From 6a907738ab9840ca3d71c22cd28fba4cbae7f7ce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Apr 2015 10:51:26 +0100 Subject: x86/asm: Enable fast 32-bit put_user_64() for copy_to_user() For fixed sized copies, copy_to_user() will utilize __put_user_size() fastpaths. However, it is missing the translation for 64-bit copies on x86/32. Testing on a Pinetrail Atom, the 64 bit put_user() fastpath is substantially faster than the generic copy_to_user() fallback. Signed-off-by: Chris Wilson Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429091486-11443-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..0ed5504c6060 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -59,6 +59,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); return ret; + case 8: + __put_user_size(*(u64 *)from, (u64 __user *)to, + 8, ret, 8); + return ret; } } return __copy_to_user_ll(to, from, n); -- cgit v1.2.3 From aac82d319148c6a84e1bf90b86d3e0ec8bf0ee38 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Apr 2015 15:51:54 -0700 Subject: x86, paravirt, xen: Remove the 64-bit ->irq_enable_sysexit() pvop We don't use irq_enable_sysexit on 64-bit kernels any more. Remove all the paravirt and Xen machinery to support it on 64-bit kernels. Tested-by: Boris Ostrovsky Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8a03355698fe5b94194e9e7360f19f91c1b2cf1f.1428100853.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 6 ------ arch/x86/include/asm/paravirt_types.h | 7 ++++--- arch/x86/kernel/asm-offsets.c | 2 ++ arch/x86/kernel/paravirt.c | 4 +++- arch/x86/kernel/paravirt_patch_64.c | 1 - arch/x86/xen/enlighten.c | 3 ++- arch/x86/xen/xen-asm_64.S | 16 ---------------- arch/x86/xen/xen-ops.h | 2 ++ 8 files changed, 13 insertions(+), 28 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a821b1cd4fa7..3cdb9eafbf8c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -77,12 +77,6 @@ ENTRY(native_usergs_sysret32) swapgs sysretl ENDPROC(native_usergs_sysret32) - -ENTRY(native_irq_enable_sysexit) - swapgs - sti - sysexit -ENDPROC(native_irq_enable_sysexit) #endif /* diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..38a0ff9ef06e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -160,13 +160,14 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); unsigned long long (*read_tscp)(unsigned int *aux); +#ifdef CONFIG_X86_32 /* * Atomically enable interrupts and return to userspace. This - * is only ever used to return to 32-bit processes; in a - * 64-bit kernel, it's used for 32-on-64 compat processes, but - * never native 64-bit processes. (Jump, not call.) + * is only used in 32-bit kernels. 64-bit kernels use + * usergs_sysret32 instead. */ void (*irq_enable_sysexit)(void); +#endif /* * Switch to usermode gs and return to 64-bit usermode using diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index b27f6ec90caa..8e3d22a1af94 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -68,7 +68,9 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); +#ifdef CONFIG_X86_32 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); +#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..7563114d9c3a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -154,7 +154,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || +#ifdef CONFIG_X86_32 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || +#endif type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -371,7 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#if defined(CONFIG_X86_32) .irq_enable_sysexit = native_irq_enable_sysexit, #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da6737ba5b..0de21c62c348 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -49,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); PATCH_SITE(pv_cpu_ops, swapgs); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 81665c9f2132..3797b6b31f95 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1267,10 +1267,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tscp = native_read_tscp, .iret = xen_iret, - .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, +#else + .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 985fc3ee0973..a2cabb8bd6bf 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -47,22 +47,6 @@ ENTRY(xen_iret) ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) -/* - * sysexit is not used for 64-bit processes, so it's only ever used to - * return to 32-bit compat userspace. - */ -ENTRY(xen_sysexit) - pushq $__USER32_DS - pushq %rcx - pushq $X86_EFLAGS_IF - pushq $__USER32_CS - pushq %rdx - - pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_sysexit) -RELOC(xen_sysexit, 1b+1) - ENTRY(xen_sysret64) /* * We're already on the usermode stack at this point, but diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549..c20fe29e65f4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,7 +134,9 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); +#ifdef CONFIG_X86_32 __visible void xen_sysexit(void); +#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); -- cgit v1.2.3 From 3462bd2adeadc49d9e126bca3b5536a3437a902d Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Mon, 20 Apr 2015 23:27:11 +0200 Subject: x86/asm: Always inline atomics During some code analysis I realized that atomic_add(), atomic_sub() and friends are not necessarily inlined AND that each function is defined multiple times: atomic_inc: 544 duplicates atomic_dec: 215 duplicates atomic_dec_and_test: 107 duplicates atomic64_inc: 38 duplicates [...] Each definition is exact equally, e.g.: ffffffff813171b8 : 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 01 3e lock add %edi,(%rsi) 5d pop %rbp c3 retq In turn each definition has one or more callsites (sure): ffffffff81317c78: e8 3b f5 ff ff callq ffffffff813171b8 [...] ffffffff8131a062: e8 51 d1 ff ff callq ffffffff813171b8 [...] ffffffff8131a190: e8 23 d0 ff ff callq ffffffff813171b8 [...] The other way around would be to remove the static linkage - but I prefer an enforced inlining here. Before: text data bss dec hex filename 81467393 19874720 20168704 121510817 73e1ba1 vmlinux.orig After: text data bss dec hex filename 81461323 19874720 20168704 121504747 73e03eb vmlinux.inlined Yes, the inlining here makes the kernel even smaller! ;) Linus further observed: "I have this memory of having seen that before - the size heuristics for gcc getting confused by inlining. [...] It might be a good idea to mark things that are basically just wrappers around a single (or a couple of) asm instruction to be always_inline." Signed-off-by: Hagen Paul Pfeifer Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429565231-4609-1-git-send-email-hagen@jauu.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 16 ++++++++-------- arch/x86/include/asm/atomic64_64.h | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e5cd123fdfb..75a9ee8529f3 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i) * * Atomically adds @i to @v. */ -static inline void atomic_add(int i, atomic_t *v) +static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v) * * Atomically subtracts @i from @v. */ -static inline void atomic_sub(int i, atomic_t *v) +static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -static inline void atomic_inc(atomic_t *v) +static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); @@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic_dec(atomic_t *v) +static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); @@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic_dec_and_test(atomic_t *v) +static __always_inline int atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } @@ -191,7 +191,7 @@ static inline int atomic_xchg(atomic_t *v, int new) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index f8d273e18516..b965f9e03f2a 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -40,7 +40,7 @@ static inline void atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -81,7 +81,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static __always_inline void atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -94,7 +94,7 @@ static inline void atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static __always_inline void atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -148,7 +148,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } -- cgit v1.2.3 From 5f0052f9522b84269e1b3b435a806f873d992702 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:23 +0800 Subject: x86/irq: Save destination CPU ID in irq_cfg Cache destination CPU APIC ID into struct irq_cfg when assigning vector for interrupt. Upper layer just needs to read the cached APIC ID instead of calling apic->cpu_mask_to_apicid_and(), it helps to hide APIC driver details from IOAPIC/HPET/MSI drivers.. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 1 + arch/x86/kernel/apic/vector.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..cda96954cbbf 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -116,6 +116,7 @@ struct irq_data; struct irq_cfg { cpumask_var_t domain; cpumask_var_t old_domain; + unsigned int dest_apicid; u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6cedd7914581..c724ef6b218c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -188,6 +188,12 @@ next: } free_cpumask_var(tmp_mask); + if (!err) { + /* cache destination APIC IDs into cfg->dest_apicid */ + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, + &cfg->dest_apicid); + } + return err; } -- cgit v1.2.3 From b5dc8e6c21e7ffba0246bf39cea97805c142bf85 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:24 +0800 Subject: x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors Abstract CPU local APIC as an interrupt controller and create an irqdomain for it to manage CPU interrupt vectors. It's the base to enable hierarchical irqdomains on x86 systems. The final irqdomain hierarchy will look like this: IOAPIC domain ----| MSI/MSI-x domain ----> [Interrupt Remapping domain] -> CPU vector domain HPET_IRQ domain ----| ^ | DMAR domain ----------------------------------------------| HT_IRQ domain ----------------------------------------------| Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Prarit Bhargava Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428905519-23704-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/hw_irq.h | 17 +++++ arch/x86/kernel/apic/io_apic.c | 3 - arch/x86/kernel/apic/vector.c | 155 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 160 insertions(+), 18 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6049d587599e..e75a96c38c58 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -465,7 +465,6 @@ config X86_INTEL_CE select X86_REBOOTFIXUPS select OF select OF_EARLY_FLATTREE - select IRQ_DOMAIN ---help--- Select for the Intel CE media processor (CE4100) SOC. This option compiles in support for the CE4100 SOC for settop @@ -914,11 +913,11 @@ config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + select IRQ_DOMAIN_HIERARCHY config X86_IO_APIC def_bool y depends on X86_LOCAL_APIC || X86_UP_IOAPIC - select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index cda96954cbbf..5b951ac56aa1 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -112,6 +112,17 @@ struct irq_2_irte { #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; +struct irq_domain; + +struct irq_alloc_info { + u32 flags; + const struct cpumask *mask; /* CPU mask for vector allocation */ +}; + +enum { + /* Allocate contiguous CPU vectors */ + X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, +}; struct irq_cfg { cpumask_var_t domain; @@ -135,6 +146,12 @@ struct irq_cfg { }; }; +extern struct irq_domain *x86_vector_domain; + +extern void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask); +extern void copy_irq_alloc_info(struct irq_alloc_info *dst, + struct irq_alloc_info *src); extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..56d532106ef3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2356,9 +2356,6 @@ static int mp_irqdomain_create(int ioapic) ioapic_dynirq_base = max(ioapic_dynirq_base, gsi_cfg->gsi_end + 1); - if (gsi_cfg->gsi_base == 0) - irq_set_default_host(ip->irqdomain); - return 0; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c724ef6b218c..6358d8d351f5 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu + * Enable support of hierarchical irqdomains * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,7 +21,9 @@ #include #include +struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); +static struct irq_chip lapic_controller; void lock_vector_lock(void) { @@ -36,15 +40,21 @@ void unlock_vector_lock(void) struct irq_cfg *irq_cfg(unsigned int irq) { - return irq_get_chip_data(irq); + return irqd_cfg(irq_get_irq_data(irq)); } struct irq_cfg *irqd_cfg(struct irq_data *irq_data) { + if (!irq_data) + return NULL; + + while (irq_data->parent_data) + irq_data = irq_data->parent_data; + return irq_data->chip_data; } -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) +static struct irq_cfg *alloc_irq_cfg(int node) { struct irq_cfg *cfg; @@ -79,7 +89,7 @@ struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } - cfg = alloc_irq_cfg(at, node); + cfg = alloc_irq_cfg(node); if (cfg) irq_set_chip_data(at, cfg); else @@ -87,14 +97,13 @@ struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } -static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) +static void free_irq_cfg(struct irq_cfg *cfg) { - if (!cfg) - return; - irq_set_chip_data(at, NULL); - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); + if (cfg) { + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); + } } static int @@ -241,6 +250,90 @@ void clear_irq_vector(int irq, struct irq_cfg *cfg) raw_spin_unlock_irqrestore(&vector_lock, flags); } +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) +{ + memset(info, 0, sizeof(*info)); + info->mask = mask; +} + +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +{ + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); +} + +static inline const struct cpumask * +irq_alloc_info_get_mask(struct irq_alloc_info *info) +{ + return (!info || !info->mask) ? apic->target_cpus() : info->mask; +} + +static void x86_vector_free_irqs(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irq_data && irq_data->chip_data) { + free_remapped_irq(virq); + clear_irq_vector(virq + i, irq_data->chip_data); + free_irq_cfg(irq_data->chip_data); + irq_domain_reset_irq_data(irq_data); + } + } +} + +static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + const struct cpumask *mask; + struct irq_data *irq_data; + struct irq_cfg *cfg; + int i, err; + + if (disable_apic) + return -ENXIO; + + /* Currently vector allocator can't guarantee contiguous allocations */ + if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) + return -ENOSYS; + + mask = irq_alloc_info_get_mask(info); + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irq_data); + cfg = alloc_irq_cfg(irq_data->node); + if (!cfg) { + err = -ENOMEM; + goto error; + } + + irq_data->chip = &lapic_controller; + irq_data->chip_data = cfg; + irq_data->hwirq = virq + i; + err = assign_irq_vector(virq, cfg, mask); + if (err) + goto error; + } + + return 0; + +error: + x86_vector_free_irqs(domain, virq, i + 1); + return err; +} + +static struct irq_domain_ops x86_vector_domain_ops = { + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, +}; + int __init arch_probe_nr_irqs(void) { int nr; @@ -266,6 +359,11 @@ int __init arch_probe_nr_irqs(void) int __init arch_early_irq_init(void) { + x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops, + NULL); + BUG_ON(x86_vector_domain == NULL); + irq_set_default_host(x86_vector_domain); + return arch_early_ioapic_init(); } @@ -380,6 +478,36 @@ int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, return 0; } +static int vector_set_affinity(struct irq_data *irq_data, + const struct cpumask *dest, bool force) +{ + struct irq_cfg *cfg = irq_data->chip_data; + int err, irq = irq_data->irq; + + if (!config_enabled(CONFIG_SMP)) + return -EPERM; + + if (!cpumask_intersects(dest, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, dest); + if (err) { + struct irq_data *top = irq_get_irq_data(irq); + + if (assign_irq_vector(irq, cfg, top->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip lapic_controller = { + .irq_ack = apic_ack_edge, + .irq_set_affinity = vector_set_affinity, + .irq_retrigger = apic_retrigger_irq, +}; + #ifdef CONFIG_SMP void send_cleanup_vector(struct irq_cfg *cfg) { @@ -497,7 +625,7 @@ int arch_setup_hwirq(unsigned int irq, int node) unsigned long flags; int ret; - cfg = alloc_irq_cfg(irq, node); + cfg = alloc_irq_cfg(node); if (!cfg) return -ENOMEM; @@ -508,7 +636,7 @@ int arch_setup_hwirq(unsigned int irq, int node) if (!ret) irq_set_chip_data(irq, cfg); else - free_irq_cfg(irq, cfg); + free_irq_cfg(cfg); return ret; } @@ -518,7 +646,8 @@ void arch_teardown_hwirq(unsigned int irq) free_remapped_irq(irq); clear_irq_vector(irq, cfg); - free_irq_cfg(irq, cfg); + irq_set_chip_data(irq, NULL); + free_irq_cfg(cfg); } static void __init print_APIC_field(int base) -- cgit v1.2.3 From a62b32cdd0a6324c959f40b3c9b928b275297066 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:29 +0800 Subject: x86/dmar: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ for DMAR and interrupt remapping, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. The private definitions of irq_alloc_hwirqs()/irq_free_hwirqs() are a temporary solution, they will be removed once we have converted the interrupt remapping driver to use irqdomain framework. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Joerg Roedel Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-8-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 4 ++-- arch/x86/kernel/apic/msi.c | 10 ++++++++++ drivers/iommu/irq_remapping.c | 17 +++++++++++++++-- 3 files changed, 27 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 6224d316c405..86d897bc15dd 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -99,7 +99,7 @@ static inline bool setup_remapped_irq(int irq, } #endif /* CONFIG_IRQ_REMAP */ -#define dmar_alloc_hwirq() irq_alloc_hwirq(-1) -#define dmar_free_hwirq irq_free_hwirq +extern int dmar_alloc_hwirq(void); +extern void dmar_free_hwirq(int irq); #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 76cc2c902176..9be7d6d8a579 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -223,6 +223,16 @@ int arch_setup_dmar_msi(unsigned int irq) "edge"); return 0; } + +int dmar_alloc_hwirq(void) +{ + return irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); +} + +void dmar_free_hwirq(int irq) +{ + irq_domain_free_irqs(irq, 1); +} #endif /* diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 390079ee1350..5617150fd8fb 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,18 @@ static void irq_remapping_disable_io_apic(void) disconnect_bsp_APIC(0); } +#ifndef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ +static unsigned int irq_alloc_hwirqs(int cnt, int node) +{ + return irq_domain_alloc_irqs(NULL, -1, cnt, node, NULL); +} + +static void irq_free_hwirqs(unsigned int from, int cnt) +{ + irq_domain_free_irqs(from, cnt); +} +#endif + static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) { int ret, sub_handle, nvec_pow2, index = 0; @@ -104,7 +117,7 @@ static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_alloc_hwirq(node); + irq = irq_alloc_hwirqs(1, node); if (irq == 0) return -1; @@ -127,7 +140,7 @@ static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) return 0; error: - irq_free_hwirq(irq); + irq_free_hwirqs(irq, 1); return ret; } -- cgit v1.2.3 From 947045a2aac1157c85a24984c9a8128846ae7266 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:30 +0800 Subject: irq_remapping: Introduce new interfaces to support hierarchical irqdomains Introduce new interfaces for interrupt remapping drivers to support hierarchical irqdomains: 1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an interrupt remapping unit. IOAPIC/HPET drivers use this interface to get parent interrupt remapping irqdomain. 2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation. This is mainly used to support MSI irqdomain. We must build one MSI irqdomain for each interrupt remapping unit. MSI driver calls this interface to get MSI irqdomain associated with an IR irqdomain which manages the PCI devices. In a further step we will store the irqdomain pointer in the device struct to avoid this call in the irq allocation path. Architecture specific hooks: 1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain, which is x86_vector_domain on x86 platforms. 2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with the interrupt remapping unit. We also add following callbacks into struct irq_remap_ops: struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *); struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *); Once all clients of IR have been converted to the new hierarchical irqdomain interfaces, we will: 1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg, msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp 2) Remove setup_ioapic_remapped_entry, free_remapped_irq, compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq. 3) Simplify x86_io_apic_ops and x86_msi. We can achieve a way clearer architecture with all these changes applied. Signed-off-by: Jiang Liu Acked-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-9-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 37 ++++++++++++++++++++++++++ arch/x86/include/asm/irq_remapping.h | 36 ++++++++++++++++++++++++++ drivers/iommu/irq_remapping.c | 50 +++++++++++++++++++++++++++++++++++- drivers/iommu/irq_remapping.h | 10 ++++++++ 4 files changed, 132 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 5b951ac56aa1..75a97a5bbfa8 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -113,10 +113,47 @@ struct irq_2_irte { #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct irq_domain; +struct pci_dev; +struct msi_desc; + +enum irq_alloc_type { + X86_IRQ_ALLOC_TYPE_IOAPIC = 1, + X86_IRQ_ALLOC_TYPE_HPET, + X86_IRQ_ALLOC_TYPE_MSI, + X86_IRQ_ALLOC_TYPE_MSIX, +}; struct irq_alloc_info { + enum irq_alloc_type type; u32 flags; const struct cpumask *mask; /* CPU mask for vector allocation */ + union { + int unused; +#ifdef CONFIG_HPET_TIMER + struct { + int hpet_id; + int hpet_index; + void *hpet_data; + }; +#endif +#ifdef CONFIG_PCI_MSI + struct { + struct pci_dev *msi_dev; + irq_hw_number_t msi_hwirq; + }; +#endif +#ifdef CONFIG_X86_IO_APIC + struct { + int ioapic_id; + int ioapic_pin; + int ioapic_node; + u32 ioapic_trigger : 1; + u32 ioapic_polarity : 1; + u32 ioapic_valid : 1; + struct IO_APIC_route_entry *ioapic_entry; + }; +#endif + }; }; enum { diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 86d897bc15dd..0cd6195cc375 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -22,6 +22,8 @@ #ifndef __X86_IRQ_REMAPPING_H #define __X86_IRQ_REMAPPING_H +#include +#include #include struct IO_APIC_route_entry; @@ -30,6 +32,7 @@ struct irq_chip; struct msi_msg; struct pci_dev; struct irq_cfg; +struct irq_alloc_info; #ifdef CONFIG_IRQ_REMAP @@ -56,6 +59,25 @@ extern bool setup_remapped_irq(int irq, void irq_remap_modify_chip_defaults(struct irq_chip *chip); +extern struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info); +extern struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info); +extern void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p); + +/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ +static inline struct irq_domain * +arch_create_msi_irq_domain(struct irq_domain *parent) +{ + return NULL; +} + +/* Get parent irqdomain for interrupt remapping irqdomain */ +static inline struct irq_domain *arch_get_ir_parent_domain(void) +{ + return x86_vector_domain; +} + #else /* CONFIG_IRQ_REMAP */ static inline void set_irq_remapping_broken(void) { } @@ -97,6 +119,20 @@ static inline bool setup_remapped_irq(int irq, { return false; } + +static inline struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) +{ + return NULL; +} + +static inline struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info) +{ + return NULL; +} + +#define irq_remapping_print_chip NULL #endif /* CONFIG_IRQ_REMAP */ extern int dmar_alloc_hwirq(void); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 5617150fd8fb..c306421d86c1 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -345,7 +345,7 @@ void panic_if_irq_remap(const char *msg) panic(msg); } -static void ir_ack_apic_edge(struct irq_data *data) +void ir_ack_apic_edge(struct irq_data *data) { ack_APIC_irq(); } @@ -356,6 +356,19 @@ static void ir_ack_apic_level(struct irq_data *data) eoi_ioapic_irq(data->irq, irqd_cfg(data)); } +void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p) +{ + /* + * Assume interrupt is remapped if the parent irqdomain isn't the + * vector domain, which is true for MSI, HPET and IOAPIC on x86 + * platforms. + */ + if (data->domain && data->domain->parent != arch_get_ir_parent_domain()) + seq_printf(p, " IR-%s", data->chip->name); + else + seq_printf(p, " %s", data->chip->name); +} + static void ir_print_prefix(struct irq_data *data, struct seq_file *p) { seq_printf(p, " IR-%s", data->chip->name); @@ -377,3 +390,38 @@ bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) irq_remap_modify_chip_defaults(chip); return true; } + +/** + * irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU + * device serving request @info + * @info: interrupt allocation information, used to identify the IOMMU device + * + * It's used to get parent irqdomain for HPET and IOAPIC irqdomains. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain * +irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) +{ + if (!remap_ops || !remap_ops->get_ir_irq_domain) + return NULL; + + return remap_ops->get_ir_irq_domain(info); +} + +/** + * irq_remapping_get_irq_domain - Get the irqdomain serving the request @info + * @info: interrupt allocation information, used to identify the IOMMU device + * + * There will be one PCI MSI/MSIX irqdomain associated with each interrupt + * remapping device, so this interface is used to retrieve the PCI MSI/MSIX + * irqdomain serving request @info. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain * +irq_remapping_get_irq_domain(struct irq_alloc_info *info) +{ + if (!remap_ops || !remap_ops->get_irq_domain) + return NULL; + + return remap_ops->get_irq_domain(info); +} diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 7c70cc29ffe6..3e109b1ea688 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -30,6 +30,8 @@ struct irq_data; struct cpumask; struct pci_dev; struct msi_msg; +struct irq_domain; +struct irq_alloc_info; extern int irq_remap_broken; extern int disable_sourceid_checking; @@ -77,11 +79,19 @@ struct irq_remap_ops { /* Setup interrupt remapping for an HPET MSI */ int (*alloc_hpet_msi)(unsigned int, unsigned int); + + /* Get the irqdomain associated the IOMMU device */ + struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *); + + /* Get the MSI irqdomain associated with the IOMMU device */ + struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *); }; extern struct irq_remap_ops intel_irq_remap_ops; extern struct irq_remap_ops amd_iommu_irq_ops; +extern void ir_ack_apic_edge(struct irq_data *data); + #else /* CONFIG_IRQ_REMAP */ #define irq_remapping_enabled 0 -- cgit v1.2.3 From 3cb96f0c97330834929abe9bd2ca3c252a83def0 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:34 +0800 Subject: x86/hpet: Enhance HPET IRQ to support hierarchical irqdomains Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Srivatsa S. Bhat Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/1428905519-23704-13-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hpet.h | 7 +- arch/x86/kernel/apic/msi.c | 166 +++++++++++++++++++++++++++++++++++++++----- arch/x86/kernel/hpet.c | 57 ++++----------- 3 files changed, 167 insertions(+), 63 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 36f7125945e3..e87e9faf87a9 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -74,11 +74,16 @@ extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); struct irq_data; +struct hpet_dev; +struct irq_domain; + extern void hpet_msi_unmask(struct irq_data *data); extern void hpet_msi_mask(struct irq_data *data); -struct hpet_dev; extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); +extern struct irq_domain *hpet_create_irq_domain(int hpet_id); +extern int hpet_assign_irq(struct irq_domain *domain, + struct hpet_dev *dev, int dev_num); #ifdef CONFIG_PCI_MSI extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9be7d6d8a579..10d9ae8f2166 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -51,6 +51,44 @@ void native_compose_msi_msg(struct pci_dev *pdev, MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct irq_cfg *cfg = irqd_cfg(data); + + msg->address_hi = MSI_ADDR_BASE_HI; + + if (x2apic_enabled()) + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); + + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL : + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU : + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(cfg->dest_apicid); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED : + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); +} + +static void msi_update_msg(struct msi_msg *msg, struct irq_data *irq_data) +{ + struct irq_cfg *cfg = irqd_cfg(irq_data); + + msg->data &= ~MSI_DATA_VECTOR_MASK; + msg->data |= MSI_DATA_VECTOR(cfg->vector); + msg->address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg->address_lo |= MSI_ADDR_DEST_ID(cfg->dest_apicid); +} + static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg, u8 hpet_id) { @@ -239,44 +277,43 @@ void dmar_free_hwirq(int irq) * MSI message composition */ #ifdef CONFIG_HPET_TIMER +static inline int hpet_dev_id(struct irq_domain *domain) +{ + return (int)(long)domain->host_data; +} static int hpet_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); + struct irq_data *parent = data->parent_data; struct msi_msg msg; - unsigned int dest; int ret; - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - hpet_msi_read(data->handler_data, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - hpet_msi_write(data->handler_data, &msg); + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { + hpet_msi_read(data->handler_data, &msg); + msi_update_msg(&msg, data); + hpet_msi_write(data->handler_data, &msg); + } - return IRQ_SET_MASK_OK_NOCOPY; + return ret; } -static struct irq_chip hpet_msi_type = { +static struct irq_chip hpet_msi_controller = { .name = "HPET_MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, - .irq_ack = apic_ack_edge, + .irq_ack = irq_chip_ack_parent, .irq_set_affinity = hpet_msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_print_chip = irq_remapping_print_chip, + .irq_compose_msi_msg = irq_msi_compose_msg, .flags = IRQCHIP_SKIP_SET_WAKE, }; int default_setup_hpet_msi(unsigned int irq, unsigned int id) { - struct irq_chip *chip = &hpet_msi_type; + struct irq_chip *chip = &hpet_msi_controller; struct msi_msg msg; int ret; @@ -291,4 +328,95 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id) irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; } + +static int hpet_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + int ret; + + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_HPET) + return -EINVAL; + if (irq_find_mapping(domain, info->hpet_index)) { + pr_warn("IRQ for HPET%d already exists.\n", info->hpet_index); + return -EEXIST; + } + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret >= 0) { + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_domain_set_hwirq_and_chip(domain, virq, info->hpet_index, + &hpet_msi_controller, NULL); + irq_set_handler_data(virq, info->hpet_data); + __irq_set_handler(virq, handle_edge_irq, 0, "edge"); + } + + return ret; +} + +static void hpet_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + BUG_ON(nr_irqs > 1); + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} + +static void hpet_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct msi_msg msg; + + BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg)); + hpet_msi_write(irq_get_handler_data(irq_data->irq), &msg); +} + +static void hpet_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct msi_msg msg; + + memset(&msg, 0, sizeof(msg)); + hpet_msi_write(irq_get_handler_data(irq_data->irq), &msg); +} + +static struct irq_domain_ops hpet_domain_ops = { + .alloc = hpet_domain_alloc, + .free = hpet_domain_free, + .activate = hpet_domain_activate, + .deactivate = hpet_domain_deactivate, +}; + +struct irq_domain *hpet_create_irq_domain(int hpet_id) +{ + struct irq_domain *parent; + struct irq_alloc_info info; + + if (x86_vector_domain == NULL) + return NULL; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.hpet_id = hpet_id; + parent = irq_remapping_get_ir_irq_domain(&info); + if (parent == NULL) + parent = x86_vector_domain; + + return irq_domain_add_hierarchy(parent, 0, 0, NULL, &hpet_domain_ops, + (void *)(long)hpet_id); +} + +int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, + int dev_num) +{ + struct irq_alloc_info info; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_HPET; + info.hpet_data = dev; + info.hpet_id = hpet_dev_id(domain); + info.hpet_index = dev_num; + + return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, NULL); +} #endif diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ae29554f57ea..e3bc18080052 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -306,8 +306,6 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static int hpet_setup_msi_irq(unsigned int irq); - static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { @@ -358,7 +356,7 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_enable_legacy_int(); } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - hpet_setup_msi_irq(hdev->irq); + irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); @@ -424,6 +422,7 @@ static int hpet_legacy_next_event(unsigned long delta, static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); static struct hpet_dev *hpet_devs; +static struct irq_domain *hpet_domain; void hpet_msi_unmask(struct irq_data *data) { @@ -474,32 +473,6 @@ static int hpet_msi_next_event(unsigned long delta, return hpet_next_event(delta, evt, hdev->num); } -static int hpet_setup_msi_irq(unsigned int irq) -{ - if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { - irq_domain_free_irqs(irq, 1); - return -EINVAL; - } - return 0; -} - -static int hpet_assign_irq(struct hpet_dev *dev) -{ - int irq; - - irq = irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); - if (irq <= 0) - return -EINVAL; - - irq_set_handler_data(irq, dev); - - if (hpet_setup_msi_irq(irq)) - return -EINVAL; - - dev->irq = irq; - return 0; -} - static irqreturn_t hpet_interrupt_handler(int irq, void *data) { struct hpet_dev *dev = (struct hpet_dev *)data; @@ -542,9 +515,6 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) if (!(hdev->flags & HPET_DEV_VALID)) return; - if (hpet_setup_msi_irq(hdev->irq)) - return; - hdev->cpu = cpu; per_cpu(cpu_hpet_dev, cpu) = hdev; evt->name = hdev->name; @@ -576,7 +546,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int id; unsigned int num_timers; unsigned int num_timers_used = 0; - int i; + int i, irq; if (hpet_msi_disable) return; @@ -589,6 +559,10 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) num_timers++; /* Value read out starts from 0 */ hpet_print_config(); + hpet_domain = hpet_create_irq_domain(hpet_blockid); + if (!hpet_domain) + return; + hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); if (!hpet_devs) return; @@ -603,15 +577,16 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) if (!(cfg & HPET_TN_FSB_CAP)) continue; + irq = hpet_assign_irq(hpet_domain, hdev, hdev->num); + if (irq < 0) + continue; + + sprintf(hdev->name, "hpet%d", i); + hdev->num = i; + hdev->irq = irq; hdev->flags = 0; if (cfg & HPET_TN_PERIODIC_CAP) hdev->flags |= HPET_DEV_PERI_CAP; - hdev->num = i; - - sprintf(hdev->name, "hpet%d", i); - if (hpet_assign_irq(hdev)) - continue; - hdev->flags |= HPET_DEV_FSB_CAP; hdev->flags |= HPET_DEV_VALID; num_timers_used++; @@ -711,10 +686,6 @@ static int hpet_cpuhp_notify(struct notifier_block *n, } #else -static int hpet_setup_msi_irq(unsigned int irq) -{ - return 0; -} static void hpet_msi_capability_lookup(unsigned int start_timer) { return; -- cgit v1.2.3 From 52f518a3a7c2f80551a38d38be28bc9f335e713c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:35 +0800 Subject: x86/MSI: Use hierarchical irqdomains to manage MSI interrupts Enhance MSI code to support hierarchical irqdomains, it helps to make the architecture more clear. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Joerg Roedel Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-14-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 + arch/x86/include/asm/hw_irq.h | 9 ++- arch/x86/include/asm/irq_remapping.h | 6 +- arch/x86/include/asm/msi.h | 7 ++ arch/x86/kernel/apic/msi.c | 141 +++++++++++++++++++---------------- arch/x86/kernel/apic/vector.c | 2 + drivers/iommu/irq_remapping.c | 1 - 7 files changed, 94 insertions(+), 73 deletions(-) create mode 100644 arch/x86/include/asm/msi.h (limited to 'arch/x86/include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e75a96c38c58..26e066579637 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -914,6 +914,7 @@ config X86_LOCAL_APIC depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ select IRQ_DOMAIN_HIERARCHY + select PCI_MSI_IRQ_DOMAIN if PCI_MSI config X86_IO_APIC def_bool y diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 75a97a5bbfa8..05829e973a2a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -110,9 +110,10 @@ struct irq_2_irte { }; #endif /* CONFIG_IRQ_REMAP */ +struct irq_domain; + #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; -struct irq_domain; struct pci_dev; struct msi_desc; @@ -214,6 +215,12 @@ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} #endif /* CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_PCI_MSI +extern void arch_init_msi_domain(struct irq_domain *domain); +#else +static inline void arch_init_msi_domain(struct irq_domain *domain) { } +#endif + /* Statistics */ extern atomic_t irq_err_count; extern atomic_t irq_mis_count; diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0cd6195cc375..0d3bbd0e2c42 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -66,11 +66,7 @@ irq_remapping_get_irq_domain(struct irq_alloc_info *info); extern void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p); /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ -static inline struct irq_domain * -arch_create_msi_irq_domain(struct irq_domain *parent) -{ - return NULL; -} +extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); /* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h new file mode 100644 index 000000000000..93724cc62177 --- /dev/null +++ b/arch/x86/include/asm/msi.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_MSI_H +#define _ASM_X86_MSI_H +#include + +typedef struct irq_alloc_info msi_alloc_info_t; + +#endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 10d9ae8f2166..c426cd58844e 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu + * Convert to hierarchical irqdomain * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,6 +23,8 @@ #include #include +static struct irq_domain *msi_default_domain; + void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) @@ -114,102 +118,107 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, return 0; } -static int -msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_cfg *cfg = irqd_cfg(data); - struct msi_msg msg; - unsigned int dest; - int ret; - - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - __get_cached_msi_msg(data->msi_desc, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - __pci_write_msi_msg(data->msi_desc, &msg); - - return IRQ_SET_MASK_OK_NOCOPY; -} - /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. */ -static struct irq_chip msi_chip = { +static struct irq_chip pci_msi_controller = { .name = "PCI-MSI", .irq_unmask = pci_msi_unmask_irq, .irq_mask = pci_msi_mask_irq, - .irq_ack = apic_ack_edge, - .irq_set_affinity = msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_ack = irq_chip_ack_parent, + .irq_set_affinity = msi_domain_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_print_chip = irq_remapping_print_chip, + .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_write_msi_msg = pci_msi_domain_write_msg, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - struct irq_chip *chip = &msi_chip; - struct msi_msg msg; - unsigned int irq = irq_base + irq_offset; - int ret; + struct irq_domain *domain; + struct irq_alloc_info info; - ret = msi_compose_msg(dev, irq, &msg, -1); - if (ret < 0) - return ret; + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_MSI; + info.msi_dev = dev; - irq_set_msi_desc_off(irq_base, irq_offset, msidesc); + domain = irq_remapping_get_irq_domain(&info); + if (domain == NULL) + domain = msi_default_domain; + if (domain == NULL) + return -ENOSYS; - /* - * MSI-X message is written per-IRQ, the offset is always 0. - * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. - */ - if (!irq_offset) - pci_write_msi_msg(irq, &msg); + return pci_msi_domain_alloc_irqs(domain, dev, nvec, type); +} - setup_remapped_irq(irq, irq_cfg(irq), chip); +void native_teardown_msi_irq(unsigned int irq) +{ + irq_domain_free_irqs(irq, 1); +} - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); +static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->msi_hwirq; +} - dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq); +static int pci_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct msi_desc *desc = first_pci_msi_entry(pdev); + + init_irq_alloc_info(arg, NULL); + arg->msi_dev = pdev; + if (desc->msi_attrib.is_msix) { + arg->type = X86_IRQ_ALLOC_TYPE_MSIX; + } else { + arg->type = X86_IRQ_ALLOC_TYPE_MSI; + arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + } return 0; } -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) { - struct msi_desc *msidesc; - int irq, ret; + arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc); +} - /* Multiple MSI vectors only supported with interrupt remapping */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; +static struct msi_domain_ops pci_msi_domain_ops = { + .get_hwirq = pci_msi_get_hwirq, + .msi_prepare = pci_msi_prepare, + .set_desc = pci_msi_set_desc, +}; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); - if (irq <= 0) - return -ENOSPC; +static struct msi_domain_info pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, + .ops = &pci_msi_domain_ops, + .chip = &pci_msi_controller, + .handler = handle_edge_irq, + .handler_name = "edge", +}; - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) { - irq_domain_free_irqs(irq, 1); - return ret; - } +void arch_init_msi_domain(struct irq_domain *parent) +{ + if (disable_apic) + return; - } - return 0; + msi_default_domain = pci_msi_create_irq_domain(NULL, + &pci_msi_domain_info, parent); + if (!msi_default_domain) + pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); } -void native_teardown_msi_irq(unsigned int irq) +#ifdef CONFIG_IRQ_REMAP +struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent) { - irq_domain_free_irqs(irq, 1); + return msi_create_irq_domain(NULL, &pci_msi_domain_info, parent); } +#endif #ifdef CONFIG_DMAR_TABLE static int diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6358d8d351f5..a8d82896be75 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -364,6 +364,8 @@ int __init arch_early_irq_init(void) BUG_ON(x86_vector_domain == NULL); irq_set_default_host(x86_vector_domain); + arch_init_msi_domain(x86_vector_domain); + return arch_early_ioapic_init(); } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index c306421d86c1..d77e3711c2aa 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -170,7 +170,6 @@ static void __init irq_remapping_modify_x86_ops(void) x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; - x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; x86_msi.compose_msi_msg = compose_remapped_msi_msg; } -- cgit v1.2.3 From 7a53a12162cbe5feb66380b96cc794a031a8f39a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:39 +0800 Subject: irq_remapping: Clean up unused MSI related code Now MSI interrupt has been converted to new hierarchical irqdomain interfaces, so remove legacy MSI related code and interfaces. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Rafael J. Wysocki Cc: Joerg Roedel Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: Yijing Wang Link: http://lkml.kernel.org/r/1428905519-23704-18-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 13 --- arch/x86/include/asm/pci.h | 5 -- arch/x86/kernel/x86_init.c | 2 - drivers/iommu/irq_remapping.c | 151 ----------------------------------- drivers/iommu/irq_remapping.h | 14 ---- 5 files changed, 185 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0d3bbd0e2c42..b978c68f5d29 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -48,10 +48,6 @@ extern int setup_ioapic_remapped_entry(int irq, int vector, struct io_apic_irq_attr *attr); extern void free_remapped_irq(int irq); -extern void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id); -extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); extern bool setup_remapped_irq(int irq, struct irq_cfg *cfg, @@ -91,15 +87,6 @@ static inline int setup_ioapic_remapped_entry(int irq, return -ENODEV; } static inline void free_remapped_irq(int irq) { } -static inline void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ -} -static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) -{ - return -ENODEV; -} static inline void panic_if_irq_remap(const char *msg) { diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 4e370a5d8117..d8c80ff32e8c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -96,15 +96,10 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; -void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, - unsigned int dest, struct msi_msg *msg, u8 hpet_id); int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); -int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset); #else -#define native_compose_msi_msg NULL #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 234b0722de53..b094d691f2fe 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -111,11 +111,9 @@ EXPORT_SYMBOL_GPL(x86_platform); #if defined(CONFIG_PCI_MSI) struct x86_msi_ops x86_msi = { .setup_msi_irqs = native_setup_msi_irqs, - .compose_msi_msg = native_compose_msi_msg, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, .restore_msi_irqs = default_restore_msi_irqs, - .setup_hpet_msi = default_setup_hpet_msi, }; /* MSI arch specific hooks */ diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index d77e3711c2aa..3eaa822c30a9 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -25,9 +25,6 @@ int no_x2apic_optout; static int disable_irq_remap; static struct irq_remap_ops *remap_ops; -static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); -static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle); static int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force); @@ -50,109 +47,6 @@ static void irq_remapping_disable_io_apic(void) disconnect_bsp_APIC(0); } -#ifndef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ -static unsigned int irq_alloc_hwirqs(int cnt, int node) -{ - return irq_domain_alloc_irqs(NULL, -1, cnt, node, NULL); -} - -static void irq_free_hwirqs(unsigned int from, int cnt) -{ - irq_domain_free_irqs(from, cnt); -} -#endif - -static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) -{ - int ret, sub_handle, nvec_pow2, index = 0; - unsigned int irq; - struct msi_desc *msidesc; - - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - - irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev)); - if (irq == 0) - return -ENOSPC; - - nvec_pow2 = __roundup_pow_of_two(nvec); - for (sub_handle = 0; sub_handle < nvec; sub_handle++) { - if (!sub_handle) { - index = msi_alloc_remapped_irq(dev, irq, nvec_pow2); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq + sub_handle, - index, sub_handle); - if (ret < 0) - goto error; - } - ret = setup_msi_irq(dev, msidesc, irq, sub_handle); - if (ret < 0) - goto error; - } - return 0; - -error: - irq_free_hwirqs(irq, nvec); - - /* - * Restore altered MSI descriptor fields and prevent just destroyed - * IRQs from tearing down again in default_teardown_msi_irqs() - */ - msidesc->irq = 0; - - return ret; -} - -static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) -{ - int node, ret, sub_handle, index = 0; - struct msi_desc *msidesc; - unsigned int irq; - - node = dev_to_node(&dev->dev); - sub_handle = 0; - - list_for_each_entry(msidesc, &dev->msi_list, list) { - - irq = irq_alloc_hwirqs(1, node); - if (irq == 0) - return -1; - - if (sub_handle == 0) - ret = index = msi_alloc_remapped_irq(dev, irq, nvec); - else - ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); - - if (ret < 0) - goto error; - - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) - goto error; - - sub_handle += 1; - irq += 1; - } - - return 0; - -error: - irq_free_hwirqs(irq, 1); - return ret; -} - -static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, - int nvec, int type) -{ - if (type == PCI_CAP_ID_MSI) - return do_setup_msi_irqs(dev, nvec); - else - return do_setup_msix_irqs(dev, nvec); -} - static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) { /* @@ -170,8 +64,6 @@ static void __init irq_remapping_modify_x86_ops(void) x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; - x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; - x86_msi.compose_msi_msg = compose_remapped_msi_msg; } static __init int setup_nointremap(char *str) @@ -295,49 +187,6 @@ void free_remapped_irq(int irq) remap_ops->free_irq(irq); } -void compose_remapped_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - if (!irq_remapped(cfg)) - native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); - else if (remap_ops->compose_msi_msg) - remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); -} - -static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) -{ - if (!remap_ops->msi_alloc_irq) - return -ENODEV; - - return remap_ops->msi_alloc_irq(pdev, irq, nvec); -} - -static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) -{ - if (!remap_ops->msi_setup_irq) - return -ENODEV; - - return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle); -} - -int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) -{ - int ret; - - if (!remap_ops->alloc_hpet_msi) - return -ENODEV; - - ret = remap_ops->alloc_hpet_msi(irq, id); - if (ret) - return -EINVAL; - - return default_setup_hpet_msi(irq, id); -} - void panic_if_irq_remap(const char *msg) { if (irq_remapping_enabled) diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 3e109b1ea688..16b7d814e6fe 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -66,20 +66,6 @@ struct irq_remap_ops { /* Free an IRQ */ int (*free_irq)(int); - /* Create MSI msg to use for interrupt remapping */ - void (*compose_msi_msg)(struct pci_dev *, - unsigned int, unsigned int, - struct msi_msg *, u8); - - /* Allocate remapping resources for MSI */ - int (*msi_alloc_irq)(struct pci_dev *, int, int); - - /* Setup the remapped MSI irq */ - int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int); - - /* Setup interrupt remapping for an HPET MSI */ - int (*alloc_hpet_msi)(unsigned int, unsigned int); - /* Get the irqdomain associated the IOMMU device */ struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *); -- cgit v1.2.3 From b1855c752e67d1125d41fadb499014b49a245db8 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:40 +0800 Subject: x86/MSI: Clean up unused MSI related code and interfaces Now MSI interrupt has been converted to new hierarchical irqdomain interfaces, so remove legacy MSI related code and interfaces. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Yijing Wang Link: http://lkml.kernel.org/r/1428905519-23704-19-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hpet.h | 9 ------- arch/x86/include/asm/x86_init.h | 4 --- arch/x86/kernel/apic/msi.c | 55 +++-------------------------------------- 3 files changed, 4 insertions(+), 64 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index e87e9faf87a9..5fa9fb0f8809 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -85,15 +85,6 @@ extern struct irq_domain *hpet_create_irq_domain(int hpet_id); extern int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, int dev_num); -#ifdef CONFIG_PCI_MSI -extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); -#else -static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - return -EINVAL; -} -#endif - #ifdef CONFIG_HPET_EMULATE_RTC #include diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f58a9c7a3c86..1649bb9ca27c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -175,13 +175,9 @@ struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); - void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, - unsigned int dest, struct msi_msg *msg, - u8 hpet_id); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev); - int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; struct IO_APIC_route_entry; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9adb87100ffe..9fe7a08479fa 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -25,16 +25,12 @@ static struct irq_domain *msi_default_domain; -void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id) +static void native_compose_msi_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irq_cfg(irq); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); msg->address_lo = MSI_ADDR_BASE_LO | @@ -44,7 +40,7 @@ void native_compose_msi_msg(struct pci_dev *pdev, ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU : MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | @@ -93,31 +89,6 @@ static void msi_update_msg(struct msi_msg *msg, struct irq_data *irq_data) msg->address_lo |= MSI_ADDR_DEST_ID(cfg->dest_apicid); } -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); - - return 0; -} - /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -262,7 +233,7 @@ int arch_setup_dmar_msi(unsigned int irq) struct msi_msg msg; struct irq_cfg *cfg = irq_cfg(irq); - native_compose_msi_msg(NULL, irq, cfg->dest_apicid, &msg, -1); + native_compose_msi_msg(cfg, &msg); dmar_msi_write(irq, &msg); irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, "edge"); @@ -318,24 +289,6 @@ static struct irq_chip hpet_msi_controller = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -int default_setup_hpet_msi(unsigned int irq, unsigned int id) -{ - struct irq_chip *chip = &hpet_msi_controller; - struct msi_msg msg; - int ret; - - ret = msi_compose_msg(NULL, irq, &msg, id); - if (ret < 0) - return ret; - - hpet_msi_write(irq_get_handler_data(irq), &msg); - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - setup_remapped_irq(irq, irq_cfg(irq), chip); - - irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); - return 0; -} - static int hpet_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { -- cgit v1.2.3 From 34742db8eaf9ff364034f214ee5827701e131d4b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:41 +0800 Subject: iommu/vt-d: Refine the interfaces to create IRQ for DMAR unit Refine the interfaces to create IRQ for DMAR unit. It's a preparation for converting DMAR IRQ to hierarchical irqdomain on x86. It also moves dmar_alloc_hwirq()/dmar_free_hwirq() from irq_remapping.h to dmar.h. They are not irq_remapping specific. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Vinod Koul Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Tony Luck Cc: Fenghua Yu Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-20-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/ia64/include/asm/irq_remapping.h | 2 -- arch/ia64/kernel/msi_ia64.c | 30 +++++++++++++++++++----------- arch/x86/include/asm/irq_remapping.h | 4 ---- arch/x86/kernel/apic/msi.c | 24 +++++++++++++----------- drivers/iommu/dmar.c | 19 +++++-------------- include/linux/dmar.h | 3 ++- 6 files changed, 39 insertions(+), 43 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h index e3b3556e2e1b..a8687b1d8906 100644 --- a/arch/ia64/include/asm/irq_remapping.h +++ b/arch/ia64/include/asm/irq_remapping.h @@ -1,6 +1,4 @@ #ifndef __IA64_INTR_REMAPPING_H #define __IA64_INTR_REMAPPING_H #define irq_remapping_enabled 0 -#define dmar_alloc_hwirq create_irq -#define dmar_free_hwirq destroy_irq #endif diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 9dd7464f8c17..d70bf15c690a 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -165,7 +165,7 @@ static struct irq_chip dmar_msi_type = { .irq_retrigger = ia64_msi_retrigger_irq, }; -static int +static void msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { struct irq_cfg *cfg = irq_cfg + irq; @@ -186,21 +186,29 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) MSI_DATA_LEVEL_ASSERT | MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); - return 0; } -int arch_setup_dmar_msi(unsigned int irq) +int dmar_alloc_hwirq(int id, int node, void *arg) { - int ret; + int irq; struct msi_msg msg; - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; + irq = create_irq(); + if (irq > 0) { + irq_set_handler_data(irq, arg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, + handle_edge_irq, "edge"); + msi_compose_msg(NULL, irq, &msg); + dmar_msi_write(irq, &msg); + } + + return irq; +} + +void dmar_free_hwirq(int irq) +{ + irq_set_handler_data(irq, NULL); + destroy_irq(irq); } #endif /* CONFIG_INTEL_IOMMU */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index b978c68f5d29..bacac1052262 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -117,8 +117,4 @@ irq_remapping_get_irq_domain(struct irq_alloc_info *info) #define irq_remapping_print_chip NULL #endif /* CONFIG_IRQ_REMAP */ - -extern int dmar_alloc_hwirq(void); -extern void dmar_free_hwirq(int irq); - #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9fe7a08479fa..ca6250439acc 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -228,25 +228,27 @@ static struct irq_chip dmar_msi_type = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -int arch_setup_dmar_msi(unsigned int irq) +int dmar_alloc_hwirq(int id, int node, void *arg) { + int irq; struct msi_msg msg; - struct irq_cfg *cfg = irq_cfg(irq); - native_compose_msi_msg(cfg, &msg); - dmar_msi_write(irq, &msg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} + irq = irq_domain_alloc_irqs(NULL, 1, node, NULL); + if (irq > 0) { + irq_set_handler_data(irq, arg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, + handle_edge_irq, "edge"); + native_compose_msi_msg(irq_cfg(irq), &msg); + dmar_msi_write(irq, &msg); + } -int dmar_alloc_hwirq(void) -{ - return irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL); + return irq; } void dmar_free_hwirq(int irq) { + irq_set_handler_data(irq, NULL); + irq_set_handler(irq, NULL); irq_domain_free_irqs(irq, 1); } #endif diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9847613085e1..536f2d8ea41a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1087,8 +1087,8 @@ static void free_iommu(struct intel_iommu *iommu) if (iommu->irq) { free_irq(iommu->irq, iommu); - irq_set_handler_data(iommu->irq, NULL); dmar_free_hwirq(iommu->irq); + iommu->irq = 0; } if (iommu->qi) { @@ -1642,23 +1642,14 @@ int dmar_set_interrupt(struct intel_iommu *iommu) if (iommu->irq) return 0; - irq = dmar_alloc_hwirq(); - if (irq <= 0) { + irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu); + if (irq > 0) { + iommu->irq = irq; + } else { pr_err("IOMMU: no free vectors\n"); return -EINVAL; } - irq_set_handler_data(irq, iommu); - iommu->irq = irq; - - ret = arch_setup_dmar_msi(irq); - if (ret) { - irq_set_handler_data(irq, NULL); - iommu->irq = 0; - dmar_free_hwirq(irq); - return ret; - } - ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); if (ret) pr_err("IOMMU: can't request irq\n"); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 30624954dec5..84737565c1fd 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -227,6 +227,7 @@ extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); -extern int arch_setup_dmar_msi(unsigned int irq); +extern int dmar_alloc_hwirq(int id, int node, void *arg); +extern void dmar_free_hwirq(int irq); #endif /* __DMAR_H__ */ -- cgit v1.2.3 From 0921f1da6425f05a1f56803069124b7ec13b79e2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:42 +0800 Subject: x86/irq: Use hierarchical irqdomain to manage DMAR interrupts Enhance DMAR code to support hierarchical irqdomain, it helps to make the architecture more clear. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-21-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 7 ++ arch/x86/kernel/apic/msi.c | 153 ++++++++++++++++++++++++++---------------- 2 files changed, 103 insertions(+), 57 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 05829e973a2a..bf1725047c27 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -122,6 +122,7 @@ enum irq_alloc_type { X86_IRQ_ALLOC_TYPE_HPET, X86_IRQ_ALLOC_TYPE_MSI, X86_IRQ_ALLOC_TYPE_MSIX, + X86_IRQ_ALLOC_TYPE_DMAR, }; struct irq_alloc_info { @@ -153,6 +154,12 @@ struct irq_alloc_info { u32 ioapic_valid : 1; struct IO_APIC_route_entry *ioapic_entry; }; +#endif +#ifdef CONFIG_DMAR_TABLE + struct { + int dmar_id; + void *dmar_data; + }; #endif }; }; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index ca6250439acc..f23d17d759b6 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -25,32 +25,6 @@ static struct irq_domain *msi_default_domain; -static void native_compose_msi_msg(struct irq_cfg *cfg, struct msi_msg *msg) -{ - msg->address_hi = MSI_ADDR_BASE_HI; - - if (x2apic_enabled()) - msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); - - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL : - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU : - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(cfg->dest_apicid); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED : - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); -} - static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { struct irq_cfg *cfg = irqd_cfg(data); @@ -87,6 +61,9 @@ static void msi_update_msg(struct msi_msg *msg, struct irq_data *irq_data) msg->data |= MSI_DATA_VECTOR(cfg->vector); msg->address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg->address_lo |= MSI_ADDR_DEST_ID(cfg->dest_apicid); + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid); } /* @@ -196,59 +173,121 @@ static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest, irq = data->irq; + struct irq_data *parent = data->parent_data; struct msi_msg msg; int ret; - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); - - dmar_msi_write(irq, &msg); + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + dmar_msi_read(data->irq, &msg); + msi_update_msg(&msg, data); + dmar_msi_write(data->irq, &msg); + } - return IRQ_SET_MASK_OK_NOCOPY; + return ret; } -static struct irq_chip dmar_msi_type = { +static struct irq_chip dmar_msi_controller = { .name = "DMAR_MSI", .irq_unmask = dmar_msi_unmask, .irq_mask = dmar_msi_mask, - .irq_ack = apic_ack_edge, + .irq_ack = irq_chip_ack_parent, .irq_set_affinity = dmar_msi_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_compose_msi_msg = irq_msi_compose_msg, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int dmar_alloc_hwirq(int id, int node, void *arg) +static int dmar_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + int ret; + + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_DMAR) + return -EINVAL; + if (irq_find_mapping(domain, info->dmar_id)) { + pr_warn("IRQ for DMAR%d already exists.\n", info->dmar_id); + return -EEXIST; + } + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret >= 0) { + irq_domain_set_hwirq_and_chip(domain, virq, info->dmar_id, + &dmar_msi_controller, NULL); + irq_set_handler_data(virq, info->dmar_data); + __irq_set_handler(virq, handle_edge_irq, 0, "edge"); + } + + return ret; +} + +static void dmar_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + BUG_ON(nr_irqs > 1); + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} + +static void dmar_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - int irq; struct msi_msg msg; - irq = irq_domain_alloc_irqs(NULL, 1, node, NULL); - if (irq > 0) { - irq_set_handler_data(irq, arg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, - handle_edge_irq, "edge"); - native_compose_msi_msg(irq_cfg(irq), &msg); - dmar_msi_write(irq, &msg); + BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg)); + dmar_msi_write(irq_data->irq, &msg); +} + +static void dmar_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct msi_msg msg; + + memset(&msg, 0, sizeof(msg)); + dmar_msi_write(irq_data->irq, &msg); +} + +static struct irq_domain_ops dmar_domain_ops = { + .alloc = dmar_domain_alloc, + .free = dmar_domain_free, + .activate = dmar_domain_activate, + .deactivate = dmar_domain_deactivate, +}; + +static struct irq_domain *dmar_get_irq_domain(void) +{ + static struct irq_domain *dmar_domain; + static DEFINE_MUTEX(dmar_lock); + + mutex_lock(&dmar_lock); + if (dmar_domain == NULL) { + dmar_domain = irq_domain_add_tree(NULL, &dmar_domain_ops, NULL); + if (dmar_domain) + dmar_domain->parent = x86_vector_domain; } + mutex_unlock(&dmar_lock); + + return dmar_domain; +} + +int dmar_alloc_hwirq(int id, int node, void *arg) +{ + struct irq_domain *domain = dmar_get_irq_domain(); + struct irq_alloc_info info; + + if (!domain) + return -1; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_DMAR; + info.dmar_id = id; + info.dmar_data = arg; - return irq; + return irq_domain_alloc_irqs(domain, 1, node, &info); } void dmar_free_hwirq(int irq) { - irq_set_handler_data(irq, NULL); - irq_set_handler(irq, NULL); irq_domain_free_irqs(irq, 1); } #endif -- cgit v1.2.3 From 49e07d8f28c05347f237146a9ec66f6d958db83e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:43 +0800 Subject: x86/htirq: Use hierarchical irqdomain to manage Hypertransport interrupts We have slightly changed the architecture interfaces to support htirq PCI driver. It's safe because currently Hypertransport interrupt is only enabled on x86 platforms. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-22-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 13 ++++ arch/x86/kernel/apic/htirq.c | 161 +++++++++++++++++++++++++++++++----------- arch/x86/kernel/apic/vector.c | 1 + drivers/pci/htirq.c | 47 ++---------- include/linux/htirq.h | 24 +++++-- 5 files changed, 158 insertions(+), 88 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index bf1725047c27..5e0b031d1a7d 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -160,6 +160,14 @@ struct irq_alloc_info { int dmar_id; void *dmar_data; }; +#endif +#ifdef CONFIG_HT_IRQ + struct { + int ht_pos; + int ht_idx; + struct pci_dev *ht_dev; + void *ht_update; + }; #endif }; }; @@ -227,6 +235,11 @@ extern void arch_init_msi_domain(struct irq_domain *domain); #else static inline void arch_init_msi_domain(struct irq_domain *domain) { } #endif +#ifdef CONFIG_HT_IRQ +extern void arch_init_htirq_domain(struct irq_domain *domain); +#else +static inline void arch_init_htirq_domain(struct irq_domain *domain) { } +#endif /* Statistics */ extern atomic_t irq_err_count; diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index b307ee7a7148..1cae104415ea 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -3,6 +3,8 @@ * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. + * Jiang Liu + * Add support of hierarchical irqdomain * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,70 +21,104 @@ #include #include +static struct irq_domain *htirq_domain; + /* * Hypertransport interrupt support */ -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - static int ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest; + struct irq_data *parent = data->parent_data; int ret; - ret = apic_set_affinity(data, mask, &dest); - if (ret) - return ret; - - target_ht_irq(data->irq, dest, cfg->vector); - return IRQ_SET_MASK_OK_NOCOPY; + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + struct ht_irq_msg msg; + struct irq_cfg *cfg = irqd_cfg(data); + + fetch_ht_irq_msg(data->irq, &msg); + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | + HT_IRQ_LOW_DEST_ID_MASK); + msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) | + HT_IRQ_LOW_DEST_ID(cfg->dest_apicid); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); + write_ht_irq_msg(data->irq, &msg); + } + + return ret; } static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .irq_mask = mask_ht_irq, .irq_unmask = unmask_ht_irq, - .irq_ack = apic_ack_edge, + .irq_ack = irq_chip_ack_parent, .irq_set_affinity = ht_set_affinity, - .irq_retrigger = apic_retrigger_irq, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -int arch_alloc_ht_irq(struct pci_dev *dev) +static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - return irq_domain_alloc_irqs(NULL, 1, dev_to_node(&dev->dev), NULL); + struct ht_irq_cfg *ht_cfg; + struct irq_alloc_info *info = arg; + struct pci_dev *dev; + irq_hw_number_t hwirq; + int ret; + + if (nr_irqs > 1 || !info) + return -EINVAL; + + dev = info->ht_dev; + hwirq = (info->ht_idx & 0xFF) | + PCI_DEVID(dev->bus->number, dev->devfn) << 8 | + (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24; + if (irq_find_mapping(domain, hwirq) > 0) + return -EEXIST; + + ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL); + if (!ht_cfg) + return -ENOMEM; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); + if (ret < 0) { + kfree(ht_cfg); + return ret; + } + + /* Initialize msg to a value that will never match the first write. */ + ht_cfg->msg.address_lo = 0xffffffff; + ht_cfg->msg.address_hi = 0xffffffff; + ht_cfg->dev = info->ht_dev; + ht_cfg->update = info->ht_update; + ht_cfg->pos = info->ht_pos; + ht_cfg->idx = 0x10 + (info->ht_idx * 2); + irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg, + handle_edge_irq, ht_cfg, "edge"); + + return 0; } -void arch_free_ht_irq(int irq) +static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - irq_domain_free_irqs(irq, 1); + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_domain_free_irqs_top(domain, virq, nr_irqs); } -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +static void htirq_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - struct irq_cfg *cfg; struct ht_irq_msg msg; + struct irq_cfg *cfg = irqd_cfg(irq_data); - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); - msg.address_lo = HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) | @@ -95,13 +131,56 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; + write_ht_irq_msg(irq_data->irq, &msg); +} - write_ht_irq_msg(irq, &msg); +static void htirq_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct ht_irq_msg msg; - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); + memset(&msg, 0, sizeof(msg)); + write_ht_irq_msg(irq_data->irq, &msg); +} - dev_dbg(&dev->dev, "irq %d for HT\n", irq); +static struct irq_domain_ops htirq_domain_ops = { + .alloc = htirq_domain_alloc, + .free = htirq_domain_free, + .activate = htirq_domain_activate, + .deactivate = htirq_domain_deactivate, +}; - return 0; +void arch_init_htirq_domain(struct irq_domain *parent) +{ + if (disable_apic) + return; + + htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL); + if (!htirq_domain) + pr_warn("failed to initialize irqdomain for HTIRQ.\n"); + else + htirq_domain->parent = parent; +} + +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update) +{ + struct irq_alloc_info info; + + if (!htirq_domain) + return -ENOSYS; + + init_irq_alloc_info(&info, NULL); + info.ht_idx = idx; + info.ht_pos = pos; + info.ht_dev = dev; + info.ht_update = update; + + return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev), + &info); +} + +void arch_teardown_ht_irq(unsigned int irq) +{ + irq_domain_free_irqs(irq, 1); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a8d82896be75..b4b6b5a13440 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -365,6 +365,7 @@ int __init arch_early_irq_init(void) irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); + arch_init_htirq_domain(x86_vector_domain); return arch_early_ioapic_init(); } diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c index ceb0ebeb7b5f..7eb4109a3df4 100644 --- a/drivers/pci/htirq.c +++ b/drivers/pci/htirq.c @@ -23,20 +23,11 @@ */ static DEFINE_SPINLOCK(ht_irq_lock); -struct ht_irq_cfg { - struct pci_dev *dev; - /* Update callback used to cope with buggy hardware */ - ht_irq_update_t *update; - unsigned pos; - unsigned idx; - struct ht_irq_msg msg; -}; - - void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) { struct ht_irq_cfg *cfg = irq_get_handler_data(irq); unsigned long flags; + spin_lock_irqsave(&ht_irq_lock, flags); if (cfg->msg.address_lo != msg->address_lo) { pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx); @@ -55,6 +46,7 @@ void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) { struct ht_irq_cfg *cfg = irq_get_handler_data(irq); + *msg = cfg->msg; } @@ -86,7 +78,6 @@ void unmask_ht_irq(struct irq_data *data) */ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) { - struct ht_irq_cfg *cfg; int max_irq, pos, irq; unsigned long flags; u32 data; @@ -105,29 +96,9 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) if (idx > max_irq) return -EINVAL; - cfg = kmalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) - return -ENOMEM; - - cfg->dev = dev; - cfg->update = update; - cfg->pos = pos; - cfg->idx = 0x10 + (idx * 2); - /* Initialize msg to a value that will never match the first write. */ - cfg->msg.address_lo = 0xffffffff; - cfg->msg.address_hi = 0xffffffff; - - irq = arch_alloc_ht_irq(dev); - if (irq <= 0) { - kfree(cfg); - return -EBUSY; - } - irq_set_handler_data(irq, cfg); - - if (arch_setup_ht_irq(irq, dev) < 0) { - ht_destroy_irq(irq); - return -EBUSY; - } + irq = arch_setup_ht_irq(idx, pos, dev, update); + if (irq > 0) + dev_dbg(&dev->dev, "irq %d for HT\n", irq); return irq; } @@ -158,12 +129,6 @@ EXPORT_SYMBOL(ht_create_irq); */ void ht_destroy_irq(unsigned int irq) { - struct ht_irq_cfg *cfg; - - cfg = irq_get_handler_data(irq); - irq_set_chip(irq, NULL); - irq_set_handler_data(irq, NULL); - arch_free_ht_irq(irq); - kfree(cfg); + arch_teardown_ht_irq(irq); } EXPORT_SYMBOL(ht_destroy_irq); diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 5caa51b7b95c..d4a527e58434 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -1,26 +1,38 @@ #ifndef LINUX_HTIRQ_H #define LINUX_HTIRQ_H +struct pci_dev; +struct irq_data; + struct ht_irq_msg { u32 address_lo; /* low 32 bits of the ht irq message */ u32 address_hi; /* high 32 bits of the it irq message */ }; +typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg); + +struct ht_irq_cfg { + struct pci_dev *dev; + /* Update callback used to cope with buggy hardware */ + ht_irq_update_t *update; + unsigned pos; + unsigned idx; + struct ht_irq_msg msg; +}; + /* Helper functions.. */ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -struct irq_data; void mask_ht_irq(struct irq_data *data); void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); -int arch_alloc_ht_irq(struct pci_dev *dev); -void arch_free_ht_irq(int irq); +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update); +void arch_teardown_ht_irq(unsigned int irq); /* For drivers of buggy hardware */ -typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg); int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); #endif /* LINUX_HTIRQ_H */ -- cgit v1.2.3 From 43fe1abc18a237581663a51da4c2f8e57684c223 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:44 +0800 Subject: x86/uv: Use hierarchical irqdomain to manage UV interrupts Enhance UV code to support hierarchical irqdomain, it helps to make the architecture more clear. We construct hwirq based on mmr_blade and mmr_offset, but mmr_offset has type unsigned long, it may exceed the range of irq_hw_number_t. So help about the way to construct hwirq based on mmr_blade and mmr_offset is welcomed! Folded a patch from Dimitri Sivanich to fix a bug on UV platforms, please refer to: http://lkml.org/lkml/2014/12/16/351 Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Russ Anderson Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-23-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 9 ++ arch/x86/platform/uv/uv_irq.c | 290 ++++++++++++++++-------------------------- 2 files changed, 119 insertions(+), 180 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 5e0b031d1a7d..75d0db1db8a0 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -123,6 +123,7 @@ enum irq_alloc_type { X86_IRQ_ALLOC_TYPE_MSI, X86_IRQ_ALLOC_TYPE_MSIX, X86_IRQ_ALLOC_TYPE_DMAR, + X86_IRQ_ALLOC_TYPE_UV, }; struct irq_alloc_info { @@ -168,6 +169,14 @@ struct irq_alloc_info { struct pci_dev *ht_dev; void *ht_update; }; +#endif +#ifdef CONFIG_X86_UV + struct { + int uv_limit; + int uv_blade; + unsigned long uv_offset; + char *uv_name; + }; #endif }; }; diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 474912d03f40..54af6e388a12 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -19,17 +19,31 @@ #include /* MMR offset and pnode of hub sourcing interrupts for a given irq */ -struct uv_irq_2_mmr_pnode{ - struct rb_node list; +struct uv_irq_2_mmr_pnode { unsigned long offset; int pnode; - int irq; }; -static DEFINE_SPINLOCK(uv_irq_lock); -static struct rb_root uv_irq_root; +static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cfg->dest_apicid; -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); + uv_write_global_mmr64(info->pnode, info->offset, mmr_value); +} static void uv_noop(struct irq_data *data) { } @@ -38,6 +52,24 @@ static void uv_ack_apic(struct irq_data *data) ack_APIC_irq(); } +static int +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0) { + uv_program_mmr(cfg, data->chip_data); + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + } + + return ret; +} + static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, @@ -46,179 +78,99 @@ static struct irq_chip uv_irq_chip = { .irq_set_affinity = uv_set_irq_affinity, }; -/* - * Add offset and pnode information of the hub sourcing interrupts to the - * rb tree for a specific irq. - */ -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - struct rb_node **link = &uv_irq_root.rb_node; - struct rb_node *parent = NULL; - struct uv_irq_2_mmr_pnode *n; - struct uv_irq_2_mmr_pnode *e; - unsigned long irqflags; - - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, - uv_blade_to_memory_nid(blade)); - if (!n) + struct uv_irq_2_mmr_pnode *chip_data; + struct irq_alloc_info *info = arg; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + int ret; + + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV) + return -EINVAL; + + chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL, + irq_data->node); + if (!chip_data) return -ENOMEM; - n->irq = irq; - n->offset = offset; - n->pnode = uv_blade_to_pnode(blade); - spin_lock_irqsave(&uv_irq_lock, irqflags); - /* Find the right place in the rbtree: */ - while (*link) { - parent = *link; - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); - - if (unlikely(irq == e->irq)) { - /* irq entry exists */ - e->pnode = uv_blade_to_pnode(blade); - e->offset = offset; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - kfree(n); - return 0; - } - - if (irq < e->irq) - link = &(*link)->rb_left; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret >= 0) { + if (info->uv_limit == UV_AFFINITY_CPU) + irq_set_status_flags(virq, IRQ_NO_BALANCING); else - link = &(*link)->rb_right; + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + + chip_data->pnode = uv_blade_to_pnode(info->uv_blade); + chip_data->offset = info->uv_offset; + irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data, + handle_percpu_irq, NULL, info->uv_name); + } else { + kfree(chip_data); } - /* Insert the node into the rbtree. */ - rb_link_node(&n->list, parent, link); - rb_insert_color(&n->list, &uv_irq_root); - - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; + return ret; } -/* Retrieve offset and pnode information from the rb tree for a specific irq */ -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +static void uv_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - - if (e->irq == irq) { - *offset = e->offset; - *pnode = e->pnode; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; - } - - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return -1; + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + BUG_ON(nr_irqs != 1); + kfree(irq_data->chip_data); + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); + irq_clear_status_flags(virq, IRQ_NO_BALANCING); + irq_domain_free_irqs_top(domain, virq, nr_irqs); } /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. */ -static int -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int limit) +static void uv_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data) { - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - - if (limit == UV_AFFINITY_CPU) - irq_set_status_flags(irq, IRQ_NO_BALANCING); - else - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = cfg->dest_apicid; - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } /* * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +static void uv_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); } -static int -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int dest; - unsigned long mmr_value, mmr_offset; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - if (apic_set_affinity(data, mask, &dest)) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +static struct irq_domain_ops uv_domain_ops = { + .alloc = uv_domain_alloc, + .free = uv_domain_free, + .activate = uv_domain_activate, + .deactivate = uv_domain_deactivate, +}; - if (cfg->move_in_progress) - send_cleanup_vector(cfg); +static struct irq_domain *uv_get_irq_domain(void) +{ + static struct irq_domain *uv_domain; + static DEFINE_MUTEX(uv_lock); + + mutex_lock(&uv_lock); + if (uv_domain == NULL) { + uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL); + if (uv_domain) + uv_domain->parent = x86_vector_domain; + } + mutex_unlock(&uv_lock); - return IRQ_SET_MASK_OK_NOCOPY; + return uv_domain; } /* @@ -229,23 +181,21 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - int ret, irq; struct irq_alloc_info info; + struct irq_domain *domain = uv_get_irq_domain(); - init_irq_alloc_info(&info, cpumask_of(cpu)); - irq = irq_domain_alloc_irqs(NULL, 1, uv_blade_to_memory_nid(mmr_blade), - &info); - if (irq <= 0) - return -EBUSY; - - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, - limit); - if (ret == irq) - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); - else - irq_domain_free_irqs(irq, 1); + if (!domain) + return -ENOMEM; - return ret; + init_irq_alloc_info(&info, cpumask_of(cpu)); + info.type = X86_IRQ_ALLOC_TYPE_UV; + info.uv_limit = limit; + info.uv_blade = mmr_blade; + info.uv_offset = mmr_offset; + info.uv_name = irq_name; + + return irq_domain_alloc_irqs(domain, 1, + uv_blade_to_memory_nid(mmr_blade), &info); } EXPORT_SYMBOL_GPL(uv_setup_irq); @@ -258,26 +208,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); */ void uv_teardown_irq(unsigned int irq) { - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - if (e->irq == irq) { - arch_disable_uv_irq(e->pnode, e->offset); - rb_erase(n, &uv_irq_root); - kfree(e); - break; - } - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); irq_domain_free_irqs(irq, 1); } EXPORT_SYMBOL_GPL(uv_teardown_irq); -- cgit v1.2.3 From 0cddfc79462423bf86cbe34560bad07f4a25ded6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:50 +0800 Subject: irq_remapping: Remove unused function irq_remapping_print_chip() Now there's no user of irq_remapping_print_chip() anymore, so remove it. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-29-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 2 -- drivers/iommu/irq_remapping.c | 13 ------------- 2 files changed, 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index bacac1052262..52e1a1f337b2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -59,7 +59,6 @@ extern struct irq_domain * irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info); extern struct irq_domain * irq_remapping_get_irq_domain(struct irq_alloc_info *info); -extern void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p); /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); @@ -115,6 +114,5 @@ irq_remapping_get_irq_domain(struct irq_alloc_info *info) return NULL; } -#define irq_remapping_print_chip NULL #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 3eaa822c30a9..558c804dbe11 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -204,19 +204,6 @@ static void ir_ack_apic_level(struct irq_data *data) eoi_ioapic_irq(data->irq, irqd_cfg(data)); } -void irq_remapping_print_chip(struct irq_data *data, struct seq_file *p) -{ - /* - * Assume interrupt is remapped if the parent irqdomain isn't the - * vector domain, which is true for MSI, HPET and IOAPIC on x86 - * platforms. - */ - if (data->domain && data->domain->parent != arch_get_ir_parent_domain()) - seq_printf(p, " IR-%s", data->chip->name); - else - seq_printf(p, " %s", data->chip->name); -} - static void ir_print_prefix(struct irq_data *data, struct seq_file *p) { seq_printf(p, " IR-%s", data->chip->name); -- cgit v1.2.3 From 4e69d7eab4c24aa88fb0ec99fad7feac254d9ece Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:53 +0800 Subject: x86/irq: Remove unused pre_init_apic_IRQ0() Now there's no user of pre_init_apic_IRQ0(), so remove it. Signed-off-by: Jiang Liu Tested-by: Andy Shevchenko Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Jan Beulich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428905519-23704-32-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 1 - arch/x86/kernel/apic/io_apic.c | 17 ----------------- 2 files changed, 18 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 2f91685fe1cd..c976de126a91 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -204,7 +204,6 @@ extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); -extern void __init pre_init_apic_IRQ0(void); extern void mp_save_irq(struct mpc_intsrc *m); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 56d532106ef3..540598c77e55 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3091,20 +3091,3 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) return ret; } - -/* Enable IOAPIC early just for system timer */ -void __init pre_init_apic_IRQ0(void) -{ - struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; - - printk(KERN_INFO "Early APIC setup for system timer0\n"); -#ifndef CONFIG_SMP - physid_set_mask_of_physid(boot_cpu_physical_apicid, - &phys_cpu_present_map); -#endif - setup_local_APIC(); - - io_apic_setup_irq_pin(0, 0, &attr); - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); -} -- cgit v1.2.3 From c4d05a2c354b15965c9b2a5f46016a5d9f43e224 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:54 +0800 Subject: x86/irq: Prepare IOAPIC interfaces to support hierarchical irqdomains Introduce helper functions to manipulate struct irq_alloc_info for IOAPIC. Also add an extra parameter to IOAPIC interfaces to prepare for hierarchical irqdomain. Function mp_set_gsi_attr() will be removed once we have switched to hierarchical irqdomains. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Len Brown Cc: Pavel Machek Cc: Jan Beulich Cc: Grant Likely Cc: David Cohen Link: http://lkml.kernel.org/r/1428905519-23704-33-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 14 ++++++-- arch/x86/kernel/acpi/boot.c | 6 ++-- arch/x86/kernel/apic/io_apic.c | 39 ++++++++++++++-------- arch/x86/pci/intel_mid_pci.c | 4 ++- .../platform/intel-mid/device_libs/platform_wdt.c | 4 ++- arch/x86/platform/intel-mid/sfi.c | 9 +++-- 6 files changed, 54 insertions(+), 22 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c976de126a91..1fbeda50a77d 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -95,6 +95,8 @@ struct IR_IO_APIC_route_entry { index : 15; } __attribute__ ((packed)); +struct irq_alloc_info; + #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 @@ -194,7 +196,8 @@ extern u32 gsi_top; extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); -extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); +extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info); extern void mp_unmap_irq(int irq); extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); @@ -203,6 +206,8 @@ extern int mp_ioapic_registered(u32 gsi_base); extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); +extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, + int node, int trigger, int polarity); extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); extern void mp_save_irq(struct mpc_intsrc *m); @@ -253,7 +258,12 @@ static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } -static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } +static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info) +{ + return gsi; +} + static inline void mp_unmap_irq(int irq) { } static inline int save_ioapic_entries(void) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 803b684676ff..a43a4d3c60e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -404,6 +404,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { int irq, node; + struct irq_alloc_info info; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -416,7 +417,8 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, return -1; } - irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); + ioapic_set_alloc_attr(&info, node, trigger, polarity); + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); if (irq < 0) return irq; @@ -434,7 +436,7 @@ static void mp_unregister_gsi(u32 gsi) if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return; - irq = mp_map_gsi_to_irq(gsi, 0); + irq = mp_map_gsi_to_irq(gsi, 0, NULL); if (irq > 0) mp_unmap_irq(irq); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 540598c77e55..5c953bb96ecf 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -938,7 +938,19 @@ static int irq_trigger(int idx) return trigger; } -static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin) +void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, + int trigger, int polarity) +{ + init_irq_alloc_info(info, NULL); + info->type = X86_IRQ_ALLOC_TYPE_IOAPIC; + info->ioapic_node = node; + info->ioapic_trigger = trigger; + info->ioapic_polarity = polarity; + info->ioapic_valid = 1; +} + +static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin, + struct irq_alloc_info *info) { int irq = -1; int ioapic = (int)(long)domain->host_data; @@ -971,11 +983,11 @@ static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin) } static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, - unsigned int flags) + unsigned int flags, struct irq_alloc_info *info) { int irq; struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); - struct mp_pin_info *info = mp_pin_info(ioapic, pin); + struct mp_pin_info *pinfo = mp_pin_info(ioapic, pin); if (!domain) return -1; @@ -997,30 +1009,30 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; if (flags & IOAPIC_MAP_ALLOC) { - if (info->count == 0 && + if (pinfo->count == 0 && mp_irqdomain_map(domain, irq, pin) != 0) irq = -1; /* special handling for timer IRQ0 */ if (irq == 0) - info->count++; + pinfo->count++; } } else { irq = irq_find_mapping(domain, pin); if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC)) - irq = alloc_irq_from_domain(domain, gsi, pin); + irq = alloc_irq_from_domain(domain, gsi, pin, info); } if (flags & IOAPIC_MAP_ALLOC) { /* special handling for legacy IRQs */ - if (irq < nr_legacy_irqs() && info->count == 1 && + if (irq < nr_legacy_irqs() && pinfo->count == 1 && mp_irqdomain_map(domain, irq, pin) != 0) irq = -1; if (irq > 0) - info->count++; - else if (info->count == 0) - info->set = 0; + pinfo->count++; + else if (pinfo->count == 0) + pinfo->set = 0; } mutex_unlock(&ioapic_mutex); @@ -1058,10 +1070,11 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) } #endif - return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL); } -int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) +int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, + struct irq_alloc_info *info) { int ioapic, pin, idx; @@ -1074,7 +1087,7 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) if ((flags & IOAPIC_MAP_CHECK) && idx < 0) return -1; - return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags); + return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info); } void mp_unmap_irq(int irq) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 852aa4c92da0..57b5719b617b 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -208,6 +208,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, static int intel_mid_pci_irq_enable(struct pci_dev *dev) { + struct irq_alloc_info info; int polarity; if (dev->irq_managed && dev->irq > 0) @@ -217,6 +218,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) polarity = 0; /* active high */ else polarity = 1; /* active low */ + ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity); /* * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to @@ -224,7 +226,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) */ if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev))) return -EBUSY; - if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) + if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info) < 0) return -EBUSY; dev->irq_managed = 1; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c index 0b283d4d0ad7..de0009f6d555 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c @@ -27,6 +27,7 @@ static struct platform_device wdt_dev = { static int tangier_probe(struct platform_device *pdev) { int gsi; + struct irq_alloc_info info; struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; if (!pdata) @@ -34,8 +35,9 @@ static int tangier_probe(struct platform_device *pdev) /* IOAPIC builds identity mapping between GSI and IRQ on MID */ gsi = pdata->irq; + ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0); if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) || - mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) { + mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) { dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", gsi); return -EINVAL; diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 9a16749935d4..7d17355d820e 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -104,7 +104,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; mp_save_irq(&mp_irq); - mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL); } return 0; @@ -175,7 +175,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; mp_save_irq(&mp_irq); - mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL); } return 0; } @@ -434,6 +434,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) struct devs_id *dev = NULL; int num, i, ret; int polarity; + struct irq_alloc_info info; sb = (struct sfi_table_simple *)table; num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); @@ -467,9 +468,11 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) polarity = 1; } + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, polarity); ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE); if (ret == 0) - ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC); + ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC, + &info); WARN_ON(ret < 0); } -- cgit v1.2.3 From 49c7e60022912d10da88ba67e8eb2927f1143f6a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:55 +0800 Subject: x86/irq: Implement callbacks to enable hierarchical irqdomains on IOAPICs Implement required callbacks to prepare for enabling hierarchical irqdomains on IOAPICs. After the conversion we can remove quite some code from the old implementation. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Jan Beulich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428905519-23704-34-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 10 +++ arch/x86/kernel/apic/io_apic.c | 159 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 166 insertions(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 1fbeda50a77d..ecc192624eaf 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -96,6 +96,7 @@ struct IR_IO_APIC_route_entry { } __attribute__ ((packed)); struct irq_alloc_info; +struct irq_data; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -206,6 +207,15 @@ extern int mp_ioapic_registered(u32 gsi_base); extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); +extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg); +extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +extern void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data); +extern void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data); +extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, int trigger, int polarity); extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5c953bb96ecf..3406dbec1570 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -78,6 +78,13 @@ static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; +struct mp_chip_data { + struct IO_APIC_route_entry entry; + int trigger; + int polarity; + bool isa_irq; +}; + struct mp_pin_info { int trigger; int polarity; @@ -949,11 +956,28 @@ void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, info->ioapic_valid = 1; } +static void mp_register_handler(unsigned int irq, unsigned long trigger) +{ + irq_flow_handler_t hdl; + bool fasteoi; + + if (trigger) { + irq_set_status_flags(irq, IRQ_LEVEL); + fasteoi = true; + } else { + irq_clear_status_flags(irq, IRQ_LEVEL); + fasteoi = false; + } + + hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; + __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge"); +} + static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin, struct irq_alloc_info *info) { int irq = -1; - int ioapic = (int)(long)domain->host_data; + int ioapic = mp_irqdomain_ioapic_idx(domain); int type = ioapics[ioapic].irqdomain_cfg.type; switch (type) { @@ -3029,7 +3053,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { - int ioapic = (int)(long)domain->host_data; + int ioapic = mp_irqdomain_ioapic_idx(domain); struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); struct io_apic_irq_attr attr; @@ -3067,7 +3091,7 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) { struct irq_data *data = irq_get_irq_data(virq); struct irq_cfg *cfg = irq_cfg(virq); - int ioapic = (int)(long)domain->host_data; + int ioapic = mp_irqdomain_ioapic_idx(domain); int pin = (int)data->hwirq; ioapic_mask_entry(ioapic, pin); @@ -3076,6 +3100,130 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) arch_teardown_hwirq(virq); } +static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, + struct irq_alloc_info *info) +{ + if (info && info->ioapic_valid) { + data->trigger = info->ioapic_trigger; + data->polarity = info->ioapic_polarity; + } else if (acpi_get_override_irq(gsi, &data->trigger, + &data->polarity) < 0) { + /* PCI interrupts are always polarity one level triggered. */ + data->trigger = 1; + data->polarity = 1; + } +} + +static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data, + struct IO_APIC_route_entry *entry) +{ + memset(entry, 0, sizeof(*entry)); + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + entry->mask = 0; /* enable IRQ */ + entry->trigger = data->trigger; + entry->polarity = data->polarity; + /* + * Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (data->trigger) + entry->mask = 1; +} + +int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int ret, ioapic, pin; + struct irq_cfg *cfg; + struct irq_data *irq_data; + struct mp_chip_data *data; + struct irq_alloc_info *info = arg; + + if (!info || nr_irqs > 1) + return -EINVAL; + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) + return -EINVAL; + + ioapic = mp_irqdomain_ioapic_idx(domain); + pin = info->ioapic_pin; + if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0) + return -EEXIST; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + info->ioapic_entry = &data->entry; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); + if (ret < 0) { + kfree(data); + return ret; + } + + irq_data->hwirq = info->ioapic_pin; + irq_data->chip = &ioapic_chip; + irq_data->chip_data = data; + mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); + + cfg = irqd_cfg(irq_data); + add_pin_to_irq_node(cfg, info->ioapic_node, ioapic, pin); + if (info->ioapic_entry) + mp_setup_entry(cfg, data, info->ioapic_entry); + mp_register_handler(virq, data->trigger); + if (virq < nr_legacy_irqs()) + legacy_pic->mask(virq); + + apic_printk(APIC_VERBOSE, KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n", + ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector, + virq, data->trigger, data->polarity, cfg->dest_apicid); + + return 0; +} + +void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_cfg *cfg = irq_cfg(virq); + struct irq_data *irq_data; + + BUG_ON(nr_irqs != 1); + irq_data = irq_domain_get_irq_data(domain, virq); + if (irq_data && irq_data->chip_data) { + __remove_pin_from_irq(cfg, mp_irqdomain_ioapic_idx(domain), + (int)irq_data->hwirq); + WARN_ON(!list_empty(&cfg->irq_2_pin)); + kfree(irq_data->chip_data); + } + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} + +void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + unsigned long flags; + struct irq_pin_list *entry; + struct mp_chip_data *data = irq_data->chip_data; + struct irq_cfg *cfg = irqd_cfg(irq_data); + + raw_spin_lock_irqsave(&ioapic_lock, flags); + for_each_irq_pin(entry, cfg->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, data->entry); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); +} + +void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + /* It won't be called for IRQ with multiple IOAPIC pins associated */ + ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain), + (int)irq_data->hwirq); +} + int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) { int ret = 0; @@ -3104,3 +3252,8 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) return ret; } + +int mp_irqdomain_ioapic_idx(struct irq_domain *domain) +{ + return (int)(long)domain->host_data; +} -- cgit v1.2.3 From 5ad274d41c1b3f3ccf73591078efaa8ed6828a8d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:38 +0800 Subject: x86/irq: Remove unused old IOAPIC irqdomain interfaces Now we have converted to hierarchical irqdomain, so remove unused old IOAPIC interfaces and code. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 4 - arch/x86/kernel/apic/io_apic.c | 202 +---------------------------------------- 2 files changed, 1 insertion(+), 205 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index ecc192624eaf..705c425c9c3d 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -204,9 +204,6 @@ extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); extern int mp_unregister_ioapic(u32 gsi_base); extern int mp_ioapic_registered(u32 gsi_base); -extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq); -extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg); extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, @@ -218,7 +215,6 @@ extern void mp_irqdomain_deactivate(struct irq_domain *domain, extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, int trigger, int polarity); -extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node); extern void mp_save_irq(struct mpc_intsrc *m); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ba50f8d6f6b0..523b326d71c2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -89,7 +89,6 @@ struct mp_chip_data { struct mp_pin_info { int trigger; int polarity; - int node; int set; u32 count; }; @@ -1310,30 +1309,6 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, - unsigned long trigger) -{ - struct irq_chip *chip = &ioapic_chip; - irq_flow_handler_t hdl; - bool fasteoi; - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_set_status_flags(irq, IRQ_LEVEL); - fasteoi = true; - } else { - irq_clear_status_flags(irq, IRQ_LEVEL); - fasteoi = false; - } - - if (setup_remapped_irq(irq, cfg, chip)) - fasteoi = trigger != 0; - - hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; - irq_set_chip_and_handler_name(irq, chip, hdl, - fasteoi ? "fasteoi" : "edge"); -} - int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int vector, struct io_apic_irq_attr *attr) @@ -1358,48 +1333,6 @@ int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, return 0; } -static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, - struct io_apic_irq_attr *attr) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - if (!IO_APIC_IRQ(irq)) - return; - - if (assign_irq_vector(irq, cfg, apic->target_cpus())) - return; - - if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), - &dest)) { - pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - clear_irq_vector(irq, cfg); - - return; - } - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i Dest:%d)\n", - attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, - cfg->vector, irq, attr->trigger, attr->polarity, dest); - - if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); - clear_irq_vector(irq, cfg); - - return; - } - - ioapic_register_intr(irq, cfg, attr->trigger); - if (irq < nr_legacy_irqs()) - legacy_pic->mask(irq); - - ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); -} - static void __init setup_IO_APIC_irqs(void) { unsigned int ioapic, pin; @@ -1419,46 +1352,6 @@ static void __init setup_IO_APIC_irqs(void) } } -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, - unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - unsigned int dest; - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), - apic->target_cpus(), &dest))) - dest = BAD_APICID; - - entry.dest_mode = apic->irq_dest_mode; - entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = dest; - entry.delivery_mode = apic->irq_delivery_mode; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, - "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_idx, pin, entry); -} - void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) { int i; @@ -2669,20 +2562,6 @@ static int __init ioapic_init_ops(void) device_initcall(ioapic_init_ops); -static int -io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) -{ - struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); - int ret; - - if (!cfg) - return -EINVAL; - ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); - if (!ret) - setup_ioapic_irq(irq, cfg, attr); - return ret; -} - static int io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3239,58 +3118,8 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, irq_attr->polarity = polarity; } -int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) -{ - int ioapic = mp_irqdomain_ioapic_idx(domain); - struct mp_pin_info *info = mp_pin_info(ioapic, hwirq); - struct io_apic_irq_attr attr; - - /* Get default attribute if not set by caller yet */ - if (!info->set) { - u32 gsi = mp_pin_to_gsi(ioapic, hwirq); - - if (acpi_get_override_irq(gsi, &info->trigger, - &info->polarity) < 0) { - /* - * PCI interrupts are always polarity one level - * triggered. - */ - info->trigger = 1; - info->polarity = 1; - } - info->node = NUMA_NO_NODE; - - /* - * setup_IO_APIC_irqs() programs all legacy IRQs with default - * trigger and polarity attributes. Don't set the flag for that - * case so the first legacy IRQ user could reprogram the pin - * with real trigger and polarity attributes. - */ - if (virq >= nr_legacy_irqs() || info->count) - info->set = 1; - } - set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, - info->polarity); - - return io_apic_setup_irq_pin(virq, info->node, &attr); -} - -void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq) -{ - struct irq_data *data = irq_get_irq_data(virq); - struct irq_cfg *cfg = irq_cfg(virq); - int ioapic = mp_irqdomain_ioapic_idx(domain); - int pin = (int)data->hwirq; - - ioapic_mask_entry(ioapic, pin); - __remove_pin_from_irq(cfg, ioapic, pin); - WARN_ON(!list_empty(&cfg->irq_2_pin)); - arch_teardown_hwirq(virq); -} - static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, - struct irq_alloc_info *info) + struct irq_alloc_info *info) { if (info && info->ioapic_valid) { data->trigger = info->ioapic_trigger; @@ -3414,35 +3243,6 @@ void mp_irqdomain_deactivate(struct irq_domain *domain, (int)irq_data->hwirq); } -int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) -{ - int ret = 0; - int ioapic, pin; - struct mp_pin_info *info; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return -ENODEV; - - pin = mp_find_ioapic_pin(ioapic, gsi); - info = mp_pin_info(ioapic, pin); - trigger = trigger ? 1 : 0; - polarity = polarity ? 1 : 0; - - mutex_lock(&ioapic_mutex); - if (!info->set) { - info->trigger = trigger; - info->polarity = polarity; - info->node = node; - info->set = 1; - } else if (info->trigger != trigger || info->polarity != polarity) { - ret = -EBUSY; - } - mutex_unlock(&ioapic_mutex); - - return ret; -} - int mp_irqdomain_ioapic_idx(struct irq_domain *domain) { return (int)(long)domain->host_data; -- cgit v1.2.3 From 84bea5cc7709dffdadfa9885a66efd67d9ffc24c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:40 +0800 Subject: x86/irq: Remove x86_io_apic_ops.print_entries and related interfaces Now there is no user of x86_io_apic_ops.print_entries anymore, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: Yijing Wang Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 3 -- arch/x86/include/asm/x86_init.h | 1 - arch/x86/kernel/apic/io_apic.c | 55 ------------------------------------- arch/x86/kernel/x86_init.c | 1 - drivers/iommu/intel_irq_remapping.c | 7 ----- 5 files changed, 67 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 705c425c9c3d..47af5a7af358 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -225,8 +225,6 @@ extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); -extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); -extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); extern int native_ioapic_set_affinity(struct irq_data *, const struct cpumask *, bool); @@ -290,7 +288,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_write NULL #define native_io_apic_modify NULL #define native_disable_io_apic NULL -#define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL #define native_eoi_ioapic_pin NULL diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1649bb9ca27c..2924bc88034a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -191,7 +191,6 @@ struct x86_io_apic_ops { void (*write) (unsigned int apic, unsigned int reg, unsigned int value); void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); - void (*print_entries)(unsigned int apic, unsigned int nr_entries); int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, bool force); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3506e8aeba91..acb91c19f318 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1339,61 +1339,6 @@ static void __init setup_IO_APIC_irqs(void) } } -void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) -{ - int i; - - pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); - - for (i = 0; i <= nr_entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - pr_debug(" %02x %02X ", i, entry.dest); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector); - } -} - -void intel_ir_io_apic_print_entries(unsigned int apic, - unsigned int nr_entries) -{ - int i; - - pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); - - for (i = 0; i <= nr_entries; i++) { - struct IR_IO_APIC_route_entry *ir_entry; - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - ir_entry = (struct IR_IO_APIC_route_entry *)&entry; - - pr_debug(" %02x %04X ", i, ir_entry->index); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector); - } -} - void ioapic_zap_locks(void) { raw_spin_lock_init(&ioapic_lock); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index b094d691f2fe..d6f36c7594d7 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -144,7 +144,6 @@ struct x86_io_apic_ops x86_io_apic_ops = { .write = native_io_apic_write, .modify = native_io_apic_modify, .disable = native_disable_io_apic, - .print_entries = native_io_apic_print_entries, .set_affinity = native_ioapic_set_affinity, .setup_entry = native_setup_ioapic_entry, .eoi_ioapic_pin = native_eoi_ioapic_pin, diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 71a25aeee203..0c8935cd6229 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -696,13 +696,6 @@ static int __init intel_enable_irq_remapping(void) irq_remapping_enabled = 1; - /* - * VT-d has a different layout for IO-APIC entries when - * interrupt remapping is enabled. So it needs a special routine - * to print IO-APIC entries for debugging purposes too. - */ - x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries; - pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; -- cgit v1.2.3 From 35d50d8fd5b8f932b3e71311a4cbd4384501ab9a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:41 +0800 Subject: x86/irq: Remove x86_io_apic_ops.setup_entry and related interfaces Now there is no user of x86_io_apic_ops.setup_entry anymore, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: Yijing Wang Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-5-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 4 ---- arch/x86/include/asm/irq_remapping.h | 13 ------------- arch/x86/include/asm/x86_init.h | 3 --- arch/x86/kernel/apic/io_apic.c | 24 ------------------------ arch/x86/kernel/x86_init.c | 1 - drivers/iommu/irq_remapping.c | 13 ------------- 6 files changed, 58 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 47af5a7af358..d2a34e4718d2 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -150,9 +150,6 @@ struct irq_cfg; extern void ioapic_insert_resources(void); extern int arch_early_ioapic_init(void); -extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, - unsigned int, int, - struct io_apic_irq_attr *); extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); extern void native_eoi_ioapic_pin(int apic, int pin, int vector); @@ -289,7 +286,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_modify NULL #define native_disable_io_apic NULL #define native_ioapic_set_affinity NULL -#define native_setup_ioapic_entry NULL #define native_eoi_ioapic_pin NULL static inline void setup_IO_APIC(void) { } diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 52e1a1f337b2..4b5bbd104ae2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -42,11 +42,6 @@ extern int irq_remapping_enable(void); extern void irq_remapping_disable(void); extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); -extern int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, - int vector, - struct io_apic_irq_attr *attr); extern void free_remapped_irq(int irq); extern void panic_if_irq_remap(const char *msg); extern bool setup_remapped_irq(int irq, @@ -77,14 +72,6 @@ static inline int irq_remapping_enable(void) { return -ENODEV; } static inline void irq_remapping_disable(void) { } static inline int irq_remapping_reenable(int eim) { return -ENODEV; } static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } -static inline int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, - int vector, - struct io_apic_irq_attr *attr) -{ - return -ENODEV; -} static inline void free_remapped_irq(int irq) { } static inline void panic_if_irq_remap(const char *msg) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2924bc88034a..0c690574efae 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -194,9 +194,6 @@ struct x86_io_apic_ops { int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, bool force); - int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr); void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index acb91c19f318..cf5cd19b74e3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1296,30 +1296,6 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - memset(entry, 0, sizeof(*entry)); - - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = destination; - entry->vector = vector; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* - * Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - static void __init setup_IO_APIC_irqs(void) { unsigned int ioapic, pin; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d6f36c7594d7..066cdaa6503e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -145,6 +145,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .modify = native_io_apic_modify, .disable = native_disable_io_apic, .set_affinity = native_ioapic_set_affinity, - .setup_entry = native_setup_ioapic_entry, .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 558c804dbe11..5bb1a04c91f5 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -62,7 +62,6 @@ static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; - x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; } @@ -158,18 +157,6 @@ int __init irq_remap_enable_fault_handling(void) return remap_ops->enable_faulting(); } -int setup_ioapic_remapped_entry(int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - if (!remap_ops->setup_ioapic_entry) - return -ENODEV; - - return remap_ops->setup_ioapic_entry(irq, entry, destination, - vector, attr); -} - static int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { -- cgit v1.2.3 From aa5cb97f14a2dd5aefabed6538c35ebc087d7c24 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:42 +0800 Subject: x86/irq: Remove x86_io_apic_ops.set_affinity and related interfaces Now there is no user of x86_io_apic_ops.set_affinity anymore, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: Yijing Wang Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-6-git-send-email-jiang.liu@linux.intel.com --- arch/x86/include/asm/io_apic.h | 4 ---- arch/x86/include/asm/x86_init.h | 3 --- arch/x86/kernel/apic/io_apic.c | 25 +------------------------ arch/x86/kernel/x86_init.c | 1 - drivers/iommu/irq_remapping.c | 15 --------------- 5 files changed, 1 insertion(+), 47 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index d2a34e4718d2..0ff68daa9949 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -222,9 +222,6 @@ extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); -extern int native_ioapic_set_affinity(struct irq_data *, - const struct cpumask *, - bool); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -285,7 +282,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_write NULL #define native_io_apic_modify NULL #define native_disable_io_apic NULL -#define native_ioapic_set_affinity NULL #define native_eoi_ioapic_pin NULL static inline void setup_IO_APIC(void) { } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0c690574efae..f9f83cfabcaa 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -191,9 +191,6 @@ struct x86_io_apic_ops { void (*write) (unsigned int apic, unsigned int reg, unsigned int value); void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); - int (*set_affinity)(struct irq_data *data, - const struct cpumask *mask, - bool force); void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index cf5cd19b74e3..9ef964512b86 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1787,29 +1787,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -int native_ioapic_set_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - if (!config_enabled(CONFIG_SMP)) - return -EPERM; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = apic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, irqd_cfg(data)); - ret = IRQ_SET_MASK_OK_NOCOPY; - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - atomic_t irq_mis_count; #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -2686,7 +2663,7 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - x86_io_apic_ops.set_affinity(idata, mask, false); + irq_set_affinity(irq, mask); } } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 066cdaa6503e..f7e8eab3a7c4 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -144,6 +144,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .write = native_io_apic_write, .modify = native_io_apic_modify, .disable = native_disable_io_apic, - .set_affinity = native_ioapic_set_affinity, .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 5bb1a04c91f5..7baa54a13921 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -25,10 +25,6 @@ int no_x2apic_optout; static int disable_irq_remap; static struct irq_remap_ops *remap_ops; -static int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force); - static bool irq_remapped(struct irq_cfg *cfg) { return (cfg->remapped == 1); @@ -61,7 +57,6 @@ static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; - x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; } @@ -157,15 +152,6 @@ int __init irq_remap_enable_fault_handling(void) return remap_ops->enable_faulting(); } -static int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) -{ - if (!config_enabled(CONFIG_SMP) || !remap_ops->set_affinity) - return 0; - - return remap_ops->set_affinity(data, mask, force); -} - void free_remapped_irq(int irq) { struct irq_cfg *cfg = irq_cfg(irq); @@ -201,7 +187,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_print_chip = ir_print_prefix; chip->irq_ack = ir_ack_apic_edge; chip->irq_eoi = ir_ack_apic_level; - chip->irq_set_affinity = x86_io_apic_ops.set_affinity; } bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) -- cgit v1.2.3 From ad66e1efc95e548598b032c1fe5bbc34f6460547 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:43 +0800 Subject: x86/irq: Remove x86_io_apic_ops.eoi_ioapic_pin and related interfaces Now there is no user of x86_io_apic_ops.eoi_ioapic_pin anymore, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: Yijing Wang Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-7-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 7 ------- arch/x86/include/asm/x86_init.h | 1 - arch/x86/kernel/apic/io_apic.c | 20 ++++---------------- arch/x86/kernel/x86_init.c | 1 - drivers/iommu/irq_remapping.c | 19 ------------------- 5 files changed, 4 insertions(+), 44 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 0ff68daa9949..fa4b25ebd658 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -150,10 +150,6 @@ struct irq_cfg; extern void ioapic_insert_resources(void); extern int arch_early_ioapic_init(void); -extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); - -extern void native_eoi_ioapic_pin(int apic, int pin, int vector); - extern int save_ioapic_entries(void); extern void mask_ioapic_entries(void); extern int restore_ioapic_entries(void); @@ -237,8 +233,6 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned x86_io_apic_ops.modify(apic, reg, value); } -extern void io_apic_eoi(unsigned int apic, unsigned int vector); - extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); @@ -282,7 +276,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_write NULL #define native_io_apic_modify NULL #define native_disable_io_apic NULL -#define native_eoi_ioapic_pin NULL static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index f9f83cfabcaa..4ada3d3a0e86 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -191,7 +191,6 @@ struct x86_io_apic_ops { void (*write) (unsigned int apic, unsigned int reg, unsigned int value); void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); - void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ef964512b86..998fefad820e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -271,7 +271,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -void io_apic_eoi(unsigned int apic, unsigned int vector) +static inline void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); @@ -527,7 +527,7 @@ static void unmask_ioapic_irq(struct irq_data *data) * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this. */ -void native_eoi_ioapic_pin(int apic, int pin, int vector) +static void __eoi_ioapic_pin(int apic, int pin, int vector) { if (mpc_ioapic_ver(apic) >= 0x20) { io_apic_eoi(apic, vector); @@ -558,19 +558,7 @@ void eoi_ioapic_pin(int vector, struct irq_cfg *cfg) raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) - native_eoi_ioapic_pin(entry->apic, entry->pin, vector); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - -void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) - x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, - cfg->vector); + __eoi_ioapic_pin(entry->apic, entry->pin, vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -606,7 +594,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ioapic_write_entry(apic, pin, entry); } raw_spin_lock_irqsave(&ioapic_lock, flags); - native_eoi_ioapic_pin(apic, pin, entry.vector); + __eoi_ioapic_pin(apic, pin, entry.vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index f7e8eab3a7c4..f612dc018fb6 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -144,5 +144,4 @@ struct x86_io_apic_ops x86_io_apic_ops = { .write = native_io_apic_write, .modify = native_io_apic_modify, .disable = native_disable_io_apic, - .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 7baa54a13921..bca42550b1ad 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -43,21 +43,9 @@ static void irq_remapping_disable_io_apic(void) disconnect_bsp_APIC(0); } -static void eoi_ioapic_pin_remapped(int apic, int pin, int vector) -{ - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - io_apic_eoi(apic, pin); -} - static void __init irq_remapping_modify_x86_ops(void) { x86_io_apic_ops.disable = irq_remapping_disable_io_apic; - x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; } static __init int setup_nointremap(char *str) @@ -171,12 +159,6 @@ void ir_ack_apic_edge(struct irq_data *data) ack_APIC_irq(); } -static void ir_ack_apic_level(struct irq_data *data) -{ - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, irqd_cfg(data)); -} - static void ir_print_prefix(struct irq_data *data, struct seq_file *p) { seq_printf(p, " IR-%s", data->chip->name); @@ -186,7 +168,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip) { chip->irq_print_chip = ir_print_prefix; chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; } bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) -- cgit v1.2.3 From 3dd786ea3a0753bb19a5fd5103739a7cb9ec92c1 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:45 +0800 Subject: x86/irq: Clean up unused forward declarations in x86_init.h Clean up unused forward declarations in x86_init.h. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Yijing Wang Link: http://lkml.kernel.org/r/1428978610-28986-9-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 4ada3d3a0e86..09d4dab9302f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -171,7 +171,6 @@ struct x86_platform_ops { }; struct pci_dev; -struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); @@ -180,11 +179,6 @@ struct x86_msi_ops { void (*restore_msi_irqs)(struct pci_dev *dev); }; -struct IO_APIC_route_entry; -struct io_apic_irq_attr; -struct irq_data; -struct cpumask; - struct x86_io_apic_ops { void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); -- cgit v1.2.3 From 9880534989ba96faad26aebc01dcdb2c1b5793aa Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:46 +0800 Subject: irq_remapping: Clean up unsued code to support IOAPIC Now we have converted to hierarchical irqdomains, so clean up unused code. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428978610-28986-10-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 23 ----------------------- arch/x86/kernel/apic/vector.c | 1 - drivers/iommu/irq_remapping.c | 33 --------------------------------- 3 files changed, 57 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 4b5bbd104ae2..09efa358c831 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -26,12 +26,7 @@ #include #include -struct IO_APIC_route_entry; -struct io_apic_irq_attr; -struct irq_chip; struct msi_msg; -struct pci_dev; -struct irq_cfg; struct irq_alloc_info; #ifdef CONFIG_IRQ_REMAP @@ -42,13 +37,7 @@ extern int irq_remapping_enable(void); extern void irq_remapping_disable(void); extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); -extern void free_remapped_irq(int irq); extern void panic_if_irq_remap(const char *msg); -extern bool setup_remapped_irq(int irq, - struct irq_cfg *cfg, - struct irq_chip *chip); - -void irq_remap_modify_chip_defaults(struct irq_chip *chip); extern struct irq_domain * irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info); @@ -72,23 +61,11 @@ static inline int irq_remapping_enable(void) { return -ENODEV; } static inline void irq_remapping_disable(void) { } static inline int irq_remapping_reenable(int eim) { return -ENODEV; } static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } -static inline void free_remapped_irq(int irq) { } static inline void panic_if_irq_remap(const char *msg) { } -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ -} - -static inline bool setup_remapped_irq(int irq, - struct irq_cfg *cfg, - struct irq_chip *chip) -{ - return false; -} - static inline struct irq_domain * irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info) { diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index d0e5ea0fb947..37bb9e82b919 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -283,7 +283,6 @@ static void x86_vector_free_irqs(struct irq_domain *domain, for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); if (irq_data && irq_data->chip_data) { - free_remapped_irq(virq); clear_irq_vector(virq + i, irq_data->chip_data); free_irq_cfg(irq_data->chip_data); #ifdef CONFIG_X86_IO_APIC diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index bca42550b1ad..fc78b0d41f71 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -25,11 +25,6 @@ int no_x2apic_optout; static int disable_irq_remap; static struct irq_remap_ops *remap_ops; -static bool irq_remapped(struct irq_cfg *cfg) -{ - return (cfg->remapped == 1); -} - static void irq_remapping_disable_io_apic(void) { /* @@ -140,14 +135,6 @@ int __init irq_remap_enable_fault_handling(void) return remap_ops->enable_faulting(); } -void free_remapped_irq(int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - - if (irq_remapped(cfg) && remap_ops->free_irq) - remap_ops->free_irq(irq); -} - void panic_if_irq_remap(const char *msg) { if (irq_remapping_enabled) @@ -159,26 +146,6 @@ void ir_ack_apic_edge(struct irq_data *data) ack_APIC_irq(); } -static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -{ - seq_printf(p, " IR-%s", data->chip->name); -} - -void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; -} - -bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) -{ - if (!irq_remapped(cfg)) - return false; - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - return true; -} - /** * irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU * device serving request @info -- cgit v1.2.3 From bac4f90784efb858cfafdd7401dede6ef9563818 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:50 +0800 Subject: x86/irq: Remove irq_cfg.irq_remapped Now there is no user of irq_cfg.irq_remapped, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428978610-28986-14-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 1 - drivers/iommu/amd_iommu.c | 1 - drivers/iommu/intel_irq_remapping.c | 2 -- 3 files changed, 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 75d0db1db8a0..86e4698a0025 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -193,7 +193,6 @@ struct irq_cfg { u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP - u8 remapped : 1; union { struct irq_2_iommu irq_2_iommu; struct irq_2_irte irq_2_irte; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d9f5acd06f3..9ebc81dd9d75 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4125,7 +4125,6 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, union irte *irte = &data->irte_entry; struct IO_APIC_route_entry *entry; - irq_cfg->remapped = 1; data->irq_2_irte.devid = devid; data->irq_2_irte.index = index + sub_handle; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 2697ad6f6a84..9e19800c7ff8 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -67,7 +67,6 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, struct irq_2_iommu *irq_iommu, u16 count) { struct ir_table *table = iommu->ir_table; - struct irq_cfg *cfg = irq_cfg(irq); unsigned int mask = 0; unsigned long flags; int index; @@ -94,7 +93,6 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, if (index < 0) { pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id); } else { - cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; -- cgit v1.2.3 From 099c5c03487f6bca30c628e14e666788dd61fb33 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:51 +0800 Subject: irq_remapping/vt-d: Move struct irq_2_iommu into intel_irq_remapping.c Now only intel_irq_remapping.c access irq_2_iommu, so move it from hw_irq.h into intel_irq_remapping.c. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428978610-28986-15-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 9 --------- drivers/iommu/intel_irq_remapping.c | 7 +++++++ 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 86e4698a0025..1e0ee1029ef6 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -95,14 +95,6 @@ extern void trace_call_function_single_interrupt(void); #endif /* CONFIG_TRACING */ #ifdef CONFIG_IRQ_REMAP -/* Intel specific interrupt remapping information */ -struct irq_2_iommu { - struct intel_iommu *iommu; - u16 irte_index; - u16 sub_handle; - u8 irte_mask; -}; - /* AMD specific interrupt remapping information */ struct irq_2_irte { u16 devid; /* Device ID for IRTE table */ @@ -194,7 +186,6 @@ struct irq_cfg { u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP union { - struct irq_2_iommu irq_2_iommu; struct irq_2_irte irq_2_irte; }; #endif diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 9e19800c7ff8..34642d3f7fbd 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -32,6 +32,13 @@ struct hpet_scope { unsigned int devfn; }; +struct irq_2_iommu { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +}; + struct intel_ir_data { struct irq_2_iommu irq_2_iommu; struct irte irte_entry; -- cgit v1.2.3 From 9c72496698a4dadd406d159f7735851a63ef9412 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:52 +0800 Subject: irq_remapping/amd: Move struct irq_2_irte into amd_iommu.c Now only amd_iommu.c access irq_2_irte, so move it from hw_irq.h into amd_iommu.c. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428978610-28986-16-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 13 ------------- drivers/iommu/amd_iommu.c | 5 +++++ 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1e0ee1029ef6..e47bc4de5630 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,14 +94,6 @@ extern void trace_call_function_single_interrupt(void); #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #endif /* CONFIG_TRACING */ -#ifdef CONFIG_IRQ_REMAP -/* AMD specific interrupt remapping information */ -struct irq_2_irte { - u16 devid; /* Device ID for IRTE table */ - u16 index; /* Index into IRTE table*/ -}; -#endif /* CONFIG_IRQ_REMAP */ - struct irq_domain; #ifdef CONFIG_X86_LOCAL_APIC @@ -184,11 +176,6 @@ struct irq_cfg { unsigned int dest_apicid; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_IRQ_REMAP - union { - struct irq_2_irte irq_2_irte; - }; -#endif union { #ifdef CONFIG_X86_IO_APIC struct { diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9ebc81dd9d75..b5d903cb2804 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3852,6 +3852,11 @@ union irte { } fields; }; +struct irq_2_irte { + u16 devid; /* Device ID for IRTE table */ + u16 index; /* Index into IRTE table*/ +}; + struct amd_ir_data { struct irq_2_irte irq_2_irte; union irte irte_entry; -- cgit v1.2.3 From 4467715a44cca2fa41d25f3d32b737bd2331a8d9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:53 +0800 Subject: x86/irq: Move irq_cfg.irq_2_pin into io_apic.c Now only io_apic.c accesses struct irq_cfg.irq_2_pin, so move irq_2_pin into struct mp_chip_data in io_apic.c to clean up struct irq_cfg further. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-17-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 7 -- arch/x86/kernel/apic/io_apic.c | 164 +++++++++++++++++++---------------------- arch/x86/kernel/apic/vector.c | 3 - 3 files changed, 77 insertions(+), 97 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e47bc4de5630..f000b58cbc0c 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -176,13 +176,6 @@ struct irq_cfg { unsigned int dest_apicid; u8 vector; u8 move_in_progress : 1; - union { -#ifdef CONFIG_X86_IO_APIC - struct { - struct list_head irq_2_pin; - }; -#endif - }; }; extern struct irq_domain *x86_vector_domain; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 998fefad820e..a1abdcf2cb5f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -78,7 +78,13 @@ static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; static int ioapic_initialized; +struct irq_pin_list { + struct list_head list; + int apic, pin; +}; + struct mp_chip_data { + struct list_head irq_2_pin; struct IO_APIC_route_entry entry; int trigger; int polarity; @@ -215,16 +221,6 @@ void mp_save_irq(struct mpc_intsrc *m) panic("Max # of irq sources exceeded!!\n"); } -struct irq_pin_list { - struct list_head list; - int apic, pin; -}; - -static struct irq_pin_list *alloc_irq_pin_list(int node) -{ - return kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node); -} - static void alloc_ioapic_saved_registers(int idx) { size_t size; @@ -379,16 +375,17 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int __add_pin_to_irq_node(struct mp_chip_data *data, + int node, int apic, int pin) { struct irq_pin_list *entry; /* don't allow duplicates */ - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) if (entry->apic == apic && entry->pin == pin) return 0; - entry = alloc_irq_pin_list(node); + entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node); if (!entry) { pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", node, apic, pin); @@ -396,16 +393,16 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi } entry->apic = apic; entry->pin = pin; + list_add_tail(&entry->list, &data->irq_2_pin); - list_add_tail(&entry->list, &cfg->irq_2_pin); return 0; } -static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) +static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin) { struct irq_pin_list *tmp, *entry; - list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list) + list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list) if (entry->apic == apic && entry->pin == pin) { list_del(&entry->list); kfree(entry); @@ -413,22 +410,23 @@ static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin) } } -static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static void add_pin_to_irq_node(struct mp_chip_data *data, + int node, int apic, int pin) { - if (__add_pin_to_irq_node(cfg, node, apic, pin)) + if (__add_pin_to_irq_node(data, node, apic, pin)) panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); } /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, +static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, int oldapic, int oldpin, int newapic, int newpin) { struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; @@ -438,7 +436,7 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, } /* old apic/pin didn't exist, so just add new ones */ - add_pin_to_irq_node(cfg, node, newapic, newpin); + add_pin_to_irq_node(data, node, newapic, newpin); } static void __io_apic_modify_irq(struct irq_pin_list *entry, @@ -456,13 +454,13 @@ static void __io_apic_modify_irq(struct irq_pin_list *entry, final(entry); } -static void io_apic_modify_irq(struct irq_cfg *cfg, +static void io_apic_modify_irq(struct mp_chip_data *data, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) __io_apic_modify_irq(entry, mask_and, mask_or, final); } @@ -478,39 +476,31 @@ static void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void mask_ioapic(struct irq_cfg *cfg) +static void mask_ioapic_irq(struct irq_data *irq_data) { + struct mp_chip_data *data = irq_data->chip_data; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void mask_ioapic_irq(struct irq_data *data) -{ - mask_ioapic(irqd_cfg(data)); -} - -static void __unmask_ioapic(struct irq_cfg *cfg) +static void __unmask_ioapic(struct mp_chip_data *data) { - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL); } -static void unmask_ioapic(struct irq_cfg *cfg) +static void unmask_ioapic_irq(struct irq_data *irq_data) { + struct mp_chip_data *data = irq_data->chip_data; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - __unmask_ioapic(cfg); + __unmask_ioapic(data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_ioapic_irq(struct irq_data *data) -{ - unmask_ioapic(irqd_cfg(data)); -} - /* * IO-APIC versions below 0x20 don't support EOI register. * For the record, here is the information about various versions: @@ -551,13 +541,13 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector) } } -void eoi_ioapic_pin(int vector, struct irq_cfg *cfg) +void eoi_ioapic_pin(int vector, struct mp_chip_data *data) { unsigned long flags; struct irq_pin_list *entry; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) __eoi_ioapic_pin(entry->apic, entry->pin, vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1068,11 +1058,10 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, * entry. The IOAPIC entry */ if (irq_data && irq_data->parent_data) { - struct irq_cfg *cfg = irqd_cfg(irq_data); - if (!mp_check_pin_attr(irq, info)) return -EBUSY; - if (__add_pin_to_irq_node(cfg, node, ioapic, info->ioapic_pin)) + if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic, + info->ioapic_pin)) return -ENOMEM; } else { irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true); @@ -1394,9 +1383,7 @@ static void __init print_IO_APIC(int ioapic_idx) void __init print_IO_APICs(void) { int ioapic_idx; - struct irq_cfg *cfg; unsigned int irq; - struct irq_chip *chip; printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for_each_ioapic(ioapic_idx) @@ -1416,18 +1403,20 @@ void __init print_IO_APICs(void) printk(KERN_DEBUG "IRQ to pin mappings:\n"); for_each_active_irq(irq) { struct irq_pin_list *entry; + struct irq_chip *chip; + struct mp_chip_data *data; chip = irq_get_chip(irq); if (chip != &ioapic_chip && chip != &ioapic_ir_chip) continue; - - cfg = irq_cfg(irq); - if (!cfg) + data = irq_get_chip_data(irq); + if (!data) continue; - if (list_empty(&cfg->irq_2_pin)) + if (list_empty(&data->irq_2_pin)) continue; + printk(KERN_DEBUG "IRQ%d ", irq); - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) pr_cont("-> %d:%d", entry->apic, entry->pin); pr_cont("\n"); } @@ -1740,7 +1729,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) if (legacy_pic->irq_pending(irq)) was_pending = 1; } - __unmask_ioapic(irqd_cfg(data)); + __unmask_ioapic(data->chip_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; @@ -1755,13 +1744,15 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) * races. */ -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +static void __target_IO_APIC_irq(unsigned int irq, struct irq_cfg *cfg, + struct mp_chip_data *data) { int apic, pin; struct irq_pin_list *entry; u8 vector = cfg->vector; + unsigned int dest = SET_APIC_LOGICAL_ID(cfg->dest_apicid); - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { unsigned int reg; apic = entry->apic; @@ -1778,13 +1769,13 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq atomic_t irq_mis_count; #ifdef CONFIG_GENERIC_PENDING_IRQ -static bool io_apic_level_ack_pending(struct irq_cfg *cfg) +static bool io_apic_level_ack_pending(struct mp_chip_data *data) { struct irq_pin_list *entry; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) { + for_each_irq_pin(entry, data->irq_2_pin) { unsigned int reg; int pin; @@ -1801,18 +1792,17 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) return false; } -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic(cfg); + mask_ioapic_irq(data); return true; } return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) +static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) { if (unlikely(masked)) { /* Only migrate the irq if the ack has been received. @@ -1841,31 +1831,30 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(cfg)) + if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic(cfg); + unmask_ioapic_irq(data); } } #else -static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +static inline bool ioapic_irqd_mask(struct irq_data *data) { return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, - struct irq_cfg *cfg, bool masked) +static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) { } #endif -static void ioapic_ack_level(struct irq_data *data) +static void ioapic_ack_level(struct irq_data *irq_data) { - struct irq_cfg *cfg = irqd_cfg(data); + struct irq_cfg *cfg = irqd_cfg(irq_data); unsigned long v; bool masked; int i; irq_complete_move(cfg); - masked = ioapic_irqd_mask(data, cfg); + masked = ioapic_irqd_mask(irq_data); /* * It appears there is an erratum which affects at least version 0x11 @@ -1917,10 +1906,10 @@ static void ioapic_ack_level(struct irq_data *data) */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - eoi_ioapic_pin(cfg->vector, cfg); + eoi_ioapic_pin(cfg->vector, irq_data->chip_data); } - ioapic_irqd_unmask(data, cfg, masked); + ioapic_irqd_unmask(irq_data, masked); } static void ioapic_ir_ack_level(struct irq_data *irq_data) @@ -1934,7 +1923,7 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) * EOI we use the pin number. */ ack_APIC_irq(); - eoi_ioapic_pin(data->entry.vector, irqd_cfg(irq_data)); + eoi_ioapic_pin(data->entry.vector, data); } static int ioapic_set_affinity(struct irq_data *irq_data, @@ -1942,7 +1931,6 @@ static int ioapic_set_affinity(struct irq_data *irq_data, { struct irq_data *parent = irq_data->parent_data; struct mp_chip_data *data = irq_data->chip_data; - unsigned int dest, irq = irq_data->irq; struct irq_cfg *cfg; unsigned long flags; int ret; @@ -1953,9 +1941,7 @@ static int ioapic_set_affinity(struct irq_data *irq_data, cfg = irqd_cfg(irq_data); data->entry.dest = cfg->dest_apicid; data->entry.vector = cfg->vector; - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(cfg->dest_apicid); - __target_IO_APIC_irq(irq, dest, cfg); + __target_IO_APIC_irq(irq_data->irq, cfg, irq_data->chip_data); } raw_spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2116,10 +2102,11 @@ early_param("disable_timer_pin_1", disable_timer_pin_setup); static int mp_alloc_timer_irq(int ioapic, int pin) { int irq = -1; - struct irq_alloc_info info; struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); if (domain) { + struct irq_alloc_info info; + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0); info.ioapic_id = mpc_ioapic_id(ioapic); info.ioapic_pin = pin; @@ -2141,7 +2128,9 @@ static int mp_alloc_timer_irq(int ioapic, int pin) */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_data *irq_data = irq_get_irq_data(0); + struct mp_chip_data *data = irq_data->chip_data; + struct irq_cfg *cfg = irqd_cfg(irq_data); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -2205,9 +2194,9 @@ static inline void __init check_timer(void) int idx; idx = find_irq_entry(apic1, pin1, mp_INT); if (idx != -1 && irq_trigger(idx)) - unmask_ioapic(cfg); + unmask_ioapic_irq(irq_get_chip_data(0)); } - irq_domain_activate_irq(irq_get_irq_data(0)); + irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -2227,8 +2216,8 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); - irq_domain_activate_irq(irq_get_irq_data(0)); + replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -3044,6 +3033,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, return ret; } + INIT_LIST_HEAD(&data->irq_2_pin); irq_data->hwirq = info->ioapic_pin; irq_data->chip = (domain->parent == x86_vector_domain) ? &ioapic_chip : &ioapic_ir_chip; @@ -3051,7 +3041,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info); cfg = irqd_cfg(irq_data); - add_pin_to_irq_node(cfg, info->ioapic_node, ioapic, pin); + add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); if (info->ioapic_entry) mp_setup_entry(cfg, data, info->ioapic_entry); mp_register_handler(virq, data->trigger); @@ -3069,15 +3059,16 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { - struct irq_cfg *cfg = irq_cfg(virq); struct irq_data *irq_data; + struct mp_chip_data *data; BUG_ON(nr_irqs != 1); irq_data = irq_domain_get_irq_data(domain, virq); if (irq_data && irq_data->chip_data) { - __remove_pin_from_irq(cfg, mp_irqdomain_ioapic_idx(domain), + data = irq_data->chip_data; + __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain), (int)irq_data->hwirq); - WARN_ON(!list_empty(&cfg->irq_2_pin)); + WARN_ON(!list_empty(&data->irq_2_pin)); kfree(irq_data->chip_data); } irq_domain_free_irqs_top(domain, virq, nr_irqs); @@ -3089,10 +3080,9 @@ void mp_irqdomain_activate(struct irq_domain *domain, unsigned long flags; struct irq_pin_list *entry; struct mp_chip_data *data = irq_data->chip_data; - struct irq_cfg *cfg = irqd_cfg(irq_data); raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) + for_each_irq_pin(entry, data->irq_2_pin) __ioapic_write_entry(entry->apic, entry->pin, data->entry); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 37bb9e82b919..af224e6774d8 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -68,9 +68,6 @@ static struct irq_cfg *alloc_irq_cfg(int node) goto out_cfg; if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) goto out_domain; -#ifdef CONFIG_X86_IO_APIC - INIT_LIST_HEAD(&cfg->irq_2_pin); -#endif return cfg; out_domain: free_cpumask_var(cfg->domain); -- cgit v1.2.3 From 50a6ad84b2a2c971e76d57884d61a5a55d7c1601 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:54 +0800 Subject: x86/irq: Remove struct io_apic_irq_attr Now there's no user of struct io_apic_irq_attr anymore, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-18-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 7 ------- arch/x86/kernel/apic/io_apic.c | 10 ---------- 2 files changed, 17 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index fa4b25ebd658..4eb4bcc5f219 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -157,13 +157,6 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); -struct io_apic_irq_attr { - int ioapic; - int ioapic_pin; - int trigger; - int polarity; -}; - enum ioapic_domain_type { IOAPIC_DOMAIN_INVALID, IOAPIC_DOMAIN_LEGACY, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a1abdcf2cb5f..76dc9f5bfdbc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2959,16 +2959,6 @@ int mp_ioapic_registered(u32 gsi_base) return 0; } -static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, - int ioapic, int ioapic_pin, - int trigger, int polarity) -{ - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; -} - static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, struct irq_alloc_info *info) { -- cgit v1.2.3 From 9a93d4736ec5ec322ec8f240a292c1a86cd0876d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:55 +0800 Subject: x86/irq: Remove x86_io_apic_ops.write and x86_io_apic_ops.modify x86_io_apic_ops.write is always set to native_io_apic_write(), and nobody overrides it. So get rid of the indirection by changing native_io_apic_write() as io_apic_write() and removing x86_io_apic_ops.write. Do the same for x86_io_apic_ops.modify and native_io_apic_modify(). Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Yijing Wang Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-19-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 13 ------------- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/kernel/apic/io_apic.c | 6 ++++-- arch/x86/kernel/x86_init.c | 2 -- 4 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 4eb4bcc5f219..c6486dd44418 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -208,8 +208,6 @@ extern void disable_ioapic_support(void); extern void __init native_io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); -extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); -extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -217,15 +215,6 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) return x86_io_apic_ops.read(apic, reg); } -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - x86_io_apic_ops.write(apic, reg, value); -} -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - x86_io_apic_ops.modify(apic, reg, value); -} - extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); @@ -266,8 +255,6 @@ static inline void mp_save_irq(struct mpc_intsrc *m) { }; static inline void disable_ioapic_support(void) { } #define native_io_apic_init_mappings NULL #define native_io_apic_read NULL -#define native_io_apic_write NULL -#define native_io_apic_modify NULL #define native_disable_io_apic NULL static inline void setup_IO_APIC(void) { } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 09d4dab9302f..844b37d55a44 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -182,8 +182,6 @@ struct x86_msi_ops { struct x86_io_apic_ops { void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*write) (unsigned int apic, unsigned int reg, unsigned int value); - void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 76dc9f5bfdbc..d687a10ed3a2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -280,7 +280,8 @@ unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) return readl(&io_apic->data); } -void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +static void io_apic_write(unsigned int apic, unsigned int reg, + unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -294,7 +295,8 @@ void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int valu * * Older SiS APIC requires we rewrite the index register */ -void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +static void io_apic_modify(unsigned int apic, unsigned int reg, + unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index f612dc018fb6..633f07845099 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -141,7 +141,5 @@ void arch_restore_msi_irqs(struct pci_dev *dev) struct x86_io_apic_ops x86_io_apic_ops = { .init = native_io_apic_init_mappings, .read = native_io_apic_read, - .write = native_io_apic_write, - .modify = native_io_apic_modify, .disable = native_disable_io_apic, }; -- cgit v1.2.3 From ca1b88622e9c16df7b1e0a57e9c6c2300321bed4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 24 Apr 2015 13:57:48 +0200 Subject: x86: Remove more unmodified io_apic_ops io_apic_ops.init() is either NULL, if IO-APIC support is disabled at compile time or native_io_apic_init_mappings(). No point to have that as we can achieve the same thing with an empty inline. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 6 +++--- arch/x86/include/asm/x86_init.h | 1 - arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/setup.c | 3 +-- arch/x86/kernel/x86_init.c | 1 - 5 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c6486dd44418..f80f4efa1717 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -206,7 +206,7 @@ extern void mp_save_irq(struct mpc_intsrc *m); extern void disable_ioapic_support(void); -extern void __init native_io_apic_init_mappings(void); +extern void __init io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_disable_io_apic(void); @@ -251,9 +251,9 @@ static inline int restore_ioapic_entries(void) return -ENOMEM; } -static inline void mp_save_irq(struct mpc_intsrc *m) { }; +static inline void mp_save_irq(struct mpc_intsrc *m) { } static inline void disable_ioapic_support(void) { } -#define native_io_apic_init_mappings NULL +static inline void io_apic_init_mappings(void) { } #define native_io_apic_read NULL #define native_disable_io_apic NULL diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 844b37d55a44..48d34d28f5a6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -180,7 +180,6 @@ struct x86_msi_ops { }; struct x86_io_apic_ops { - void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); void (*disable)(void); }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d687a10ed3a2..3029502b0a50 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2687,7 +2687,7 @@ static struct resource * __init ioapic_setup_resources(void) return res; } -void __init native_io_apic_init_mappings(void) +void __init io_apic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d74ac33290ae..8d04a7594a03 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1222,8 +1222,7 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); init_apic_mappings(); - if (x86_io_apic_ops.init) - x86_io_apic_ops.init(); + io_apic_init_mappings(); kvm_guest_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 633f07845099..3cee10abf01d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -139,7 +139,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev) #endif struct x86_io_apic_ops x86_io_apic_ops = { - .init = native_io_apic_init_mappings, .read = native_io_apic_read, .disable = native_disable_io_apic, }; -- cgit v1.2.3 From 154d9e50e413ee144d48ccd6c402633ffbecbfff Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:56 +0800 Subject: x86/irq: Clean up io_apic.h Clean up io_apic.h by: 1) moving definition of struct mp_ioapic_gsi into io_apic.c 2) changing mp_pin_to_gsi() and mp_ioapic_gsi_routing() as static 3) removing unused MP_MAX_IOAPIC_PIN 4) removing useless forward declaration 5) removing useless comments Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-20-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 13 ++----------- arch/x86/kernel/apic/io_apic.c | 23 ++++++++--------------- 2 files changed, 10 insertions(+), 26 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index f80f4efa1717..9b9050129ee7 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -113,9 +113,6 @@ extern int nr_ioapics; extern int mpc_ioapic_id(int ioapic); extern unsigned int mpc_ioapic_addr(int ioapic); -extern struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic); - -#define MP_MAX_IOAPIC_PIN 127 /* # of MP IRQ source entries */ extern int mp_irq_entries; @@ -135,6 +132,8 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; +extern u32 gsi_top; + extern unsigned long io_apic_irqs; #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) @@ -174,15 +173,8 @@ struct ioapic_domain_cfg { struct device_node *dev; }; -struct mp_ioapic_gsi{ - u32 gsi_base; - u32 gsi_end; -}; -extern u32 gsi_top; - extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); -extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info); extern void mp_unmap_irq(int irq); @@ -231,7 +223,6 @@ static inline int arch_early_ioapic_init(void) { return 0; } static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } -static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3029502b0a50..4c7da8483398 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -63,7 +63,6 @@ #define for_each_ioapic_pin(idx, pin) \ for_each_ioapic((idx)) \ for_each_pin((idx), (pin)) - #define for_each_irq_pin(entry, head) \ list_for_each_entry(entry, &head, list) @@ -92,6 +91,11 @@ struct mp_chip_data { bool isa_irq; }; +struct mp_ioapic_gsi { + u32 gsi_base; + u32 gsi_end; +}; + static struct ioapic { /* * # of IRQ routing registers @@ -122,7 +126,7 @@ unsigned int mpc_ioapic_addr(int ioapic_idx) return ioapics[ioapic_idx].mp_config.apicaddr; } -struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) +static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) { return &ioapics[ioapic_idx].gsi_config; } @@ -134,7 +138,7 @@ static inline int mp_ioapic_pin_count(int ioapic) return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; } -u32 mp_pin_to_gsi(int ioapic, int pin) +static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin; } @@ -1153,8 +1157,7 @@ static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags) return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL); } -int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, - struct irq_alloc_info *info) +int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) { int ioapic, pin, idx; @@ -1719,7 +1722,6 @@ static int __init timer_irq_works(void) * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... */ - static unsigned int startup_ioapic_irq(struct irq_data *data) { int was_pending = 0, irq = data->irq; @@ -1737,15 +1739,6 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) return was_pending; } -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - static void __target_IO_APIC_irq(unsigned int irq, struct irq_cfg *cfg, struct mp_chip_data *data) { -- cgit v1.2.3 From 1f934641294ca2e09016c689862378fbb15da4d4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 10:29:58 +0800 Subject: x86/irq: Remove sis apic bug workaround The SiS apic bug workaround is now obsolete as we cache the register values for performance reasons. Signed-off-by: Thomas Gleixner Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Link: http://lkml.kernel.org/r/1428978610-28986-22-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 3 --- arch/x86/kernel/apic/io_apic.c | 35 ++++++++++------------------------- drivers/pci/quirks.c | 7 ------- 3 files changed, 10 insertions(+), 35 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9b9050129ee7..d58f1c6b5608 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -120,9 +120,6 @@ extern int mp_irq_entries; /* MP IRQ source entries */ extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; -/* Older SiS APIC requires we rewrite the index register */ -extern int sis_apic_bug; - /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4fb347f01653..9806f9605bc4 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -18,6 +18,16 @@ * and Rolf G. Tews * for testing these extensively * Paul Diefenbaugh : Added full ACPI support + * + * Historical information which is worth to be preserved: + * + * - SiS APIC rmw bug: + * + * We used to have a workaround for a bug in SiS chips which + * required to rewrite the index register for a read-modify-write + * operation as the chip lost the index information which was + * setup for the read already. We cache the data now, so that + * workaround has been removed. */ #include @@ -66,17 +76,6 @@ #define for_each_irq_pin(entry, head) \ list_for_each_entry(entry, &head, list) -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - * When doing a read-modify-write operation on IOAPIC registers, older SiS APIC - * requires we rewrite the index register again where the read already set up - * the index register. - * The code to make use of sis_apic_bug has been removed, but we don't want to - * lose this knowledge. - */ -int sis_apic_bug = -1; - static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_MUTEX(ioapic_mutex); static unsigned int ioapic_dynirq_base; @@ -2320,20 +2319,6 @@ void __init setup_IO_APIC(void) ioapic_initialized = 1; } -/* - * Called after all the initialization is done. If we didn't find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - static void resume_ioapic_id(int ioapic_idx) { unsigned long flags; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c6dc1dfd25d5..2890ad7cf7c6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -819,13 +819,6 @@ static void quirk_amd_ioapic(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic); - -static void quirk_ioapic_rmw(struct pci_dev *dev) -{ - if (dev->devfn == 0 && dev->bus->number == 0) - sis_apic_bug = 1; -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_ANY_ID, quirk_ioapic_rmw); #endif /* CONFIG_X86_IO_APIC */ /* -- cgit v1.2.3 From a2cbbb47fd90ef1161ce22b099de5c6095f8365f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:29:59 +0800 Subject: x86/irq: Remove unused alloc_irq_and_cfg_at() There's no caller of alloc_irq_and_cfg_at() anymore, so remove it. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428978610-28986-23-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 1 - arch/x86/kernel/apic/vector.c | 21 --------------------- 2 files changed, 22 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index f000b58cbc0c..bb2c990b97ef 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -186,7 +186,6 @@ extern void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src); extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); -extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index af224e6774d8..51cd46bfc46e 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -76,27 +76,6 @@ out_cfg: return NULL; } -struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) -{ - int res = irq_alloc_desc_at(at, node); - struct irq_cfg *cfg; - - if (res < 0) { - if (res != -EEXIST) - return NULL; - cfg = irq_cfg(at); - if (cfg) - return cfg; - } - - cfg = alloc_irq_cfg(node); - if (cfg) - irq_set_chip_data(at, cfg); - else - irq_free_desc(at); - return cfg; -} - static void free_irq_cfg(struct irq_cfg *cfg) { if (cfg) { -- cgit v1.2.3 From f970510cc55e41d21ca30feb56873aaeb57ec18d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:30:00 +0800 Subject: x86/irq: Make functions only used in vector.c static Function {assign|clear}_irq_vector() and apic_retrigger_irq() are only used in vector.c, so make them static. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428978610-28986-24-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 3 --- arch/x86/kernel/apic/vector.c | 7 ++++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index bb2c990b97ef..9a797682fef5 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -188,8 +188,6 @@ extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); -extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); -extern void clear_irq_vector(int irq, struct irq_cfg *cfg); extern void setup_vector_irq(int cpu); #ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); @@ -199,7 +197,6 @@ static inline void send_cleanup_vector(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif -extern int apic_retrigger_irq(struct irq_data *data); extern void apic_ack_edge(struct irq_data *data); extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, unsigned int *dest_id); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 51cd46bfc46e..d52af4d805db 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -185,7 +185,8 @@ next: return err; } -int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int assign_irq_vector(int irq, struct irq_cfg *cfg, + const struct cpumask *mask) { int err; unsigned long flags; @@ -196,7 +197,7 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) return err; } -void clear_irq_vector(int irq, struct irq_cfg *cfg) +static void clear_irq_vector(int irq, struct irq_cfg *cfg) { int cpu, vector; unsigned long flags; @@ -441,7 +442,7 @@ void setup_vector_irq(int cpu) __setup_vector_irq(cpu); } -int apic_retrigger_irq(struct irq_data *data) +static int apic_retrigger_irq(struct irq_data *data) { struct irq_cfg *cfg = irqd_cfg(data); unsigned long flags; -- cgit v1.2.3 From 68f9f4404d74f859dc84973db8731b41a51d929a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:30:01 +0800 Subject: x86/irq: Remove function apic_set_affinity() Now there's no user of apic_set_affinity(), so remove it. Also rename vector_set_affinity() to apic_set_affinity() for consistency. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428978610-28986-25-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 2 -- arch/x86/kernel/apic/vector.c | 40 +++------------------------------------- 2 files changed, 3 insertions(+), 39 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9a797682fef5..727c62378a65 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -198,8 +198,6 @@ static inline void irq_complete_move(struct irq_cfg *c) { } #endif extern void apic_ack_edge(struct irq_data *data); -extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id); #else /* CONFIG_X86_LOCAL_APIC */ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index d52af4d805db..1aea62d60cf2 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -463,42 +463,8 @@ void apic_ack_edge(struct irq_data *data) ack_APIC_irq(); } -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = irqd_cfg(data); - unsigned int irq = data->irq; - int err; - - if (!config_enabled(CONFIG_SMP)) - return -EPERM; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); - if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - cpumask_copy(data->affinity, mask); - - return 0; -} - -static int vector_set_affinity(struct irq_data *irq_data, - const struct cpumask *dest, bool force) +static int apic_set_affinity(struct irq_data *irq_data, + const struct cpumask *dest, bool force) { struct irq_cfg *cfg = irq_data->chip_data; int err, irq = irq_data->irq; @@ -523,7 +489,7 @@ static int vector_set_affinity(struct irq_data *irq_data, static struct irq_chip lapic_controller = { .irq_ack = apic_ack_edge, - .irq_set_affinity = vector_set_affinity, + .irq_set_affinity = apic_set_affinity, .irq_retrigger = apic_retrigger_irq, }; -- cgit v1.2.3 From 7f3262edcdf623296b514377d52911b115c7ab49 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:30:03 +0800 Subject: x86/irq: Move private data in struct irq_cfg into dedicated data structure Several fields in struct irq_cfg are private to vector.c, so move it into dedicated data structure. This helps to hide implementation details. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428978610-28986-27-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1416901802-24211-35-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Tested-by: Joerg Roedel --- arch/x86/include/asm/hw_irq.h | 3 - arch/x86/kernel/apic/vector.c | 221 +++++++++++++++++++++++------------------- 2 files changed, 119 insertions(+), 105 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 727c62378a65..3b8233a26348 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -171,11 +171,8 @@ enum { }; struct irq_cfg { - cpumask_var_t domain; - cpumask_var_t old_domain; unsigned int dest_apicid; u8 vector; - u8 move_in_progress : 1; }; extern struct irq_domain *x86_vector_domain; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 0092a6e0d5ee..60047495041c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -21,11 +21,18 @@ #include #include +struct apic_chip_data { + struct irq_cfg cfg; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 move_in_progress : 1; +}; + struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC -static struct irq_cfg *legacy_irq_cfgs[NR_IRQS_LEGACY]; +static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; #endif void lock_vector_lock(void) @@ -41,12 +48,7 @@ void unlock_vector_lock(void) raw_spin_unlock(&vector_lock); } -struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irqd_cfg(irq_get_irq_data(irq)); -} - -struct irq_cfg *irqd_cfg(struct irq_data *irq_data) +static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data) { if (!irq_data) return NULL; @@ -57,36 +59,48 @@ struct irq_cfg *irqd_cfg(struct irq_data *irq_data) return irq_data->chip_data; } -static struct irq_cfg *alloc_irq_cfg(int node) +struct irq_cfg *irqd_cfg(struct irq_data *irq_data) +{ + struct apic_chip_data *data = apic_chip_data(irq_data); + + return data ? &data->cfg : NULL; +} + +struct irq_cfg *irq_cfg(unsigned int irq) { - struct irq_cfg *cfg; + return irqd_cfg(irq_get_irq_data(irq)); +} - cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); - if (!cfg) +static struct apic_chip_data *alloc_apic_chip_data(int node) +{ + struct apic_chip_data *data; + + data = kzalloc_node(sizeof(*data), GFP_KERNEL, node); + if (!data) return NULL; - if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) - goto out_cfg; - if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) + if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node)) + goto out_data; + if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node)) goto out_domain; - return cfg; + return data; out_domain: - free_cpumask_var(cfg->domain); -out_cfg: - kfree(cfg); + free_cpumask_var(data->domain); +out_data: + kfree(data); return NULL; } -static void free_irq_cfg(struct irq_cfg *cfg) +static void free_apic_chip_data(struct apic_chip_data *data) { - if (cfg) { - free_cpumask_var(cfg->domain); - free_cpumask_var(cfg->old_domain); - kfree(cfg); + if (data) { + free_cpumask_var(data->domain); + free_cpumask_var(data->old_domain); + kfree(data); } } -static int -__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +static int __assign_irq_vector(int irq, struct apic_chip_data *d, + const struct cpumask *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -104,7 +118,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if (cfg->move_in_progress) + if (d->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -112,26 +126,26 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; - cpumask_clear(cfg->old_domain); + cpumask_clear(d->old_domain); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; apic->vector_allocation_domain(cpu, tmp_mask, mask); - if (cpumask_subset(tmp_mask, cfg->domain)) { + if (cpumask_subset(tmp_mask, d->domain)) { err = 0; - if (cpumask_equal(tmp_mask, cfg->domain)) + if (cpumask_equal(tmp_mask, d->domain)) break; /* * New cpumask using the vector is a proper subset of * the current in use mask. So cleanup the vector * allocation for the members that are not used anymore. */ - cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); - cpumask_and(cfg->domain, cfg->domain, tmp_mask); + cpumask_andnot(d->old_domain, d->domain, tmp_mask); + d->move_in_progress = + cpumask_intersects(d->old_domain, cpu_online_mask); + cpumask_and(d->domain, d->domain, tmp_mask); break; } @@ -145,8 +159,8 @@ next: } if (unlikely(current_vector == vector)) { - cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); - cpumask_andnot(tmp_mask, mask, cfg->old_domain); + cpumask_or(d->old_domain, d->old_domain, tmp_mask); + cpumask_andnot(tmp_mask, mask, d->old_domain); cpu = cpumask_first_and(tmp_mask, cpu_online_mask); continue; } @@ -162,15 +176,15 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (cfg->vector) { - cpumask_copy(cfg->old_domain, cfg->domain); - cfg->move_in_progress = - cpumask_intersects(cfg->old_domain, cpu_online_mask); + if (d->cfg.vector) { + cpumask_copy(d->old_domain, d->domain); + d->move_in_progress = + cpumask_intersects(d->old_domain, cpu_online_mask); } for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cpumask_copy(cfg->domain, tmp_mask); + d->cfg.vector = vector; + cpumask_copy(d->domain, tmp_mask); err = 0; break; } @@ -178,46 +192,46 @@ next: if (!err) { /* cache destination APIC IDs into cfg->dest_apicid */ - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, - &cfg->dest_apicid); + err = apic->cpu_mask_to_apicid_and(mask, d->domain, + &d->cfg.dest_apicid); } return err; } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, +static int assign_irq_vector(int irq, struct apic_chip_data *data, const struct cpumask *mask) { int err; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, cfg, mask); + err = __assign_irq_vector(irq, data, mask); raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } -static void clear_irq_vector(int irq, struct irq_cfg *cfg) +static void clear_irq_vector(int irq, struct apic_chip_data *data) { int cpu, vector; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - BUG_ON(!cfg->vector); + BUG_ON(!data->cfg.vector); - vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + vector = data->cfg.vector; + for_each_cpu_and(cpu, data->domain, cpu_online_mask) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; - cfg->vector = 0; - cpumask_clear(cfg->domain); + data->cfg.vector = 0; + cpumask_clear(data->domain); - if (likely(!cfg->move_in_progress)) { + if (likely(!data->move_in_progress)) { raw_spin_unlock_irqrestore(&vector_lock, flags); return; } - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -226,7 +240,7 @@ static void clear_irq_vector(int irq, struct irq_cfg *cfg) break; } } - cfg->move_in_progress = 0; + data->move_in_progress = 0; raw_spin_unlock_irqrestore(&vector_lock, flags); } @@ -261,10 +275,10 @@ static void x86_vector_free_irqs(struct irq_domain *domain, irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); if (irq_data && irq_data->chip_data) { clear_irq_vector(virq + i, irq_data->chip_data); - free_irq_cfg(irq_data->chip_data); + free_apic_chip_data(irq_data->chip_data); #ifdef CONFIG_X86_IO_APIC if (virq + i < nr_legacy_irqs()) - legacy_irq_cfgs[virq + i] = NULL; + legacy_irq_data[virq + i] = NULL; #endif irq_domain_reset_irq_data(irq_data); } @@ -275,9 +289,9 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { struct irq_alloc_info *info = arg; + struct apic_chip_data *data; const struct cpumask *mask; struct irq_data *irq_data; - struct irq_cfg *cfg; int i, err; if (disable_apic) @@ -292,20 +306,20 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data = irq_domain_get_irq_data(domain, virq + i); BUG_ON(!irq_data); #ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs() && legacy_irq_cfgs[virq + i]) - cfg = legacy_irq_cfgs[virq + i]; + if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) + data = legacy_irq_data[virq + i]; else #endif - cfg = alloc_irq_cfg(irq_data->node); - if (!cfg) { + data = alloc_apic_chip_data(irq_data->node); + if (!data) { err = -ENOMEM; goto error; } irq_data->chip = &lapic_controller; - irq_data->chip_data = cfg; + irq_data->chip_data = data; irq_data->hwirq = virq + i; - err = assign_irq_vector(virq, cfg, mask); + err = assign_irq_vector(virq, data, mask); if (err) goto error; } @@ -349,22 +363,22 @@ int __init arch_probe_nr_irqs(void) static void init_legacy_irqs(void) { int i, node = cpu_to_node(0); - struct irq_cfg *cfg; + struct apic_chip_data *data; /* * For legacy IRQ's, start with assigning irq0 to irq15 to * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. */ for (i = 0; i < nr_legacy_irqs(); i++) { - cfg = legacy_irq_cfgs[i] = alloc_irq_cfg(node); - BUG_ON(!cfg); + data = legacy_irq_data[i] = alloc_apic_chip_data(node); + BUG_ON(!data); /* * For legacy IRQ's, start with assigning irq0 to irq15 to * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. */ - cfg->vector = IRQ0_VECTOR + i; - cpumask_setall(cfg->domain); - irq_set_chip_data(i, cfg); + data->cfg.vector = IRQ0_VECTOR + i; + cpumask_setall(data->domain); + irq_set_chip_data(i, data); } } #else @@ -390,7 +404,7 @@ static void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ int irq, vector; - struct irq_cfg *cfg; + struct apic_chip_data *data; /* * vector_lock will make sure that we don't run into irq vector @@ -400,13 +414,13 @@ static void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = irq_cfg(irq); - if (!cfg) + data = apic_chip_data(irq_get_irq_data(irq)); + if (!data) continue; - if (!cpumask_test_cpu(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, data->domain)) continue; - vector = cfg->vector; + vector = data->cfg.vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -415,8 +429,8 @@ static void __setup_vector_irq(int cpu) if (irq <= VECTOR_UNDEFINED) continue; - cfg = irq_cfg(irq); - if (!cpumask_test_cpu(cpu, cfg->domain)) + data = apic_chip_data(irq_get_irq_data(irq)); + if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } raw_spin_unlock(&vector_lock); @@ -442,15 +456,15 @@ void setup_vector_irq(int cpu) __setup_vector_irq(cpu); } -static int apic_retrigger_irq(struct irq_data *data) +static int apic_retrigger_irq(struct irq_data *irq_data) { - struct irq_cfg *cfg = irqd_cfg(data); + struct apic_chip_data *data = apic_chip_data(irq_data); unsigned long flags; int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(cfg->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); + cpu = cpumask_first_and(data->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -466,7 +480,7 @@ void apic_ack_edge(struct irq_data *data) static int apic_set_affinity(struct irq_data *irq_data, const struct cpumask *dest, bool force) { - struct irq_cfg *cfg = irq_data->chip_data; + struct apic_chip_data *data = irq_data->chip_data; int err, irq = irq_data->irq; if (!config_enabled(CONFIG_SMP)) @@ -475,11 +489,11 @@ static int apic_set_affinity(struct irq_data *irq_data, if (!cpumask_intersects(dest, cpu_online_mask)) return -EINVAL; - err = assign_irq_vector(irq, cfg, dest); + err = assign_irq_vector(irq, data, dest); if (err) { struct irq_data *top = irq_get_irq_data(irq); - if (assign_irq_vector(irq, cfg, top->affinity)) + if (assign_irq_vector(irq, data, top->affinity)) pr_err("Failed to recover vector for irq %d\n", irq); return err; } @@ -494,28 +508,31 @@ static struct irq_chip lapic_controller = { }; #ifdef CONFIG_SMP -static void __send_cleanup_vector(struct irq_cfg *cfg) +static void __send_cleanup_vector(struct apic_chip_data *data) { cpumask_var_t cleanup_mask; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + for_each_cpu_and(i, data->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } - cfg->move_in_progress = 0; + data->move_in_progress = 0; } void send_cleanup_vector(struct irq_cfg *cfg) { - if (cfg->move_in_progress) - __send_cleanup_vector(cfg); + struct apic_chip_data *data; + + data = container_of(cfg, struct apic_chip_data, cfg); + if (data->move_in_progress) + __send_cleanup_vector(data); } asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) @@ -531,7 +548,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) int irq; unsigned int irr; struct irq_desc *desc; - struct irq_cfg *cfg; + struct apic_chip_data *data; irq = __this_cpu_read(vector_irq[vector]); @@ -542,8 +559,8 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) if (!desc) continue; - cfg = irq_cfg(irq); - if (!cfg) + data = apic_chip_data(&desc->irq_data); + if (!data) continue; raw_spin_lock(&desc->lock); @@ -552,10 +569,11 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) * Check if the irq migration is in progress. If so, we * haven't received the cleanup request yet for this irq. */ - if (cfg->move_in_progress) + if (data->move_in_progress) goto unlock; - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + if (vector == data->cfg.vector && + cpumask_test_cpu(me, data->domain)) goto unlock; irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); @@ -581,14 +599,15 @@ unlock: static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) { unsigned me; + struct apic_chip_data *data; - if (likely(!cfg->move_in_progress)) + data = container_of(cfg, struct apic_chip_data, cfg); + if (likely(!data->move_in_progress)) return; me = smp_processor_id(); - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) - __send_cleanup_vector(cfg); + if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) + __send_cleanup_vector(data); } void irq_complete_move(struct irq_cfg *cfg) @@ -600,10 +619,8 @@ void irq_force_complete_move(int irq) { struct irq_cfg *cfg = irq_cfg(irq); - if (!cfg) - return; - - __irq_complete_move(cfg, cfg->vector); + if (cfg) + __irq_complete_move(cfg, cfg->vector); } #endif -- cgit v1.2.3 From 4399b14fa75c8d8225a0739fbcef575f02c6c6a5 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:30:04 +0800 Subject: x86/irq: Refine the way to calculate NR_IRQS Now we have made MSI independent of IOAPIC, so we need to refine the way to calculate NR_IRQS to support configuration with MSI enabled but IOAPIC disabled. Signed-off-by: Jiang Liu Tested-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Jan Beulich Link: http://lkml.kernel.org/r/1428978610-28986-28-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_vectors.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..b26cb124a4f1 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -155,18 +155,22 @@ static inline int invalid_vm86_irq(int irq) * static arrays. */ -#define NR_IRQS_LEGACY 16 +#define NR_IRQS_LEGACY 16 -#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) +#define CPU_VECTOR_LIMIT (64 * NR_CPUS) +#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS) -#ifdef CONFIG_X86_IO_APIC -# define CPU_VECTOR_LIMIT (64 * NR_CPUS) -# define NR_IRQS \ +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI) +#define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -#else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS NR_IRQS_LEGACY +#elif defined(CONFIG_X86_IO_APIC) +#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +#elif defined(CONFIG_PCI_MSI) +#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT) +#else +#define NR_IRQS NR_IRQS_LEGACY #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ -- cgit v1.2.3 From 335efdf57da39d3949c3ef9338de5737e85cbe52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 10:30:06 +0800 Subject: x86, ioapic: Use proper defines for the entry fields While looking at the printout issue, I stumbled more than once over the various 0/1 assignments which are either commented in strange ways or force to lookup the meaning. Use proper constants and fix the misleading comments. While at it remove pointless 0 assignments in native_disable_io_apic() which have no value for understanding the code. Signed-off-by: Thomas Gleixner Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Grant Likely Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1428978610-28986-30-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 16 +++++-- arch/x86/kernel/apic/io_apic.c | 100 ++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 53 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index d58f1c6b5608..984afb732efe 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -98,9 +98,19 @@ struct IR_IO_APIC_route_entry { struct irq_alloc_info; struct irq_data; -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#define IOAPIC_MASKED 1 +#define IOAPIC_UNMASKED 0 + +#define IOAPIC_POL_HIGH 0 +#define IOAPIC_POL_LOW 1 + +#define IOAPIC_DEST_MODE_PHYSICAL 0 +#define IOAPIC_DEST_MODE_LOGICAL 1 + #define IOAPIC_MAP_ALLOC 0x1 #define IOAPIC_MAP_CHECK 0x2 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9806f9605bc4..a63167f96126 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -356,7 +356,7 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_mask_entry(int apic, int pin) { unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; + union entry_union eu = { .entry.mask = IOAPIC_MASKED }; raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); @@ -517,7 +517,7 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector) /* * Mask the entry and change the trigger mode to edge. */ - entry1.mask = 1; + entry1.mask = IOAPIC_MASKED; entry1.trigger = IOAPIC_EDGE; __ioapic_write_entry(apic, pin, entry1); @@ -553,8 +553,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * Make sure the entry is masked and re-read the contents to check * if it is a level triggered pin and if the remote-IRR is set. */ - if (!entry.mask) { - entry.mask = 1; + if (entry.mask == IOAPIC_UNMASKED) { + entry.mask = IOAPIC_MASKED; ioapic_write_entry(apic, pin, entry); entry = ioapic_read_entry(apic, pin); } @@ -567,7 +567,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) * doesn't clear the remote-IRR if the trigger mode is not * set to level. */ - if (!entry.trigger) { + if (entry.trigger == IOAPIC_EDGE) { entry.trigger = IOAPIC_LEVEL; ioapic_write_entry(apic, pin, entry); } @@ -670,8 +670,8 @@ void mask_ioapic_entries(void) struct IO_APIC_route_entry entry; entry = ioapics[apic].saved_registers[pin]; - if (!entry.mask) { - entry.mask = 1; + if (entry.mask == IOAPIC_UNMASKED) { + entry.mask = IOAPIC_MASKED; ioapic_write_entry(apic, pin, entry); } } @@ -773,11 +773,11 @@ static int EISA_ELCR(unsigned int irq) #endif -/* ISA interrupts are always polarity zero edge triggered, +/* ISA interrupts are always active high edge triggered, * when listed as conforming in the MP table. */ -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) +#define default_ISA_trigger(idx) (IOAPIC_EDGE) +#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH) /* EISA interrupts are always polarity zero and can be edge or level * trigger depending on the ELCR value. If an interrupt is listed as @@ -787,11 +787,11 @@ static int EISA_ELCR(unsigned int irq) #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) -/* PCI interrupts are always polarity one level triggered, +/* PCI interrupts are always active low level triggered, * when listed as conforming in the MP table. */ -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) +#define default_PCI_trigger(idx) (IOAPIC_LEVEL) +#define default_PCI_polarity(idx) (IOAPIC_POL_LOW) static int irq_polarity(int idx) { @@ -811,24 +811,24 @@ static int irq_polarity(int idx) break; case 1: /* high active */ { - polarity = 0; + polarity = IOAPIC_POL_HIGH; break; } case 2: /* reserved */ { pr_warn("broken BIOS!!\n"); - polarity = 1; + polarity = IOAPIC_POL_LOW; break; } case 3: /* low active */ { - polarity = 1; + polarity = IOAPIC_POL_LOW; break; } default: /* invalid */ { pr_warn("broken BIOS!!\n"); - polarity = 1; + polarity = IOAPIC_POL_LOW; break; } } @@ -870,7 +870,7 @@ static int irq_trigger(int idx) default: { pr_warn("broken BIOS!!\n"); - trigger = 1; + trigger = IOAPIC_LEVEL; break; } } @@ -878,24 +878,24 @@ static int irq_trigger(int idx) break; case 1: /* edge */ { - trigger = 0; + trigger = IOAPIC_EDGE; break; } case 2: /* reserved */ { pr_warn("broken BIOS!!\n"); - trigger = 1; + trigger = IOAPIC_LEVEL; break; } case 3: /* level */ { - trigger = 1; + trigger = IOAPIC_LEVEL; break; } default: /* invalid */ { pr_warn("broken BIOS!!\n"); - trigger = 0; + trigger = IOAPIC_EDGE; break; } } @@ -939,11 +939,11 @@ static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst, dst->ioapic_polarity = polarity; } else { /* - * PCI interrupts are always polarity one level + * PCI interrupts are always active low level * triggered. */ - dst->ioapic_trigger = 1; - dst->ioapic_polarity = 1; + dst->ioapic_trigger = IOAPIC_LEVEL; + dst->ioapic_polarity = IOAPIC_POL_LOW; } } } @@ -1296,9 +1296,10 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) entry = ioapic_read_entry(apic, i); snprintf(buf, sizeof(buf), " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)", - i, entry.mask ? "disabled" : "enabled ", - entry.trigger ? "level" : "edge ", - entry.polarity ? "low " : "high", + i, + entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ", + entry.trigger == IOAPIC_LEVEL ? "level" : "edge ", + entry.polarity == IOAPIC_POL_LOW ? "low " : "high", entry.vector, entry.irr, entry.delivery_status); if (ir_entry->format) printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", @@ -1306,7 +1307,9 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) ir_entry->zero); else printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", - buf, entry.dest_mode ? "logical " : "physical", + buf, + entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ? + "logical " : "physical", entry.dest, entry.delivery_mode); } } @@ -1476,15 +1479,12 @@ void native_disable_io_apic(void) struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); + entry.mask = IOAPIC_UNMASKED; + entry.trigger = IOAPIC_EDGE; + entry.polarity = IOAPIC_POL_HIGH; + entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; + entry.delivery_mode = dest_ExtINT; + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -1494,7 +1494,6 @@ void native_disable_io_apic(void) if (cpu_has_apic || apic_from_smp_config()) disconnect_bsp_APIC(ioapic_i8259.pin != -1); - } /* @@ -2018,12 +2017,12 @@ static inline void __init unlock_ExtINT_logic(void) memset(&entry1, 0, sizeof(entry1)); - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ + entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL; + entry1.mask = IOAPIC_UNMASKED; entry1.dest = hard_smp_processor_id(); entry1.delivery_mode = dest_ExtINT; entry1.polarity = entry0.polarity; - entry1.trigger = 0; + entry1.trigger = IOAPIC_EDGE; entry1.vector = 0; ioapic_write_entry(apic, pin, entry1); @@ -2911,9 +2910,9 @@ static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data, data->polarity = info->ioapic_polarity; } else if (acpi_get_override_irq(gsi, &data->trigger, &data->polarity) < 0) { - /* PCI interrupts are always polarity one level triggered. */ - data->trigger = 1; - data->polarity = 1; + /* PCI interrupts are always active low level triggered. */ + data->trigger = IOAPIC_LEVEL; + data->polarity = IOAPIC_POL_LOW; } } @@ -2925,15 +2924,16 @@ static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data, entry->dest_mode = apic->irq_dest_mode; entry->dest = cfg->dest_apicid; entry->vector = cfg->vector; - entry->mask = 0; /* enable IRQ */ entry->trigger = data->trigger; entry->polarity = data->polarity; /* - * Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + * Mask level triggered irqs. Edge triggered irqs are masked + * by the irq core code in case they fire. */ - if (data->trigger) - entry->mask = 1; + if (data->trigger == IOAPIC_LEVEL) + entry->mask = IOAPIC_MASKED; + else + entry->mask = IOAPIC_UNMASKED; } int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, -- cgit v1.2.3 From f7a0c78669ee79443a91ea89652766c1be8d9e04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 10:30:08 +0800 Subject: x86: Cleanup irq_domain ops We have 3 identical copies of the ioapic domain ops for acpi, mpparse, and sfi. Have a global one in the io_apic code and be done with it. To avoid include hell in io_apic.h, create a private irqdomain header and include the generic irqdomain header from there. Signed-off-by: Thomas Gleixner Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: sfi-devel@simplefirmware.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Len Brown Cc: Pavel Machek Cc: Grant Likely Cc: Rob Herring Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1428978610-28986-32-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_apic.h | 29 ++--------------------------- arch/x86/include/asm/irqdomain.h | 34 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/acpi/boot.c | 13 +++---------- arch/x86/kernel/apic/io_apic.c | 9 ++++++++- arch/x86/kernel/devicetree.c | 12 ++++++------ arch/x86/kernel/mpparse.c | 9 +-------- arch/x86/platform/sfi/sfi.c | 10 ++-------- 7 files changed, 56 insertions(+), 60 deletions(-) create mode 100644 arch/x86/include/asm/irqdomain.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 984afb732efe..6cbf2cfb3f8a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -96,7 +96,7 @@ struct IR_IO_APIC_route_entry { } __attribute__ ((packed)); struct irq_alloc_info; -struct irq_data; +struct ioapic_domain_cfg; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -163,23 +163,6 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); -enum ioapic_domain_type { - IOAPIC_DOMAIN_INVALID, - IOAPIC_DOMAIN_LEGACY, - IOAPIC_DOMAIN_STRICT, - IOAPIC_DOMAIN_DYNAMIC, -}; - -struct device_node; -struct irq_domain; -struct irq_domain_ops; - -struct ioapic_domain_cfg { - enum ioapic_domain_type type; - const struct irq_domain_ops *ops; - struct device_node *dev; -}; - extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, @@ -189,15 +172,7 @@ extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, struct ioapic_domain_cfg *cfg); extern int mp_unregister_ioapic(u32 gsi_base); extern int mp_ioapic_registered(u32 gsi_base); -extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs, void *arg); -extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs); -extern void mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data); -extern void mp_irqdomain_deactivate(struct irq_domain *domain, - struct irq_data *irq_data); -extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); + extern void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node, int trigger, int polarity); diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h new file mode 100644 index 000000000000..fe0d4c6636ec --- /dev/null +++ b/arch/x86/include/asm/irqdomain.h @@ -0,0 +1,34 @@ +#ifndef _ASM_IRQDOMAIN_H +#define _ASM_IRQDOMAIN_H + +#include + +enum ioapic_domain_type { + IOAPIC_DOMAIN_INVALID, + IOAPIC_DOMAIN_LEGACY, + IOAPIC_DOMAIN_STRICT, + IOAPIC_DOMAIN_DYNAMIC, +}; + +struct device_node; +struct irq_data; + +struct ioapic_domain_cfg { + enum ioapic_domain_type type; + const struct irq_domain_ops *ops; + struct device_node *dev; +}; + +extern const struct irq_domain_ops mp_ioapic_irqdomain_ops; + +extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg); +extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +extern void mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data); +extern void mp_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data); +extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); + +#endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 91a10120ed10..cb9f6f12246b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,12 +31,12 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -400,20 +400,13 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } -static struct irq_domain_ops acpi_irqdomain_ops = { - .alloc = mp_irqdomain_alloc, - .free = mp_irqdomain_free, - .activate = mp_irqdomain_activate, - .deactivate = mp_irqdomain_deactivate, -}; - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_DYNAMIC, - .ops = &acpi_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; ioapic = (struct acpi_madt_io_apic *)header; @@ -764,7 +757,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) u64 addr; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_DYNAMIC, - .ops = &acpi_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9fcca68b183d..845dc0df2002 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -41,13 +41,13 @@ #include #include #include -#include #include #include #include /* time_after() */ #include #include +#include #include #include #include @@ -2995,3 +2995,10 @@ int mp_irqdomain_ioapic_idx(struct irq_domain *domain) { return (int)(long)domain->host_data; } + +const struct irq_domain_ops mp_ioapic_irqdomain_ops = { + .alloc = mp_irqdomain_alloc, + .free = mp_irqdomain_free, + .activate = mp_irqdomain_activate, + .deactivate = mp_irqdomain_deactivate, +}; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 05103d398ed7..5ee771859b6f 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ #include #include +#include #include #include #include @@ -216,11 +216,11 @@ static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp); } -const struct irq_domain_ops ioapic_irq_domain_ops = { - .alloc = dt_irqdomain_alloc, - .free = mp_irqdomain_free, - .activate = mp_irqdomain_activate, - .deactivate = mp_irqdomain_deactivate, +static const struct irq_domain_ops ioapic_irq_domain_ops = { + .alloc = dt_irqdomain_alloc, + .free = mp_irqdomain_free, + .activate = mp_irqdomain_activate, + .deactivate = mp_irqdomain_deactivate, }; static void __init dtb_add_ioapic(struct device_node *dn) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index aa4feee74dbe..30ca7607cbbb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -19,8 +19,8 @@ #include #include #include -#include +#include #include #include #include @@ -113,13 +113,6 @@ static void __init MP_bus_info(struct mpc_bus *m) pr_warn("Unknown bustype %s - ignoring\n", str); } -static struct irq_domain_ops mp_ioapic_irqdomain_ops = { - .alloc = mp_irqdomain_alloc, - .free = mp_irqdomain_free, - .activate = mp_irqdomain_activate, - .deactivate = mp_irqdomain_deactivate, -}; - static void __init MP_ioapic_info(struct mpc_ioapic *m) { struct ioapic_domain_cfg cfg = { diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index b66b194f9900..6c7111bbd1e9 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -25,8 +25,8 @@ #include #include #include -#include +#include #include #include #include @@ -71,12 +71,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC -static struct irq_domain_ops sfi_ioapic_irqdomain_ops = { - .alloc = mp_irqdomain_alloc, - .free = mp_irqdomain_free, - .activate = mp_irqdomain_activate, - .deactivate = mp_irqdomain_deactivate, -}; static int __init sfi_parse_ioapic(struct sfi_table_header *table) { @@ -85,7 +79,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) int i, num; struct ioapic_domain_cfg cfg = { .type = IOAPIC_DOMAIN_STRICT, - .ops = &sfi_ioapic_irqdomain_ops, + .ops = &mp_ioapic_irqdomain_ops, }; sb = (struct sfi_table_simple *)table; -- cgit v1.2.3 From d746d1ebd30c48562a3fb512ab18d5822f137820 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 14 Apr 2015 10:30:09 +0800 Subject: x86/irq: Move irqdomain specific code into asm/irqdomain.h Now we have dedicated asm/irqdomain.h, so move irqdomain specific code into it. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/1428978610-28986-33-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 24 ------------------------ arch/x86/include/asm/irq_remapping.h | 2 +- arch/x86/include/asm/irqdomain.h | 35 ++++++++++++++++++++++++++++++++--- arch/x86/kernel/apic/htirq.c | 2 +- arch/x86/kernel/apic/msi.c | 2 +- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/hpet.c | 2 +- arch/x86/platform/uv/uv_irq.c | 2 +- 8 files changed, 38 insertions(+), 33 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 3b8233a26348..1f88e719fa78 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,8 +94,6 @@ extern void trace_call_function_single_interrupt(void); #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #endif /* CONFIG_TRACING */ -struct irq_domain; - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct pci_dev; @@ -165,22 +163,11 @@ struct irq_alloc_info { }; }; -enum { - /* Allocate contiguous CPU vectors */ - X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, -}; - struct irq_cfg { unsigned int dest_apicid; u8 vector; }; -extern struct irq_domain *x86_vector_domain; - -extern void init_irq_alloc_info(struct irq_alloc_info *info, - const struct cpumask *mask); -extern void copy_irq_alloc_info(struct irq_alloc_info *dst, - struct irq_alloc_info *src); extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern void lock_vector_lock(void); @@ -200,17 +187,6 @@ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_PCI_MSI -extern void arch_init_msi_domain(struct irq_domain *domain); -#else -static inline void arch_init_msi_domain(struct irq_domain *domain) { } -#endif -#ifdef CONFIG_HT_IRQ -extern void arch_init_htirq_domain(struct irq_domain *domain); -#else -static inline void arch_init_htirq_domain(struct irq_domain *domain) { } -#endif - /* Statistics */ extern atomic_t irq_err_count; extern atomic_t irq_mis_count; diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 09efa358c831..78974fbc33b4 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -22,7 +22,7 @@ #ifndef __X86_IRQ_REMAPPING_H #define __X86_IRQ_REMAPPING_H -#include +#include #include #include diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index fe0d4c6636ec..d26075b52885 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -2,6 +2,25 @@ #define _ASM_IRQDOMAIN_H #include +#include + +#ifdef CONFIG_X86_LOCAL_APIC +enum { + /* Allocate contiguous CPU vectors */ + X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, +}; + +extern struct irq_domain *x86_vector_domain; + +extern void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask); +extern void copy_irq_alloc_info(struct irq_alloc_info *dst, + struct irq_alloc_info *src); +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +struct device_node; +struct irq_data; enum ioapic_domain_type { IOAPIC_DOMAIN_INVALID, @@ -10,9 +29,6 @@ enum ioapic_domain_type { IOAPIC_DOMAIN_DYNAMIC, }; -struct device_node; -struct irq_data; - struct ioapic_domain_cfg { enum ioapic_domain_type type; const struct irq_domain_ops *ops; @@ -30,5 +46,18 @@ extern void mp_irqdomain_activate(struct irq_domain *domain, extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); +#endif /* CONFIG_X86_IO_APIC */ + +#ifdef CONFIG_PCI_MSI +extern void arch_init_msi_domain(struct irq_domain *domain); +#else +static inline void arch_init_msi_domain(struct irq_domain *domain) { } +#endif + +#ifdef CONFIG_HT_IRQ +extern void arch_init_htirq_domain(struct irq_domain *domain); +#else +static inline void arch_init_htirq_domain(struct irq_domain *domain) { } +#endif #endif diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 1cae104415ea..341e99be42b8 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 109584261c4e..58fde664e7c0 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 60047495041c..ad786f8a7cc7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e3bc18080052..e2449cf38b06 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -11,8 +11,8 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 091b36ac44c4..cdf86cd3fd97 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -12,8 +12,8 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 5b673a48c54594108aec368014efc7334743f06a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Apr 2015 16:40:45 +0200 Subject: x86/alternatives: Document macros Add some text to the macro magic for future reference and against failing human memory. Requested-by: Ingo Molnar Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative-asm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index bdf02eeee765..e7636bac7372 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -18,6 +18,12 @@ .endm #endif +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ .macro altinstruction_entry orig alt feature orig_len alt_len pad_len .long \orig - . .long \alt - . @@ -27,6 +33,12 @@ .byte \pad_len .endm +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ .macro ALTERNATIVE oldinstr, newinstr, feature 140: \oldinstr @@ -55,6 +67,12 @@ */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 140: \oldinstr -- cgit v1.2.3 From 7559e13fb4abe7880dfaf985d6a1630ca90a67ce Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:55 -0500 Subject: x86/mce: Add support for deferred errors on AMD Deferred errors indicate error conditions that were not corrected, but those errors have not been consumed yet. They require no action from S/W (or action is optional). These errors provide info about a latent uncorrectable MCE that can occur when a poisoned data is consumed by the processor. Newer AMD processors can generate deferred errors and can be configured to generate APIC interrupts on such events. SUCCOR stands for S/W UnCorrectable error COntainment and Recovery. It indicates support for data poisoning in HW and deferred error interrupts. Add new bitfield to mce_vendor_flags for this. We use this to verify presence of deferred error interrupts before we enable them in mce_amd.c While at it, clarify comments in mce_vendor_flags to provide an indication of usages of the bitfields. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-4-git-send-email-Aravind.Gopalakrishnan@amd.com [ beef up commit message, do CPUID(8000_0007) only once. ] Signed-off-by: Borislav Petkov --- arch/x86/include/asm/mce.h | 15 +++++++++++++-- arch/x86/kernel/cpu/mcheck/mce.c | 10 ++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 1f5a86d518db..407ced642ac1 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -117,8 +117,19 @@ struct mca_config { }; struct mce_vendor_flags { - __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */ - __reserved_0 : 63; + /* + * overflow recovery cpuid bit indicates that overflow + * conditions are not fatal + */ + __u64 overflow_recov : 1, + + /* + * SUCCOR stands for S/W UnCorrectable error COntainment + * and Recovery. It indicates support for data poisoning + * in HW and deferred error interrupts. + */ + succor : 1, + __reserved_0 : 62; }; extern struct mce_vendor_flags mce_flags; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..521e5016aca6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1637,10 +1637,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_intel_feature_init(c); mce_adjust_timer = cmci_intel_adjust_timer; break; - case X86_VENDOR_AMD: + + case X86_VENDOR_AMD: { + u32 ebx = cpuid_ebx(0x80000007); + mce_amd_feature_init(c); - mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1; + mce_flags.overflow_recov = !!(ebx & BIT(0)); + mce_flags.succor = !!(ebx & BIT(1)); break; + } + default: break; } -- cgit v1.2.3 From 24fd78a81f6d3fe7f7a440c8629f9c52cd5f830e Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:56 -0500 Subject: x86/mce/amd: Introduce deferred error interrupt handler Deferred errors indicate error conditions that were not corrected, but require no action from S/W (or action is optional).These errors provide info about a latent UC MCE that can occur when a poisoned data is consumed by the processor. Processors that report these errors can be configured to generate APIC interrupts to notify OS about the error. Provide an interrupt handler in this patch so that OS can catch these errors as and when they happen. Currently, we simply log the errors and exit the handler as S/W action is not mandated. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Cc: linux-edac Link: http://lkml.kernel.org/r/1430913538-1415-5-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/entry_arch.h | 3 ++ arch/x86/include/asm/hardirq.h | 3 ++ arch/x86/include/asm/hw_irq.h | 2 + arch/x86/include/asm/irq_vectors.h | 1 + arch/x86/include/asm/mce.h | 3 ++ arch/x86/include/asm/trace/irq_vectors.h | 6 +++ arch/x86/include/asm/traps.h | 3 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 93 ++++++++++++++++++++++++++++++++ arch/x86/kernel/entry_64.S | 5 ++ arch/x86/kernel/irq.c | 6 +++ arch/x86/kernel/irqinit.c | 4 ++ arch/x86/kernel/traps.c | 5 ++ 12 files changed, 133 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index dc5fa661465f..6da46dbaac87 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -50,4 +50,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) #endif +#ifdef CONFIG_X86_MCE_AMD +BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR) +#endif #endif diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f5fb6b6567e..db9f536f482f 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -33,6 +33,9 @@ typedef struct { #ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; #endif +#ifdef CONFIG_X86_MCE_AMD + unsigned int irq_deferred_error_count; +#endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) unsigned int irq_hv_callback_count; #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..f71e489d7537 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -73,6 +73,7 @@ extern asmlinkage void invalidate_interrupt31(void); extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); +extern asmlinkage void deferred_error_interrupt(void); extern asmlinkage void call_function_interrupt(void); extern asmlinkage void call_function_single_interrupt(void); @@ -87,6 +88,7 @@ extern void trace_spurious_interrupt(void); extern void trace_thermal_interrupt(void); extern void trace_reschedule_interrupt(void); extern void trace_threshold_interrupt(void); +extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..026fc1e1599c 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -113,6 +113,7 @@ #define IRQ_WORK_VECTOR 0xf6 #define UV_BAU_MESSAGE 0xf5 +#define DEFERRED_ERROR_VECTOR 0xf4 /* Vector on which hypervisor callbacks will be delivered */ #define HYPERVISOR_CALLBACK_VECTOR 0xf3 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 407ced642ac1..6a3034a0a072 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -234,6 +234,9 @@ void do_machine_check(struct pt_regs *, long); extern void (*mce_threshold_vector)(void); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); +/* Deferred error interrupt handler */ +extern void (*deferred_error_int_vector)(void); + /* * Thermal handler */ diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 4cab890007a7..38a09a13a9bc 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -100,6 +100,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single); */ DEFINE_IRQ_VECTOR_EVENT(threshold_apic); +/* + * deferred_error_apic - called when entering/exiting a deferred apic interrupt + * vector handler + */ +DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); + /* * thermal_apic - called when entering/exiting a thermal apic interrupt * vector handler diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4e49d7dff78e..c5380bea2a36 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi; void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); -asmlinkage void mce_threshold_interrupt(void); +asmlinkage void smp_threshold_interrupt(void); +asmlinkage void smp_deferred_error_interrupt(void); #endif extern enum ctx_state ist_enter(struct pt_regs *regs); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 607075726e10..2e7ebe7e1e80 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -12,6 +12,8 @@ * - added support for AMD Family 0x10 processors * May 2012 * - major scrubbing + * May 2015 + * - add support for deferred error interrupts (Aravind Gopalakrishnan) * * All MC4_MISCi registers are shared between multi-cores */ @@ -32,6 +34,7 @@ #include #include #include +#include #define NR_BLOCKS 9 #define THRESHOLD_MAX 0xFFF @@ -47,6 +50,13 @@ #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 +/* Deferred error settings */ +#define MSR_CU_DEF_ERR 0xC0000410 +#define MASK_DEF_LVTOFF 0x000000F0 +#define MASK_DEF_INT_TYPE 0x00000006 +#define DEF_LVT_OFF 0x2 +#define DEF_INT_TYPE_APIC 0x2 + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -60,6 +70,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ static void amd_threshold_interrupt(void); +static void amd_deferred_error_interrupt(void); + +static void default_deferred_error_interrupt(void) +{ + pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR); +} +void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; /* * CPU Initialization @@ -205,6 +222,39 @@ static int setup_APIC_mce(int reserved, int new) return reserved; } +static int setup_APIC_deferred_error(int reserved, int new) +{ + if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, + APIC_EILVT_MSG_FIX, 0)) + return new; + + return reserved; +} + +static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) +{ + u32 low = 0, high = 0; + int def_offset = -1, def_new; + + if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) + return; + + def_new = (low & MASK_DEF_LVTOFF) >> 4; + if (!(low & MASK_DEF_LVTOFF)) { + pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); + def_new = DEF_LVT_OFF; + low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); + } + + def_offset = setup_APIC_deferred_error(def_offset, def_new); + if ((def_offset == def_new) && + (deferred_error_int_vector != amd_deferred_error_interrupt)) + deferred_error_int_vector = amd_deferred_error_interrupt; + + low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + wrmsr(MSR_CU_DEF_ERR, low, high); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -262,6 +312,9 @@ init: mce_threshold_block_init(&b, offset); } } + + if (mce_flags.succor) + deferred_error_interrupt_enable(c); } static void __log_error(unsigned int bank, bool threshold_err, u64 misc) @@ -288,6 +341,46 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); } +static inline void __smp_deferred_error_interrupt(void) +{ + inc_irq_stat(irq_deferred_error_count); + deferred_error_int_vector(); +} + +asmlinkage __visible void smp_deferred_error_interrupt(void) +{ + entering_irq(); + __smp_deferred_error_interrupt(); + exiting_ack_irq(); +} + +asmlinkage __visible void smp_trace_deferred_error_interrupt(void) +{ + entering_irq(); + trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); + __smp_deferred_error_interrupt(); + trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); + exiting_ack_irq(); +} + +/* APIC interrupt handler for deferred errors */ +static void amd_deferred_error_interrupt(void) +{ + u64 status; + unsigned int bank; + + for (bank = 0; bank < mca_cfg.banks; ++bank) { + rdmsrl(MSR_IA32_MCx_STATUS(bank), status); + + if (!(status & MCI_STATUS_VAL) || + !(status & MCI_STATUS_DEFERRED)) + continue; + + __log_error(bank, false, 0); + break; + } +} + /* * APIC Interrupt Handler */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478d..12aea85fe738 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -935,6 +935,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt #endif +#ifdef CONFIG_X86_MCE_AMD +apicinterrupt DEFERRED_ERROR_VECTOR \ + deferred_error_interrupt smp_deferred_error_interrupt +#endif + #ifdef CONFIG_X86_THERMAL_VECTOR apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e5952c225532..590ed6c1bf51 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -116,6 +116,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_puts(p, " Threshold APIC interrupts\n"); #endif +#ifdef CONFIG_X86_MCE_AMD + seq_printf(p, "%*s: ", prec, "DFR"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); + seq_puts(p, " Deferred Error APIC interrupts\n"); +#endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); for_each_online_cpu(j) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index cd10a6437264..d7ec6e7b2b5b 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -135,6 +135,10 @@ static void __init apic_intr_init(void) alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif +#ifdef CONFIG_X86_MCE_AMD + alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt); +#endif + #ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..68b1d5979a46 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -827,6 +827,11 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) { } +asmlinkage __visible void __attribute__((weak)) +smp_deferred_error_interrupt(void) +{ +} + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task -- cgit v1.2.3 From 5c0d728e1a8ccbaf68ec37181e466627ba0a6efc Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 6 May 2015 06:58:57 -0500 Subject: x86/irq: Cleanup ordering of vector numbers Sort vector number assignments in proper descending order. No functional change. Signed-off-by: Aravind Gopalakrishnan Cc: Tony Luck Cc: x86-ml Link: http://lkml.kernel.org/r/1430913538-1415-6-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/irq_vectors.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 026fc1e1599c..8900ba444ad7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,11 +102,6 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 -/* Vector for KVM to deliver posted interrupt IPI */ -#ifdef CONFIG_HAVE_KVM -#define POSTED_INTR_VECTOR 0xf2 -#endif - /* * IRQ work vector: */ @@ -118,6 +113,11 @@ /* Vector on which hypervisor callbacks will be delivered */ #define HYPERVISOR_CALLBACK_VECTOR 0xf3 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ -- cgit v1.2.3 From 2a4e90b18c256d52a7f3f77d58114f6d4e4a7f9f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 8 May 2015 12:26:02 +0200 Subject: x86: Force inlining of atomic ops With both gcc 4.7.2 and 4.9.2, sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined: $ nm --size-sort vmlinux | grep -iF ' t ' | uniq -c | grep -v '^ *1 ' | sort -rn 473 000000000000000b t spin_unlock_irqrestore 449 000000000000005f t rcu_read_unlock 355 0000000000000009 t atomic_inc <== THIS 353 000000000000006e t rcu_read_lock 350 0000000000000075 t rcu_read_lock_sched_held 291 000000000000000b t spin_unlock 266 0000000000000019 t arch_local_irq_restore 215 000000000000000b t spin_lock 180 0000000000000011 t kzalloc 165 0000000000000012 t list_add_tail 161 0000000000000019 t arch_local_save_flags 153 0000000000000016 t test_and_set_bit 134 000000000000000b t spin_unlock_irq 134 0000000000000009 t atomic_dec <== THIS 130 000000000000000b t spin_unlock_bh 122 0000000000000010 t brelse 120 0000000000000016 t test_and_clear_bit 120 000000000000000b t spin_lock_irq 119 000000000000001e t get_dma_ops 117 0000000000000053 t cpumask_next 116 0000000000000036 t kref_get 114 000000000000001a t schedule_work 106 000000000000000b t spin_lock_bh 103 0000000000000019 t arch_local_irq_disable ... Note sizes of marked functions. They are merely 9 bytes long! Selecting function with 'atomic' in their names: 355 0000000000000009 t atomic_inc 134 0000000000000009 t atomic_dec 98 0000000000000014 t atomic_dec_and_test 31 000000000000000e t atomic_add_return 27 000000000000000a t atomic64_inc 26 000000000000002f t kmap_atomic 24 0000000000000009 t atomic_add 12 0000000000000009 t atomic_sub 10 0000000000000021 t __atomic_add_unless 10 000000000000000a t atomic64_add 5 000000000000001f t __atomic_add_unless.constprop.7 5 000000000000000a t atomic64_dec 4 000000000000001f t __atomic_add_unless.constprop.18 4 000000000000001f t __atomic_add_unless.constprop.12 4 000000000000001f t __atomic_add_unless.constprop.10 3 000000000000001f t __atomic_add_unless.constprop.13 3 0000000000000011 t atomic64_add_return 2 000000000000001f t __atomic_add_unless.constprop.9 2 000000000000001f t __atomic_add_unless.constprop.8 2 000000000000001f t __atomic_add_unless.constprop.6 2 000000000000001f t __atomic_add_unless.constprop.5 2 000000000000001f t __atomic_add_unless.constprop.3 2 000000000000001f t __atomic_add_unless.constprop.22 2 000000000000001f t __atomic_add_unless.constprop.14 2 000000000000001f t __atomic_add_unless.constprop.11 2 000000000000001e t atomic_dec_if_positive 2 0000000000000014 t atomic_inc_and_test 2 0000000000000011 t atomic_add_return.constprop.4 2 0000000000000011 t atomic_add_return.constprop.17 2 0000000000000011 t atomic_add_return.constprop.16 2 000000000000000d t atomic_inc.constprop.4 2 000000000000000c t atomic_cmpxchg This patch fixes this for x86 atomic ops via s/inline/__always_inline/. This decreases allyesconfig kernel by about 25k: text data bss dec hex filename 82399481 22255416 20627456 125282353 777a831 vmlinux.before 82375570 22255544 20627456 125258570 7774b4a vmlinux Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1431080762-17797-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 75a9ee8529f3..e9168955c42f 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,7 @@ * * Atomically reads the value of @v. */ -static inline int atomic_read(const atomic_t *v) +static __always_inline int atomic_read(const atomic_t *v) { return ACCESS_ONCE((v)->counter); } @@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v) * * Atomically sets the value of @v to @i. */ -static inline void atomic_set(atomic_t *v, int i) +static __always_inline void atomic_set(atomic_t *v, int i) { v->counter = i; } @@ -126,7 +126,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic_inc_and_test(atomic_t *v) +static __always_inline int atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline int atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } @@ -164,7 +164,7 @@ static __always_inline int atomic_add_return(int i, atomic_t *v) * * Atomically subtracts @i from @v and returns @v - @i */ -static inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int atomic_sub_return(int i, atomic_t *v) { return atomic_add_return(-i, v); } @@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } @@ -213,7 +213,7 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) * Atomically adds 1 to @v * Returns the new value of @u */ -static inline short int atomic_inc_short(short int *v) +static __always_inline short int atomic_inc_short(short int *v) { asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); return *v; -- cgit v1.2.3 From 63332a8455d8310b77d38779c6c21a660a8d9feb Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 24 Apr 2015 17:31:33 +0200 Subject: x86/entry: Stop using PER_CPU_VAR(kernel_stack) PER_CPU_VAR(kernel_stack) is redundant: - On the 64-bit build, we can use PER_CPU_VAR(cpu_tss + TSS_sp0). - On the 32-bit build, we can use PER_CPU_VAR(cpu_current_top_of_stack). PER_CPU_VAR(kernel_stack) will be deleted by a separate change. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429889495-27850-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/include/asm/thread_info.h | 8 +++++++- arch/x86/kernel/entry_64.S | 2 +- arch/x86/xen/xen-asm_64.S | 5 +++-- 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 2ab0f7182df3..1b1330c07971 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -346,7 +346,7 @@ ENTRY(ia32_cstar_target) SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b4bdec3e9523..d656a363e1eb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -198,9 +198,15 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ /* Load thread_info address into "reg" */ +#ifdef CONFIG_X86_32 #define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ _ASM_SUB $(THREAD_SIZE),reg ; +#else +#define GET_THREAD_INFO(reg) \ + _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \ + _ASM_SUB $(THREAD_SIZE),reg ; +#endif /* * ASM operand which evaluates to a 'thread_info' address of diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7423e3e2f5c5..c13b86b40176 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -216,7 +216,7 @@ ENTRY(system_call) GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp /* Construct struct pt_regs on stack */ pushq_cfi $__USER_DS /* pt_regs->ss */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index a2cabb8bd6bf..5aa7ec607b9e 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -53,7 +54,7 @@ ENTRY(xen_sysret64) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp pushq $__USER_DS pushq PER_CPU_VAR(rsp_scratch) @@ -72,7 +73,7 @@ ENTRY(xen_sysret32) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(kernel_stack), %rsp + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp pushq $__USER32_DS pushq PER_CPU_VAR(rsp_scratch) -- cgit v1.2.3 From fed7c3f0f750f225317828d691e9eb76eec887b3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 24 Apr 2015 17:31:34 +0200 Subject: x86/entry: Remove unused 'kernel_stack' per-cpu variable Signed-off-by: Denys Vlasenko Acked-by: Andy Lutomirski Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429889495-27850-2-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/kernel/cpu/common.c | 4 ---- arch/x86/kernel/process_32.c | 5 +---- arch/x86/kernel/process_64.c | 3 --- arch/x86/kernel/smpboot.c | 2 -- 5 files changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d656a363e1eb..472288962c99 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -177,8 +177,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -DECLARE_PER_CPU(unsigned long, kernel_stack); - static inline struct thread_info *current_thread_info(void) { return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8a..6bec0b55863e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1155,10 +1155,6 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..a99900cedc22 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -302,13 +302,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0, kernel_stack, and current_top_of_stack. This changes + * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..82134506faa8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -409,9 +409,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload esp0 and ss1. This changes current_thread_info(). */ load_sp0(tss, next); - this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + THREAD_SIZE); - /* * Now maybe reload the debug registers and handle I/O bitmaps */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50e547eac8cd..023cccf5a4ae 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -792,8 +792,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) + THREAD_SIZE; } /* -- cgit v1.2.3 From 3a23208e69679597e767cf3547b1a30dd845d9b5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 24 Apr 2015 17:31:35 +0200 Subject: x86/entry: Define 'cpu_current_top_of_stack' for 64-bit code 32-bit code has PER_CPU_VAR(cpu_current_top_of_stack). 64-bit code uses somewhat more obscure: PER_CPU_VAR(cpu_tss + TSS_sp0). Define the 'cpu_current_top_of_stack' macro on CONFIG_X86_64 as well so that the PER_CPU_VAR(cpu_current_top_of_stack) expression can be used in both 32-bit and 64-bit code. Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Drewry Link: http://lkml.kernel.org/r/1429889495-27850-3-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 4 ++-- arch/x86/include/asm/thread_info.h | 10 ++++------ arch/x86/kernel/entry_64.S | 2 +- arch/x86/xen/xen-asm_64.S | 5 +++-- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 1b1330c07971..63450a596800 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -113,7 +113,7 @@ ENTRY(ia32_sysenter_target) * it is too small to ever cause noticeable irq latency. */ SWAPGS_UNSAFE_STACK - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ @@ -346,7 +346,7 @@ ENTRY(ia32_cstar_target) SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 472288962c99..225ee545e1a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -195,16 +195,14 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_64 +# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +#endif + /* Load thread_info address into "reg" */ -#ifdef CONFIG_X86_32 #define GET_THREAD_INFO(reg) \ _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ _ASM_SUB $(THREAD_SIZE),reg ; -#else -#define GET_THREAD_INFO(reg) \ - _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \ - _ASM_SUB $(THREAD_SIZE),reg ; -#endif /* * ASM operand which evaluates to a 'thread_info' address of diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c13b86b40176..09c3f9e0e07e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -216,7 +216,7 @@ ENTRY(system_call) GLOBAL(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp /* Construct struct pt_regs on stack */ pushq_cfi $__USER_DS /* pt_regs->ss */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5aa7ec607b9e..04529e620559 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -54,7 +55,7 @@ ENTRY(xen_sysret64) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(rsp_scratch) @@ -73,7 +74,7 @@ ENTRY(xen_sysret32) * still with the kernel gs, so we can easily switch back */ movq %rsp, PER_CPU_VAR(rsp_scratch) - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq $__USER32_DS pushq PER_CPU_VAR(rsp_scratch) -- cgit v1.2.3 From c6e692f95dacddff5f3607717fb2246c60bbb714 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:51 -0400 Subject: x86/asm/entry/irq: Remove unused invalidate_interrupt prototypes The invalidate_interrupt* functions no longer exist. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-3-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e9571ddabc4f..014c6382ffce 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -36,40 +36,6 @@ extern asmlinkage void spurious_interrupt(void); extern asmlinkage void thermal_interrupt(void); extern asmlinkage void reschedule_interrupt(void); -extern asmlinkage void invalidate_interrupt(void); -extern asmlinkage void invalidate_interrupt0(void); -extern asmlinkage void invalidate_interrupt1(void); -extern asmlinkage void invalidate_interrupt2(void); -extern asmlinkage void invalidate_interrupt3(void); -extern asmlinkage void invalidate_interrupt4(void); -extern asmlinkage void invalidate_interrupt5(void); -extern asmlinkage void invalidate_interrupt6(void); -extern asmlinkage void invalidate_interrupt7(void); -extern asmlinkage void invalidate_interrupt8(void); -extern asmlinkage void invalidate_interrupt9(void); -extern asmlinkage void invalidate_interrupt10(void); -extern asmlinkage void invalidate_interrupt11(void); -extern asmlinkage void invalidate_interrupt12(void); -extern asmlinkage void invalidate_interrupt13(void); -extern asmlinkage void invalidate_interrupt14(void); -extern asmlinkage void invalidate_interrupt15(void); -extern asmlinkage void invalidate_interrupt16(void); -extern asmlinkage void invalidate_interrupt17(void); -extern asmlinkage void invalidate_interrupt18(void); -extern asmlinkage void invalidate_interrupt19(void); -extern asmlinkage void invalidate_interrupt20(void); -extern asmlinkage void invalidate_interrupt21(void); -extern asmlinkage void invalidate_interrupt22(void); -extern asmlinkage void invalidate_interrupt23(void); -extern asmlinkage void invalidate_interrupt24(void); -extern asmlinkage void invalidate_interrupt25(void); -extern asmlinkage void invalidate_interrupt26(void); -extern asmlinkage void invalidate_interrupt27(void); -extern asmlinkage void invalidate_interrupt28(void); -extern asmlinkage void invalidate_interrupt29(void); -extern asmlinkage void invalidate_interrupt30(void); -extern asmlinkage void invalidate_interrupt31(void); - extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void reboot_interrupt(void); extern asmlinkage void threshold_interrupt(void); @@ -178,7 +144,6 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); extern __visible void smp_reschedule_interrupt(struct pt_regs *); extern __visible void smp_call_function_interrupt(struct pt_regs *); extern __visible void smp_call_function_single_interrupt(struct pt_regs *); -extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif extern char irq_entries_start[]; -- cgit v1.2.3 From 51bb92843edcba5a58138cad25ced97923048add Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:52 -0400 Subject: x86/asm/entry: Remove SYSCALL_VECTOR Use IA32_SYSCALL_VECTOR for both compat and native. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-4-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 3 --- arch/x86/kernel/traps.c | 4 ++-- arch/x86/lguest/boot.c | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 666c89ec4bd7..07f27926d473 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -47,9 +47,6 @@ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR #define IA32_SYSCALL_VECTOR 0x80 -#ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 -#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..5e0791f9d3dc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -997,8 +997,8 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - set_bit(SYSCALL_VECTOR, used_vectors); + set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); + set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..cab9aaa7802c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -90,7 +90,7 @@ struct lguest_data lguest_data = { .noirq_iret = (u32)lguest_noirq_iret, .kernel_address = PAGE_OFFSET, .blocked_interrupts = { 1 }, /* Block timer interrupts */ - .syscall_vec = SYSCALL_VECTOR, + .syscall_vec = IA32_SYSCALL_VECTOR, }; /*G:037 @@ -866,7 +866,7 @@ static void __init lguest_init_IRQ(void) for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); - if (i != SYSCALL_VECTOR) + if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR)); } -- cgit v1.2.3 From 8b455e6577f325289cf2d1b20f493b2fe5c6c316 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 9 May 2015 11:36:53 -0400 Subject: x86/asm/entry/irq: Clean up IRQn_VECTOR macros Since the ISA irqs are in a single block, use ISA_IRQ_VECTOR(irq) instead of individual macros. Signed-off-by: Brian Gerst Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431185813-15413-5-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 18 +----------------- arch/x86/kernel/apic/io_apic.c | 4 ++-- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/i8259.c | 8 ++++---- arch/x86/kernel/irqinit.c | 4 ++-- 5 files changed, 10 insertions(+), 26 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 07f27926d473..117db96ad5fb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -52,23 +52,7 @@ * Vectors 0x30-0x3f are used for ISA interrupts. * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) - -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..e01e4117188a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -258,11 +258,11 @@ int __init arch_early_ioapic_init(void) /* * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. + * ISA_IRQ_VECTOR(irq) for all cpu's. */ for (i = 0; i < nr_legacy_irqs(); i++) { cfg = alloc_irq_and_cfg_at(i, node); - cfg->vector = IRQ0_VECTOR + i; + cfg->vector = ISA_IRQ_VECTOR(i); cpumask_setall(cfg->domain); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6cedd7914581..82d44c314a3f 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -314,7 +314,7 @@ void setup_vector_irq(int cpu) * legacy vector to irq mapping: */ for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; + per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq; __setup_vector_irq(cpu); } diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index e7cc5370cd2f..16cb827a5b27 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -329,8 +329,8 @@ static void init_8259A(int auto_eoi) */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* ICW2: 8259A-1 IR0-7 mapped to ISA_IRQ_VECTOR(0) */ + outb_pic(ISA_IRQ_VECTOR(0), PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); @@ -342,8 +342,8 @@ static void init_8259A(int auto_eoi) outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* ICW2: 8259A-2 IR0-7 mapped to ISA_IRQ_VECTOR(8) */ + outb_pic(ISA_IRQ_VECTOR(8), PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index cd10a6437264..dc1e08d23552 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -86,7 +86,7 @@ void __init init_IRQ(void) int i; /* - * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * On cpu 0, Assign ISA_IRQ_VECTOR(irq) to IRQ 0..15. * If these IRQ's are handled by legacy interrupt-controllers like PIC, * then this configuration will likely be static after the boot. If * these IRQ's are handled by more mordern controllers like IO-APIC, @@ -94,7 +94,7 @@ void __init init_IRQ(void) * irq's migrate etc. */ for (i = 0; i < nr_legacy_irqs(); i++) - per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = i; x86_init.irqs.intr_init(); } -- cgit v1.2.3 From ca7d9b795e6bc78c80a1771ada867994fabcfc01 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 11 May 2015 10:15:51 +0200 Subject: x86/mm: Add kerneldoc comments for pcommit_sfence() Add kerneldoc comments for pcommit_sfence() describing the purpose of the PCOMMIT instruction and demonstrating its usage with an example. Signed-off-by: Ross Zwisler Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H Peter Anvin Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/1430261196-2401-1-git-send-email-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/1431332153-18566-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/special_insns.h | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index aeb4666e0c0a..2270e41b32fd 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -215,6 +215,44 @@ static inline void clwb(volatile void *__p) : [pax] "a" (p)); } +/** + * pcommit_sfence() - persistent commit and fence + * + * The PCOMMIT instruction ensures that data that has been flushed from the + * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to + * memory and is durable on the DIMM. The primary use case for this is + * persistent memory. + * + * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT + * with appropriate fencing. + * + * Example: + * void flush_and_commit_buffer(void *vaddr, unsigned int size) + * { + * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + * void *vend = vaddr + size; + * void *p; + * + * for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + * p < vend; p += boot_cpu_data.x86_clflush_size) + * clwb(p); + * + * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes + * // MFENCE via mb() also works + * wmb(); + * + * // PCOMMIT and the required SFENCE for ordering + * pcommit_sfence(); + * } + * + * After this function completes the data pointed to by 'vaddr' has been + * accepted to memory and will be durable if the 'vaddr' points to persistent + * memory. + * + * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify + * things we include both the PCOMMIT and the required SFENCE in the + * alternatives generated by pcommit_sfence(). + */ static inline void pcommit_sfence(void) { alternative(ASM_NOP7, -- cgit v1.2.3 From e4b6be33c28923d8cde53023e0888b1c5d1a9027 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 11 May 2015 10:15:53 +0200 Subject: x86/mm: Add ioremap_uc() helper to map memory uncacheable (not UC-) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ioremap_nocache() currently uses UC- by default. Our goal is to eventually make UC the default. Linux maps UC- to PCD=1, PWT=0 page attributes on non-PAT systems. Linux maps UC to PCD=1, PWT=1 page attributes on non-PAT systems. On non-PAT and PAT systems a WC MTRR has different effects on pages with either of these attributes. In order to help with a smooth transition its best to enable use of UC (PCD,1, PWT=1) on a region as that ensures a WC MTRR will have no effect on a region, this however requires us to have an way to declare a region as UC and we currently do not have a way to do this. WC MTRR on non-PAT system with PCD=1, PWT=0 (UC-) yields WC. WC MTRR on non-PAT system with PCD=1, PWT=1 (UC) yields UC. WC MTRR on PAT system with PCD=1, PWT=0 (UC-) yields WC. WC MTRR on PAT system with PCD=1, PWT=1 (UC) yields UC. A flip of the default ioremap_nocache() behaviour from UC- to UC can therefore regress a memory region from effective memory type WC to UC if MTRRs are used. Use of MTRRs should be phased out and in the best case only arch_phys_wc_add() use will remain, even if this happens arch_phys_wc_add() will have an effect on non-PAT systems and changes to default ioremap_nocache() behaviour could regress drivers. Now, ideally we'd use ioremap_nocache() on the regions in which we'd need uncachable memory types and avoid any MTRRs on those regions. There are however some restrictions on MTRRs use, such as the requirement of having the base and size of variable sized MTRRs to be powers of two, which could mean having to use a WC MTRR over a large area which includes a region in which write-combining effects are undesirable. Add ioremap_uc() to help with the both phasing out of MTRR use and also provide a way to blacklist small WC undesirable regions in devices with mixed regions which are size-implicated to use large WC MTRRs. Use of ioremap_uc() helps phase out MTRR use by avoiding regressions with an eventual flip of default behaviour or ioremap_nocache() from UC- to UC. Drivers working with WC MTRRs can use the below table to review and consider the use of ioremap*() and similar helpers to ensure appropriate behaviour long term even if default ioremap_nocache() behaviour changes from UC- to UC. Although ioremap_uc() is being added we leave set_memory_uc() to use UC- as only initial memory type setup is required to be able to accommodate existing device drivers and phase out MTRR use. It should also be clarified that set_memory_uc() cannot be used with IO memory, even though its use will not return any errors, it really has no effect. ---------------------------------------------------------------------- MTRR Non-PAT PAT Linux ioremap value Effective memory type ---------------------------------------------------------------------- Non-PAT | PAT PAT |PCD ||PWT ||| WC 000 WB _PAGE_CACHE_MODE_WB WC | WC WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | WC WC 011 UC _PAGE_CACHE_MODE_UC UC | UC ---------------------------------------------------------------------- Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Acked-by: H. Peter Anvin Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Travis Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: linux-fbdev@vger.kernel.org Link: http://lkml.kernel.org/r/1430343851-967-2-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1431332153-18566-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 1 + arch/x86/mm/ioremap.c | 36 +++++++++++++++++++++++++++++++++++- arch/x86/mm/pageattr.c | 3 +++ include/asm-generic/io.h | 8 ++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34a5b93704d3..4afc05ffa566 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -177,6 +177,7 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * look at pci_iomap(). */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 70e7444c6835..a493bb83aa89 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -237,7 +237,8 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use - * UC MINUS. + * UC MINUS. Drivers that are certain they need or can already + * be converted over to strong UC can use ioremap_uc(). */ enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS; @@ -246,6 +247,39 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_nocache); +/** + * ioremap_uc - map bus memory into CPU space as strongly uncachable + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * ioremap_uc performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked with a strong + * preference as completely uncachable on the CPU when possible. For non-PAT + * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT + * systems this will set the PAT entry for the pages as strong UC. This call + * will honor existing caching rules from things like the PCI bus. Note that + * there are other caches and buffers on many busses. In particular driver + * authors should read up on PCI writes. + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size) +{ + enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC; + + return __ioremap_caller(phys_addr, size, pcm, + __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(ioremap_uc); + /** * ioremap_wc - map memory into CPU space write combined * @phys_addr: bus address of the memory diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5ddd9005f6c3..c77abd7f92a2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1467,6 +1467,9 @@ int _set_memory_uc(unsigned long addr, int numpages) { /* * for now UC MINUS. see comments in ioremap_nocache() + * If you really need strong UC use ioremap_uc(), but note + * that you cannot override IO areas with set_memory_*() as + * these helpers cannot work with IO memory. */ return change_page_attr_set(&addr, numpages, cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 9db042304df3..90ccba7f9f9a 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -769,6 +769,14 @@ static inline void __iomem *ioremap_nocache(phys_addr_t offset, size_t size) } #endif +#ifndef ioremap_uc +#define ioremap_uc ioremap_uc +static inline void __iomem *ioremap_uc(phys_addr_t offset, size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif + #ifndef ioremap_wc #define ioremap_wc ioremap_wc static inline void __iomem *ioremap_wc(phys_addr_t offset, size_t size) -- cgit v1.2.3 From 9e6b13f761d5914a8c9b83610e8d459653515c94 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 13 May 2015 19:42:23 +0200 Subject: x86/asm/uaccess: Unify the ALIGN_DESTINATION macro Pull it up into the header and kill duplicate versions. Separately, both macros are identical: 35948b2bd3431aee7149e85cfe4becbc /tmp/a 35948b2bd3431aee7149e85cfe4becbc /tmp/b Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431538944-27724-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/asm.h | 25 +++++++++++++++++++++++++ arch/x86/lib/copy_user_64.S | 24 ------------------------ arch/x86/lib/copy_user_nocache_64.S | 24 ------------------------ 3 files changed, 25 insertions(+), 48 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7730c1c5c83a..189679aba703 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -63,6 +63,31 @@ _ASM_ALIGN ; \ _ASM_PTR (entry); \ .popsection + +.macro ALIGN_DESTINATION + /* check for bad alignment of destination */ + movl %edi,%ecx + andl $7,%ecx + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + _ASM_EXTABLE(100b,103b) + _ASM_EXTABLE(101b,103b) + .endm + #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index fa997dfaef24..06ce685c3a5d 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -16,30 +16,6 @@ #include #include - .macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) - .endm - /* Standard copy_to_user with segment limit checking */ ENTRY(_copy_to_user) CFI_STARTPROC diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 42eeb12e0cd9..b836a2bace15 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -14,30 +14,6 @@ #include #include - .macro ALIGN_DESTINATION - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jz 102f /* already aligned */ - subl $8,%ecx - negl %ecx - subl %ecx,%edx -100: movb (%rsi),%al -101: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 100b -102: - .section .fixup,"ax" -103: addl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - _ASM_EXTABLE(100b,103b) - _ASM_EXTABLE(101b,103b) - .endm - /* * copy_user_nocache - Uncached memory copy with exception handling * This will force destination/source out of cache for more performance. -- cgit v1.2.3 From 6dc178760553605c58d78bd403dfcb4e042c5b72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 May 2015 15:50:45 +0200 Subject: x86: Consolidate irq entering inlines smp.c and irq_work.c implement the same inline helper. Move it to apic.h and use it everywhere. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra --- arch/x86/include/asm/apic.h | 6 ++++++ arch/x86/kernel/irq_work.c | 10 ++-------- arch/x86/kernel/smp.c | 19 ++++++------------- 3 files changed, 14 insertions(+), 21 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 976b86a325e5..c8393634ca0c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -644,6 +644,12 @@ static inline void entering_ack_irq(void) entering_irq(); } +static inline void ipi_entering_ack_irq(void) +{ + ack_APIC_irq(); + irq_enter(); +} + static inline void exiting_irq(void) { irq_exit(); diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 15d741ddfeeb..dc5fa6a1e8d6 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -10,12 +10,6 @@ #include #include -static inline void irq_work_entering_irq(void) -{ - irq_enter(); - ack_APIC_irq(); -} - static inline void __smp_irq_work_interrupt(void) { inc_irq_stat(apic_irq_work_irqs); @@ -24,14 +18,14 @@ static inline void __smp_irq_work_interrupt(void) __visible void smp_irq_work_interrupt(struct pt_regs *regs) { - irq_work_entering_irq(); + ipi_entering_ack_irq(); __smp_irq_work_interrupt(); exiting_irq(); } __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) { - irq_work_entering_irq(); + ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); __smp_irq_work_interrupt(); trace_irq_work_exit(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index be8e1bde07aa..15aaa69bbb5e 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -170,8 +170,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) asmlinkage __visible void smp_reboot_interrupt(void) { - ack_APIC_irq(); - irq_enter(); + ipi_entering_ack_irq(); stop_this_cpu(NULL); irq_exit(); } @@ -265,12 +264,6 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs) */ } -static inline void smp_entering_irq(void) -{ - ack_APIC_irq(); - irq_enter(); -} - __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) { /* @@ -279,7 +272,7 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) * scheduler_ipi(). This is OK, since those functions are allowed * to nest. */ - smp_entering_irq(); + ipi_entering_ack_irq(); trace_reschedule_entry(RESCHEDULE_VECTOR); __smp_reschedule_interrupt(); trace_reschedule_exit(RESCHEDULE_VECTOR); @@ -297,14 +290,14 @@ static inline void __smp_call_function_interrupt(void) __visible void smp_call_function_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); __smp_call_function_interrupt(); exiting_irq(); } __visible void smp_trace_call_function_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); __smp_call_function_interrupt(); trace_call_function_exit(CALL_FUNCTION_VECTOR); @@ -319,14 +312,14 @@ static inline void __smp_call_function_single_interrupt(void) __visible void smp_call_function_single_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } __visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) { - smp_entering_irq(); + ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); __smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); -- cgit v1.2.3 From ea6cd25058f39ac69623efdcbd94a7fc7d4d13f0 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sat, 9 May 2015 20:27:37 -0400 Subject: x86: Rename eisa_set_level_irq to elcr_set_level_irq This routine has been around for over a decade, but with EISA being dead and abandoned for about twice that long, the name can be kind of confusing. The function is going at the PIC Edge/Level Configuration Registers (ELCR), so rename it as such and mentally decouple it from the long since dead EISA bus. Signed-off-by: Paul Gortmaker Reviewed-by: Maciej W. Rozycki Acked-by: Pavel Machek Cc: Rafael J. Wysocki Cc: Len Brown Cc: Bjorn Helgaas Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1431217657-934-1-git-send-email-paul.gortmaker@windriver.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hw_irq.h | 3 +-- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/pci/irq.c | 13 +++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9472c9aff26d..9ec5d37d8da3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -157,8 +157,7 @@ static inline void unlock_vector_lock(void) {} extern atomic_t irq_err_count; extern atomic_t irq_mis_count; -/* EISA */ -extern void eisa_set_level_irq(unsigned int irq); +extern void elcr_set_level_irq(unsigned int irq); /* SMP */ extern __visible void smp_apic_timer_interrupt(struct pt_regs *); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 271293ad89d7..e49ee24da85e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -608,7 +608,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (trigger == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); + elcr_set_level_irq(gsi); #endif return gsi; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 5dc6ca5e1741..9bd115484745 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -146,19 +146,20 @@ static void __init pirq_peer_trick(void) /* * Code for querying and setting of IRQ routes on various interrupt routers. + * PIC Edge/Level Control Registers (ELCR) 0x4d0 & 0x4d1. */ -void eisa_set_level_irq(unsigned int irq) +void elcr_set_level_irq(unsigned int irq) { unsigned char mask = 1 << (irq & 7); unsigned int port = 0x4d0 + (irq >> 3); unsigned char val; - static u16 eisa_irq_mask; + static u16 elcr_irq_mask; - if (irq >= 16 || (1 << irq) & eisa_irq_mask) + if (irq >= 16 || (1 << irq) & elcr_irq_mask) return; - eisa_irq_mask |= (1 << irq); + elcr_irq_mask |= (1 << irq); printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); val = inb(port); if (!(val & mask)) { @@ -965,11 +966,11 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { msg = "found"; - eisa_set_level_irq(irq); + elcr_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { if (r->set(pirq_router_dev, dev, pirq, newirq)) { - eisa_set_level_irq(newirq); + elcr_set_level_irq(newirq); msg = "assigned"; irq = newirq; } -- cgit v1.2.3 From f6b3c72c23661e5534cd2eede16e9bac7ebb761c Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Tue, 19 May 2015 17:07:16 +0800 Subject: x86/irq: Define a global vector for VT-d Posted-Interrupts Currently, we use a global vector as the Posted-Interrupts Notification Event for all the vCPUs in the system. We need to introduce another global vector for VT-d Posted-Interrtups, which will be used to wakeup the sleep vCPU when an external interrupt from a direct-assigned device happens for that vCPU. [ tglx: Removed a gazillion of extra newlines ] Signed-off-by: Feng Wu Cc: jiang.liu@linux.intel.com Link: http://lkml.kernel.org/r/1432026437-16560-4-git-send-email-feng.wu@intel.com Suggested-by: Yang Zhang Acked-by: H. Peter Anvin Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/entry_arch.h | 2 ++ arch/x86/include/asm/hardirq.h | 1 + arch/x86/include/asm/hw_irq.h | 2 ++ arch/x86/include/asm/irq.h | 4 ++++ arch/x86/include/asm/irq_vectors.h | 1 + arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/irq.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/irqinit.c | 2 ++ 8 files changed, 40 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index dc5fa661465f..27ca0afcccd7 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_HAVE_KVM BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, smp_kvm_posted_intr_ipi) +BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, + smp_kvm_posted_intr_wakeup_ipi) #endif /* diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f5fb6b6567e..986606539395 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,6 +14,7 @@ typedef struct { #endif #ifdef CONFIG_HAVE_KVM unsigned int kvm_posted_intr_ipis; + unsigned int kvm_posted_intr_wakeup_ipis; #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9ec5d37d8da3..10c80d4f8386 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,6 +29,7 @@ extern asmlinkage void apic_timer_interrupt(void); extern asmlinkage void x86_platform_ipi(void); extern asmlinkage void kvm_posted_intr_ipi(void); +extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); @@ -58,6 +59,7 @@ extern void trace_call_function_single_interrupt(void); #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt #define trace_reboot_interrupt reboot_interrupt #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi +#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi #endif /* CONFIG_TRACING */ #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index a80cbb88ea91..8008d06581c7 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -30,6 +30,10 @@ extern void fixup_irqs(void); extern void irq_force_complete_move(int); #endif +#ifdef CONFIG_HAVE_KVM +extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); +#endif + extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index bf55235d7772..0ed29ac13a9d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -86,6 +86,7 @@ /* Vector for KVM to deliver posted interrupt IPI */ #ifdef CONFIG_HAVE_KVM #define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 #endif /* diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 47b95813dc37..22aadc917868 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -916,6 +916,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ #ifdef CONFIG_HAVE_KVM apicinterrupt3 POSTED_INTR_VECTOR \ kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \ + kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi #endif #ifdef CONFIG_X86_MCE_THRESHOLD diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index be3894512820..90b2f7052f5b 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -242,6 +242,18 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs) } #ifdef CONFIG_HAVE_KVM +static void dummy_handler(void) {} +static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; + +void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) +{ + if (handler) + kvm_posted_intr_wakeup_handler = handler; + else + kvm_posted_intr_wakeup_handler = dummy_handler; +} +EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); + /* * Handler for POSTED_INTERRUPT_VECTOR. */ @@ -254,6 +266,20 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs) exiting_irq(); set_irq_regs(old_regs); } + +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_ack_irq(); + inc_irq_stat(kvm_posted_intr_wakeup_ipis); + kvm_posted_intr_wakeup_handler(); + exiting_irq(); + set_irq_regs(old_regs); +} #endif __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index dc1e08d23552..680723a8e4b6 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -144,6 +144,8 @@ static void __init apic_intr_init(void) #ifdef CONFIG_HAVE_KVM /* IPI for KVM to deliver posted interrupt */ alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); + /* IPI for KVM to deliver interrupt to wake up tasks */ + alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); #endif /* IPI vectors for APIC spurious and error interrupts */ -- cgit v1.2.3 From cdeb6048940fa4bfb429e2f1cba0d28a11e20cd5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 22 May 2015 16:15:47 -0700 Subject: x86/asm/irq: Stop relying on magic JMP behavior for early_idt_handlers The early_idt_handlers asm code generates an array of entry points spaced nine bytes apart. It's not really clear from that code or from the places that reference it what's going on, and the code only works in the first place because GAS never generates two-byte JMP instructions when jumping to global labels. Clean up the code to generate the correct array stride (member size) explicitly. This should be considerably more robust against screw-ups, as GAS will warn if a .fill directive has a negative count. Using '. =' to advance would have been even more robust (it would generate an actual error if it tried to move backwards), but it would pad with nulls, confusing anyone who tries to disassemble the code. The new scheme should be much clearer to future readers. While we're at it, improve the comments and rename the array and common code. Binutils may start relaxing jumps to non-weak labels. If so, this change will fix our build, and we may need to backport this change. Before, on x86_64: 0000000000000000 : 0: 6a 00 pushq $0x0 2: 6a 00 pushq $0x0 4: e9 00 00 00 00 jmpq 9 5: R_X86_64_PC32 early_idt_handler-0x4 ... 48: 66 90 xchg %ax,%ax 4a: 6a 08 pushq $0x8 4c: e9 00 00 00 00 jmpq 51 4d: R_X86_64_PC32 early_idt_handler-0x4 ... 117: 6a 00 pushq $0x0 119: 6a 1f pushq $0x1f 11b: e9 00 00 00 00 jmpq 120 11c: R_X86_64_PC32 early_idt_handler-0x4 After: 0000000000000000 : 0: 6a 00 pushq $0x0 2: 6a 00 pushq $0x0 4: e9 14 01 00 00 jmpq 11d ... 48: 6a 08 pushq $0x8 4a: e9 d1 00 00 00 jmpq 120 4f: cc int3 50: cc int3 ... 117: 6a 00 pushq $0x0 119: 6a 1f pushq $0x1f 11b: eb 03 jmp 120 11d: cc int3 11e: cc int3 11f: cc int3 Signed-off-by: Andy Lutomirski Acked-by: H. Peter Anvin Cc: Binutils Cc: Borislav Petkov Cc: H.J. Lu Cc: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/ac027962af343b0c599cbfcf50b945ad2ef3d7a8.1432336324.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/segment.h | 14 ++++++++++++-- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/head_32.S | 33 ++++++++++++++++++--------------- arch/x86/kernel/head_64.S | 20 +++++++++++--------- 4 files changed, 42 insertions(+), 27 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5a9856eb12ba..7d5a1929d76b 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -231,11 +231,21 @@ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) #ifdef __KERNEL__ + +/* + * early_idt_handler_array is an array of entry points referenced in the + * early IDT. For simplicity, it's a real array with one entry point + * every nine bytes. That leaves room for an optional 'push $0' if the + * vector has no error code (two bytes), a 'push $vector_number' (two + * bytes), and a jump to the common entry code (up to five bytes). + */ +#define EARLY_IDT_HANDLER_SIZE 9 + #ifndef __ASSEMBLY__ -extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; +extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; #ifdef CONFIG_TRACING -# define trace_early_idt_handlers early_idt_handlers +# define trace_early_idt_handler_array early_idt_handler_array #endif /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2b55ee6db053..5a4668136e98 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) clear_bss(); for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) - set_intr_gate(i, early_idt_handlers[i]); + set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); copy_bootdata(__va(real_mode_data)); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 02d257256200..544dec4cc605 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -478,21 +478,22 @@ is486: __INIT setup_once: /* - * Set up a idt with 256 entries pointing to ignore_int, - * interrupt gates. It doesn't actually load idt - that needs - * to be done on each CPU. Interrupts are enabled elsewhere, - * when we can be relatively sure everything is ok. + * Set up a idt with 256 interrupt gates that push zero if there + * is no error code and then jump to early_idt_handler_common. + * It doesn't actually load the idt - that needs to be done on + * each CPU. Interrupts are enabled elsewhere, when we can be + * relatively sure everything is ok. */ movl $idt_table,%edi - movl $early_idt_handlers,%eax + movl $early_idt_handler_array,%eax movl $NUM_EXCEPTION_VECTORS,%ecx 1: movl %eax,(%edi) movl %eax,4(%edi) /* interrupt gate, dpl=0, present */ movl $(0x8E000000 + __KERNEL_CS),2(%edi) - addl $9,%eax + addl $EARLY_IDT_HANDLER_SIZE,%eax addl $8,%edi loop 1b @@ -524,26 +525,28 @@ setup_once: andl $0,setup_once_ref /* Once is enough, thanks */ ret -ENTRY(early_idt_handlers) +ENTRY(early_idt_handler_array) # 36(%esp) %eflags # 32(%esp) %cs # 28(%esp) %eip # 24(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .if (EXCEPTION_ERRCODE_MASK >> i) & 1 - ASM_NOP2 - .else + .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 pushl $0 # Dummy error code, to make stack frame uniform .endif pushl $i # 20(%esp) Vector number - jmp early_idt_handler + jmp early_idt_handler_common i = i + 1 + .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -ENDPROC(early_idt_handlers) +ENDPROC(early_idt_handler_array) - /* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. + */ cld cmpl $2,(%esp) # X86_TRAP_NMI @@ -603,7 +606,7 @@ ex_entry: .Lis_nmi: addl $8,%esp /* drop vector number and error code */ iret -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ ALIGN diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 43eafc8afb69..e5c27f729a38 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -321,26 +321,28 @@ bad_address: jmp bad_address __INIT - .globl early_idt_handlers -early_idt_handlers: +ENTRY(early_idt_handler_array) # 104(%rsp) %rflags # 96(%rsp) %cs # 88(%rsp) %rip # 80(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .if (EXCEPTION_ERRCODE_MASK >> i) & 1 - ASM_NOP2 - .else + .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 pushq $0 # Dummy error code, to make stack frame uniform .endif pushq $i # 72(%rsp) Vector number - jmp early_idt_handler + jmp early_idt_handler_common i = i + 1 + .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr +ENDPROC(early_idt_handler_array) -/* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. + */ cld cmpl $2,(%rsp) # X86_TRAP_NMI @@ -412,7 +414,7 @@ ENTRY(early_idt_handler) .Lis_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common) __INITDATA -- cgit v1.2.3 From 9b3aca620883fc06636737c82a4d024b22182281 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:06 +0200 Subject: x86/mm/mtrr: Fix MTRR state checks in mtrr_type_lookup() 'mtrr_state.enabled' contains the FE (fixed MTRRs enabled) and E (MTRRs enabled) flags in MSR_MTRRdefType. Intel SDM, section 11.11.2.1, defines these flags as follows: - All MTRRs are disabled when the E flag is clear. The FE flag has no affect when the E flag is clear. - The default type is enabled when the E flag is set. - MTRR variable ranges are enabled when the E flag is set. - MTRR fixed ranges are enabled when both E and FE flags are set. MTRR state checks in __mtrr_type_lookup() do not match with SDM. Hence, this patch makes the following changes: - The current code detects MTRRs disabled when both E and FE flags are clear in mtrr_state.enabled. Fix to detect MTRRs disabled when the E flag is clear. - The current code does not check if the FE bit is set in mtrr_state.enabled when looking at the fixed entries. Fix to check the FE flag. - The current code returns the default type when the E flag is clear in mtrr_state.enabled. However, the default type is UC when the E flag is clear. Remove the code as this case is handled as MTRR disabled with the 1st change. In addition, this patch defines the E and FE flags in mtrr_state.enabled as follows. - FE flag: MTRR_STATE_MTRR_FIXED_ENABLED - E flag: MTRR_STATE_MTRR_ENABLED print_mtrr_state() and x86_get_mtrr_mem_range() are also updated accordingly. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-4-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 4 ++++ arch/x86/kernel/cpu/mtrr/cleanup.c | 3 ++- arch/x86/kernel/cpu/mtrr/generic.c | 15 ++++++++------- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index f768f6298419..ef927948657c 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -127,4 +127,8 @@ struct mtrr_gentry32 { _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) #endif /* CONFIG_COMPAT */ +/* Bit fields for enabled in struct mtrr_state_type */ +#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 +#define MTRR_STATE_MTRR_ENABLED 0x02 + #endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 5f90b85ff22e..70d7c93f4550 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -98,7 +98,8 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, continue; base = range_state[i].base_pfn; if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && - (mtrr_state.enabled & 1)) { + (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { /* Var MTRR contains UC entry below 1M? Skip it: */ printk(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e202d26f64a2..b0599dbb899a 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -119,14 +119,16 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) if (!mtrr_state_set) return 0xFF; - if (!mtrr_state.enabled) + if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) return 0xFF; /* Make end inclusive end, instead of exclusive */ end--; /* Look in fixed ranges. Just return the type as per start */ - if (mtrr_state.have_fixed && (start < 0x100000)) { + if ((start < 0x100000) && + (mtrr_state.have_fixed) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { int idx; if (start < 0x80000) { @@ -149,9 +151,6 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - if (!(mtrr_state.enabled & 2)) - return mtrr_state.def_type; - prev_match = 0xFF; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -355,7 +354,9 @@ static void __init print_mtrr_state(void) mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { pr_debug("MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); + ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ? + "en" : "dis"); print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); for (i = 0; i < 2; ++i) print_fixed(0x80000 + i * 0x20000, 0x04000, @@ -368,7 +369,7 @@ static void __init print_mtrr_state(void) print_fixed_last(); } pr_debug("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); + mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { -- cgit v1.2.3 From 3d3ca416d9b0784cfcf244eeeba1bcaf421bc64d Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:07 +0200 Subject: x86/mm/mtrr: Use symbolic define as a retval for disabled MTRRs mtrr_type_lookup() returns verbatim 0xFF when MTRRs are disabled. This patch defines MTRR_TYPE_INVALID to clarify the meaning of this value, and documents its usage. Document the return values of the kernel virtual address mapping helpers pud_set_huge(), pmd_set_huge, pud_clear_huge() and pmd_clear_huge(). There is no functional change in this patch. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-5-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 2 +- arch/x86/include/uapi/asm/mtrr.h | 8 +++++++- arch/x86/kernel/cpu/mtrr/generic.c | 14 ++++++------- arch/x86/mm/pgtable.c | 42 +++++++++++++++++++++++++++++--------- 4 files changed, 47 insertions(+), 19 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index ef927948657c..bb03a547c1ab 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -55,7 +55,7 @@ static inline u8 mtrr_type_lookup(u64 addr, u64 end) /* * Return no-MTRRs: */ - return 0xff; + return MTRR_TYPE_INVALID; } #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h index d0acb658c8f4..7528dcf59691 100644 --- a/arch/x86/include/uapi/asm/mtrr.h +++ b/arch/x86/include/uapi/asm/mtrr.h @@ -103,7 +103,7 @@ struct mtrr_state_type { #define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry) #define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry) -/* These are the region types */ +/* MTRR memory types, which are defined in SDM */ #define MTRR_TYPE_UNCACHABLE 0 #define MTRR_TYPE_WRCOMB 1 /*#define MTRR_TYPE_ 2*/ @@ -113,5 +113,11 @@ struct mtrr_state_type { #define MTRR_TYPE_WRBACK 6 #define MTRR_NUM_TYPES 7 +/* + * Invalid MTRR memory type. mtrr_type_lookup() returns this value when + * MTRRs are disabled. Note, this value is allocated from the reserved + * values (0x7-0xff) of the MTRR memory types. + */ +#define MTRR_TYPE_INVALID 0xff #endif /* _UAPI_ASM_X86_MTRR_H */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b0599dbb899a..7b1491c6232d 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -104,7 +104,7 @@ static int check_type_overlap(u8 *prev, u8 *curr) /* * Error/Semi-error returns: - * 0xFF - when MTRR is not enabled + * MTRR_TYPE_INVALID - when MTRR is not enabled * *repeat == 1 implies [start:end] spanned across MTRR range and type returned * corresponds only to [start:*partial_end]. * Caller has to lookup again for [*partial_end:end]. @@ -117,10 +117,10 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) *repeat = 0; if (!mtrr_state_set) - return 0xFF; + return MTRR_TYPE_INVALID; if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) - return 0xFF; + return MTRR_TYPE_INVALID; /* Make end inclusive end, instead of exclusive */ end--; @@ -151,7 +151,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - prev_match = 0xFF; + prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -206,7 +206,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) continue; curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; - if (prev_match == 0xFF) { + if (prev_match == MTRR_TYPE_INVALID) { prev_match = curr_match; continue; } @@ -220,7 +220,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) return MTRR_TYPE_WRBACK; } - if (prev_match != 0xFF) + if (prev_match != MTRR_TYPE_INVALID) return prev_match; return mtrr_state.def_type; @@ -229,7 +229,7 @@ static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) /* * Returns the effective MTRR type for the region * Error return: - * 0xFF - when MTRR is not enabled + * MTRR_TYPE_INVALID - when MTRR is not enabled */ u8 mtrr_type_lookup(u64 start, u64 end) { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0b97d2c75df3..c30f9819786b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -563,16 +563,22 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, } #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +/** + * pud_set_huge - setup kernel PUD mapping + * + * MTRR can override PAT memory types with 4KiB granularity. Therefore, + * this function does not set up a huge page when the range is covered + * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are + * disabled. + * + * Returns 1 on success and 0 on failure. + */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { u8 mtrr; - /* - * Do not use a huge page when the range is covered by non-WB type - * of MTRRs. - */ mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) return 0; prot = pgprot_4k_2_large(prot); @@ -584,16 +590,22 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pmd_set_huge - setup kernel PMD mapping + * + * MTRR can override PAT memory types with 4KiB granularity. Therefore, + * this function does not set up a huge page when the range is covered + * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are + * disabled. + * + * Returns 1 on success and 0 on failure. + */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { u8 mtrr; - /* - * Do not use a huge page when the range is covered by non-WB type - * of MTRRs. - */ mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) return 0; prot = pgprot_4k_2_large(prot); @@ -605,6 +617,11 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ int pud_clear_huge(pud_t *pud) { if (pud_large(*pud)) { @@ -615,6 +632,11 @@ int pud_clear_huge(pud_t *pud) return 0; } +/** + * pmd_clear_huge - clear kernel PMD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PMD map is found). + */ int pmd_clear_huge(pmd_t *pmd) { if (pmd_large(*pmd)) { -- cgit v1.2.3 From b73522e0c1be58d3c69b124985b8ccf94e3677f7 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 May 2015 10:28:10 +0200 Subject: x86/mm/mtrr: Enhance MTRR checks in kernel mapping helpers This patch adds the argument 'uniform' to mtrr_type_lookup(), which gets set to 1 when a given range is covered uniformly by MTRRs, i.e. the range is fully covered by a single MTRR entry or the default type. Change pud_set_huge() and pmd_set_huge() to honor the 'uniform' flag to see if it is safe to create a huge page mapping in the range. This allows them to create a huge page mapping in a range covered by a single MTRR entry of any memory type. It also detects a non-optimal request properly. They continue to check with the WB type since it does not effectively change the uniform mapping even if a request spans multiple MTRR entries. pmd_set_huge() logs a warning message to a non-optimal request so that driver writers will be aware of such a case. Drivers should make a mapping request aligned to a single MTRR entry when the range is covered by MTRRs. Signed-off-by: Toshi Kani [ Realign, flesh out comments, improve warning message. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave.hansen@intel.com Cc: linux-mm Cc: pebolle@tiscali.nl Link: http://lkml.kernel.org/r/1431714237-880-7-git-send-email-toshi.kani@hp.com Link: http://lkml.kernel.org/r/1432628901-18044-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 4 ++-- arch/x86/kernel/cpu/mtrr/generic.c | 40 ++++++++++++++++++++++++++++---------- arch/x86/mm/pat.c | 4 ++-- arch/x86/mm/pgtable.c | 38 +++++++++++++++++++++++------------- 4 files changed, 58 insertions(+), 28 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index bb03a547c1ab..a31759e1edd9 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -31,7 +31,7 @@ * arch_phys_wc_add and arch_phys_wc_del. */ # ifdef CONFIG_MTRR -extern u8 mtrr_type_lookup(u64 addr, u64 end); +extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); extern int mtrr_add(unsigned long base, unsigned long size, @@ -50,7 +50,7 @@ extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); extern int phys_wc_to_mtrr_index(int handle); # else -static inline u8 mtrr_type_lookup(u64 addr, u64 end) +static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { /* * Return no-MTRRs: diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e51100c49eea..f782d9b62cb3 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -147,19 +147,24 @@ static u8 mtrr_type_lookup_fixed(u64 start, u64 end) * Return Value: * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched) * - * Output Argument: + * Output Arguments: * repeat - Set to 1 when [start:end] spanned across MTRR range and type * returned corresponds only to [start:*partial_end]. Caller has * to lookup again for [*partial_end:end]. + * + * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the + * region is fully covered by a single MTRR entry or the default + * type. */ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, - int *repeat) + int *repeat, u8 *uniform) { int i; u64 base, mask; u8 prev_match, curr_match; *repeat = 0; + *uniform = 1; /* Make end inclusive instead of exclusive */ end--; @@ -214,6 +219,7 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, end = *partial_end - 1; /* end is inclusive */ *repeat = 1; + *uniform = 0; } if ((start & mask) != (base & mask)) @@ -225,6 +231,7 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, continue; } + *uniform = 0; if (check_type_overlap(&prev_match, &curr_match)) return curr_match; } @@ -241,10 +248,15 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, * Return Values: * MTRR_TYPE_(type) - The effective MTRR type for the region * MTRR_TYPE_INVALID - MTRR is disabled + * + * Output Argument: + * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the + * region is fully covered by a single MTRR entry or the default + * type. */ -u8 mtrr_type_lookup(u64 start, u64 end) +u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) { - u8 type, prev_type; + u8 type, prev_type, is_uniform = 1, dummy; int repeat; u64 partial_end; @@ -260,14 +272,18 @@ u8 mtrr_type_lookup(u64 start, u64 end) */ if ((start < 0x100000) && (mtrr_state.have_fixed) && - (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) - return mtrr_type_lookup_fixed(start, end); + (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { + is_uniform = 0; + type = mtrr_type_lookup_fixed(start, end); + goto out; + } /* * Look up the variable ranges. Look of multiple ranges matching * this address and pick type as per MTRR precedence. */ - type = mtrr_type_lookup_variable(start, end, &partial_end, &repeat); + type = mtrr_type_lookup_variable(start, end, &partial_end, + &repeat, &is_uniform); /* * Common path is with repeat = 0. @@ -278,15 +294,19 @@ u8 mtrr_type_lookup(u64 start, u64 end) while (repeat) { prev_type = type; start = partial_end; - type = mtrr_type_lookup_variable(start, end, &partial_end, &repeat); + is_uniform = 0; + type = mtrr_type_lookup_variable(start, end, &partial_end, + &repeat, &dummy); if (check_type_overlap(&prev_type, &type)) - return type; + goto out; } if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2)) - return MTRR_TYPE_WRBACK; + type = MTRR_TYPE_WRBACK; +out: + *uniform = is_uniform; return type; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 35af6771a95a..372ad422c2c3 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -267,9 +267,9 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, * request is for WB. */ if (req_type == _PAGE_CACHE_MODE_WB) { - u8 mtrr_type; + u8 mtrr_type, uniform; - mtrr_type = mtrr_type_lookup(start, end); + mtrr_type = mtrr_type_lookup(start, end, &uniform); if (mtrr_type != MTRR_TYPE_WRBACK) return _PAGE_CACHE_MODE_UC_MINUS; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c30f9819786b..fb0a9dd1d6e4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -566,19 +566,28 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, /** * pud_set_huge - setup kernel PUD mapping * - * MTRR can override PAT memory types with 4KiB granularity. Therefore, - * this function does not set up a huge page when the range is covered - * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are - * disabled. + * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this + * function sets up a huge page only if any of the following conditions are met: + * + * - MTRRs are disabled, or + * + * - MTRRs are enabled and the range is completely covered by a single MTRR, or + * + * - MTRRs are enabled and the corresponding MTRR memory type is WB, which + * has no effect on the requested PAT memory type. + * + * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger + * page mapping attempt fails. * * Returns 1 on success and 0 on failure. */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { - u8 mtrr; + u8 mtrr, uniform; - mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) + mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); + if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && + (mtrr != MTRR_TYPE_WRBACK)) return 0; prot = pgprot_4k_2_large(prot); @@ -593,20 +602,21 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) /** * pmd_set_huge - setup kernel PMD mapping * - * MTRR can override PAT memory types with 4KiB granularity. Therefore, - * this function does not set up a huge page when the range is covered - * by a non-WB type of MTRR. MTRR_TYPE_INVALID indicates that MTRR are - * disabled. + * See text over pud_set_huge() above. * * Returns 1 on success and 0 on failure. */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { - u8 mtrr; + u8 mtrr, uniform; - mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); - if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != MTRR_TYPE_INVALID)) + mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); + if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && + (mtrr != MTRR_TYPE_WRBACK)) { + pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", + __func__, addr, addr + PMD_SIZE); return 0; + } prot = pgprot_4k_2_large(prot); -- cgit v1.2.3 From 7d010fdf299929f9583ce5e17da629dcd83c36ef Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:13 +0200 Subject: x86/mm/mtrr: Avoid #ifdeffery with phys_wc_to_mtrr_index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one user but since we're going to bury MTRR next out of access to drivers, expose this last piece of API to drivers in a general fashion only needing io.h for access to helpers. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Abhilash Kesavan Cc: Andrew Morton Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: Cristian Stoica Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Matthias Brugger Cc: Mel Gorman Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429722736-4473-1-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 +++ arch/x86/include/asm/mtrr.h | 5 ----- arch/x86/kernel/cpu/mtrr/main.c | 6 +++--- drivers/gpu/drm/drm_ioctl.c | 14 +------------- include/linux/io.h | 7 +++++++ 5 files changed, 14 insertions(+), 21 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4afc05ffa566..a2b97404922d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -339,6 +339,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, #define IO_SPACE_LIMIT 0xffff #ifdef CONFIG_MTRR +extern int __must_check arch_phys_wc_index(int handle); +#define arch_phys_wc_index arch_phys_wc_index + extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index a31759e1edd9..b94f6f64e23d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -48,7 +48,6 @@ extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); -extern int phys_wc_to_mtrr_index(int handle); # else static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { @@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } -static inline int phys_wc_to_mtrr_index(int handle) -{ - return -1; -} #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 04aceb7e6443..81baf5fee0e1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -580,7 +580,7 @@ void arch_phys_wc_del(int handle) EXPORT_SYMBOL(arch_phys_wc_del); /* - * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value + * arch_phys_wc_index - translates arch_phys_wc_add's return value * @handle: Return value from arch_phys_wc_add * * This will turn the return value from arch_phys_wc_add into an mtrr @@ -590,14 +590,14 @@ EXPORT_SYMBOL(arch_phys_wc_del); * in printk line. Alas there is an illegitimate use in some ancient * drm ioctls. */ -int phys_wc_to_mtrr_index(int handle) +int arch_phys_wc_index(int handle) { if (handle < MTRR_TO_PHYS_WC_OFFSET) return -1; else return handle - MTRR_TO_PHYS_WC_OFFSET; } -EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); +EXPORT_SYMBOL_GPL(arch_phys_wc_index); /* * HACK ALERT! diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 266dcd6cdf3b..0a957828b3bd 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -36,9 +36,6 @@ #include #include -#ifdef CONFIG_X86 -#include -#endif static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -197,16 +194,7 @@ static int drm_getmap(struct drm_device *dev, void *data, map->type = r_list->map->type; map->flags = r_list->map->flags; map->handle = (void *)(unsigned long) r_list->user_token; - -#ifdef CONFIG_X86 - /* - * There appears to be exactly one user of the mtrr index: dritest. - * It's easy enough to keep it working on non-PAT systems. - */ - map->mtrr = phys_wc_to_mtrr_index(r_list->map->mtrr); -#else - map->mtrr = -1; -#endif + map->mtrr = arch_phys_wc_index(r_list->map->mtrr); mutex_unlock(&dev->struct_mutex); diff --git a/include/linux/io.h b/include/linux/io.h index 986f2bffea1e..04cce4da3685 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -111,6 +111,13 @@ static inline void arch_phys_wc_del(int handle) } #define arch_phys_wc_add arch_phys_wc_add +#ifndef arch_phys_wc_index +static inline int arch_phys_wc_index(int handle) +{ + return -1; +} +#define arch_phys_wc_index arch_phys_wc_index +#endif #endif #endif /* _LINUX_IO_H */ -- cgit v1.2.3 From cb32edf65bf2197a2d2226e94c7602dc92e295bb Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:15 +0200 Subject: x86/mm/pat: Wrap pat_enabled into a function API We use pat_enabled in x86-specific code to see if PAT is enabled or not but we're granting full access to it even though readers do not need to set it. If, for instance, we granted access to it to modules later they then could override the variable setting... no bueno. This renames pat_enabled to a new static variable __pat_enabled. Folks are redirected to use pat_enabled() now. Code that sets this can only be internal to pat.c. Apart from the early kernel parameter "nopat" to disable PAT, we also have a few cases that disable it later and make use of a helper pat_disable(). It is wrapped under an ifdef but since that code cannot run unless PAT was enabled its not required to wrap it with ifdefs, unwrap that. Likewise, since "nopat" doesn't really change non-PAT systems just remove that ifdef as well. Although we could add and use an early_param_off(), these helpers don't use __read_mostly but we want to keep __read_mostly for __pat_enabled as this is a hot path -- upon boot, for instance, a simple guest may see ~4k accesses to pat_enabled(). Since __read_mostly early boot params are not that common we don't add a helper for them just yet. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Andy Walls Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Lameter Cc: Daniel Vetter Cc: Dave Airlie Cc: Denys Vlasenko Cc: Doug Ledford Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kyle McMartin Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430425520-22275-3-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 7 +------ arch/x86/kernel/cpu/mtrr/main.c | 2 +- arch/x86/mm/iomap_32.c | 2 +- arch/x86/mm/ioremap.c | 4 ++-- arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pat.c | 33 +++++++++++++++------------------ arch/x86/pci/i386.c | 6 +++--- 7 files changed, 24 insertions(+), 32 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 91bc4ba95f91..cdcff7f7f694 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -4,12 +4,7 @@ #include #include -#ifdef CONFIG_X86_PAT -extern int pat_enabled; -#else -static const int pat_enabled; -#endif - +bool pat_enabled(void); extern void pat_init(void); void pat_init_cache_modes(void); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 383efb26e516..e7ed0d8ebacb 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -558,7 +558,7 @@ int arch_phys_wc_add(unsigned long base, unsigned long size) { int ret; - if (pat_enabled || !mtrr_enabled()) + if (pat_enabled() || !mtrr_enabled()) return 0; /* Success! (We don't need to do anything.) */ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 9ca35fc60cfe..3a2ec8790ca7 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -82,7 +82,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) * MTRR is UC or WC. UC_MINUS gets the real intention, of the * user, which is "WC if the MTRR is WC, UC if you can't do that." */ - if (!pat_enabled && pgprot_val(prot) == + if (!pat_enabled() && pgprot_val(prot) == (__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_WC))) prot = __pgprot(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a493bb83aa89..82d63ed70045 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -234,7 +234,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: - * pat_enabled ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; + * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS; * * Till we fix all X drivers to use ioremap_wc(), we will use * UC MINUS. Drivers that are certain they need or can already @@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(ioremap_uc); */ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { - if (pat_enabled) + if (pat_enabled()) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, __builtin_return_address(0)); else diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 397838eb292b..70d221fe2eb4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1571,7 +1571,7 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - if (!pat_enabled) + if (!pat_enabled()) return set_memory_uc(addr, numpages); ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8c50b9bfa996..484dce7f759b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -36,12 +36,11 @@ #undef pr_fmt #define pr_fmt(fmt) "" fmt -#ifdef CONFIG_X86_PAT -int __read_mostly pat_enabled = 1; +static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); static inline void pat_disable(const char *reason) { - pat_enabled = 0; + __pat_enabled = 0; pr_info("x86/PAT: %s\n", reason); } @@ -51,13 +50,11 @@ static int __init nopat(char *str) return 0; } early_param("nopat", nopat); -#else -static inline void pat_disable(const char *reason) + +bool pat_enabled(void) { - (void)reason; + return !!__pat_enabled; } -#endif - int pat_debug_enable; @@ -201,7 +198,7 @@ void pat_init(void) u64 pat; bool boot_cpu = !boot_pat_state; - if (!pat_enabled) + if (!pat_enabled()) return; if (!cpu_has_pat) { @@ -402,7 +399,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, BUG_ON(start >= end); /* end is exclusive */ - if (!pat_enabled) { + if (!pat_enabled()) { /* This is identical to page table setting without PAT */ if (new_type) { if (req_type == _PAGE_CACHE_MODE_WC) @@ -477,7 +474,7 @@ int free_memtype(u64 start, u64 end) int is_range_ram; struct memtype *entry; - if (!pat_enabled) + if (!pat_enabled()) return 0; /* Low ISA region is always mapped WB. No need to track */ @@ -625,7 +622,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) u64 to = from + size; u64 cursor = from; - if (!pat_enabled) + if (!pat_enabled()) return 1; while (cursor < to) { @@ -661,7 +658,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * caching for the high addresses through the KEN pin, but * we maintain the tradition of paranoia in this code. */ - if (!pat_enabled && + if (!pat_enabled() && !(boot_cpu_has(X86_FEATURE_MTRR) || boot_cpu_has(X86_FEATURE_K6_MTRR) || boot_cpu_has(X86_FEATURE_CYRIX_ARR) || @@ -730,7 +727,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, * the type requested matches the type of first page in the range. */ if (is_ram) { - if (!pat_enabled) + if (!pat_enabled()) return 0; pcm = lookup_memtype(paddr); @@ -844,7 +841,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return ret; } - if (!pat_enabled) + if (!pat_enabled()) return 0; /* @@ -872,7 +869,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, { enum page_cache_mode pcm; - if (!pat_enabled) + if (!pat_enabled()) return 0; /* Set prot based on lookup */ @@ -913,7 +910,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, pgprot_t pgprot_writecombine(pgprot_t prot) { - if (pat_enabled) + if (pat_enabled()) return __pgprot(pgprot_val(prot) | cachemode2protval(_PAGE_CACHE_MODE_WC)); else @@ -996,7 +993,7 @@ static const struct file_operations memtype_fops = { static int __init pat_memtype_list_init(void) { - if (pat_enabled) { + if (pat_enabled()) { debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, NULL, &memtype_fops); } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 349c0d32cc0b..0a9f2caf358f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -429,12 +429,12 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, * Caller can followup with UC MINUS request and add a WC mtrr if there * is a free mtrr slot. */ - if (!pat_enabled && write_combine) + if (!pat_enabled() && write_combine) return -EINVAL; - if (pat_enabled && write_combine) + if (pat_enabled() && write_combine) prot |= cachemode2protval(_PAGE_CACHE_MODE_WC); - else if (pat_enabled || boot_cpu_data.x86 > 3) + else if (pat_enabled() || boot_cpu_data.x86 > 3) /* * ioremap() and ioremap_nocache() defaults to UC MINUS for now. * To avoid attribute conflicts, request UC MINUS here -- cgit v1.2.3 From 131484c8da97ed600c18dd9d03b661e8ae052df6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 May 2015 12:21:47 +0200 Subject: x86/debug: Remove perpetually broken, unmaintainable dwarf annotations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So the dwarf2 annotations in low level assembly code have become an increasing hindrance: unreadable, messy macros mixed into some of the most security sensitive code paths of the Linux kernel. These debug info annotations don't even buy the upstream kernel anything: dwarf driven stack unwinding has caused problems in the past so it's out of tree, and the upstream kernel only uses the much more robust framepointers based stack unwinding method. In addition to that there's a steady, slow bitrot going on with these annotations, requiring frequent fixups. There's no tooling and no functionality upstream that keeps it correct. So burn down the sick forest, allowing new, healthier growth: 27 files changed, 350 insertions(+), 1101 deletions(-) Someone who has the willingness and time to do this properly can attempt to reintroduce dwarf debuginfo in x86 assembly code plus dwarf unwinding from first principles, with the following conditions: - it should be maximally readable, and maximally low-key to 'ordinary' code reading and maintenance. - find a build time method to insert dwarf annotations automatically in the most common cases, for pop/push instructions that manipulate the stack pointer. This could be done for example via a preprocessing step that just looks for common patterns - plus special annotations for the few cases where we want to depart from the default. We have hundreds of CFI annotations, so automating most of that makes sense. - it should come with build tooling checks that ensure that CFI annotations are sensible. We've seen such efforts from the framepointer side, and there's no reason it couldn't be done on the dwarf side. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frédéric Weisbecker Cc: Jan Beulich Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 10 +- arch/x86/ia32/ia32entry.S | 133 ++++----------- arch/x86/include/asm/calling.h | 94 +++++------ arch/x86/include/asm/dwarf2.h | 170 ------------------- arch/x86/include/asm/frame.h | 7 +- arch/x86/kernel/entry_32.S | 368 ++++++++++++----------------------------- arch/x86/kernel/entry_64.S | 288 ++++++-------------------------- arch/x86/lib/atomic64_386_32.S | 7 +- arch/x86/lib/atomic64_cx8_32.S | 61 +++---- arch/x86/lib/checksum_32.S | 52 +++--- arch/x86/lib/clear_page_64.S | 7 - arch/x86/lib/cmpxchg16b_emu.S | 12 +- arch/x86/lib/cmpxchg8b_emu.S | 11 +- arch/x86/lib/copy_page_64.S | 11 -- arch/x86/lib/copy_user_64.S | 15 -- arch/x86/lib/csum-copy_64.S | 17 -- arch/x86/lib/getuser.S | 13 -- arch/x86/lib/iomap_copy_64.S | 3 - arch/x86/lib/memcpy_64.S | 3 - arch/x86/lib/memmove_64.S | 3 - arch/x86/lib/memset_64.S | 5 - arch/x86/lib/msr-reg.S | 44 ++--- arch/x86/lib/putuser.S | 8 +- arch/x86/lib/rwsem.S | 49 +++--- arch/x86/lib/thunk_32.S | 15 +- arch/x86/lib/thunk_64.S | 44 +++-- arch/x86/net/bpf_jit.S | 1 - 27 files changed, 350 insertions(+), 1101 deletions(-) delete mode 100644 arch/x86/include/asm/dwarf2.h (limited to 'arch/x86/include') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 57996ee840dd..43e8328a23e4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -149,12 +149,6 @@ endif sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp -# do binutils support CFI? -cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) -# is .cfi_signal_frame supported too? -cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) -cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) - # does binutils support specific instructions? asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) @@ -162,8 +156,8 @@ asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) -KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) -KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 63450a596800..2be23c734db5 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -4,7 +4,6 @@ * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include #include #include #include @@ -60,17 +59,6 @@ movl %eax,%eax /* zero extension */ .endm - .macro CFI_STARTPROC32 simple - CFI_STARTPROC \simple - CFI_UNDEFINED r8 - CFI_UNDEFINED r9 - CFI_UNDEFINED r10 - CFI_UNDEFINED r11 - CFI_UNDEFINED r12 - CFI_UNDEFINED r13 - CFI_UNDEFINED r14 - CFI_UNDEFINED r15 - .endm #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret32) @@ -102,11 +90,6 @@ ENDPROC(native_usergs_sysret32) * with the int 0x80 path. */ ENTRY(ia32_sysenter_target) - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,0 - CFI_REGISTER rsp,rbp - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -121,25 +104,21 @@ ENTRY(ia32_sysenter_target) movl %eax, %eax movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d - CFI_REGISTER rip,r10 /* Construct struct pt_regs on stack */ - pushq_cfi $__USER32_DS /* pt_regs->ss */ - pushq_cfi %rbp /* pt_regs->sp */ - CFI_REL_OFFSET rsp,0 - pushfq_cfi /* pt_regs->flags */ - pushq_cfi $__USER32_CS /* pt_regs->cs */ - pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */ - CFI_REL_OFFSET rip,0 - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq $__USER32_DS /* pt_regs->ss */ + pushq %rbp /* pt_regs->sp */ + pushfq /* pt_regs->flags */ + pushq $__USER32_CS /* pt_regs->cs */ + pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 10*8 /* * no need to do an access_ok check here because rbp has been @@ -161,8 +140,8 @@ sysenter_flags_fixed: orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - CFI_REMEMBER_STATE jnz sysenter_tracesys + sysenter_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -193,14 +172,12 @@ sysexit_from_sys_call: */ andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RIP(%rsp),%ecx /* User %eip */ - CFI_REGISTER rip,rcx RESTORE_RSI_RDI xorl %edx,%edx /* avoid info leaks */ xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 movl EFLAGS(%rsp),%r11d /* User eflags */ - /*CFI_RESTORE rflags*/ TRACE_IRQS_ON /* @@ -231,8 +208,6 @@ sysexit_from_sys_call: */ USERGS_SYSRET32 - CFI_RESTORE_STATE - #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common movl %esi,%r8d /* 5th arg: 4th syscall arg */ @@ -282,8 +257,8 @@ sysexit_audit: #endif sysenter_fix_flags: - pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) - popfq_cfi + pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + popfq jmp sysenter_flags_fixed sysenter_tracesys: @@ -298,7 +273,6 @@ sysenter_tracesys: LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS jmp sysenter_do_call - CFI_ENDPROC ENDPROC(ia32_sysenter_target) /* @@ -332,12 +306,6 @@ ENDPROC(ia32_sysenter_target) * with the int 0x80 path. */ ENTRY(ia32_cstar_target) - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,0 - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -345,7 +313,6 @@ ENTRY(ia32_cstar_target) */ SWAPGS_UNSAFE_STACK movl %esp,%r8d - CFI_REGISTER rsp,r8 movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp ENABLE_INTERRUPTS(CLBR_NONE) @@ -353,22 +320,19 @@ ENTRY(ia32_cstar_target) movl %eax,%eax /* Construct struct pt_regs on stack */ - pushq_cfi $__USER32_DS /* pt_regs->ss */ - pushq_cfi %r8 /* pt_regs->sp */ - CFI_REL_OFFSET rsp,0 - pushq_cfi %r11 /* pt_regs->flags */ - pushq_cfi $__USER32_CS /* pt_regs->cs */ - pushq_cfi %rcx /* pt_regs->ip */ - CFI_REL_OFFSET rip,0 - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rbp /* pt_regs->cx */ + pushq $__USER32_DS /* pt_regs->ss */ + pushq %r8 /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER32_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rbp /* pt_regs->cx */ movl %ebp,%ecx - pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq $-ENOSYS /* pt_regs->ax */ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 10*8 /* * no need to do an access_ok check here because r8 has been @@ -380,8 +344,8 @@ ENTRY(ia32_cstar_target) ASM_CLAC orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - CFI_REMEMBER_STATE jnz cstar_tracesys + cstar_do_call: /* 32bit syscall -> 64bit C ABI argument conversion */ movl %edi,%r8d /* arg5 */ @@ -403,15 +367,12 @@ sysretl_from_sys_call: andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) RESTORE_RSI_RDI_RDX movl RIP(%rsp),%ecx - CFI_REGISTER rip,rcx movl EFLAGS(%rsp),%r11d - /*CFI_REGISTER rflags,r11*/ xorq %r10,%r10 xorq %r9,%r9 xorq %r8,%r8 TRACE_IRQS_ON movl RSP(%rsp),%esp - CFI_RESTORE rsp /* * 64bit->32bit SYSRET restores eip from ecx, * eflags from r11 (but RF and VM bits are forced to 0), @@ -430,7 +391,6 @@ sysretl_from_sys_call: #ifdef CONFIG_AUDITSYSCALL cstar_auditsys: - CFI_RESTORE_STATE movl %r9d,R9(%rsp) /* register to be clobbered by call */ auditsys_entry_common movl R9(%rsp),%r9d /* reload 6th syscall arg */ @@ -460,7 +420,6 @@ ia32_badarg: ASM_CLAC movq $-EFAULT,%rax jmp ia32_sysret - CFI_ENDPROC /* * Emulated IA32 system calls via int 0x80. @@ -484,15 +443,6 @@ ia32_badarg: */ ENTRY(ia32_syscall) - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,5*8 - /*CFI_REL_OFFSET ss,4*8 */ - CFI_REL_OFFSET rsp,3*8 - /*CFI_REL_OFFSET rflags,2*8 */ - /*CFI_REL_OFFSET cs,1*8 */ - CFI_REL_OFFSET rip,0*8 - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -506,15 +456,14 @@ ENTRY(ia32_syscall) movl %eax,%eax /* Construct struct pt_regs on stack (iret frame is already on stack) */ - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ cld sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 10*8 orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) @@ -544,7 +493,6 @@ ia32_tracesys: LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ RESTORE_EXTRA_REGS jmp ia32_do_call - CFI_ENDPROC END(ia32_syscall) .macro PTREGSCALL label, func @@ -554,8 +502,6 @@ GLOBAL(\label) jmp ia32_ptregs_common .endm - CFI_STARTPROC32 - PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn PTREGSCALL stub32_sigreturn, sys32_sigreturn PTREGSCALL stub32_fork, sys_fork @@ -569,23 +515,8 @@ GLOBAL(stub32_clone) ALIGN ia32_ptregs_common: - CFI_ENDPROC - CFI_STARTPROC32 simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SIZEOF_PTREGS - CFI_REL_OFFSET rax,RAX - CFI_REL_OFFSET rcx,RCX - CFI_REL_OFFSET rdx,RDX - CFI_REL_OFFSET rsi,RSI - CFI_REL_OFFSET rdi,RDI - CFI_REL_OFFSET rip,RIP -/* CFI_REL_OFFSET cs,CS*/ -/* CFI_REL_OFFSET rflags,EFLAGS*/ - CFI_REL_OFFSET rsp,RSP -/* CFI_REL_OFFSET ss,SS*/ SAVE_EXTRA_REGS 8 call *%rax RESTORE_EXTRA_REGS 8 ret - CFI_ENDPROC END(ia32_ptregs_common) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 1c8b50edb2db..0d76accde45b 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -46,8 +46,6 @@ For 32-bit we have the following conventions - kernel is built with */ -#include - #ifdef CONFIG_X86_64 /* @@ -92,27 +90,26 @@ For 32-bit we have the following conventions - kernel is built with .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 subq $15*8+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET 15*8+\addskip .endm .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 .if \r11 - movq_cfi r11, 6*8+\offset + movq %r11, 6*8+\offset(%rsp) .endif .if \r8910 - movq_cfi r10, 7*8+\offset - movq_cfi r9, 8*8+\offset - movq_cfi r8, 9*8+\offset + movq %r10, 7*8+\offset(%rsp) + movq %r9, 8*8+\offset(%rsp) + movq %r8, 9*8+\offset(%rsp) .endif .if \rax - movq_cfi rax, 10*8+\offset + movq %rax, 10*8+\offset(%rsp) .endif .if \rcx - movq_cfi rcx, 11*8+\offset + movq %rcx, 11*8+\offset(%rsp) .endif - movq_cfi rdx, 12*8+\offset - movq_cfi rsi, 13*8+\offset - movq_cfi rdi, 14*8+\offset + movq %rdx, 12*8+\offset(%rsp) + movq %rsi, 13*8+\offset(%rsp) + movq %rdi, 14*8+\offset(%rsp) .endm .macro SAVE_C_REGS offset=0 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 @@ -131,24 +128,24 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro SAVE_EXTRA_REGS offset=0 - movq_cfi r15, 0*8+\offset - movq_cfi r14, 1*8+\offset - movq_cfi r13, 2*8+\offset - movq_cfi r12, 3*8+\offset - movq_cfi rbp, 4*8+\offset - movq_cfi rbx, 5*8+\offset + movq %r15, 0*8+\offset(%rsp) + movq %r14, 1*8+\offset(%rsp) + movq %r13, 2*8+\offset(%rsp) + movq %r12, 3*8+\offset(%rsp) + movq %rbp, 4*8+\offset(%rsp) + movq %rbx, 5*8+\offset(%rsp) .endm .macro SAVE_EXTRA_REGS_RBP offset=0 - movq_cfi rbp, 4*8+\offset + movq %rbp, 4*8+\offset(%rsp) .endm .macro RESTORE_EXTRA_REGS offset=0 - movq_cfi_restore 0*8+\offset, r15 - movq_cfi_restore 1*8+\offset, r14 - movq_cfi_restore 2*8+\offset, r13 - movq_cfi_restore 3*8+\offset, r12 - movq_cfi_restore 4*8+\offset, rbp - movq_cfi_restore 5*8+\offset, rbx + movq 0*8+\offset(%rsp), %r15 + movq 1*8+\offset(%rsp), %r14 + movq 2*8+\offset(%rsp), %r13 + movq 3*8+\offset(%rsp), %r12 + movq 4*8+\offset(%rsp), %rbp + movq 5*8+\offset(%rsp), %rbx .endm .macro ZERO_EXTRA_REGS @@ -162,24 +159,24 @@ For 32-bit we have the following conventions - kernel is built with .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 .if \rstor_r11 - movq_cfi_restore 6*8, r11 + movq 6*8(%rsp), %r11 .endif .if \rstor_r8910 - movq_cfi_restore 7*8, r10 - movq_cfi_restore 8*8, r9 - movq_cfi_restore 9*8, r8 + movq 7*8(%rsp), %r10 + movq 8*8(%rsp), %r9 + movq 9*8(%rsp), %r8 .endif .if \rstor_rax - movq_cfi_restore 10*8, rax + movq 10*8(%rsp), %rax .endif .if \rstor_rcx - movq_cfi_restore 11*8, rcx + movq 11*8(%rsp), %rcx .endif .if \rstor_rdx - movq_cfi_restore 12*8, rdx + movq 12*8(%rsp), %rdx .endif - movq_cfi_restore 13*8, rsi - movq_cfi_restore 14*8, rdi + movq 13*8(%rsp), %rsi + movq 14*8(%rsp), %rdi .endm .macro RESTORE_C_REGS RESTORE_C_REGS_HELPER 1,1,1,1,1 @@ -205,7 +202,6 @@ For 32-bit we have the following conventions - kernel is built with .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 addq $15*8+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) .endm .macro icebp @@ -224,23 +220,23 @@ For 32-bit we have the following conventions - kernel is built with */ .macro SAVE_ALL - pushl_cfi_reg eax - pushl_cfi_reg ebp - pushl_cfi_reg edi - pushl_cfi_reg esi - pushl_cfi_reg edx - pushl_cfi_reg ecx - pushl_cfi_reg ebx + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx .endm .macro RESTORE_ALL - popl_cfi_reg ebx - popl_cfi_reg ecx - popl_cfi_reg edx - popl_cfi_reg esi - popl_cfi_reg edi - popl_cfi_reg ebp - popl_cfi_reg eax + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax .endm #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h deleted file mode 100644 index de1cdaf4d743..000000000000 --- a/arch/x86/include/asm/dwarf2.h +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef _ASM_X86_DWARF2_H -#define _ASM_X86_DWARF2_H - -#ifndef __ASSEMBLY__ -#warning "asm/dwarf2.h should be only included in pure assembly files" -#endif - -/* - * Macros for dwarf2 CFI unwind table entries. - * See "as.info" for details on these pseudo ops. Unfortunately - * they are only supported in very new binutils, so define them - * away for older version. - */ - -#ifdef CONFIG_AS_CFI - -#define CFI_STARTPROC .cfi_startproc -#define CFI_ENDPROC .cfi_endproc -#define CFI_DEF_CFA .cfi_def_cfa -#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register -#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset -#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset -#define CFI_OFFSET .cfi_offset -#define CFI_REL_OFFSET .cfi_rel_offset -#define CFI_REGISTER .cfi_register -#define CFI_RESTORE .cfi_restore -#define CFI_REMEMBER_STATE .cfi_remember_state -#define CFI_RESTORE_STATE .cfi_restore_state -#define CFI_UNDEFINED .cfi_undefined -#define CFI_ESCAPE .cfi_escape - -#ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame -#else -#define CFI_SIGNAL_FRAME -#endif - -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) - /* - * Emit CFI data in .debug_frame sections, not .eh_frame sections. - * The latter we currently just discard since we don't do DWARF - * unwinding at runtime. So only the offline DWARF information is - * useful to anyone. Note we should not use this directive if this - * file is used in the vDSO assembly, or if vmlinux.lds.S gets - * changed so it doesn't discard .eh_frame. - */ - .cfi_sections .debug_frame -#endif - -#else - -/* - * Due to the structure of pre-exisiting code, don't use assembler line - * comment character # to ignore the arguments. Instead, use a dummy macro. - */ -.macro cfi_ignore a=0, b=0, c=0, d=0 -.endm - -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore -#define CFI_DEF_CFA_REGISTER cfi_ignore -#define CFI_DEF_CFA_OFFSET cfi_ignore -#define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_ESCAPE cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore - -#endif - -/* - * An attempt to make CFI annotations more or less - * correct and shorter. It is implied that you know - * what you're doing if you use them. - */ -#ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_64 - .macro pushq_cfi reg - pushq \reg - CFI_ADJUST_CFA_OFFSET 8 - .endm - - .macro pushq_cfi_reg reg - pushq %\reg - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET \reg, 0 - .endm - - .macro popq_cfi reg - popq \reg - CFI_ADJUST_CFA_OFFSET -8 - .endm - - .macro popq_cfi_reg reg - popq %\reg - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE \reg - .endm - - .macro pushfq_cfi - pushfq - CFI_ADJUST_CFA_OFFSET 8 - .endm - - .macro popfq_cfi - popfq - CFI_ADJUST_CFA_OFFSET -8 - .endm - - .macro movq_cfi reg offset=0 - movq %\reg, \offset(%rsp) - CFI_REL_OFFSET \reg, \offset - .endm - - .macro movq_cfi_restore offset reg - movq \offset(%rsp), %\reg - CFI_RESTORE \reg - .endm -#else /*!CONFIG_X86_64*/ - .macro pushl_cfi reg - pushl \reg - CFI_ADJUST_CFA_OFFSET 4 - .endm - - .macro pushl_cfi_reg reg - pushl %\reg - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET \reg, 0 - .endm - - .macro popl_cfi reg - popl \reg - CFI_ADJUST_CFA_OFFSET -4 - .endm - - .macro popl_cfi_reg reg - popl %\reg - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE \reg - .endm - - .macro pushfl_cfi - pushfl - CFI_ADJUST_CFA_OFFSET 4 - .endm - - .macro popfl_cfi - popfl - CFI_ADJUST_CFA_OFFSET -4 - .endm - - .macro movl_cfi reg offset=0 - movl %\reg, \offset(%esp) - CFI_REL_OFFSET \reg, \offset - .endm - - .macro movl_cfi_restore offset reg - movl \offset(%esp), %\reg - CFI_RESTORE \reg - .endm -#endif /*!CONFIG_X86_64*/ -#endif /*__ASSEMBLY__*/ - -#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 3b629f47eb65..793179cf8e21 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -1,20 +1,17 @@ #ifdef __ASSEMBLY__ #include -#include /* The annotation hides the frame from the unwinder and makes it look like a ordinary ebp save/restore. This avoids some special cases for frame pointer later */ #ifdef CONFIG_FRAME_POINTER .macro FRAME - __ASM_SIZE(push,_cfi) %__ASM_REG(bp) - CFI_REL_OFFSET __ASM_REG(bp), 0 + __ASM_SIZE(push,) %__ASM_REG(bp) __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) .endm .macro ENDFRAME - __ASM_SIZE(pop,_cfi) %__ASM_REG(bp) - CFI_RESTORE __ASM_REG(bp) + __ASM_SIZE(pop,) %__ASM_REG(bp) .endm #else .macro FRAME diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 1c309763e321..0ac73de925d1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -113,11 +112,10 @@ /* unfortunately push/pop can't be no-op */ .macro PUSH_GS - pushl_cfi $0 + pushl $0 .endm .macro POP_GS pop=0 addl $(4 + \pop), %esp - CFI_ADJUST_CFA_OFFSET -(4 + \pop) .endm .macro POP_GS_EX .endm @@ -137,16 +135,13 @@ #else /* CONFIG_X86_32_LAZY_GS */ .macro PUSH_GS - pushl_cfi %gs - /*CFI_REL_OFFSET gs, 0*/ + pushl %gs .endm .macro POP_GS pop=0 -98: popl_cfi %gs - /*CFI_RESTORE gs*/ +98: popl %gs .if \pop <> 0 add $\pop, %esp - CFI_ADJUST_CFA_OFFSET -\pop .endif .endm .macro POP_GS_EX @@ -170,11 +165,9 @@ .macro GS_TO_REG reg movl %gs, \reg - /*CFI_REGISTER gs, \reg*/ .endm .macro REG_TO_PTGS reg movl \reg, PT_GS(%esp) - /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg movl $(__KERNEL_STACK_CANARY), \reg @@ -186,26 +179,16 @@ .macro SAVE_ALL cld PUSH_GS - pushl_cfi %fs - /*CFI_REL_OFFSET fs, 0;*/ - pushl_cfi %es - /*CFI_REL_OFFSET es, 0;*/ - pushl_cfi %ds - /*CFI_REL_OFFSET ds, 0;*/ - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ebp - CFI_REL_OFFSET ebp, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es @@ -215,30 +198,20 @@ .endm .macro RESTORE_INT_REGS - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %ecx - CFI_RESTORE ecx - popl_cfi %edx - CFI_RESTORE edx - popl_cfi %esi - CFI_RESTORE esi - popl_cfi %edi - CFI_RESTORE edi - popl_cfi %ebp - CFI_RESTORE ebp - popl_cfi %eax - CFI_RESTORE eax + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax .endm .macro RESTORE_REGS pop=0 RESTORE_INT_REGS -1: popl_cfi %ds - /*CFI_RESTORE ds;*/ -2: popl_cfi %es - /*CFI_RESTORE es;*/ -3: popl_cfi %fs - /*CFI_RESTORE fs;*/ +1: popl %ds +2: popl %es +3: popl %fs POP_GS \pop .pushsection .fixup, "ax" 4: movl $0, (%esp) @@ -254,64 +227,27 @@ POP_GS_EX .endm -.macro RING0_INT_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 3*4 - /*CFI_OFFSET cs, -2*4;*/ - CFI_OFFSET eip, -3*4 -.endm - -.macro RING0_EC_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 4*4 - /*CFI_OFFSET cs, -2*4;*/ - CFI_OFFSET eip, -3*4 -.endm - -.macro RING0_PTREGS_FRAME - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ - CFI_OFFSET eip, PT_EIP-PT_OLDESP - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ - CFI_OFFSET eax, PT_EAX-PT_OLDESP - CFI_OFFSET ebp, PT_EBP-PT_OLDESP - CFI_OFFSET edi, PT_EDI-PT_OLDESP - CFI_OFFSET esi, PT_ESI-PT_OLDESP - CFI_OFFSET edx, PT_EDX-PT_OLDESP - CFI_OFFSET ecx, PT_ECX-PT_OLDESP - CFI_OFFSET ebx, PT_EBX-PT_OLDESP -.endm - ENTRY(ret_from_fork) - CFI_STARTPROC - pushl_cfi %eax + pushl %eax call schedule_tail GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi + popl %eax + pushl $0x0202 # Reset kernel eflags + popfl jmp syscall_exit - CFI_ENDPROC END(ret_from_fork) ENTRY(ret_from_kernel_thread) - CFI_STARTPROC - pushl_cfi %eax + pushl %eax call schedule_tail GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi + popl %eax + pushl $0x0202 # Reset kernel eflags + popfl movl PT_EBP(%esp),%eax call *PT_EBX(%esp) movl $0,PT_EAX(%esp) jmp syscall_exit - CFI_ENDPROC ENDPROC(ret_from_kernel_thread) /* @@ -323,7 +259,6 @@ ENDPROC(ret_from_kernel_thread) # userspace resumption stub bypassing syscall exit tracing ALIGN - RING0_PTREGS_FRAME ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: @@ -367,17 +302,12 @@ need_resched: jmp need_resched END(resume_kernel) #endif - CFI_ENDPROC /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub ENTRY(ia32_sysenter_target) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 0 - CFI_REGISTER esp, ebp movl TSS_sysenter_sp0(%esp),%esp sysenter_past_esp: /* @@ -385,14 +315,11 @@ sysenter_past_esp: * enough kernel state to call TRACE_IRQS_OFF can be called - but * we immediately enable interrupts at that point anyway. */ - pushl_cfi $__USER_DS - /*CFI_REL_OFFSET ss, 0*/ - pushl_cfi %ebp - CFI_REL_OFFSET esp, 0 - pushfl_cfi + pushl $__USER_DS + pushl %ebp + pushfl orl $X86_EFLAGS_IF, (%esp) - pushl_cfi $__USER_CS - /*CFI_REL_OFFSET cs, 0*/ + pushl $__USER_CS /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary: TI_sysenter_return @@ -401,10 +328,9 @@ sysenter_past_esp: * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; * and THREAD_SIZE takes us to the bottom. */ - pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - CFI_REL_OFFSET eip, 0 + pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - pushl_cfi %eax + pushl %eax SAVE_ALL ENABLE_INTERRUPTS(CLBR_NONE) @@ -453,11 +379,11 @@ sysenter_audit: /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ - pushl_cfi PT_ESI(%esp) /* a3: 5th arg */ - pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */ + pushl PT_ESI(%esp) /* a3: 5th arg */ + pushl PT_EDX+4(%esp) /* a2: 4th arg */ call __audit_syscall_entry - popl_cfi %ecx /* get that remapped edx off the stack */ - popl_cfi %ecx /* get that remapped esi off the stack */ + popl %ecx /* get that remapped edx off the stack */ + popl %ecx /* get that remapped esi off the stack */ movl PT_EAX(%esp),%eax /* reload syscall number */ jmp sysenter_do_call @@ -480,7 +406,6 @@ sysexit_audit: jmp sysenter_exit #endif - CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) jmp 1b @@ -491,9 +416,8 @@ ENDPROC(ia32_sysenter_target) # system call handler stub ENTRY(system_call) - RING0_INT_FRAME # can't unwind into user space anyway ASM_CLAC - pushl_cfi %eax # save orig_eax + pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation @@ -527,7 +451,6 @@ restore_all_notrace: movb PT_CS(%esp), %al andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax - CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS #endif restore_nocheck: @@ -543,7 +466,6 @@ ENTRY(iret_exc) _ASM_EXTABLE(irq_return,iret_exc) #ifdef CONFIG_X86_ESPFIX32 - CFI_RESTORE_STATE ldt_ss: #ifdef CONFIG_PARAVIRT /* @@ -577,22 +499,19 @@ ldt_ss: shr $16, %edx mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ - pushl_cfi $__ESPFIX_SS - pushl_cfi %eax /* new kernel esp */ + pushl $__ESPFIX_SS + pushl %eax /* new kernel esp */ /* Disable interrupts, but do not irqtrace this section: we * will soon execute iret and the tracer was already set to * the irqstate after the iret */ DISABLE_INTERRUPTS(CLBR_EAX) lss (%esp), %esp /* switch to espfix segment */ - CFI_ADJUST_CFA_OFFSET -8 jmp restore_nocheck #endif - CFI_ENDPROC ENDPROC(system_call) # perform work that needs to be done immediately before resumption ALIGN - RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig @@ -634,9 +553,9 @@ work_notifysig: # deal with pending signals and #ifdef CONFIG_VM86 ALIGN work_notifysig_v86: - pushl_cfi %ecx # save ti_flags for do_notify_resume + pushl %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer - popl_cfi %ecx + popl %ecx movl %eax, %esp jmp 1b #endif @@ -666,9 +585,7 @@ syscall_exit_work: call syscall_trace_leave jmp resume_userspace END(syscall_exit_work) - CFI_ENDPROC - RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: ASM_CLAC GET_THREAD_INFO(%ebp) @@ -685,7 +602,6 @@ sysenter_badsys: movl $-ENOSYS,%eax jmp sysenter_after_call END(sysenter_badsys) - CFI_ENDPROC .macro FIXUP_ESPFIX_STACK /* @@ -701,10 +617,9 @@ END(sysenter_badsys) mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ shl $16, %eax addl %esp, %eax /* the adjusted stack pointer */ - pushl_cfi $__KERNEL_DS - pushl_cfi %eax + pushl $__KERNEL_DS + pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ - CFI_ADJUST_CFA_OFFSET -8 #endif .endm .macro UNWIND_ESPFIX_STACK @@ -728,13 +643,11 @@ END(sysenter_badsys) */ .align 8 ENTRY(irq_entries_start) - RING0_INT_FRAME vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ + pushl $(~vector+0x80) /* Note: always in signed byte range */ vector=vector+1 jmp common_interrupt - CFI_ADJUST_CFA_OFFSET -4 .align 8 .endr END(irq_entries_start) @@ -753,19 +666,16 @@ common_interrupt: call do_IRQ jmp ret_from_intr ENDPROC(common_interrupt) - CFI_ENDPROC #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ - RING0_INT_FRAME; \ ASM_CLAC; \ - pushl_cfi $~(nr); \ + pushl $~(nr); \ SAVE_ALL; \ TRACE_IRQS_OFF \ movl %esp,%eax; \ call fn; \ jmp ret_from_intr; \ - CFI_ENDPROC; \ ENDPROC(name) @@ -784,37 +694,31 @@ ENDPROC(name) #include ENTRY(coprocessor_error) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_coprocessor_error + pushl $0 + pushl $do_coprocessor_error jmp error_code - CFI_ENDPROC END(coprocessor_error) ENTRY(simd_coprocessor_error) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 + pushl $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ - ALTERNATIVE "pushl_cfi $do_general_protection", \ + ALTERNATIVE "pushl $do_general_protection", \ "pushl $do_simd_coprocessor_error", \ X86_FEATURE_XMM #else - pushl_cfi $do_simd_coprocessor_error + pushl $do_simd_coprocessor_error #endif jmp error_code - CFI_ENDPROC END(simd_coprocessor_error) ENTRY(device_not_available) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $-1 # mark this as an int - pushl_cfi $do_device_not_available + pushl $-1 # mark this as an int + pushl $do_device_not_available jmp error_code - CFI_ENDPROC END(device_not_available) #ifdef CONFIG_PARAVIRT @@ -830,115 +734,89 @@ END(native_irq_enable_sysexit) #endif ENTRY(overflow) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_overflow + pushl $0 + pushl $do_overflow jmp error_code - CFI_ENDPROC END(overflow) ENTRY(bounds) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_bounds + pushl $0 + pushl $do_bounds jmp error_code - CFI_ENDPROC END(bounds) ENTRY(invalid_op) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_invalid_op + pushl $0 + pushl $do_invalid_op jmp error_code - CFI_ENDPROC END(invalid_op) ENTRY(coprocessor_segment_overrun) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_coprocessor_segment_overrun + pushl $0 + pushl $do_coprocessor_segment_overrun jmp error_code - CFI_ENDPROC END(coprocessor_segment_overrun) ENTRY(invalid_TSS) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_invalid_TSS + pushl $do_invalid_TSS jmp error_code - CFI_ENDPROC END(invalid_TSS) ENTRY(segment_not_present) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_segment_not_present + pushl $do_segment_not_present jmp error_code - CFI_ENDPROC END(segment_not_present) ENTRY(stack_segment) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_stack_segment + pushl $do_stack_segment jmp error_code - CFI_ENDPROC END(stack_segment) ENTRY(alignment_check) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_alignment_check + pushl $do_alignment_check jmp error_code - CFI_ENDPROC END(alignment_check) ENTRY(divide_error) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 # no error code - pushl_cfi $do_divide_error + pushl $0 # no error code + pushl $do_divide_error jmp error_code - CFI_ENDPROC END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi machine_check_vector + pushl $0 + pushl machine_check_vector jmp error_code - CFI_ENDPROC END(machine_check) #endif ENTRY(spurious_interrupt_bug) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $0 - pushl_cfi $do_spurious_interrupt_bug + pushl $0 + pushl $do_spurious_interrupt_bug jmp error_code - CFI_ENDPROC END(spurious_interrupt_bug) #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ ENTRY(xen_sysenter_target) - RING0_INT_FRAME addl $5*4, %esp /* remove xen-provided frame */ - CFI_ADJUST_CFA_OFFSET -5*4 jmp sysenter_past_esp - CFI_ENDPROC ENTRY(xen_hypervisor_callback) - CFI_STARTPROC - pushl_cfi $-1 /* orig_ax = -1 => not a system call */ + pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF @@ -962,7 +840,6 @@ ENTRY(xen_do_upcall) call xen_maybe_preempt_hcall #endif jmp ret_from_intr - CFI_ENDPROC ENDPROC(xen_hypervisor_callback) # Hypervisor uses this for application faults while it executes. @@ -976,8 +853,7 @@ ENDPROC(xen_hypervisor_callback) # to pop the stack frame we end up in an infinite loop of failsafe callbacks. # We distinguish between categories by maintaining a status value in EAX. ENTRY(xen_failsafe_callback) - CFI_STARTPROC - pushl_cfi %eax + pushl %eax movl $1,%eax 1: mov 4(%esp),%ds 2: mov 8(%esp),%es @@ -986,15 +862,13 @@ ENTRY(xen_failsafe_callback) /* EAX == 0 => Category 1 (Bad segment) EAX != 0 => Category 2 (Bad IRET) */ testl %eax,%eax - popl_cfi %eax + popl %eax lea 16(%esp),%esp - CFI_ADJUST_CFA_OFFSET -16 jz 5f jmp iret_exc -5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ +5: pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp ret_from_exception - CFI_ENDPROC .section .fixup,"ax" 6: xorl %eax,%eax @@ -1195,34 +1069,28 @@ return_to_handler: #ifdef CONFIG_TRACING ENTRY(trace_page_fault) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $trace_do_page_fault + pushl $trace_do_page_fault jmp error_code - CFI_ENDPROC END(trace_page_fault) #endif ENTRY(page_fault) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_page_fault + pushl $do_page_fault ALIGN error_code: /* the function address is in %gs's slot on the stack */ - pushl_cfi %fs - /*CFI_REL_OFFSET fs, 0*/ - pushl_cfi %es - /*CFI_REL_OFFSET es, 0*/ - pushl_cfi %ds - /*CFI_REL_OFFSET ds, 0*/ - pushl_cfi_reg eax - pushl_cfi_reg ebp - pushl_cfi_reg edi - pushl_cfi_reg esi - pushl_cfi_reg edx - pushl_cfi_reg ecx - pushl_cfi_reg ebx + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx cld movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs @@ -1240,7 +1108,6 @@ error_code: movl %esp,%eax # pt_regs pointer call *%edi jmp ret_from_exception - CFI_ENDPROC END(page_fault) /* @@ -1261,29 +1128,24 @@ END(page_fault) jne \ok \label: movl TSS_sysenter_sp0 + \offset(%esp), %esp - CFI_DEF_CFA esp, 0 - CFI_UNDEFINED eip - pushfl_cfi - pushl_cfi $__KERNEL_CS - pushl_cfi $sysenter_past_esp - CFI_REL_OFFSET eip, 0 + pushfl + pushl $__KERNEL_CS + pushl $sysenter_past_esp .endm ENTRY(debug) - RING0_INT_FRAME ASM_CLAC cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: - pushl_cfi $-1 # mark this as an int + pushl $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug jmp ret_from_exception - CFI_ENDPROC END(debug) /* @@ -1295,45 +1157,40 @@ END(debug) * fault happened on the sysenter path. */ ENTRY(nmi) - RING0_INT_FRAME ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 - pushl_cfi %eax + pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax - popl_cfi %eax + popl %eax je nmi_espfix_stack #endif cmpl $ia32_sysenter_target,(%esp) je nmi_stack_fixup - pushl_cfi %eax + pushl %eax movl %esp,%eax /* Do not access memory above the end of our stack page, * it might not exist. */ andl $(THREAD_SIZE-1),%eax cmpl $(THREAD_SIZE-20),%eax - popl_cfi %eax + popl %eax jae nmi_stack_correct cmpl $ia32_sysenter_target,12(%esp) je nmi_debug_stack_check nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl_cfi %eax + pushl %eax SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi jmp restore_all_notrace - CFI_ENDPROC nmi_stack_fixup: - RING0_INT_FRAME FIX_STACK 12, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct cmpl $debug,(%esp) @@ -1345,57 +1202,48 @@ nmi_debug_stack_check: #ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * + /* * create the pointer to lss back */ - pushl_cfi %ss - pushl_cfi %esp + pushl %ss + pushl %esp addl $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 - pushl_cfi 16(%esp) + pushl 16(%esp) .endr - pushl_cfi %eax + pushl %eax SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 jmp irq_return #endif - CFI_ENDPROC END(nmi) ENTRY(int3) - RING0_INT_FRAME ASM_CLAC - pushl_cfi $-1 # mark this as an int + pushl $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 jmp ret_from_exception - CFI_ENDPROC END(int3) ENTRY(general_protection) - RING0_EC_FRAME - pushl_cfi $do_general_protection + pushl $do_general_protection jmp error_code - CFI_ENDPROC END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) - RING0_EC_FRAME ASM_CLAC - pushl_cfi $do_async_page_fault + pushl $do_async_page_fault jmp error_code - CFI_ENDPROC END(async_page_fault) #endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 47b95813dc37..b84cec50c8cf 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -19,8 +19,6 @@ * at the top of the kernel process stack. * * Some macro usage: - * - CFI macros are used to generate dwarf2 unwind information for better - * backtraces. They don't change any code. * - ENTRY/END Define functions in the symbol table. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. * - idtentry - Define exception entry points. @@ -30,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -112,61 +109,6 @@ ENDPROC(native_usergs_sysret64) # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ #endif -/* - * empty frame - */ - .macro EMPTY_FRAME start=1 offset=0 - .if \start - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,8+\offset - .else - CFI_DEF_CFA_OFFSET 8+\offset - .endif - .endm - -/* - * initial frame state for interrupts (and exceptions without error code) - */ - .macro INTR_FRAME start=1 offset=0 - EMPTY_FRAME \start, 5*8+\offset - /*CFI_REL_OFFSET ss, 4*8+\offset*/ - CFI_REL_OFFSET rsp, 3*8+\offset - /*CFI_REL_OFFSET rflags, 2*8+\offset*/ - /*CFI_REL_OFFSET cs, 1*8+\offset*/ - CFI_REL_OFFSET rip, 0*8+\offset - .endm - -/* - * initial frame state for exceptions with error code (and interrupts - * with vector already pushed) - */ - .macro XCPT_FRAME start=1 offset=0 - INTR_FRAME \start, 1*8+\offset - .endm - -/* - * frame that enables passing a complete pt_regs to a C function. - */ - .macro DEFAULT_FRAME start=1 offset=0 - XCPT_FRAME \start, ORIG_RAX+\offset - CFI_REL_OFFSET rdi, RDI+\offset - CFI_REL_OFFSET rsi, RSI+\offset - CFI_REL_OFFSET rdx, RDX+\offset - CFI_REL_OFFSET rcx, RCX+\offset - CFI_REL_OFFSET rax, RAX+\offset - CFI_REL_OFFSET r8, R8+\offset - CFI_REL_OFFSET r9, R9+\offset - CFI_REL_OFFSET r10, R10+\offset - CFI_REL_OFFSET r11, R11+\offset - CFI_REL_OFFSET rbx, RBX+\offset - CFI_REL_OFFSET rbp, RBP+\offset - CFI_REL_OFFSET r12, R12+\offset - CFI_REL_OFFSET r13, R13+\offset - CFI_REL_OFFSET r14, R14+\offset - CFI_REL_OFFSET r15, R15+\offset - .endm - /* * 64bit SYSCALL instruction entry. Up to 6 arguments in registers. * @@ -196,12 +138,6 @@ ENDPROC(native_usergs_sysret64) */ ENTRY(system_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,0 - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -219,8 +155,8 @@ GLOBAL(system_call_after_swapgs) movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp /* Construct struct pt_regs on stack */ - pushq_cfi $__USER_DS /* pt_regs->ss */ - pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ + pushq $__USER_DS /* pt_regs->ss */ + pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ /* * Re-enable interrupts. * We use 'rsp_scratch' as a scratch space, hence irq-off block above @@ -229,22 +165,20 @@ GLOBAL(system_call_after_swapgs) * with using rsp_scratch: */ ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %r11 /* pt_regs->flags */ - pushq_cfi $__USER_CS /* pt_regs->cs */ - pushq_cfi %rcx /* pt_regs->ip */ - CFI_REL_OFFSET rip,0 - pushq_cfi_reg rax /* pt_regs->orig_ax */ - pushq_cfi_reg rdi /* pt_regs->di */ - pushq_cfi_reg rsi /* pt_regs->si */ - pushq_cfi_reg rdx /* pt_regs->dx */ - pushq_cfi_reg rcx /* pt_regs->cx */ - pushq_cfi $-ENOSYS /* pt_regs->ax */ - pushq_cfi_reg r8 /* pt_regs->r8 */ - pushq_cfi_reg r9 /* pt_regs->r9 */ - pushq_cfi_reg r10 /* pt_regs->r10 */ - pushq_cfi_reg r11 /* pt_regs->r11 */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq %rdx /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq $-ENOSYS /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ - CFI_ADJUST_CFA_OFFSET 6*8 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz tracesys @@ -282,13 +216,9 @@ system_call_fastpath: testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ - CFI_REMEMBER_STATE - RESTORE_C_REGS_EXCEPT_RCX_R11 movq RIP(%rsp),%rcx - CFI_REGISTER rip,rcx movq EFLAGS(%rsp),%r11 - /*CFI_REGISTER rflags,r11*/ movq RSP(%rsp),%rsp /* * 64bit SYSRET restores rip from rcx, @@ -307,8 +237,6 @@ system_call_fastpath: */ USERGS_SYSRET64 - CFI_RESTORE_STATE - /* Do syscall entry tracing */ tracesys: movq %rsp, %rdi @@ -374,9 +302,9 @@ int_careful: jnc int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi + pushq %rdi SCHEDULE_USER - popq_cfi %rdi + popq %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp int_with_check @@ -389,10 +317,10 @@ int_very_careful: /* Check for syscall exit trace */ testl $_TIF_WORK_SYSCALL_EXIT,%edx jz int_signal - pushq_cfi %rdi + pushq %rdi leaq 8(%rsp),%rdi # &ptregs -> arg1 call syscall_trace_leave - popq_cfi %rdi + popq %rdi andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi jmp int_restore_rest @@ -475,27 +403,21 @@ syscall_return: * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - CFI_REMEMBER_STATE /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp),%rsp USERGS_SYSRET64 - CFI_RESTORE_STATE opportunistic_sysret_failed: SWAPGS jmp restore_c_regs_and_iret - CFI_ENDPROC END(system_call) .macro FORK_LIKE func ENTRY(stub_\func) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 /* offset 8: return address */ SAVE_EXTRA_REGS 8 jmp sys_\func - CFI_ENDPROC END(stub_\func) .endm @@ -504,8 +426,6 @@ END(stub_\func) FORK_LIKE vfork ENTRY(stub_execve) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call sys_execve return_from_execve: testl %eax, %eax @@ -515,11 +435,9 @@ return_from_execve: 1: /* must use IRET code path (pt_regs->cs may have changed) */ addq $8, %rsp - CFI_ADJUST_CFA_OFFSET -8 ZERO_EXTRA_REGS movq %rax,RAX(%rsp) jmp int_ret_from_sys_call - CFI_ENDPROC END(stub_execve) /* * Remaining execve stubs are only 7 bytes long. @@ -527,32 +445,23 @@ END(stub_execve) */ .align 8 GLOBAL(stub_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call sys_execveat jmp return_from_execve - CFI_ENDPROC END(stub_execveat) #if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) .align 8 GLOBAL(stub_x32_execve) GLOBAL(stub32_execve) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call compat_sys_execve jmp return_from_execve - CFI_ENDPROC END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) GLOBAL(stub32_execveat) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 call compat_sys_execveat jmp return_from_execve - CFI_ENDPROC END(stub32_execveat) END(stub_x32_execveat) #endif @@ -562,8 +471,6 @@ END(stub_x32_execveat) * This cannot be done with SYSRET, so use the IRET return path instead. */ ENTRY(stub_rt_sigreturn) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 /* * SAVE_EXTRA_REGS result is not normally needed: * sigreturn overwrites all pt_regs->GPREGS. @@ -575,21 +482,16 @@ ENTRY(stub_rt_sigreturn) call sys_rt_sigreturn return_from_stub: addq $8, %rsp - CFI_ADJUST_CFA_OFFSET -8 RESTORE_EXTRA_REGS movq %rax,RAX(%rsp) jmp int_ret_from_sys_call - CFI_ENDPROC END(stub_rt_sigreturn) #ifdef CONFIG_X86_X32_ABI ENTRY(stub_x32_rt_sigreturn) - CFI_STARTPROC - DEFAULT_FRAME 0, 8 SAVE_EXTRA_REGS 8 call sys32_x32_rt_sigreturn jmp return_from_stub - CFI_ENDPROC END(stub_x32_rt_sigreturn) #endif @@ -599,12 +501,11 @@ END(stub_x32_rt_sigreturn) * rdi: prev task we switched from */ ENTRY(ret_from_fork) - DEFAULT_FRAME LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi $0x0002 - popfq_cfi # reset kernel eflags + pushq $0x0002 + popfq # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -628,7 +529,6 @@ ENTRY(ret_from_fork) movl $0, RAX(%rsp) RESTORE_EXTRA_REGS jmp int_ret_from_sys_call - CFI_ENDPROC END(ret_from_fork) /* @@ -637,16 +537,13 @@ END(ret_from_fork) */ .align 8 ENTRY(irq_entries_start) - INTR_FRAME vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) - pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ + pushq $(~vector+0x80) /* Note: always in signed byte range */ vector=vector+1 jmp common_interrupt - CFI_ADJUST_CFA_OFFSET -8 .align 8 .endr - CFI_ENDPROC END(irq_entries_start) /* @@ -688,17 +585,7 @@ END(irq_entries_start) movq %rsp, %rsi incl PER_CPU_VAR(irq_count) cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - CFI_DEF_CFA_REGISTER rsi pushq %rsi - /* - * For debugger: - * "CFA (Current Frame Address) is the value on stack + offset" - */ - CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 (rsp) */, 0, \ - 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \ - 0x22 /* DW_OP_plus */ /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -711,7 +598,6 @@ END(irq_entries_start) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: - XCPT_FRAME ASM_CLAC addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ @@ -723,11 +609,8 @@ ret_from_intr: /* Restore saved previous stack */ popq %rsi - CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */ /* return code expects complete pt_regs - adjust rsp accordingly: */ leaq -RBP(%rsi),%rsp - CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET RBP testb $3, CS(%rsp) jz retint_kernel @@ -743,7 +626,6 @@ retint_check: LOCKDEP_SYS_EXIT_IRQ movl TI_flags(%rcx),%edx andl %edi,%edx - CFI_REMEMBER_STATE jnz retint_careful retint_swapgs: /* return to user-space */ @@ -807,8 +689,8 @@ native_irq_return_iret: #ifdef CONFIG_X86_ESPFIX64 native_irq_return_ldt: - pushq_cfi %rax - pushq_cfi %rdi + pushq %rax + pushq %rdi SWAPGS movq PER_CPU_VAR(espfix_waddr),%rdi movq %rax,(0*8)(%rdi) /* RAX */ @@ -823,24 +705,23 @@ native_irq_return_ldt: movq (5*8)(%rsp),%rax /* RSP */ movq %rax,(4*8)(%rdi) andl $0xffff0000,%eax - popq_cfi %rdi + popq %rdi orq PER_CPU_VAR(espfix_stack),%rax SWAPGS movq %rax,%rsp - popq_cfi %rax + popq %rax jmp native_irq_return_iret #endif /* edi: workmask, edx: work */ retint_careful: - CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi + pushq %rdi SCHEDULE_USER - popq_cfi %rdi + popq %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -862,7 +743,6 @@ retint_signal: GET_THREAD_INFO(%rcx) jmp retint_with_reschedule - CFI_ENDPROC END(common_interrupt) /* @@ -870,13 +750,11 @@ END(common_interrupt) */ .macro apicinterrupt3 num sym do_sym ENTRY(\sym) - INTR_FRAME ASM_CLAC - pushq_cfi $~(\num) + pushq $~(\num) .Lcommon_\sym: interrupt \do_sym jmp ret_from_intr - CFI_ENDPROC END(\sym) .endm @@ -959,24 +837,17 @@ ENTRY(\sym) .error "using shift_ist requires paranoid=1" .endif - .if \has_error_code - XCPT_FRAME - .else - INTR_FRAME - .endif - ASM_CLAC PARAVIRT_ADJUST_EXCEPTION_FRAME .ifeq \has_error_code - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif ALLOC_PT_GPREGS_ON_STACK .if \paranoid .if \paranoid == 1 - CFI_REMEMBER_STATE testb $3, CS(%rsp) /* If coming from userspace, switch */ jnz 1f /* stacks. */ .endif @@ -986,8 +857,6 @@ ENTRY(\sym) .endif /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ - DEFAULT_FRAME 0 - .if \paranoid .if \shift_ist != -1 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ @@ -1023,7 +892,6 @@ ENTRY(\sym) .endif .if \paranoid == 1 - CFI_RESTORE_STATE /* * Paranoid entry from userspace. Switch stacks and treat it * as a normal entry. This means that paranoid handlers @@ -1032,7 +900,6 @@ ENTRY(\sym) 1: call error_entry - DEFAULT_FRAME 0 movq %rsp,%rdi /* pt_regs pointer */ call sync_regs @@ -1051,8 +918,6 @@ ENTRY(\sym) jmp error_exit /* %ebx: no swapgs flag */ .endif - - CFI_ENDPROC END(\sym) .endm @@ -1085,17 +950,15 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 /* Reload gs selector with exception handling */ /* edi: new selector */ ENTRY(native_load_gs_index) - CFI_STARTPROC - pushfq_cfi + pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: movl %edi,%gs 2: mfence /* workaround */ SWAPGS - popfq_cfi + popfq ret - CFI_ENDPROC END(native_load_gs_index) _ASM_EXTABLE(gs_change,bad_gs) @@ -1110,22 +973,15 @@ bad_gs: /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) - CFI_STARTPROC - pushq_cfi %rbp - CFI_REL_OFFSET rbp,0 + pushq %rbp mov %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp incl PER_CPU_VAR(irq_count) cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq - CFI_RESTORE rbp - CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET -8 decl PER_CPU_VAR(irq_count) ret - CFI_ENDPROC END(do_softirq_own_stack) #ifdef CONFIG_XEN @@ -1145,28 +1001,22 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 * activation and restart the handler using the previous one. */ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) - CFI_STARTPROC /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * see the correct pointer to the pt_regs */ movq %rdi, %rsp # we don't return, adjust the stack frame - CFI_ENDPROC - DEFAULT_FRAME 11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp - CFI_DEF_CFA_REGISTER rsp decl PER_CPU_VAR(irq_count) #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall #endif jmp error_exit - CFI_ENDPROC END(xen_do_hypervisor_callback) /* @@ -1183,16 +1033,8 @@ END(xen_do_hypervisor_callback) * with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) - INTR_FRAME 1 (6*8) - /*CFI_REL_OFFSET gs,GS*/ - /*CFI_REL_OFFSET fs,FS*/ - /*CFI_REL_OFFSET es,ES*/ - /*CFI_REL_OFFSET ds,DS*/ - CFI_REL_OFFSET r11,8 - CFI_REL_OFFSET rcx,0 movl %ds,%ecx cmpw %cx,0x10(%rsp) - CFI_REMEMBER_STATE jne 1f movl %es,%ecx cmpw %cx,0x18(%rsp) @@ -1205,29 +1047,21 @@ ENTRY(xen_failsafe_callback) jne 1f /* All segments match their saved values => Category 2 (Bad IRET). */ movq (%rsp),%rcx - CFI_RESTORE rcx movq 8(%rsp),%r11 - CFI_RESTORE r11 addq $0x30,%rsp - CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 /* RIP */ - pushq_cfi %r11 - pushq_cfi %rcx + pushq $0 /* RIP */ + pushq %r11 + pushq %rcx jmp general_protection - CFI_RESTORE_STATE 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp),%rcx - CFI_RESTORE rcx movq 8(%rsp),%r11 - CFI_RESTORE r11 addq $0x30,%rsp - CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $-1 /* orig_ax = -1 => not a system call */ + pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS jmp error_exit - CFI_ENDPROC END(xen_failsafe_callback) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ @@ -1263,7 +1097,6 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) - XCPT_FRAME 1 15*8 cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1275,7 +1108,6 @@ ENTRY(paranoid_entry) SWAPGS xorl %ebx,%ebx 1: ret - CFI_ENDPROC END(paranoid_entry) /* @@ -1290,7 +1122,6 @@ END(paranoid_entry) */ /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) - DEFAULT_FRAME DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ @@ -1305,7 +1136,6 @@ paranoid_exit_restore: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 INTERRUPT_RETURN - CFI_ENDPROC END(paranoid_exit) /* @@ -1313,7 +1143,6 @@ END(paranoid_exit) * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(error_entry) - XCPT_FRAME 1 15*8 cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1333,7 +1162,6 @@ error_sti: * for these here too. */ error_kernelspace: - CFI_REL_OFFSET rcx, RCX+8 incl %ebx leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) @@ -1357,13 +1185,11 @@ error_bad_iret: mov %rax,%rsp decl %ebx /* Return to usergs */ jmp error_sti - CFI_ENDPROC END(error_entry) /* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(error_exit) - DEFAULT_FRAME movl %ebx,%eax RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) @@ -1377,12 +1203,10 @@ ENTRY(error_exit) andl %edi,%edx jnz retint_careful jmp retint_swapgs - CFI_ENDPROC END(error_exit) /* Runs on exception stack */ ENTRY(nmi) - INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1417,8 +1241,7 @@ ENTRY(nmi) */ /* Use %rdx as our temp variable throughout */ - pushq_cfi %rdx - CFI_REL_OFFSET rdx, 0 + pushq %rdx /* * If %cs was not the kernel segment, then the NMI triggered in user @@ -1452,8 +1275,6 @@ ENTRY(nmi) jb first_nmi /* Ah, it is within the NMI stack, treat it as nested */ - CFI_REMEMBER_STATE - nested_nmi: /* * Do nothing if we interrupted the fixup in repeat_nmi. @@ -1471,26 +1292,22 @@ nested_nmi: /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp - CFI_ADJUST_CFA_OFFSET 1*8 leaq -10*8(%rsp), %rdx - pushq_cfi $__KERNEL_DS - pushq_cfi %rdx - pushfq_cfi - pushq_cfi $__KERNEL_CS - pushq_cfi $repeat_nmi + pushq $__KERNEL_DS + pushq %rdx + pushfq + pushq $__KERNEL_CS + pushq $repeat_nmi /* Put stack back */ addq $(6*8), %rsp - CFI_ADJUST_CFA_OFFSET -6*8 nested_nmi_out: - popq_cfi %rdx - CFI_RESTORE rdx + popq %rdx /* No need to check faults here */ INTERRUPT_RETURN - CFI_RESTORE_STATE first_nmi: /* * Because nested NMIs will use the pushed location that we @@ -1529,22 +1346,19 @@ first_nmi: */ /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ movq (%rsp), %rdx - CFI_RESTORE rdx /* Set the NMI executing variable on the stack. */ - pushq_cfi $1 + pushq $1 /* * Leave room for the "copied" frame */ subq $(5*8), %rsp - CFI_ADJUST_CFA_OFFSET 5*8 /* Copy the stack frame to the Saved frame */ .rept 5 - pushq_cfi 11*8(%rsp) + pushq 11*8(%rsp) .endr - CFI_DEF_CFA_OFFSET 5*8 /* Everything up to here is safe from nested NMIs */ @@ -1567,12 +1381,10 @@ repeat_nmi: /* Make another copy, this one may be modified by nested NMIs */ addq $(10*8), %rsp - CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 - pushq_cfi -6*8(%rsp) + pushq -6*8(%rsp) .endr subq $(5*8), %rsp - CFI_DEF_CFA_OFFSET 5*8 end_repeat_nmi: /* @@ -1580,7 +1392,7 @@ end_repeat_nmi: * NMI if the first NMI took an exception and reset our iret stack * so that we repeat another NMI. */ - pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK /* @@ -1591,7 +1403,6 @@ end_repeat_nmi: * exceptions might do. */ call paranoid_entry - DEFAULT_FRAME 0 /* * Save off the CR2 register. If we take a page fault in the NMI then @@ -1628,13 +1439,10 @@ nmi_restore: /* Clear the NMI executing stack variable */ movq $0, 5*8(%rsp) jmp irq_return - CFI_ENDPROC END(nmi) ENTRY(ignore_sysret) - CFI_STARTPROC mov $-ENOSYS,%eax sysret - CFI_ENDPROC END(ignore_sysret) diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 00933d5e992f..9b0ca8fe80fc 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -11,26 +11,23 @@ #include #include -#include /* if you want SMP support, implement these with real spinlocks */ .macro LOCK reg - pushfl_cfi + pushfl cli .endm .macro UNLOCK reg - popfl_cfi + popfl .endm #define BEGIN(op) \ .macro endp; \ - CFI_ENDPROC; \ ENDPROC(atomic64_##op##_386); \ .purgem endp; \ .endm; \ ENTRY(atomic64_##op##_386); \ - CFI_STARTPROC; \ LOCK v; #define ENDP endp diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 082a85167a5b..db3ae85440ff 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -11,7 +11,6 @@ #include #include -#include .macro read64 reg movl %ebx, %eax @@ -22,16 +21,11 @@ .endm ENTRY(atomic64_read_cx8) - CFI_STARTPROC - read64 %ecx ret - CFI_ENDPROC ENDPROC(atomic64_read_cx8) ENTRY(atomic64_set_cx8) - CFI_STARTPROC - 1: /* we don't need LOCK_PREFIX since aligned 64-bit writes * are atomic on 586 and newer */ @@ -39,28 +33,23 @@ ENTRY(atomic64_set_cx8) jne 1b ret - CFI_ENDPROC ENDPROC(atomic64_set_cx8) ENTRY(atomic64_xchg_cx8) - CFI_STARTPROC - 1: LOCK_PREFIX cmpxchg8b (%esi) jne 1b ret - CFI_ENDPROC ENDPROC(atomic64_xchg_cx8) .macro addsub_return func ins insc ENTRY(atomic64_\func\()_return_cx8) - CFI_STARTPROC - pushl_cfi_reg ebp - pushl_cfi_reg ebx - pushl_cfi_reg esi - pushl_cfi_reg edi + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi movl %eax, %esi movl %edx, %edi @@ -79,12 +68,11 @@ ENTRY(atomic64_\func\()_return_cx8) 10: movl %ebx, %eax movl %ecx, %edx - popl_cfi_reg edi - popl_cfi_reg esi - popl_cfi_reg ebx - popl_cfi_reg ebp + popl %edi + popl %esi + popl %ebx + popl %ebp ret - CFI_ENDPROC ENDPROC(atomic64_\func\()_return_cx8) .endm @@ -93,8 +81,7 @@ addsub_return sub sub sbb .macro incdec_return func ins insc ENTRY(atomic64_\func\()_return_cx8) - CFI_STARTPROC - pushl_cfi_reg ebx + pushl %ebx read64 %esi 1: @@ -109,9 +96,8 @@ ENTRY(atomic64_\func\()_return_cx8) 10: movl %ebx, %eax movl %ecx, %edx - popl_cfi_reg ebx + popl %ebx ret - CFI_ENDPROC ENDPROC(atomic64_\func\()_return_cx8) .endm @@ -119,8 +105,7 @@ incdec_return inc add adc incdec_return dec sub sbb ENTRY(atomic64_dec_if_positive_cx8) - CFI_STARTPROC - pushl_cfi_reg ebx + pushl %ebx read64 %esi 1: @@ -136,18 +121,16 @@ ENTRY(atomic64_dec_if_positive_cx8) 2: movl %ebx, %eax movl %ecx, %edx - popl_cfi_reg ebx + popl %ebx ret - CFI_ENDPROC ENDPROC(atomic64_dec_if_positive_cx8) ENTRY(atomic64_add_unless_cx8) - CFI_STARTPROC - pushl_cfi_reg ebp - pushl_cfi_reg ebx + pushl %ebp + pushl %ebx /* these just push these two parameters on the stack */ - pushl_cfi_reg edi - pushl_cfi_reg ecx + pushl %edi + pushl %ecx movl %eax, %ebp movl %edx, %edi @@ -168,21 +151,18 @@ ENTRY(atomic64_add_unless_cx8) movl $1, %eax 3: addl $8, %esp - CFI_ADJUST_CFA_OFFSET -8 - popl_cfi_reg ebx - popl_cfi_reg ebp + popl %ebx + popl %ebp ret 4: cmpl %edx, 4(%esp) jne 2b xorl %eax, %eax jmp 3b - CFI_ENDPROC ENDPROC(atomic64_add_unless_cx8) ENTRY(atomic64_inc_not_zero_cx8) - CFI_STARTPROC - pushl_cfi_reg ebx + pushl %ebx read64 %esi 1: @@ -199,7 +179,6 @@ ENTRY(atomic64_inc_not_zero_cx8) movl $1, %eax 3: - popl_cfi_reg ebx + popl %ebx ret - CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 9bc944a91274..c1e623209853 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -26,7 +26,6 @@ */ #include -#include #include #include @@ -50,9 +49,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) * alignment for the unrolled loop. */ ENTRY(csum_partial) - CFI_STARTPROC - pushl_cfi_reg esi - pushl_cfi_reg ebx + pushl %esi + pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: unsigned char *buff @@ -129,10 +127,9 @@ ENTRY(csum_partial) jz 8f roll $8, %eax 8: - popl_cfi_reg ebx - popl_cfi_reg esi + popl %ebx + popl %esi ret - CFI_ENDPROC ENDPROC(csum_partial) #else @@ -140,9 +137,8 @@ ENDPROC(csum_partial) /* Version for PentiumII/PPro */ ENTRY(csum_partial) - CFI_STARTPROC - pushl_cfi_reg esi - pushl_cfi_reg ebx + pushl %esi + pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: const unsigned char *buf @@ -249,10 +245,9 @@ ENTRY(csum_partial) jz 90f roll $8, %eax 90: - popl_cfi_reg ebx - popl_cfi_reg esi + popl %ebx + popl %esi ret - CFI_ENDPROC ENDPROC(csum_partial) #endif @@ -287,12 +282,10 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, #define FP 12 ENTRY(csum_partial_copy_generic) - CFI_STARTPROC subl $4,%esp - CFI_ADJUST_CFA_OFFSET 4 - pushl_cfi_reg edi - pushl_cfi_reg esi - pushl_cfi_reg ebx + pushl %edi + pushl %esi + pushl %ebx movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+12(%esp),%ecx # len movl ARGBASE+4(%esp),%esi # src @@ -401,12 +394,11 @@ DST( movb %cl, (%edi) ) .previous - popl_cfi_reg ebx - popl_cfi_reg esi - popl_cfi_reg edi - popl_cfi %ecx # equivalent to addl $4,%esp + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp ret - CFI_ENDPROC ENDPROC(csum_partial_copy_generic) #else @@ -426,10 +418,9 @@ ENDPROC(csum_partial_copy_generic) #define ARGBASE 12 ENTRY(csum_partial_copy_generic) - CFI_STARTPROC - pushl_cfi_reg ebx - pushl_cfi_reg edi - pushl_cfi_reg esi + pushl %ebx + pushl %edi + pushl %esi movl ARGBASE+4(%esp),%esi #src movl ARGBASE+8(%esp),%edi #dst movl ARGBASE+12(%esp),%ecx #len @@ -489,11 +480,10 @@ DST( movb %dl, (%edi) ) jmp 7b .previous - popl_cfi_reg esi - popl_cfi_reg edi - popl_cfi_reg ebx + popl %esi + popl %edi + popl %ebx ret - CFI_ENDPROC ENDPROC(csum_partial_copy_generic) #undef ROUND diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index e67e579c93bd..a2fe51b00cce 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,4 @@ #include -#include #include #include @@ -15,7 +14,6 @@ * %rdi - page */ ENTRY(clear_page) - CFI_STARTPROC ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ "jmp clear_page_c_e", X86_FEATURE_ERMS @@ -24,11 +22,9 @@ ENTRY(clear_page) xorl %eax,%eax rep stosq ret - CFI_ENDPROC ENDPROC(clear_page) ENTRY(clear_page_orig) - CFI_STARTPROC xorl %eax,%eax movl $4096/64,%ecx @@ -48,14 +44,11 @@ ENTRY(clear_page_orig) jnz .Lloop nop ret - CFI_ENDPROC ENDPROC(clear_page_orig) ENTRY(clear_page_c_e) - CFI_STARTPROC movl $4096,%ecx xorl %eax,%eax rep stosb ret - CFI_ENDPROC ENDPROC(clear_page_c_e) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 40a172541ee2..9b330242e740 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -6,7 +6,6 @@ * */ #include -#include #include .text @@ -21,7 +20,6 @@ * %al : Operation successful */ ENTRY(this_cpu_cmpxchg16b_emu) -CFI_STARTPROC # # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not @@ -32,7 +30,7 @@ CFI_STARTPROC # *atomic* on a single cpu (as provided by the this_cpu_xx class of # macros). # - pushfq_cfi + pushfq cli cmpq PER_CPU_VAR((%rsi)), %rax @@ -43,17 +41,13 @@ CFI_STARTPROC movq %rbx, PER_CPU_VAR((%rsi)) movq %rcx, PER_CPU_VAR(8(%rsi)) - CFI_REMEMBER_STATE - popfq_cfi + popfq mov $1, %al ret - CFI_RESTORE_STATE .Lnot_same: - popfq_cfi + popfq xor %al,%al ret -CFI_ENDPROC - ENDPROC(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index b4807fce5177..ad5349778490 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -7,7 +7,6 @@ */ #include -#include .text @@ -20,14 +19,13 @@ * %ecx : high 32 bits of new value */ ENTRY(cmpxchg8b_emu) -CFI_STARTPROC # # Emulate 'cmpxchg8b (%esi)' on UP except we don't # set the whole ZF thing (caller will just compare # eax:edx with the expected value) # - pushfl_cfi + pushfl cli cmpl (%esi), %eax @@ -38,18 +36,15 @@ CFI_STARTPROC movl %ebx, (%esi) movl %ecx, 4(%esi) - CFI_REMEMBER_STATE - popfl_cfi + popfl ret - CFI_RESTORE_STATE .Lnot_same: movl (%esi), %eax .Lhalf_same: movl 4(%esi), %edx - popfl_cfi + popfl ret -CFI_ENDPROC ENDPROC(cmpxchg8b_emu) diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 8239dbcbf984..009f98216b7e 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -1,7 +1,6 @@ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ #include -#include #include #include @@ -13,22 +12,16 @@ */ ALIGN ENTRY(copy_page) - CFI_STARTPROC ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq ret - CFI_ENDPROC ENDPROC(copy_page) ENTRY(copy_page_regs) - CFI_STARTPROC subq $2*8, %rsp - CFI_ADJUST_CFA_OFFSET 2*8 movq %rbx, (%rsp) - CFI_REL_OFFSET rbx, 0 movq %r12, 1*8(%rsp) - CFI_REL_OFFSET r12, 1*8 movl $(4096/64)-5, %ecx .p2align 4 @@ -87,11 +80,7 @@ ENTRY(copy_page_regs) jnz .Loop2 movq (%rsp), %rbx - CFI_RESTORE rbx movq 1*8(%rsp), %r12 - CFI_RESTORE r12 addq $2*8, %rsp - CFI_ADJUST_CFA_OFFSET -2*8 ret - CFI_ENDPROC ENDPROC(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index e4b3beee83bd..982ce34f4a9b 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -18,7 +17,6 @@ /* Standard copy_to_user with segment limit checking */ ENTRY(_copy_to_user) - CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx @@ -30,12 +28,10 @@ ENTRY(_copy_to_user) X86_FEATURE_REP_GOOD, \ "jmp copy_user_enhanced_fast_string", \ X86_FEATURE_ERMS - CFI_ENDPROC ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ ENTRY(_copy_from_user) - CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx addq %rdx,%rcx @@ -47,14 +43,12 @@ ENTRY(_copy_from_user) X86_FEATURE_REP_GOOD, \ "jmp copy_user_enhanced_fast_string", \ X86_FEATURE_ERMS - CFI_ENDPROC ENDPROC(_copy_from_user) .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) bad_from_user: - CFI_STARTPROC movl %edx,%ecx xorl %eax,%eax rep @@ -62,7 +56,6 @@ bad_from_user: bad_to_user: movl %edx,%eax ret - CFI_ENDPROC ENDPROC(bad_from_user) .previous @@ -80,7 +73,6 @@ ENDPROC(bad_from_user) * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_generic_unrolled) - CFI_STARTPROC ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ @@ -162,7 +154,6 @@ ENTRY(copy_user_generic_unrolled) _ASM_EXTABLE(19b,40b) _ASM_EXTABLE(21b,50b) _ASM_EXTABLE(22b,50b) - CFI_ENDPROC ENDPROC(copy_user_generic_unrolled) /* Some CPUs run faster using the string copy instructions. @@ -184,7 +175,6 @@ ENDPROC(copy_user_generic_unrolled) * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_generic_string) - CFI_STARTPROC ASM_STAC cmpl $8,%edx jb 2f /* less than 8 bytes, go to byte copy loop */ @@ -209,7 +199,6 @@ ENTRY(copy_user_generic_string) _ASM_EXTABLE(1b,11b) _ASM_EXTABLE(3b,12b) - CFI_ENDPROC ENDPROC(copy_user_generic_string) /* @@ -225,7 +214,6 @@ ENDPROC(copy_user_generic_string) * eax uncopied bytes or 0 if successful. */ ENTRY(copy_user_enhanced_fast_string) - CFI_STARTPROC ASM_STAC movl %edx,%ecx 1: rep @@ -240,7 +228,6 @@ ENTRY(copy_user_enhanced_fast_string) .previous _ASM_EXTABLE(1b,12b) - CFI_ENDPROC ENDPROC(copy_user_enhanced_fast_string) /* @@ -248,7 +235,6 @@ ENDPROC(copy_user_enhanced_fast_string) * This will force destination/source out of cache for more performance. */ ENTRY(__copy_user_nocache) - CFI_STARTPROC ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ @@ -332,5 +318,4 @@ ENTRY(__copy_user_nocache) _ASM_EXTABLE(19b,40b) _ASM_EXTABLE(21b,50b) _ASM_EXTABLE(22b,50b) - CFI_ENDPROC ENDPROC(__copy_user_nocache) diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 9734182966f3..7e48807b2fa1 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -6,7 +6,6 @@ * for more details. No warranty for anything given at all. */ #include -#include #include #include @@ -47,23 +46,16 @@ ENTRY(csum_partial_copy_generic) - CFI_STARTPROC cmpl $3*64, %edx jle .Lignore .Lignore: subq $7*8, %rsp - CFI_ADJUST_CFA_OFFSET 7*8 movq %rbx, 2*8(%rsp) - CFI_REL_OFFSET rbx, 2*8 movq %r12, 3*8(%rsp) - CFI_REL_OFFSET r12, 3*8 movq %r14, 4*8(%rsp) - CFI_REL_OFFSET r14, 4*8 movq %r13, 5*8(%rsp) - CFI_REL_OFFSET r13, 5*8 movq %rbp, 6*8(%rsp) - CFI_REL_OFFSET rbp, 6*8 movq %r8, (%rsp) movq %r9, 1*8(%rsp) @@ -206,22 +198,14 @@ ENTRY(csum_partial_copy_generic) addl %ebx, %eax adcl %r9d, %eax /* carry */ - CFI_REMEMBER_STATE .Lende: movq 2*8(%rsp), %rbx - CFI_RESTORE rbx movq 3*8(%rsp), %r12 - CFI_RESTORE r12 movq 4*8(%rsp), %r14 - CFI_RESTORE r14 movq 5*8(%rsp), %r13 - CFI_RESTORE r13 movq 6*8(%rsp), %rbp - CFI_RESTORE rbp addq $7*8, %rsp - CFI_ADJUST_CFA_OFFSET -7*8 ret - CFI_RESTORE_STATE /* Exception handlers. Very simple, zeroing is done in the wrappers */ .Lbad_source: @@ -237,5 +221,4 @@ ENTRY(csum_partial_copy_generic) jz .Lende movl $-EFAULT, (%rax) jmp .Lende - CFI_ENDPROC ENDPROC(csum_partial_copy_generic) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a4512359656a..46668cda4ffd 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -26,7 +26,6 @@ */ #include -#include #include #include #include @@ -36,7 +35,6 @@ .text ENTRY(__get_user_1) - CFI_STARTPROC GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user @@ -45,11 +43,9 @@ ENTRY(__get_user_1) xor %eax,%eax ASM_CLAC ret - CFI_ENDPROC ENDPROC(__get_user_1) ENTRY(__get_user_2) - CFI_STARTPROC add $1,%_ASM_AX jc bad_get_user GET_THREAD_INFO(%_ASM_DX) @@ -60,11 +56,9 @@ ENTRY(__get_user_2) xor %eax,%eax ASM_CLAC ret - CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) - CFI_STARTPROC add $3,%_ASM_AX jc bad_get_user GET_THREAD_INFO(%_ASM_DX) @@ -75,11 +69,9 @@ ENTRY(__get_user_4) xor %eax,%eax ASM_CLAC ret - CFI_ENDPROC ENDPROC(__get_user_4) ENTRY(__get_user_8) - CFI_STARTPROC #ifdef CONFIG_X86_64 add $7,%_ASM_AX jc bad_get_user @@ -104,28 +96,23 @@ ENTRY(__get_user_8) ASM_CLAC ret #endif - CFI_ENDPROC ENDPROC(__get_user_8) bad_get_user: - CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX ASM_CLAC ret - CFI_ENDPROC END(bad_get_user) #ifdef CONFIG_X86_32 bad_get_user_8: - CFI_STARTPROC xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX ASM_CLAC ret - CFI_ENDPROC END(bad_get_user_8) #endif diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index 05a95e713da8..33147fef3452 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -16,15 +16,12 @@ */ #include -#include /* * override generic version in lib/iomap_copy.c */ ENTRY(__iowrite32_copy) - CFI_STARTPROC movl %edx,%ecx rep movsd ret - CFI_ENDPROC ENDPROC(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index b046664f5a1c..16698bba87de 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -2,7 +2,6 @@ #include #include -#include #include /* @@ -53,7 +52,6 @@ ENTRY(memcpy_erms) ENDPROC(memcpy_erms) ENTRY(memcpy_orig) - CFI_STARTPROC movq %rdi, %rax cmpq $0x20, %rdx @@ -178,5 +176,4 @@ ENTRY(memcpy_orig) .Lend: retq - CFI_ENDPROC ENDPROC(memcpy_orig) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0f8a0d0331b9..ca2afdd6d98e 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -6,7 +6,6 @@ * - Copyright 2011 Fenghua Yu */ #include -#include #include #include @@ -27,7 +26,6 @@ ENTRY(memmove) ENTRY(__memmove) - CFI_STARTPROC /* Handle more 32 bytes in loop */ mov %rdi, %rax @@ -207,6 +205,5 @@ ENTRY(__memmove) movb %r11b, (%rdi) 13: retq - CFI_ENDPROC ENDPROC(__memmove) ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 93118fb23976..2661fad05827 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -1,7 +1,6 @@ /* Copyright 2002 Andi Kleen, SuSE Labs */ #include -#include #include #include @@ -66,7 +65,6 @@ ENTRY(memset_erms) ENDPROC(memset_erms) ENTRY(memset_orig) - CFI_STARTPROC movq %rdi,%r10 /* expand byte value */ @@ -78,7 +76,6 @@ ENTRY(memset_orig) movl %edi,%r9d andl $7,%r9d jnz .Lbad_alignment - CFI_REMEMBER_STATE .Lafter_bad_alignment: movq %rdx,%rcx @@ -128,7 +125,6 @@ ENTRY(memset_orig) movq %r10,%rax ret - CFI_RESTORE_STATE .Lbad_alignment: cmpq $7,%rdx jbe .Lhandle_7 @@ -139,5 +135,4 @@ ENTRY(memset_orig) subq %r8,%rdx jmp .Lafter_bad_alignment .Lfinal: - CFI_ENDPROC ENDPROC(memset_orig) diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 3ca5218fbece..c81556409bbb 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -13,9 +12,8 @@ */ .macro op_safe_regs op ENTRY(\op\()_safe_regs) - CFI_STARTPROC - pushq_cfi_reg rbx - pushq_cfi_reg rbp + pushq %rbx + pushq %rbp movq %rdi, %r10 /* Save pointer */ xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax @@ -25,7 +23,6 @@ ENTRY(\op\()_safe_regs) movl 20(%rdi), %ebp movl 24(%rdi), %esi movl 28(%rdi), %edi - CFI_REMEMBER_STATE 1: \op 2: movl %eax, (%r10) movl %r11d, %eax /* Return value */ @@ -35,16 +32,14 @@ ENTRY(\op\()_safe_regs) movl %ebp, 20(%r10) movl %esi, 24(%r10) movl %edi, 28(%r10) - popq_cfi_reg rbp - popq_cfi_reg rbx + popq %rbp + popq %rbx ret 3: - CFI_RESTORE_STATE movl $-EIO, %r11d jmp 2b _ASM_EXTABLE(1b, 3b) - CFI_ENDPROC ENDPROC(\op\()_safe_regs) .endm @@ -52,13 +47,12 @@ ENDPROC(\op\()_safe_regs) .macro op_safe_regs op ENTRY(\op\()_safe_regs) - CFI_STARTPROC - pushl_cfi_reg ebx - pushl_cfi_reg ebp - pushl_cfi_reg esi - pushl_cfi_reg edi - pushl_cfi $0 /* Return value */ - pushl_cfi %eax + pushl %ebx + pushl %ebp + pushl %esi + pushl %edi + pushl $0 /* Return value */ + pushl %eax movl 4(%eax), %ecx movl 8(%eax), %edx movl 12(%eax), %ebx @@ -66,32 +60,28 @@ ENTRY(\op\()_safe_regs) movl 24(%eax), %esi movl 28(%eax), %edi movl (%eax), %eax - CFI_REMEMBER_STATE 1: \op -2: pushl_cfi %eax +2: pushl %eax movl 4(%esp), %eax - popl_cfi (%eax) + popl (%eax) addl $4, %esp - CFI_ADJUST_CFA_OFFSET -4 movl %ecx, 4(%eax) movl %edx, 8(%eax) movl %ebx, 12(%eax) movl %ebp, 20(%eax) movl %esi, 24(%eax) movl %edi, 28(%eax) - popl_cfi %eax - popl_cfi_reg edi - popl_cfi_reg esi - popl_cfi_reg ebp - popl_cfi_reg ebx + popl %eax + popl %edi + popl %esi + popl %ebp + popl %ebx ret 3: - CFI_RESTORE_STATE movl $-EIO, 4(%esp) jmp 2b _ASM_EXTABLE(1b, 3b) - CFI_ENDPROC ENDPROC(\op\()_safe_regs) .endm diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index fc6ba17a7eec..e0817a12d323 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -11,7 +11,6 @@ * return value. */ #include -#include #include #include #include @@ -30,11 +29,9 @@ * as they get called from within inline assembly. */ -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) +#define ENTER GET_THREAD_INFO(%_ASM_BX) #define EXIT ASM_CLAC ; \ - ret ; \ - CFI_ENDPROC + ret .text ENTRY(__put_user_1) @@ -87,7 +84,6 @@ ENTRY(__put_user_8) ENDPROC(__put_user_8) bad_put_user: - CFI_STARTPROC movl $-EFAULT,%eax EXIT END(bad_put_user) diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 2322abe4da3b..40027db99140 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -15,7 +15,6 @@ #include #include -#include #define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg) #define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l) @@ -34,10 +33,10 @@ */ #define save_common_regs \ - pushl_cfi_reg ecx + pushl %ecx #define restore_common_regs \ - popl_cfi_reg ecx + popl %ecx /* Avoid uglifying the argument copying x86-64 needs to do. */ .macro movq src, dst @@ -64,50 +63,45 @@ */ #define save_common_regs \ - pushq_cfi_reg rdi; \ - pushq_cfi_reg rsi; \ - pushq_cfi_reg rcx; \ - pushq_cfi_reg r8; \ - pushq_cfi_reg r9; \ - pushq_cfi_reg r10; \ - pushq_cfi_reg r11 + pushq %rdi; \ + pushq %rsi; \ + pushq %rcx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11 #define restore_common_regs \ - popq_cfi_reg r11; \ - popq_cfi_reg r10; \ - popq_cfi_reg r9; \ - popq_cfi_reg r8; \ - popq_cfi_reg rcx; \ - popq_cfi_reg rsi; \ - popq_cfi_reg rdi + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rcx; \ + popq %rsi; \ + popq %rdi #endif /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) - CFI_STARTPROC save_common_regs - __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(push,) %__ASM_REG(dx) movq %rax,%rdi call rwsem_down_read_failed - __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(pop,) %__ASM_REG(dx) restore_common_regs ret - CFI_ENDPROC ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) - CFI_STARTPROC save_common_regs movq %rax,%rdi call rwsem_down_write_failed restore_common_regs ret - CFI_ENDPROC ENDPROC(call_rwsem_down_write_failed) ENTRY(call_rwsem_wake) - CFI_STARTPROC /* do nothing if still outstanding active readers */ __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx) jnz 1f @@ -116,17 +110,14 @@ ENTRY(call_rwsem_wake) call rwsem_wake restore_common_regs 1: ret - CFI_ENDPROC ENDPROC(call_rwsem_wake) ENTRY(call_rwsem_downgrade_wake) - CFI_STARTPROC save_common_regs - __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(push,) %__ASM_REG(dx) movq %rax,%rdi call rwsem_downgrade_wake - __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) + __ASM_SIZE(pop,) %__ASM_REG(dx) restore_common_regs ret - CFI_ENDPROC ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 5eb715087b80..e9acf5f4fc92 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -6,16 +6,14 @@ */ #include #include - #include /* put return address in eax (arg1) */ .macro THUNK name, func, put_ret_addr_in_eax=0 .globl \name \name: - CFI_STARTPROC - pushl_cfi_reg eax - pushl_cfi_reg ecx - pushl_cfi_reg edx + pushl %eax + pushl %ecx + pushl %edx .if \put_ret_addr_in_eax /* Place EIP in the arg1 */ @@ -23,11 +21,10 @@ .endif call \func - popl_cfi_reg edx - popl_cfi_reg ecx - popl_cfi_reg eax + popl %edx + popl %ecx + popl %eax ret - CFI_ENDPROC _ASM_NOKPROBE(\name) .endm diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index f89ba4e93025..10f555e435e1 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -6,7 +6,6 @@ * Subject to the GNU public license, v.2. No warranty of any kind. */ #include -#include #include #include @@ -14,27 +13,25 @@ .macro THUNK name, func, put_ret_addr_in_rdi=0 .globl \name \name: - CFI_STARTPROC /* this one pushes 9 elems, the next one would be %rIP */ - pushq_cfi_reg rdi - pushq_cfi_reg rsi - pushq_cfi_reg rdx - pushq_cfi_reg rcx - pushq_cfi_reg rax - pushq_cfi_reg r8 - pushq_cfi_reg r9 - pushq_cfi_reg r10 - pushq_cfi_reg r11 + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 .if \put_ret_addr_in_rdi /* 9*8(%rsp) is return addr on stack */ - movq_cfi_restore 9*8, rdi + movq 9*8(%rsp), %rdi .endif call \func jmp restore - CFI_ENDPROC _ASM_NOKPROBE(\name) .endm @@ -57,19 +54,16 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPT) - CFI_STARTPROC - CFI_ADJUST_CFA_OFFSET 9*8 restore: - popq_cfi_reg r11 - popq_cfi_reg r10 - popq_cfi_reg r9 - popq_cfi_reg r8 - popq_cfi_reg rax - popq_cfi_reg rcx - popq_cfi_reg rdx - popq_cfi_reg rsi - popq_cfi_reg rdi + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi ret - CFI_ENDPROC _ASM_NOKPROBE(restore) #endif diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 6440221ced0d..4093216b3791 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -8,7 +8,6 @@ * of the License. */ #include -#include /* * Calling convention : -- cgit v1.2.3 From 2bf557ea3f49576fabe24cd5daf1a34e9ee22c3c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 1 Jun 2015 13:02:55 +0100 Subject: x86/asm/entry/64: Use negative immediates for stack adjustments Doing so allows adjustments by 128 bytes (occurring for REMOVE_PT_GPREGS_FROM_STACK 8 uses) to be expressed with a single byte immediate. Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/556C660F020000780007FB60@mail.emea.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0d76accde45b..f4e6308c4200 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -89,7 +89,7 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 - subq $15*8+\addskip, %rsp + addq $-(15*8+\addskip), %rsp .endm .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 @@ -201,7 +201,7 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 - addq $15*8+\addskip, %rsp + subq $-(15*8+\addskip), %rsp .endm .macro icebp -- cgit v1.2.3 From d6472302f242559d45dcf4ebace62508dc4d8aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 2 Jun 2015 19:01:38 +1000 Subject: x86/mm: Decouple from Nothing in uses anything from , so remove it from there and fix up the resulting build problems triggered on x86 {64|32}-bit {def|allmod|allno}configs. The breakages were triggering in places where x86 builds relied on vmalloc() facilities but did not include explicitly and relied on the implicit inclusion via . Also add: - to - to ... which were two other implicit header file dependencies. Suggested-by: David Miller Signed-off-by: Stephen Rothwell [ Tidied up the changelog. ] Acked-by: David Miller Acked-by: Takashi Iwai Acked-by: Viresh Kumar Acked-by: Vinod Koul Cc: Andrew Morton Cc: Anton Vorontsov Cc: Boris Ostrovsky Cc: Colin Cross Cc: David Vrabel Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: James E.J. Bottomley Cc: Jaroslav Kysela Cc: K. Y. Srinivasan Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Kristen Carlson Accardi Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Suma Ramars Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 +-- arch/x86/kernel/crash.c | 1 + arch/x86/kernel/machine_kexec_64.c | 1 + arch/x86/mm/pageattr-test.c | 1 + arch/x86/mm/pageattr.c | 1 + arch/x86/xen/p2m.c | 1 + drivers/acpi/apei/erst.c | 1 + drivers/cpufreq/intel_pstate.c | 1 + drivers/dma/mic_x100_dma.c | 1 + drivers/net/hyperv/netvsc.c | 1 + drivers/net/hyperv/rndis_filter.c | 1 + drivers/scsi/fnic/fnic_debugfs.c | 1 + drivers/scsi/fnic/fnic_trace.c | 1 + include/linux/io.h | 1 + sound/pci/asihpi/hpioctl.c | 1 + 15 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a2b97404922d..a94463063b46 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -40,6 +40,7 @@ #include #include #include +#include #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -198,8 +199,6 @@ extern void set_iounmap_nonlazy(void); #include -#include - /* * Convert a virtual cached pointer to an uncached pointer */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c76d3e37c6e1..e068d6683dba 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 415480d3ea84..11546b462fa6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 6629f397b467..8ff686aa7e8c 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 70d221fe2eb4..fae3c5366ac0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b47124d4cd67..8b7f18e200aa 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index ed65e9c4b5b0..3670bbab57a3 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "apei-internal.h" diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c4..2ba53f4f6af2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c index 6de2e677be04..74d9db05a5ad 100644 --- a/drivers/dma/mic_x100_dma.c +++ b/drivers/dma/mic_x100_dma.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "mic_x100_dma.h" diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ea091bc5ff09..1e09243d5449 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "hyperv_net.h" diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 9118cea91882..35a482d526d9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "hyperv_net.h" diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c index 5980c10c734d..d6498fabe628 100644 --- a/drivers/scsi/fnic/fnic_debugfs.c +++ b/drivers/scsi/fnic/fnic_debugfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "fnic.h" static struct dentry *fnic_trace_debugfs_root; diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c index 65a9bde26974..4e15c4bf0795 100644 --- a/drivers/scsi/fnic/fnic_trace.c +++ b/drivers/scsi/fnic/fnic_trace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fnic_io.h" #include "fnic.h" diff --git a/include/linux/io.h b/include/linux/io.h index 04cce4da3685..fb5a99800e77 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -19,6 +19,7 @@ #define _LINUX_IO_H #include +#include #include #include diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 6610bd096fc9..d17937b92331 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef MODULE_FIRMWARE MODULE_FIRMWARE("asihpi/dsp5000.bin"); -- cgit v1.2.3 From d36f947904dba0935a0e74dc9df2ef57caeb7d03 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Jun 2015 18:29:26 +0200 Subject: x86/asm/entry: Move arch/x86/include/asm/calling.h to arch/x86/entry/ asm/calling.h is private to the entry code, make this more apparent by moving it to the new arch/x86/entry/ directory. Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 243 +++++++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_64.S | 2 +- arch/x86/entry/ia32entry.S | 2 +- arch/x86/entry/thunk_64.S | 2 +- arch/x86/include/asm/calling.h | 243 ----------------------------------------- 5 files changed, 246 insertions(+), 246 deletions(-) create mode 100644 arch/x86/entry/calling.h delete mode 100644 arch/x86/include/asm/calling.h (limited to 'arch/x86/include') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h new file mode 100644 index 000000000000..f4e6308c4200 --- /dev/null +++ b/arch/x86/entry/calling.h @@ -0,0 +1,243 @@ +/* + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + +#ifdef CONFIG_X86_64 + +/* + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: + */ + +/* The layout forms the "struct pt_regs" on the stack: */ +/* + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry + * unless syscall needs a complete, fully filled "struct pt_regs". + */ +#define R15 0*8 +#define R14 1*8 +#define R13 2*8 +#define R12 3*8 +#define RBP 4*8 +#define RBX 5*8 +/* These regs are callee-clobbered. Always saved on kernel entry. */ +#define R11 6*8 +#define R10 7*8 +#define R9 8*8 +#define R8 9*8 +#define RAX 10*8 +#define RCX 11*8 +#define RDX 12*8 +#define RSI 13*8 +#define RDI 14*8 +/* + * On syscall entry, this is syscall#. On CPU exception, this is error code. + * On hw interrupt, it's IRQ number: + */ +#define ORIG_RAX 15*8 +/* Return frame for iretq */ +#define RIP 16*8 +#define CS 17*8 +#define EFLAGS 18*8 +#define RSP 19*8 +#define SS 20*8 + +#define SIZEOF_PTREGS 21*8 + + .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 + addq $-(15*8+\addskip), %rsp + .endm + + .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 + .if \r11 + movq %r11, 6*8+\offset(%rsp) + .endif + .if \r8910 + movq %r10, 7*8+\offset(%rsp) + movq %r9, 8*8+\offset(%rsp) + movq %r8, 9*8+\offset(%rsp) + .endif + .if \rax + movq %rax, 10*8+\offset(%rsp) + .endif + .if \rcx + movq %rcx, 11*8+\offset(%rsp) + .endif + movq %rdx, 12*8+\offset(%rsp) + movq %rsi, 13*8+\offset(%rsp) + movq %rdi, 14*8+\offset(%rsp) + .endm + .macro SAVE_C_REGS offset=0 + SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 + .endm + .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 + SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 + .endm + .macro SAVE_C_REGS_EXCEPT_R891011 + SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 + .endm + .macro SAVE_C_REGS_EXCEPT_RCX_R891011 + SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 + .endm + .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 + SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 + .endm + + .macro SAVE_EXTRA_REGS offset=0 + movq %r15, 0*8+\offset(%rsp) + movq %r14, 1*8+\offset(%rsp) + movq %r13, 2*8+\offset(%rsp) + movq %r12, 3*8+\offset(%rsp) + movq %rbp, 4*8+\offset(%rsp) + movq %rbx, 5*8+\offset(%rsp) + .endm + .macro SAVE_EXTRA_REGS_RBP offset=0 + movq %rbp, 4*8+\offset(%rsp) + .endm + + .macro RESTORE_EXTRA_REGS offset=0 + movq 0*8+\offset(%rsp), %r15 + movq 1*8+\offset(%rsp), %r14 + movq 2*8+\offset(%rsp), %r13 + movq 3*8+\offset(%rsp), %r12 + movq 4*8+\offset(%rsp), %rbp + movq 5*8+\offset(%rsp), %rbx + .endm + + .macro ZERO_EXTRA_REGS + xorl %r15d, %r15d + xorl %r14d, %r14d + xorl %r13d, %r13d + xorl %r12d, %r12d + xorl %ebp, %ebp + xorl %ebx, %ebx + .endm + + .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 + .if \rstor_r11 + movq 6*8(%rsp), %r11 + .endif + .if \rstor_r8910 + movq 7*8(%rsp), %r10 + movq 8*8(%rsp), %r9 + movq 9*8(%rsp), %r8 + .endif + .if \rstor_rax + movq 10*8(%rsp), %rax + .endif + .if \rstor_rcx + movq 11*8(%rsp), %rcx + .endif + .if \rstor_rdx + movq 12*8(%rsp), %rdx + .endif + movq 13*8(%rsp), %rsi + movq 14*8(%rsp), %rdi + .endm + .macro RESTORE_C_REGS + RESTORE_C_REGS_HELPER 1,1,1,1,1 + .endm + .macro RESTORE_C_REGS_EXCEPT_RAX + RESTORE_C_REGS_HELPER 0,1,1,1,1 + .endm + .macro RESTORE_C_REGS_EXCEPT_RCX + RESTORE_C_REGS_HELPER 1,0,1,1,1 + .endm + .macro RESTORE_C_REGS_EXCEPT_R11 + RESTORE_C_REGS_HELPER 1,1,0,1,1 + .endm + .macro RESTORE_C_REGS_EXCEPT_RCX_R11 + RESTORE_C_REGS_HELPER 1,0,0,1,1 + .endm + .macro RESTORE_RSI_RDI + RESTORE_C_REGS_HELPER 0,0,0,0,0 + .endm + .macro RESTORE_RSI_RDI_RDX + RESTORE_C_REGS_HELPER 0,0,0,0,1 + .endm + + .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 + subq $-(15*8+\addskip), %rsp + .endm + + .macro icebp + .byte 0xf1 + .endm + +#else /* CONFIG_X86_64 */ + +/* + * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These + * are different from the entry_32.S versions in not changing the segment + * registers. So only suitable for in kernel use, not when transitioning + * from or to user space. The resulting stack frame is not a standard + * pt_regs frame. The main use case is calling C code from assembler + * when all the registers need to be preserved. + */ + + .macro SAVE_ALL + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + .endm + + .macro RESTORE_ALL + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + .endm + +#endif /* CONFIG_X86_64 */ + diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4ad79e946f5a..f7380ea75777 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -28,7 +28,7 @@ #include #include #include -#include +#include "calling.h" #include #include #include diff --git a/arch/x86/entry/ia32entry.S b/arch/x86/entry/ia32entry.S index 2be23c734db5..f16767417385 100644 --- a/arch/x86/entry/ia32entry.S +++ b/arch/x86/entry/ia32entry.S @@ -4,7 +4,7 @@ * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include +#include "calling.h" #include #include #include diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 10f555e435e1..3e95681b4e2d 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -6,7 +6,7 @@ * Subject to the GNU public license, v.2. No warranty of any kind. */ #include -#include +#include "calling.h" #include /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h deleted file mode 100644 index f4e6308c4200..000000000000 --- a/arch/x86/include/asm/calling.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - - x86 function call convention, 64-bit: - ------------------------------------- - arguments | callee-saved | extra caller-saved | return - [callee-clobbered] | | [callee-clobbered] | - --------------------------------------------------------------------------- - rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] - - ( rsp is obviously invariant across normal function calls. (gcc can 'merge' - functions when it sees tail-call optimization possibilities) rflags is - clobbered. Leftover arguments are passed over the stack frame.) - - [*] In the frame-pointers case rbp is fixed to the stack frame. - - [**] for struct return values wider than 64 bits the return convention is a - bit more complex: up to 128 bits width we return small structures - straight in rax, rdx. For structures larger than that (3 words or - larger) the caller puts a pointer to an on-stack return struct - [allocated in the caller's stack frame] into the first argument - i.e. - into rdi. All other arguments shift up by one in this case. - Fortunately this case is rare in the kernel. - -For 32-bit we have the following conventions - kernel is built with --mregparm=3 and -freg-struct-return: - - x86 function calling convention, 32-bit: - ---------------------------------------- - arguments | callee-saved | extra caller-saved | return - [callee-clobbered] | | [callee-clobbered] | - ------------------------------------------------------------------------- - eax edx ecx | ebx edi esi ebp [*] | | eax, edx [**] - - ( here too esp is obviously invariant across normal function calls. eflags - is clobbered. Leftover arguments are passed over the stack frame. ) - - [*] In the frame-pointers case ebp is fixed to the stack frame. - - [**] We build with -freg-struct-return, which on 32-bit means similar - semantics as on 64-bit: edx can be used for a second return value - (i.e. covering integer and structure sizes up to 64 bits) - after that - it gets more complex and more expensive: 3-word or larger struct returns - get done in the caller's frame and the pointer to the return struct goes - into regparm0, i.e. eax - the other arguments shift up and the - function's register parameters degenerate to regparm=2 in essence. - -*/ - -#ifdef CONFIG_X86_64 - -/* - * 64-bit system call stack frame layout defines and helpers, - * for assembly code: - */ - -/* The layout forms the "struct pt_regs" on the stack: */ -/* - * C ABI says these regs are callee-preserved. They aren't saved on kernel entry - * unless syscall needs a complete, fully filled "struct pt_regs". - */ -#define R15 0*8 -#define R14 1*8 -#define R13 2*8 -#define R12 3*8 -#define RBP 4*8 -#define RBX 5*8 -/* These regs are callee-clobbered. Always saved on kernel entry. */ -#define R11 6*8 -#define R10 7*8 -#define R9 8*8 -#define R8 9*8 -#define RAX 10*8 -#define RCX 11*8 -#define RDX 12*8 -#define RSI 13*8 -#define RDI 14*8 -/* - * On syscall entry, this is syscall#. On CPU exception, this is error code. - * On hw interrupt, it's IRQ number: - */ -#define ORIG_RAX 15*8 -/* Return frame for iretq */ -#define RIP 16*8 -#define CS 17*8 -#define EFLAGS 18*8 -#define RSP 19*8 -#define SS 20*8 - -#define SIZEOF_PTREGS 21*8 - - .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 - addq $-(15*8+\addskip), %rsp - .endm - - .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 - .if \r11 - movq %r11, 6*8+\offset(%rsp) - .endif - .if \r8910 - movq %r10, 7*8+\offset(%rsp) - movq %r9, 8*8+\offset(%rsp) - movq %r8, 9*8+\offset(%rsp) - .endif - .if \rax - movq %rax, 10*8+\offset(%rsp) - .endif - .if \rcx - movq %rcx, 11*8+\offset(%rsp) - .endif - movq %rdx, 12*8+\offset(%rsp) - movq %rsi, 13*8+\offset(%rsp) - movq %rdi, 14*8+\offset(%rsp) - .endm - .macro SAVE_C_REGS offset=0 - SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 - .endm - .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 - SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 - .endm - .macro SAVE_C_REGS_EXCEPT_R891011 - SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 - .endm - .macro SAVE_C_REGS_EXCEPT_RCX_R891011 - SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 - .endm - .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 - SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 - .endm - - .macro SAVE_EXTRA_REGS offset=0 - movq %r15, 0*8+\offset(%rsp) - movq %r14, 1*8+\offset(%rsp) - movq %r13, 2*8+\offset(%rsp) - movq %r12, 3*8+\offset(%rsp) - movq %rbp, 4*8+\offset(%rsp) - movq %rbx, 5*8+\offset(%rsp) - .endm - .macro SAVE_EXTRA_REGS_RBP offset=0 - movq %rbp, 4*8+\offset(%rsp) - .endm - - .macro RESTORE_EXTRA_REGS offset=0 - movq 0*8+\offset(%rsp), %r15 - movq 1*8+\offset(%rsp), %r14 - movq 2*8+\offset(%rsp), %r13 - movq 3*8+\offset(%rsp), %r12 - movq 4*8+\offset(%rsp), %rbp - movq 5*8+\offset(%rsp), %rbx - .endm - - .macro ZERO_EXTRA_REGS - xorl %r15d, %r15d - xorl %r14d, %r14d - xorl %r13d, %r13d - xorl %r12d, %r12d - xorl %ebp, %ebp - xorl %ebx, %ebx - .endm - - .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 - .if \rstor_r11 - movq 6*8(%rsp), %r11 - .endif - .if \rstor_r8910 - movq 7*8(%rsp), %r10 - movq 8*8(%rsp), %r9 - movq 9*8(%rsp), %r8 - .endif - .if \rstor_rax - movq 10*8(%rsp), %rax - .endif - .if \rstor_rcx - movq 11*8(%rsp), %rcx - .endif - .if \rstor_rdx - movq 12*8(%rsp), %rdx - .endif - movq 13*8(%rsp), %rsi - movq 14*8(%rsp), %rdi - .endm - .macro RESTORE_C_REGS - RESTORE_C_REGS_HELPER 1,1,1,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_RAX - RESTORE_C_REGS_HELPER 0,1,1,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_RCX - RESTORE_C_REGS_HELPER 1,0,1,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_R11 - RESTORE_C_REGS_HELPER 1,1,0,1,1 - .endm - .macro RESTORE_C_REGS_EXCEPT_RCX_R11 - RESTORE_C_REGS_HELPER 1,0,0,1,1 - .endm - .macro RESTORE_RSI_RDI - RESTORE_C_REGS_HELPER 0,0,0,0,0 - .endm - .macro RESTORE_RSI_RDI_RDX - RESTORE_C_REGS_HELPER 0,0,0,0,1 - .endm - - .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 - subq $-(15*8+\addskip), %rsp - .endm - - .macro icebp - .byte 0xf1 - .endm - -#else /* CONFIG_X86_64 */ - -/* - * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These - * are different from the entry_32.S versions in not changing the segment - * registers. So only suitable for in kernel use, not when transitioning - * from or to user space. The resulting stack frame is not a standard - * pt_regs frame. The main use case is calling C code from assembler - * when all the registers need to be preserved. - */ - - .macro SAVE_ALL - pushl %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - .endm - - .macro RESTORE_ALL - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %eax - .endm - -#endif /* CONFIG_X86_64 */ - -- cgit v1.2.3 From cf991de2f614f454b3cb2a300c06ecdf69f3a70d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Jun 2015 17:13:44 -0700 Subject: x86/asm/msr: Make wrmsrl_safe() a function The wrmsrl_safe macro performs invalid shifts if the value argument is 32 bits. This makes it unnecessarily awkward to write code that puts an unsigned long into an MSR. Convert it to a real inline function. For inspiration, see: 7c74d5b7b7b6 ("x86/asm/entry/64: Fix MSR_IA32_SYSENTER_CS MSR value"). Signed-off-by: Andy Lutomirski Cc: Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner [ Applied small improvements. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index de36f22eb0b9..13bf576c401d 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -205,8 +205,13 @@ do { \ #endif /* !CONFIG_PARAVIRT */ -#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \ - (u32)((val) >> 32)) +/* + * 64-bit version of wrmsr_safe(): + */ +static inline int wrmsrl_safe(u32 msr, u64 val) +{ + return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); +} #define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high)) -- cgit v1.2.3 From 9cd25aac1f44f269de5ecea11f7d927f37f1d01c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:10 +0200 Subject: x86/mm/pat: Emulate PAT when it is disabled In the case when PAT is disabled on the command line with "nopat" or when virtualization doesn't support PAT (correctly) - see 9d34cfdf4796 ("x86: Don't rely on VMWare emulating PAT MSR correctly"). we emulate it using the PWT and PCD cache attribute bits. Get rid of boot_pat_state while at it. Based on a conglomerate patch from Toshi Kani. Signed-off-by: Borislav Petkov Reviewed-by: Toshi Kani Acked-by: Juergen Gross Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 2 +- arch/x86/mm/init.c | 6 ++-- arch/x86/mm/pat.c | 81 ++++++++++++++++++++++++++++++---------------- arch/x86/xen/enlighten.c | 5 +-- 4 files changed, 60 insertions(+), 34 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index cdcff7f7f694..ca6c228d5e62 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -6,7 +6,7 @@ bool pat_enabled(void); extern void pat_init(void); -void pat_init_cache_modes(void); +void pat_init_cache_modes(u64); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1d553186c434..8533b46e6bee 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -40,7 +40,7 @@ */ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WB ] = 0 | 0 , - [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , + [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, @@ -50,11 +50,11 @@ EXPORT_SYMBOL(__cachemode2pte_tbl); uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 476d0780560f..6dc7826e4797 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -68,8 +68,6 @@ static int __init pat_debug_setup(char *str) } __setup("debugpat", pat_debug_setup); -static u64 __read_mostly boot_pat_state; - #ifdef CONFIG_X86_PAT /* * X86 PAT uses page flags WC and Uncached together to keep track of @@ -177,14 +175,12 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) * configuration. * Using lower indices is preferred, so we start with highest index. */ -void pat_init_cache_modes(void) +void pat_init_cache_modes(u64 pat) { - int i; enum page_cache_mode cache; char pat_msg[33]; - u64 pat; + int i; - rdmsrl(MSR_IA32_CR_PAT, pat); pat_msg[32] = 0; for (i = 7; i >= 0; i--) { cache = pat_get_cache_mode((pat >> (i * 8)) & 7, @@ -198,24 +194,33 @@ void pat_init_cache_modes(void) static void pat_bsp_init(u64 pat) { + u64 tmp_pat; + if (!cpu_has_pat) { pat_disable("PAT not supported by CPU."); return; } - rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); - if (!boot_pat_state) { + if (!pat_enabled()) + goto done; + + rdmsrl(MSR_IA32_CR_PAT, tmp_pat); + if (!tmp_pat) { pat_disable("PAT MSR is 0, disabled."); return; } wrmsrl(MSR_IA32_CR_PAT, pat); - pat_init_cache_modes(); +done: + pat_init_cache_modes(pat); } static void pat_ap_init(u64 pat) { + if (!pat_enabled()) + return; + if (!cpu_has_pat) { /* * If this happens we are on a secondary CPU, but switched to @@ -231,25 +236,45 @@ void pat_init(void) { u64 pat; - if (!pat_enabled()) - return; - - /* - * Set PWT to Write-Combining. All other bits stay the same: - * - * PTE encoding used in Linux: - * PAT - * |PCD - * ||PWT - * ||| - * 000 WB _PAGE_CACHE_WB - * 001 WC _PAGE_CACHE_WC - * 010 UC- _PAGE_CACHE_UC_MINUS - * 011 UC _PAGE_CACHE_UC - * PAT bit unused - */ - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | - PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); + if (!pat_enabled()) { + /* + * No PAT. Emulate the PAT table that corresponds to the two + * cache bits, PWT (Write Through) and PCD (Cache Disable). This + * setup is the same as the BIOS default setup when the system + * has PAT but the "nopat" boot option has been specified. This + * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. + * + * PTE encoding used: + * + * PCD + * |PWT PAT + * || slot + * 00 0 WB : _PAGE_CACHE_MODE_WB + * 01 1 WT : _PAGE_CACHE_MODE_WT + * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS + * 11 3 UC : _PAGE_CACHE_MODE_UC + * + * NOTE: When WC or WP is used, it is redirected to UC- per + * the default setup in __cachemode2pte_tbl[]. + */ + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); + } else { + /* + * PTE encoding used in Linux: + * PAT + * |PCD + * ||PWT + * ||| + * 000 WB _PAGE_CACHE_WB + * 001 WC _PAGE_CACHE_WC + * 010 UC- _PAGE_CACHE_UC_MINUS + * 011 UC _PAGE_CACHE_UC + * PAT bit unused + */ + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); + } if (!boot_cpu_done) { pat_bsp_init(pat); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..53233a9beea9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1467,6 +1467,7 @@ asmlinkage __visible void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; + u64 pat; int rc; if (!xen_start_info) @@ -1574,8 +1575,8 @@ asmlinkage __visible void __init xen_start_kernel(void) * Modify the cache mode translation tables to match Xen's PAT * configuration. */ - - pat_init_cache_modes(); + rdmsrl(MSR_IA32_CR_PAT, pat); + pat_init_cache_modes(pat); /* keep using Xen gdt for now; no urgent need to change it */ -- cgit v1.2.3 From ecb2febaaa3945e1578359adc30ca818e78540fb Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:14 +0200 Subject: x86/mm: Teach is_new_memtype_allowed() about Write-Through type __ioremap_caller() calls reserve_memtype() and the passed down @new_pcm contains the actual page cache type it reserved in the success case. is_new_memtype_allowed() verifies if converting to the new page cache type is allowed when @pcm (the requested type) is different from @new_pcm. When WT is requested, the caller expects that writes are ordered and uncached. Therefore, enhance is_new_memtype_allowed() to disallow the following cases: - If the request is WT, mapping type cannot be WB - If the request is WT, mapping type cannot be WC Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index fe57e7a98839..2562e303405b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -398,11 +398,17 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, * requested memtype: * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back + * - request is write-through, return cannot be write-back + * - request is write-through, return cannot be write-combine */ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WC && - new_pcm == _PAGE_CACHE_MODE_WB)) { + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WT && + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WT && + new_pcm == _PAGE_CACHE_MODE_WC)) { return 0; } -- cgit v1.2.3 From d838270e2516db11084bed4e294017eb7b646a75 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:15 +0200 Subject: x86/mm, asm-generic: Add ioremap_wt() for creating Write-Through mappings Add ioremap_wt() for creating Write-Through mappings on x86. It follows the same model as ioremap_wc() for multi-arch support. Define ARCH_HAS_IOREMAP_WT in the x86 version of io.h to indicate that ioremap_wt() is implemented on x86. Also update the PAT documentation file to cover ioremap_wt(). Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/pat.txt | 4 +++- arch/x86/include/asm/io.h | 2 ++ arch/x86/mm/ioremap.c | 21 +++++++++++++++++++++ include/asm-generic/io.h | 9 +++++++++ include/asm-generic/iomap.h | 4 ++++ 5 files changed, 39 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index 521bd8adc3b8..db0de6cfc351 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -12,7 +12,7 @@ virtual addresses. PAT allows for different types of memory attributes. The most commonly used ones that will be supported at this time are Write-back, Uncached, -Write-combined and Uncached Minus. +Write-combined, Write-through and Uncached Minus. PAT APIs @@ -40,6 +40,8 @@ ioremap_nocache | -- | UC- | UC- | | | | | ioremap_wc | -- | -- | WC | | | | | +ioremap_wt | -- | -- | WT | + | | | | set_memory_uc | UC- | -- | -- | set_memory_wb | | | | | | | | diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a94463063b46..83ec9b1d77cc 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -35,6 +35,7 @@ */ #define ARCH_HAS_IOREMAP_WC +#define ARCH_HAS_IOREMAP_WT #include #include @@ -320,6 +321,7 @@ extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index cc0f17c5ad9f..07cd46a8f30a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -172,6 +172,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, prot = __pgprot(pgprot_val(prot) | cachemode2protval(_PAGE_CACHE_MODE_WC)); break; + case _PAGE_CACHE_MODE_WT: + prot = __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WT)); + break; case _PAGE_CACHE_MODE_WB: break; } @@ -297,6 +301,23 @@ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_wc); +/** + * ioremap_wt - map memory into CPU space write through + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * This version of ioremap ensures that the memory is marked write through. + * Write through stores data into memory while keeping the cache up-to-date. + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size) +{ + return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wt); + void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB, diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 90ccba7f9f9a..f56094cfdeff 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -785,8 +785,17 @@ static inline void __iomem *ioremap_wc(phys_addr_t offset, size_t size) } #endif +#ifndef ioremap_wt +#define ioremap_wt ioremap_wt +static inline void __iomem *ioremap_wt(phys_addr_t offset, size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif + #ifndef iounmap #define iounmap iounmap + static inline void iounmap(void __iomem *addr) { } diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 1b41011643a5..d8f8622fa044 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -66,6 +66,10 @@ extern void ioport_unmap(void __iomem *); #define ioremap_wc ioremap_nocache #endif +#ifndef ARCH_HAS_IOREMAP_WT +#define ioremap_wt ioremap_nocache +#endif + #ifdef CONFIG_PCI /* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; -- cgit v1.2.3 From d1b4bfbfac32da43bfc8425be1c4303548e20527 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:18 +0200 Subject: x86/mm/pat: Add pgprot_writethrough() Add pgprot_writethrough() for setting page protection flags to Write-Through mode. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 3 +++ arch/x86/mm/pat.c | 7 +++++++ include/asm-generic/pgtable.h | 4 ++++ 3 files changed, 14 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 78f0c8cbe316..13f310bfc09a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -367,6 +367,9 @@ extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); +#define pgprot_writethrough pgprot_writethrough +extern pgprot_t pgprot_writethrough(pgprot_t prot); + /* Indicate that x86 has its own track and untrack pfn vma functions */ #define __HAVE_PFNMAP_TRACKING diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 6311193fd1a2..076187c76d7a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -1000,6 +1000,13 @@ pgprot_t pgprot_writecombine(pgprot_t prot) } EXPORT_SYMBOL_GPL(pgprot_writecombine); +pgprot_t pgprot_writethrough(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | + cachemode2protval(_PAGE_CACHE_MODE_WT)); +} +EXPORT_SYMBOL_GPL(pgprot_writethrough); + #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) static struct memtype *memtype_get_idx(loff_t pos) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 39f1d6a2b04d..bd910ceaccfa 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -262,6 +262,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #define pgprot_writecombine pgprot_noncached #endif +#ifndef pgprot_writethrough +#define pgprot_writethrough pgprot_noncached +#endif + #ifndef pgprot_device #define pgprot_device pgprot_noncached #endif -- cgit v1.2.3 From 623dffb2a2e059e1ace45b59b3ff21c66c419614 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:20 +0200 Subject: x86/mm/pat: Add set_memory_wt() for Write-Through type Now that reserve_ram_pages_type() accepts the WT type, add set_memory_wt(), set_memory_array_wt() and set_pages_array_wt() in order to be able to set memory to Write-Through page cache mode. Also, extend ioremap_change_attr() to accept the WT type. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/pat.txt | 9 ++++-- arch/x86/include/asm/cacheflush.h | 6 +++- arch/x86/mm/ioremap.c | 3 ++ arch/x86/mm/pageattr.c | 62 +++++++++++++++++++++++++++++++-------- 4 files changed, 63 insertions(+), 17 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index db0de6cfc351..54944c71b819 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -48,6 +48,9 @@ set_memory_uc | UC- | -- | -- | set_memory_wc | WC | -- | -- | set_memory_wb | | | | | | | | +set_memory_wt | WT | -- | -- | + set_memory_wb | | | | + | | | | pci sysfs resource | -- | -- | UC- | | | | | pci sysfs resource_wc | -- | -- | WC | @@ -150,8 +153,8 @@ can be more restrictive, in case of any existing aliasing for that address. For example: If there is an existing uncached mapping, a new ioremap_wc can return uncached mapping in place of write-combine requested. -set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will -first make a region uc or wc and switch it back to wb after use. +set_memory_[uc|wc|wt] and set_memory_wb should be used in pairs, where driver +will first make a region uc, wc or wt and switch it back to wb after use. Over time writes to /proc/mtrr will be deprecated in favor of using PAT based interfaces. Users writing to /proc/mtrr are suggested to use above interfaces. @@ -159,7 +162,7 @@ interfaces. Users writing to /proc/mtrr are suggested to use above interfaces. Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access types. -Drivers should use set_memory_[uc|wc] to set access type for RAM ranges. +Drivers should use set_memory_[uc|wc|wt] to set access type for RAM ranges. PAT debugging diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 47c8e32f621a..b6f7457d12e4 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -8,7 +8,7 @@ /* * The set_memory_* API can be used to change various attributes of a virtual * address range. The attributes include: - * Cachability : UnCached, WriteCombining, WriteBack + * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack * Executability : eXeutable, NoteXecutable * Read/Write : ReadOnly, ReadWrite * Presence : NotPresent @@ -35,9 +35,11 @@ int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); +int _set_memory_wt(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); +int set_memory_wt(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); @@ -48,10 +50,12 @@ int set_memory_4k(unsigned long addr, int numpages); int set_memory_array_uc(unsigned long *addr, int addrinarray); int set_memory_array_wc(unsigned long *addr, int addrinarray); +int set_memory_array_wt(unsigned long *addr, int addrinarray); int set_memory_array_wb(unsigned long *addr, int addrinarray); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); +int set_pages_array_wt(struct page **pages, int addrinarray); int set_pages_array_wb(struct page **pages, int addrinarray); /* diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 07cd46a8f30a..8405c0c6a535 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -42,6 +42,9 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, case _PAGE_CACHE_MODE_WC: err = _set_memory_wc(vaddr, nrpages); break; + case _PAGE_CACHE_MODE_WT: + err = _set_memory_wt(vaddr, nrpages); + break; case _PAGE_CACHE_MODE_WB: err = _set_memory_wb(vaddr, nrpages); break; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 31b4f3fd1207..727158cb3b3c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1503,12 +1503,10 @@ EXPORT_SYMBOL(set_memory_uc); static int _set_memory_array(unsigned long *addr, int addrinarray, enum page_cache_mode new_type) { + enum page_cache_mode set_type; int i, j; int ret; - /* - * for now UC MINUS. see comments in ioremap_nocache() - */ for (i = 0; i < addrinarray; i++) { ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, new_type, NULL); @@ -1516,9 +1514,12 @@ static int _set_memory_array(unsigned long *addr, int addrinarray, goto out_free; } + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + ret = change_page_attr_set(addr, addrinarray, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), - 1); + cachemode2pgprot(set_type), 1); if (!ret && new_type == _PAGE_CACHE_MODE_WC) ret = change_page_attr_set_clr(addr, addrinarray, @@ -1550,6 +1551,12 @@ int set_memory_array_wc(unsigned long *addr, int addrinarray) } EXPORT_SYMBOL(set_memory_array_wc); +int set_memory_array_wt(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_memory_array_wt); + int _set_memory_wc(unsigned long addr, int numpages) { int ret; @@ -1575,21 +1582,39 @@ int set_memory_wc(unsigned long addr, int numpages) ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) - goto out_err; + return ret; ret = _set_memory_wc(addr, numpages); if (ret) - goto out_free; + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - return 0; - -out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); -out_err: return ret; } EXPORT_SYMBOL(set_memory_wc); +int _set_memory_wt(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); +} + +int set_memory_wt(unsigned long addr, int numpages) +{ + int ret; + + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_MODE_WT, NULL); + if (ret) + return ret; + + ret = _set_memory_wt(addr, numpages); + if (ret) + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + + return ret; +} +EXPORT_SYMBOL_GPL(set_memory_wt); + int _set_memory_wb(unsigned long addr, int numpages) { /* WB cache mode is hard wired to all cache attribute bits being 0 */ @@ -1680,6 +1705,7 @@ static int _set_pages_array(struct page **pages, int addrinarray, { unsigned long start; unsigned long end; + enum page_cache_mode set_type; int i; int free_idx; int ret; @@ -1693,8 +1719,12 @@ static int _set_pages_array(struct page **pages, int addrinarray, goto err_out; } + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + ret = cpa_set_pages_array(pages, addrinarray, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS)); + cachemode2pgprot(set_type)); if (!ret && new_type == _PAGE_CACHE_MODE_WC) ret = change_page_attr_set_clr(NULL, addrinarray, cachemode2pgprot( @@ -1728,6 +1758,12 @@ int set_pages_array_wc(struct page **pages, int addrinarray) } EXPORT_SYMBOL(set_pages_array_wc); +int set_pages_array_wt(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_pages_array_wt); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); -- cgit v1.2.3 From bc12edb8739be10fae9f86691a3708d36d00b4f8 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 4 Jun 2015 18:55:22 +0200 Subject: x86/mce: Add Local MCE definitions Add required definitions to support Local Machine Check Exceptions. Historically, machine check exceptions on Intel x86 processors have been broadcast to all logical processors in the system. Upcoming CPUs will support an opt-in mechanism to request some machine check exceptions be delivered to a single logical processor experiencing the fault. See http://www.intel.com/sdm Volume 3, System Programming Guide, chapter 15 for more information on MSRs and documentation on Local MCE. Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1433436928-31903-15-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 5 +++++ arch/x86/include/uapi/asm/msr-index.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6a3034a0a072..ae2bfb895994 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -17,11 +17,16 @@ #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ #define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ +#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */ /* MCG_STATUS register defines */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ +#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */ + +/* MCG_EXT_CTL register defines */ +#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */ /* MCi_STATUS register defines */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c469490db4a8..32c69d5fbeac 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -56,6 +56,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 @@ -379,6 +380,7 @@ #define FEATURE_CONTROL_LOCKED (1<<0) #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) +#define FEATURE_CONTROL_LMCE (1<<20) #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) -- cgit v1.2.3 From 88d538672ea26223bca08225bc49f4e65e71683d Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 4 Jun 2015 18:55:23 +0200 Subject: x86/mce: Add infrastructure to support Local MCE Initialize and prepare for handling LMCEs. Add a boot-time option to disable LMCEs. Signed-off-by: Ashok Raj [ Simplify stuff, align statements for better readability, reflow comments; kill unused lmce_clear(); save us an MSR write if LMCE is already enabled. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1433436928-31903-16-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/boot-options.txt | 3 +++ arch/x86/include/asm/mce.h | 5 ++++ arch/x86/kernel/cpu/mcheck/mce.c | 3 +++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 43 +++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+) (limited to 'arch/x86/include') diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 5223479291a2..68ed3114c363 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -31,6 +31,9 @@ Machine check (e.g. BIOS or hardware monitoring applications), conflicting with OS's error handling, and you cannot deactivate the agent, then this option will be a help. + mce=no_lmce + Do not opt-in to Local MCE delivery. Use legacy method + to broadcast MCEs. mce=bootlog Enable logging of machine checks left over from booting. Disabled by default on AMD because some BIOS leave bogus ones. diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ae2bfb895994..982dfc3679ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -109,6 +109,7 @@ struct mce_log { struct mca_config { bool dont_log_ce; bool cmci_disabled; + bool lmce_disabled; bool ignore_ce; bool disabled; bool ser; @@ -184,12 +185,16 @@ void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); +void lmce_clear(void); +void lmce_enable(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} +static inline void lmce_clear(void) {} +static inline void lmce_enable(void) {} #endif #ifdef CONFIG_X86_MCE_AMD diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0cbcd3183acf..c8c6577b4ada 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1982,6 +1982,7 @@ void mce_disable_bank(int bank) /* * mce=off Disables machine check * mce=no_cmci Disables CMCI + * mce=no_lmce Disables LMCE * mce=dont_log_ce Clears corrected events silently, no log created for CEs. * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) @@ -2005,6 +2006,8 @@ static int __init mcheck_enable(char *str) cfg->disabled = true; else if (!strcmp(str, "no_cmci")) cfg->cmci_disabled = true; + else if (!strcmp(str, "no_lmce")) + cfg->lmce_disabled = true; else if (!strcmp(str, "dont_log_ce")) cfg->dont_log_ce = true; else if (!strcmp(str, "ignore_ce")) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b4a41cf030ed..2d872deb2c50 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -91,6 +91,36 @@ static int cmci_supported(int *banks) return !!(cap & MCG_CMCI_P); } +static bool lmce_supported(void) +{ + u64 tmp; + + if (mca_cfg.lmce_disabled) + return false; + + rdmsrl(MSR_IA32_MCG_CAP, tmp); + + /* + * LMCE depends on recovery support in the processor. Hence both + * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. + */ + if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != + (MCG_SER_P | MCG_LMCE_P)) + return false; + + /* + * BIOS should indicate support for LMCE by setting bit 20 in + * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will + * generate a #GP fault. + */ + rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); + if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == + (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) + return true; + + return false; +} + bool mce_intel_cmci_poll(void) { if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) @@ -405,6 +435,19 @@ static void intel_init_cmci(void) cmci_recheck(); } +void intel_init_lmce(void) +{ + u64 val; + + if (!lmce_supported()) + return; + + rdmsrl(MSR_IA32_MCG_EXT_CTL, val); + + if (!(val & MCG_EXT_CTL_LMCE_EN)) + wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); -- cgit v1.2.3 From c8e56d20f2d190d54c0615775dcb6a23c1091681 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:25 +0200 Subject: x86: Kill CONFIG_X86_HT In talking to Aravind recently about making certain AMD topology attributes available to the MCE injection module, it seemed like that CONFIG_X86_HT thing is more or less superfluous. It is def_bool y, depends on SMP and gets enabled in the majority of .configs - distro and otherwise - out there. So let's kill it and make code behind it depend directly on SMP. Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aravind Gopalakrishnan Cc: Bartosz Golaszewski Cc: Catalin Marinas Cc: Daniel Walter Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Igor Mammedov Cc: Jacob Shin Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433436928-31903-18-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 ++------ arch/x86/include/asm/topology.h | 2 +- arch/x86/kernel/cpu/amd.c | 6 +++--- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/intel_cacheinfo.c | 8 ++++---- 5 files changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8a5cca39e74f..7e39f9b22705 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -261,10 +261,6 @@ config X86_64_SMP def_bool y depends on X86_64 && SMP -config X86_HT - def_bool y - depends on SMP - config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR @@ -865,7 +861,7 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" - depends on X86_HT + depends on SMP ---help--- SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a @@ -875,7 +871,7 @@ config SCHED_SMT config SCHED_MC def_bool y prompt "Multi-core scheduler support" - depends on X86_HT + depends on SMP ---help--- Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0e8f04f2c26f..8d717faeed22 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -26,7 +26,7 @@ #define _ASM_X86_TOPOLOGY_H #ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT +# ifdef CONFIG_SMP # define ENABLE_TOPO_DEFINES # endif #else diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e4cf63301ff4..eb4f01269b5d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -288,7 +288,7 @@ static int nearby_node(int apicid) * Assumption: Number of cores in each internal node is the same. * (2) AMD processors supporting compute units */ -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP static void amd_get_topology(struct cpuinfo_x86 *c) { u32 nodes, cores_per_cu = 1; @@ -341,7 +341,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) */ static void amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); @@ -420,7 +420,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c) static void early_init_amd_mc(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP unsigned bits, ecx; /* Multi core CPU? */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bec0b55863e..b6fe2e47f7f1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -508,7 +508,7 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) void detect_ht(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; int index_msb, core_bits; static bool printed; @@ -844,7 +844,7 @@ static void generic_identify(struct cpuinfo_x86 *c) if (c->cpuid_level >= 0x00000001) { c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT +# ifdef CONFIG_SMP c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); # else c->apicid = c->initial_apicid; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index edcb0e28c336..be4febc58b94 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -654,7 +654,7 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP unsigned int cpu = c->cpu_index; #endif @@ -773,19 +773,19 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP per_cpu(cpu_llc_id, cpu) = l2_id; #endif } if (new_l3) { l3 = new_l3; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP per_cpu(cpu_llc_id, cpu) = l3_id; #endif } -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP /* * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in * turns means that the only possibility is SMT (as indicated in -- cgit v1.2.3 From b72e7464e4cf80117938e6adb8c22fdc1ca46d42 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:26 +0200 Subject: x86/uapi: Do not export as part of the user API headers This header containing all MSRs and respective bit definitions got exported to userspace in conjunction with the big UAPI shuffle. But, it doesn't belong in the UAPI headers because userspace can do its own MSR defines and exporting them from the kernel blocks us from doing cleanups/renames in that header. Which is ridiculous - it is not kernel's job to export such a header and keep MSRs list and their names stable. Signed-off-by: Borislav Petkov Acked-by: H. Peter Anvin Cc: Andrew Morton Cc: Andy Lutomirski Cc: David Howells Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433436928-31903-19-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 665 ++++++++++++++++++++++++++++++++++ arch/x86/include/asm/msr.h | 3 +- arch/x86/include/uapi/asm/msr-index.h | 665 ---------------------------------- arch/x86/include/uapi/asm/msr.h | 2 - tools/power/x86/turbostat/Makefile | 2 +- 5 files changed, 668 insertions(+), 669 deletions(-) create mode 100644 arch/x86/include/asm/msr-index.h delete mode 100644 arch/x86/include/uapi/asm/msr-index.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h new file mode 100644 index 000000000000..9ebc3d009373 --- /dev/null +++ b/arch/x86/include/asm/msr-index.h @@ -0,0 +1,665 @@ +#ifndef _ASM_X86_MSR_INDEX_H +#define _ASM_X86_MSR_INDEX_H + +/* CPU model specific register (MSR) numbers */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) +#define EFER_LMSLE (1<<_EFER_LMSLE) +#define EFER_FFXSR (1<<_EFER_FFXSR) + +/* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_NHM_PLATFORM_INFO 0x000000ce + +#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) + +#define MSR_IA32_POWER_CTL 0x000001fc + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (x & 0xff) +#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) +#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) +#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) +#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* AMD64 MSRs. Not complete. See the architecture manual for a more + complete list. */ + +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL< +#include "msr-index.h" #ifndef __ASSEMBLY__ #include #include #include +#include struct msr { union { diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h deleted file mode 100644 index 9ebc3d009373..000000000000 --- a/arch/x86/include/uapi/asm/msr-index.h +++ /dev/null @@ -1,665 +0,0 @@ -#ifndef _ASM_X86_MSR_INDEX_H -#define _ASM_X86_MSR_INDEX_H - -/* CPU model specific register (MSR) numbers */ - -/* x86-64 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ -#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ - -/* EFER bits: */ -#define _EFER_SCE 0 /* SYSCALL/SYSRET */ -#define _EFER_LME 8 /* Long mode enable */ -#define _EFER_LMA 10 /* Long mode active (read-only) */ -#define _EFER_NX 11 /* No execute enable */ -#define _EFER_SVME 12 /* Enable virtualization */ -#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ -#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ - -#define EFER_SCE (1<<_EFER_SCE) -#define EFER_LME (1<<_EFER_LME) -#define EFER_LMA (1<<_EFER_LMA) -#define EFER_NX (1<<_EFER_NX) -#define EFER_SVME (1<<_EFER_SVME) -#define EFER_LMSLE (1<<_EFER_LMSLE) -#define EFER_FFXSR (1<<_EFER_FFXSR) - -/* Intel MSRs. Some also available on other CPUs */ -#define MSR_IA32_PERFCTR0 0x000000c1 -#define MSR_IA32_PERFCTR1 0x000000c2 -#define MSR_FSB_FREQ 0x000000cd -#define MSR_NHM_PLATFORM_INFO 0x000000ce - -#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 -#define NHM_C3_AUTO_DEMOTE (1UL << 25) -#define NHM_C1_AUTO_DEMOTE (1UL << 26) -#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - -#define MSR_PLATFORM_INFO 0x000000ce -#define MSR_MTRRcap 0x000000fe -#define MSR_IA32_BBL_CR_CTL 0x00000119 -#define MSR_IA32_BBL_CR_CTL3 0x0000011e - -#define MSR_IA32_SYSENTER_CS 0x00000174 -#define MSR_IA32_SYSENTER_ESP 0x00000175 -#define MSR_IA32_SYSENTER_EIP 0x00000176 - -#define MSR_IA32_MCG_CAP 0x00000179 -#define MSR_IA32_MCG_STATUS 0x0000017a -#define MSR_IA32_MCG_CTL 0x0000017b -#define MSR_IA32_MCG_EXT_CTL 0x000004d0 - -#define MSR_OFFCORE_RSP_0 0x000001a6 -#define MSR_OFFCORE_RSP_1 0x000001a7 -#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad -#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae -#define MSR_TURBO_RATIO_LIMIT 0x000001ad -#define MSR_TURBO_RATIO_LIMIT1 0x000001ae -#define MSR_TURBO_RATIO_LIMIT2 0x000001af - -#define MSR_LBR_SELECT 0x000001c8 -#define MSR_LBR_TOS 0x000001c9 -#define MSR_LBR_NHM_FROM 0x00000680 -#define MSR_LBR_NHM_TO 0x000006c0 -#define MSR_LBR_CORE_FROM 0x00000040 -#define MSR_LBR_CORE_TO 0x00000060 - -#define MSR_IA32_PEBS_ENABLE 0x000003f1 -#define MSR_IA32_DS_AREA 0x00000600 -#define MSR_IA32_PERF_CAPABILITIES 0x00000345 -#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 - -#define MSR_IA32_RTIT_CTL 0x00000570 -#define RTIT_CTL_TRACEEN BIT(0) -#define RTIT_CTL_OS BIT(2) -#define RTIT_CTL_USR BIT(3) -#define RTIT_CTL_CR3EN BIT(7) -#define RTIT_CTL_TOPA BIT(8) -#define RTIT_CTL_TSC_EN BIT(10) -#define RTIT_CTL_DISRETC BIT(11) -#define RTIT_CTL_BRANCH_EN BIT(13) -#define MSR_IA32_RTIT_STATUS 0x00000571 -#define RTIT_STATUS_CONTEXTEN BIT(1) -#define RTIT_STATUS_TRIGGEREN BIT(2) -#define RTIT_STATUS_ERROR BIT(4) -#define RTIT_STATUS_STOPPED BIT(5) -#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 -#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 -#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 - -#define MSR_MTRRfix64K_00000 0x00000250 -#define MSR_MTRRfix16K_80000 0x00000258 -#define MSR_MTRRfix16K_A0000 0x00000259 -#define MSR_MTRRfix4K_C0000 0x00000268 -#define MSR_MTRRfix4K_C8000 0x00000269 -#define MSR_MTRRfix4K_D0000 0x0000026a -#define MSR_MTRRfix4K_D8000 0x0000026b -#define MSR_MTRRfix4K_E0000 0x0000026c -#define MSR_MTRRfix4K_E8000 0x0000026d -#define MSR_MTRRfix4K_F0000 0x0000026e -#define MSR_MTRRfix4K_F8000 0x0000026f -#define MSR_MTRRdefType 0x000002ff - -#define MSR_IA32_CR_PAT 0x00000277 - -#define MSR_IA32_DEBUGCTLMSR 0x000001d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x000001db -#define MSR_IA32_LASTBRANCHTOIP 0x000001dc -#define MSR_IA32_LASTINTFROMIP 0x000001dd -#define MSR_IA32_LASTINTTOIP 0x000001de - -/* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ -#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ -#define DEBUGCTLMSR_TR (1UL << 6) -#define DEBUGCTLMSR_BTS (1UL << 7) -#define DEBUGCTLMSR_BTINT (1UL << 8) -#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) -#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) -#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) - -#define MSR_IA32_POWER_CTL 0x000001fc - -#define MSR_IA32_MC0_CTL 0x00000400 -#define MSR_IA32_MC0_STATUS 0x00000401 -#define MSR_IA32_MC0_ADDR 0x00000402 -#define MSR_IA32_MC0_MISC 0x00000403 - -/* C-state Residency Counters */ -#define MSR_PKG_C3_RESIDENCY 0x000003f8 -#define MSR_PKG_C6_RESIDENCY 0x000003f9 -#define MSR_PKG_C7_RESIDENCY 0x000003fa -#define MSR_CORE_C3_RESIDENCY 0x000003fc -#define MSR_CORE_C6_RESIDENCY 0x000003fd -#define MSR_CORE_C7_RESIDENCY 0x000003fe -#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff -#define MSR_PKG_C2_RESIDENCY 0x0000060d -#define MSR_PKG_C8_RESIDENCY 0x00000630 -#define MSR_PKG_C9_RESIDENCY 0x00000631 -#define MSR_PKG_C10_RESIDENCY 0x00000632 - -/* Run Time Average Power Limiting (RAPL) Interface */ - -#define MSR_RAPL_POWER_UNIT 0x00000606 - -#define MSR_PKG_POWER_LIMIT 0x00000610 -#define MSR_PKG_ENERGY_STATUS 0x00000611 -#define MSR_PKG_PERF_STATUS 0x00000613 -#define MSR_PKG_POWER_INFO 0x00000614 - -#define MSR_DRAM_POWER_LIMIT 0x00000618 -#define MSR_DRAM_ENERGY_STATUS 0x00000619 -#define MSR_DRAM_PERF_STATUS 0x0000061b -#define MSR_DRAM_POWER_INFO 0x0000061c - -#define MSR_PP0_POWER_LIMIT 0x00000638 -#define MSR_PP0_ENERGY_STATUS 0x00000639 -#define MSR_PP0_POLICY 0x0000063a -#define MSR_PP0_PERF_STATUS 0x0000063b - -#define MSR_PP1_POWER_LIMIT 0x00000640 -#define MSR_PP1_ENERGY_STATUS 0x00000641 -#define MSR_PP1_POLICY 0x00000642 - -#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 -#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 -#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A -#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B - -#define MSR_CORE_C1_RES 0x00000660 - -#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 -#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 - -#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 -#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 -#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 - -/* Hardware P state interface */ -#define MSR_PPERF 0x0000064e -#define MSR_PERF_LIMIT_REASONS 0x0000064f -#define MSR_PM_ENABLE 0x00000770 -#define MSR_HWP_CAPABILITIES 0x00000771 -#define MSR_HWP_REQUEST_PKG 0x00000772 -#define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 -#define MSR_HWP_STATUS 0x00000777 - -/* CPUID.6.EAX */ -#define HWP_BASE_BIT (1<<7) -#define HWP_NOTIFICATIONS_BIT (1<<8) -#define HWP_ACTIVITY_WINDOW_BIT (1<<9) -#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) -#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) - -/* IA32_HWP_CAPABILITIES */ -#define HWP_HIGHEST_PERF(x) (x & 0xff) -#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) -#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) -#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) - -/* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) -#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) -#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) - -/* IA32_HWP_STATUS */ -#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) -#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) - -/* IA32_HWP_INTERRUPT */ -#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) -#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) - -#define MSR_AMD64_MC0_MASK 0xc0010044 - -#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) -#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) -#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) -#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) - -#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) - -/* These are consecutive and not in the normal 4er MCE bank block */ -#define MSR_IA32_MC0_CTL2 0x00000280 -#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) - -#define MSR_P6_PERFCTR0 0x000000c1 -#define MSR_P6_PERFCTR1 0x000000c2 -#define MSR_P6_EVNTSEL0 0x00000186 -#define MSR_P6_EVNTSEL1 0x00000187 - -#define MSR_KNC_PERFCTR0 0x00000020 -#define MSR_KNC_PERFCTR1 0x00000021 -#define MSR_KNC_EVNTSEL0 0x00000028 -#define MSR_KNC_EVNTSEL1 0x00000029 - -/* Alternative perfctr range with full access. */ -#define MSR_IA32_PMC0 0x000004c1 - -/* AMD64 MSRs. Not complete. See the architecture manual for a more - complete list. */ - -#define MSR_AMD64_PATCH_LEVEL 0x0000008b -#define MSR_AMD64_TSC_RATIO 0xc0000104 -#define MSR_AMD64_NB_CFG 0xc001001f -#define MSR_AMD64_PATCH_LOADER 0xc0010020 -#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 -#define MSR_AMD64_OSVW_STATUS 0xc0010141 -#define MSR_AMD64_LS_CFG 0xc0011020 -#define MSR_AMD64_DC_CFG 0xc0011022 -#define MSR_AMD64_BU_CFG2 0xc001102a -#define MSR_AMD64_IBSFETCHCTL 0xc0011030 -#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 -#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 -#define MSR_AMD64_IBSFETCH_REG_COUNT 3 -#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL< - #ifndef __ASSEMBLY__ #include diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 4039854560d0..e367b1a85d70 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -9,7 +9,7 @@ endif turbostat : turbostat.c CFLAGS += -Wall -CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' +CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' %: %.c @mkdir -p $(BUILD_OUTPUT) -- cgit v1.2.3 From 2cd23553b488589f287457b7396470f5e3c40698 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 08:28:07 +0200 Subject: x86/asm/entry: Rename compat syscall entry points Rename the following system call entry points: ia32_cstar_target -> entry_SYSCALL_compat ia32_syscall -> entry_INT80_compat The generic naming scheme for x86 system call entry points is: entry_MNEMONIC_qualifier where 'qualifier' is one of _32, _64 or _compat. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/x86/entry_64.txt | 4 ++-- arch/x86/entry/entry_64_compat.S | 8 ++++---- arch/x86/entry/syscall_32.c | 6 +++--- arch/x86/include/asm/proto.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/xen/xen-asm_64.S | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt index 9132b86176a3..33884d156125 100644 --- a/Documentation/x86/entry_64.txt +++ b/Documentation/x86/entry_64.txt @@ -18,10 +18,10 @@ Some of these entries are: - system_call: syscall instruction from 64-bit code. - - ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall + - entry_INT80_compat: int 0x80 from 32-bit or 64-bit code; compat syscall either way. - - ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit + - entry_INT80_compat, ia32_sysenter: syscall and sysenter from 32-bit code - interrupt: An array of entries. Every IDT vector that doesn't diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 9558dacf32b9..8058892fb5ff 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -288,7 +288,7 @@ ENDPROC(ia32_sysenter_target) * path below. We set up a complete hardware stack frame to share code * with the int 0x80 path. */ -ENTRY(ia32_cstar_target) +ENTRY(entry_SYSCALL_compat) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -409,7 +409,7 @@ cstar_tracesys: RESTORE_EXTRA_REGS jmp cstar_do_call -END(ia32_cstar_target) +END(entry_SYSCALL_compat) ia32_badarg: ASM_CLAC @@ -445,7 +445,7 @@ ia32_ret_from_sys_call: * Assumes it is only called from user space and entered with interrupts off. */ -ENTRY(ia32_syscall) +ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -511,7 +511,7 @@ ia32_tracesys: movl %eax, %eax /* zero extension */ RESTORE_EXTRA_REGS jmp ia32_do_call -END(ia32_syscall) +END(entry_INT80_compat) .macro PTREGSCALL label, func ALIGN diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 3777189c4a19..e398d033673f 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,7 +10,7 @@ #else #define SYM(sym, compat) sym #define ia32_sys_call_table sys_call_table -#define __NR_ia32_syscall_max __NR_syscall_max +#define __NR_entry_INT80_compat_max __NR_syscall_max #endif #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; @@ -23,11 +23,11 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { +__visible const sys_call_ptr_t ia32_sys_call_table[__NR_entry_INT80_compat_max+1] = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall, + [0 ... __NR_entry_INT80_compat_max] = &sys_ni_syscall, #include }; diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index a90f8972dad5..7d2961a231f1 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -8,8 +8,8 @@ void system_call(void); void syscall_init(void); -void ia32_syscall(void); -void ia32_cstar_target(void); +void entry_INT80_compat(void); +void entry_SYSCALL_compat(void); void ia32_sysenter_target(void); void x86_configure_nx(void); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index dcaab87da629..599afcf0005f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -66,7 +66,7 @@ int main(void) DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(NR_syscalls, sizeof(syscalls_64)); - DEFINE(__NR_ia32_syscall_max, sizeof(syscalls_ia32) - 1); + DEFINE(__NR_entry_INT80_compat_max, sizeof(syscalls_ia32) - 1); DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); return 0; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bec0b55863e..f0b85c401014 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1207,7 +1207,7 @@ void syscall_init(void) wrmsrl(MSR_LSTAR, system_call); #ifdef CONFIG_IA32_EMULATION - wrmsrl(MSR_CSTAR, ia32_cstar_target); + wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); /* * This only works on Intel CPUs. * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5e0791f9d3dc..edf97986a53d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -992,7 +992,7 @@ void __init trap_init(void) set_bit(i, used_vectors); #ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); + set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 04529e620559..3c43c03a499c 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -122,7 +122,7 @@ ENDPROC(xen_syscall_target) /* 32-bit compat syscall target */ ENTRY(xen_syscall32_target) undo_xen_syscall - jmp ia32_cstar_target + jmp entry_SYSCALL_compat ENDPROC(xen_syscall32_target) /* 32-bit compat sysenter target */ -- cgit v1.2.3 From 4c8cd0c50d0b1559727bf0ec7ff27caeba2dfe09 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 08:33:56 +0200 Subject: x86/asm/entry: Untangle 'ia32_sysenter_target' into two entry points: entry_SYSENTER_32 and entry_SYSENTER_compat So the SYSENTER instruction is pretty quirky and it has different behavior depending on bitness and CPU maker. Yet we create a false sense of coherency by naming it 'ia32_sysenter_target' in both of the cases. Split the name into its two uses: ia32_sysenter_target (32) -> entry_SYSENTER_32 ia32_sysenter_target (64) -> entry_SYSENTER_compat As per the generic naming scheme for x86 system call entry points: entry_MNEMONIC_qualifier where 'qualifier' is one of _32, _64 or _compat. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 10 +++++----- arch/x86/entry/entry_64_compat.S | 4 ++-- arch/x86/include/asm/proto.h | 3 ++- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/xen/xen-asm_64.S | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0ac73de925d1..a65f46c3b8e1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -307,7 +307,7 @@ END(resume_kernel) the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub -ENTRY(ia32_sysenter_target) +ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp),%esp sysenter_past_esp: /* @@ -412,7 +412,7 @@ sysexit_audit: .popsection _ASM_EXTABLE(1b,2b) PTGS_TO_GS_EX -ENDPROC(ia32_sysenter_target) +ENDPROC(entry_SYSENTER_32) # system call handler stub ENTRY(system_call) @@ -1135,7 +1135,7 @@ END(page_fault) ENTRY(debug) ASM_CLAC - cmpl $ia32_sysenter_target,(%esp) + cmpl $entry_SYSENTER_32,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: @@ -1165,7 +1165,7 @@ ENTRY(nmi) popl %eax je nmi_espfix_stack #endif - cmpl $ia32_sysenter_target,(%esp) + cmpl $entry_SYSENTER_32,(%esp) je nmi_stack_fixup pushl %eax movl %esp,%eax @@ -1176,7 +1176,7 @@ ENTRY(nmi) cmpl $(THREAD_SIZE-20),%eax popl %eax jae nmi_stack_correct - cmpl $ia32_sysenter_target,12(%esp) + cmpl $entry_SYSENTER_32,12(%esp) je nmi_debug_stack_check nmi_stack_correct: pushl %eax diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 8058892fb5ff..59840e33d203 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -57,7 +57,7 @@ ENDPROC(native_usergs_sysret32) * path below. We set up a complete hardware stack frame to share code * with the int 0x80 path. */ -ENTRY(ia32_sysenter_target) +ENTRY(entry_SYSENTER_compat) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -256,7 +256,7 @@ sysenter_tracesys: RESTORE_EXTRA_REGS jmp sysenter_do_call -ENDPROC(ia32_sysenter_target) +ENDPROC(entry_SYSENTER_compat) /* * 32-bit SYSCALL instruction entry. diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 7d2961a231f1..83a7f8227949 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -10,7 +10,8 @@ void syscall_init(void); void entry_INT80_compat(void); void entry_SYSCALL_compat(void); -void ia32_sysenter_target(void); +void entry_SYSENTER_32(void); +void entry_SYSENTER_compat(void); void x86_configure_nx(void); void x86_report_nx(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0b85c401014..b2ae7cec33ca 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1026,7 +1026,7 @@ void enable_sep_cpu(void) (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), 0); - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); out: put_cpu(); @@ -1216,7 +1216,7 @@ void syscall_init(void) */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, ignore_sysret); wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 3c43c03a499c..ccac1b1e6e93 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -128,7 +128,7 @@ ENDPROC(xen_syscall32_target) /* 32-bit compat sysenter target */ ENTRY(xen_sysenter_target) undo_xen_syscall - jmp ia32_sysenter_target + jmp entry_SYSENTER_compat ENDPROC(xen_sysenter_target) #else /* !CONFIG_IA32_EMULATION */ -- cgit v1.2.3 From b2502b418e63fcde0fe1857732a476b5aa3789b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2015 08:42:03 +0200 Subject: x86/asm/entry: Untangle 'system_call' into two entry points: entry_SYSCALL_64 and entry_INT80_32 The 'system_call' entry points differ starkly between native 32-bit and 64-bit kernels: on 32-bit kernels it defines the INT 0x80 entry point, while on 64-bit it's the SYSCALL entry point. This is pretty confusing when looking at generic code, and it also obscures the nature of the entry point at the assembly level. So unangle this by splitting the name into its two uses: system_call (32) -> entry_INT80_32 system_call (64) -> entry_SYSCALL_64 As per the generic naming scheme for x86 system call entry points: entry_MNEMONIC_qualifier where 'qualifier' is one of _32, _64 or _compat. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 10 +++++----- arch/x86/include/asm/proto.h | 5 +++-- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/traps.c | 5 ++--- arch/x86/xen/xen-asm_64.S | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a65f46c3b8e1..d59461032625 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -415,7 +415,7 @@ sysexit_audit: ENDPROC(entry_SYSENTER_32) # system call handler stub -ENTRY(system_call) +ENTRY(entry_INT80_32) ASM_CLAC pushl %eax # save orig_eax SAVE_ALL @@ -508,7 +508,7 @@ ldt_ss: lss (%esp), %esp /* switch to espfix segment */ jmp restore_nocheck #endif -ENDPROC(system_call) +ENDPROC(entry_INT80_32) # perform work that needs to be done immediately before resumption ALIGN diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4cf3dd36aa0d..e1852c407155 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -137,7 +137,7 @@ ENDPROC(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ -ENTRY(system_call) +ENTRY(entry_SYSCALL_64) /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, @@ -149,7 +149,7 @@ ENTRY(system_call) * after the swapgs, so that it can do the swapgs * for the guest and jump here on syscall. */ -GLOBAL(system_call_after_swapgs) +GLOBAL(entry_SYSCALL_64_after_swapgs) movq %rsp,PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp @@ -182,7 +182,7 @@ GLOBAL(system_call_after_swapgs) testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz tracesys -system_call_fastpath: +entry_SYSCALL_64_fastpath: #if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax #else @@ -246,7 +246,7 @@ tracesys: jnz tracesys_phase2 /* if needed, run the slow path */ RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ movq ORIG_RAX(%rsp), %rax - jmp system_call_fastpath /* and return to the fast path */ + jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ tracesys_phase2: SAVE_EXTRA_REGS @@ -411,7 +411,7 @@ syscall_return_via_sysret: opportunistic_sysret_failed: SWAPGS jmp restore_c_regs_and_iret -END(system_call) +END(entry_SYSCALL_64) .macro FORK_LIKE func diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 83a7f8227949..a4a77286cb1d 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -5,11 +5,12 @@ /* misc architecture specific prototypes */ -void system_call(void); void syscall_init(void); -void entry_INT80_compat(void); +void entry_SYSCALL_64(void); void entry_SYSCALL_compat(void); +void entry_INT80_32(void); +void entry_INT80_compat(void); void entry_SYSENTER_32(void); void entry_SYSENTER_compat(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2ae7cec33ca..914be4bbc2e5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1204,7 +1204,7 @@ void syscall_init(void) * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_LSTAR, entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index edf97986a53d..001ddac221a1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -72,8 +72,7 @@ gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss; #else #include #include - -asmlinkage int system_call(void); +#include #endif /* Must be page-aligned because the real IDT is used in a fixmap. */ @@ -997,7 +996,7 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(IA32_SYSCALL_VECTOR, &system_call); + set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index ccac1b1e6e93..f22667abf7b9 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -114,7 +114,7 @@ RELOC(xen_sysret32, 1b+1) /* Normal 64-bit system call target */ ENTRY(xen_syscall_target) undo_xen_syscall - jmp system_call_after_swapgs + jmp entry_SYSCALL_64_after_swapgs ENDPROC(xen_syscall_target) #ifdef CONFIG_IA32_EMULATION -- cgit v1.2.3 From 6f281923949a4944a7d5625e4f900ec02fefee59 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Tue, 9 Jun 2015 13:20:28 +0800 Subject: iommu: Add new member capability to struct irq_remap_ops Add a new member 'capability' to struct irq_remap_ops for storing information about available capabilities such as VT-d Posted-Interrupts. Signed-off-by: Feng Wu Reviewed-by: Jiang Liu Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1433827237-3382-2-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 4 ++++ drivers/iommu/irq_remapping.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 78974fbc33b4..8b432dd8f170 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -29,6 +29,10 @@ struct msi_msg; struct irq_alloc_info; +enum irq_remap_cap { + IRQ_POSTING_CAP = 0, +}; + #ifdef CONFIG_IRQ_REMAP extern void set_irq_remapping_broken(void); diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 91d5a119956a..b6ca30d31986 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -35,6 +35,9 @@ extern int no_x2apic_optout; extern int irq_remapping_enabled; struct irq_remap_ops { + /* The supported capabilities */ + int capability; + /* Initializes hardware and makes it ready for remapping interrupts */ int (*prepare)(void); -- cgit v1.2.3 From 8541186faf3b59623c86022cc5f21ce9bd1332c5 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Tue, 9 Jun 2015 13:20:31 +0800 Subject: iommu, x86: Implement irq_set_vcpu_affinity for intel_ir_chip Interrupt chip callback to set the VCPU affinity for posted interrupts. [ tglx: Use the helper function to copy from the remap irte instead of open coding it. Massage the comment as well ] Signed-off-by: Feng Wu Reviewed-by: Jiang Liu Reviewed-by: Thomas Gleixner Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: joro@8bytes.org Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1433827237-3382-5-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 5 +++++ drivers/iommu/intel_irq_remapping.c | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 8b432dd8f170..e479fbda55cf 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -57,6 +57,11 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) return x86_vector_domain; } +struct vcpu_data { + u64 pi_desc_addr; /* Physical address of PI Descriptor */ + u32 vector; /* Guest vector of the interrupt */ +}; + #else /* CONFIG_IRQ_REMAP */ static inline void set_irq_remapping_broken(void) { } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 8fad71cc27e7..a643eecf9d29 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1013,10 +1013,47 @@ static void intel_ir_compose_msi_msg(struct irq_data *irq_data, *msg = ir_data->msi_entry; } +static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) +{ + struct intel_ir_data *ir_data = data->chip_data; + struct vcpu_data *vcpu_pi_info = info; + + /* stop posting interrupts, back to remapping mode */ + if (!vcpu_pi_info) { + modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); + } else { + struct irte irte_pi; + + /* + * We are not caching the posted interrupt entry. We + * copy the data from the remapped entry and modify + * the fields which are relevant for posted mode. The + * cached remapped entry is used for switching back to + * remapped mode. + */ + memset(&irte_pi, 0, sizeof(irte_pi)); + dmar_copy_shared_irte(&irte_pi, &ir_data->irte_entry); + + /* Update the posted mode fields */ + irte_pi.p_pst = 1; + irte_pi.p_urgent = 0; + irte_pi.p_vector = vcpu_pi_info->vector; + irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >> + (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT); + irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & + ~(-1UL << PDA_HIGH_BIT); + + modify_irte(&ir_data->irq_2_iommu, &irte_pi); + } + + return 0; +} + static struct irq_chip intel_ir_chip = { .irq_ack = ir_ack_apic_edge, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, + .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, }; static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, -- cgit v1.2.3 From 959c870f7305be019d9316bc4e038dc6119d51ad Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Tue, 9 Jun 2015 13:20:36 +0800 Subject: iommu, x86: Provide irq_remapping_cap() interface Add a new interface irq_remapping_cap() to detect whether irq remapping supports new features, such as VT-d Posted-Interrupts. Export the function, so that KVM code can check this and use this mechanism properly. Signed-off-by: Feng Wu Reviewed-by: Jiang Liu Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1433827237-3382-10-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq_remapping.h | 2 ++ drivers/iommu/irq_remapping.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index e479fbda55cf..046c7fb1ca43 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -35,6 +35,7 @@ enum irq_remap_cap { #ifdef CONFIG_IRQ_REMAP +extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); extern int irq_remapping_enable(void); @@ -64,6 +65,7 @@ struct vcpu_data { #else /* CONFIG_IRQ_REMAP */ +static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } static inline void set_irq_remapping_broken(void) { } static inline int irq_remapping_prepare(void) { return -ENODEV; } static inline int irq_remapping_enable(void) { return -ENODEV; } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index ed605a924ca2..2d9993062ded 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -81,6 +81,15 @@ void set_irq_remapping_broken(void) irq_remap_broken = 1; } +bool irq_remapping_cap(enum irq_remap_cap cap) +{ + if (!remap_ops || disable_irq_post) + return 0; + + return (remap_ops->capability & (1 << cap)); +} +EXPORT_SYMBOL_GPL(irq_remapping_cap); + int __init irq_remapping_prepare(void) { if (disable_irq_remap) -- cgit v1.2.3