From 7c3576d261ce046789a7db14f43303f8120910c7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: Convert PDA into the percpu section Currently x86 (similar to x84-64) has a special per-cpu structure called "i386_pda" which can be easily and efficiently referenced via the %fs register. An ELF section is more flexible than a structure, allowing any piece of code to use this area. Indeed, such a section already exists: the per-cpu area. So this patch: (1) Removes the PDA and uses per-cpu variables for each current member. (2) Replaces the __KERNEL_PDA segment with __KERNEL_PERCPU. (3) Creates a per-cpu mirror of __per_cpu_offset called this_cpu_off, which can be used to calculate addresses for this CPU's variables. (4) Simplifies startup, because %fs doesn't need to be loaded with a special segment at early boot; it can be deferred until the first percpu area is allocated (or never for UP). The result is less code and one less x86-specific concept. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen --- arch/i386/kernel/asm-offsets.c | 5 ----- arch/i386/kernel/cpu/common.c | 17 +++-------------- arch/i386/kernel/entry.S | 5 ++--- arch/i386/kernel/head.S | 31 ++++++------------------------- arch/i386/kernel/i386_ksyms.c | 2 -- arch/i386/kernel/irq.c | 3 +++ arch/i386/kernel/process.c | 12 +++++++++--- arch/i386/kernel/smpboot.c | 30 ++++++++++++++---------------- arch/i386/kernel/vmi.c | 6 +----- arch/i386/kernel/vmlinux.lds.S | 1 - 10 files changed, 38 insertions(+), 74 deletions(-) (limited to 'arch') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index d558adfc293c..b05e85fd1c1e 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -15,7 +15,6 @@ #include #include #include -#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -101,10 +100,6 @@ void foo(void) OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); - BLANK(); - OFFSET(PDA_cpu, i386_pda, cpu_number); - OFFSET(PDA_pcurrent, i386_pda, pcurrent); - #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7a4c036d93c8..27e00565f5e4 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -18,7 +18,6 @@ #include #include #endif -#include #include "cpu.h" @@ -47,13 +46,10 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, - [GDT_ENTRY_PDA] = { 0x00000000, 0x00c09200 }, /* set in setup_pda */ + [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -DEFINE_PER_CPU(struct i386_pda, _cpu_pda); -EXPORT_PER_CPU_SYMBOL(_cpu_pda); - static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -634,21 +630,14 @@ void __init early_cpu_init(void) #endif } -/* Make sure %gs is initialized properly in idle threads */ +/* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); - regs->xfs = __KERNEL_PDA; + regs->xfs = __KERNEL_PERCPU; return regs; } -/* Initial PDA used by boot CPU */ -struct i386_pda boot_pda = { - ._pda = &boot_pda, - .cpu_number = 0, - .pcurrent = &init_task, -}; - /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3e4aa1fd33e2..7f92ceb428ad 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -132,7 +132,7 @@ VM_MASK = 0x00020000 movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ - movl $(__KERNEL_PDA), %edx; \ + movl $(__KERNEL_PERCPU), %edx; \ movl %edx, %fs #define RESTORE_INT_REGS \ @@ -556,7 +556,6 @@ END(syscall_badsys) #define FIXUP_ESPFIX_STACK \ /* since we are on a wrong stack, we cant make it a C code :( */ \ - movl %fs:PDA_cpu, %ebx; \ PER_CPU(gdt_page, %ebx); \ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ addl %esp, %eax; \ @@ -681,7 +680,7 @@ error_code: pushl %fs CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET fs, 0*/ - movl $(__KERNEL_PDA), %ecx + movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs UNWIND_ESPFIX_STACK popl %ecx diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index bb36c24311b4..12277d8938df 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -317,12 +317,12 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 - call setup_pda lgdt early_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. + movl %eax,%fs # gets reset once there's real percpu movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds @@ -332,16 +332,17 @@ is386: movl $2,%ecx # set MP movl %eax,%gs lldt %ax - movl $(__KERNEL_PDA),%eax - mov %eax,%fs - cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder #ifdef CONFIG_SMP movb ready, %cl movb $1, ready cmpb $0,%cl # the first CPU calls start_kernel - jne initialize_secondary # all other CPUs call initialize_secondary + je 1f + movl $(__KERNEL_PERCPU), %eax + movl %eax,%fs # set this cpu's percpu + jmp initialize_secondary # all other CPUs call initialize_secondary +1: #endif /* CONFIG_SMP */ jmp start_kernel @@ -364,23 +365,6 @@ check_x87: .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ ret -/* - * Point the GDT at this CPU's PDA. On boot this will be - * cpu_gdt_table and boot_pda; for secondary CPUs, these will be - * that CPU's GDT and PDA. - */ -ENTRY(setup_pda) - /* get the PDA pointer */ - movl start_pda, %eax - - /* slot the PDA address into the GDT */ - mov early_gdt_descr+2, %ecx - mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ - shr $16, %eax - mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ - mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ - ret - /* * setup_idt * @@ -553,9 +537,6 @@ ENTRY(empty_zero_page) * This starts the data section. */ .data -ENTRY(start_pda) - .long boot_pda - ENTRY(stack_start) .long init_thread_union+THREAD_SIZE .long __BOOT_DS diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 4afe26e86260..e3d4b73bfdb0 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed); #endif EXPORT_SYMBOL(csum_partial); - -EXPORT_SYMBOL(_proxy_pda); diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 8db8d514c9c0..d2daf672f4a2 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -24,6 +24,9 @@ DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5fb9524c6f4b..61999479b7a4 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -57,7 +58,6 @@ #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -66,6 +66,12 @@ static int hlt_counter; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); + /* * Return saved PC of a blocked thread. */ @@ -342,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xds = __USER_DS; regs.xes = __USER_DS; - regs.xfs = __KERNEL_PDA; + regs.xfs = __KERNEL_PERCPU; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -711,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas if (prev->gs | next->gs) loadsegment(gs, next->gs); - write_pda(pcurrent, next_p); + x86_write_percpu(current_task, next_p); return prev_p; } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 61e2842add36..f79b6233db78 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include @@ -99,6 +98,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; +DEFINE_PER_CPU(unsigned long, this_cpu_off); +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + /* * Trampoline 80x86 program as an array. */ @@ -456,7 +458,6 @@ extern struct { void * esp; unsigned short ss; } stack_start; -extern struct i386_pda *start_pda; #ifdef CONFIG_NUMA @@ -784,20 +785,17 @@ static inline struct task_struct * alloc_idle_task(int cpu) /* Initialize the CPU's GDT. This is either the boot CPU doing itself (still using the master per-cpu area), or a CPU doing it for a secondary which will soon come up. */ -static __cpuinit void init_gdt(int cpu, struct task_struct *idle) +static __cpuinit void init_gdt(int cpu) { struct desc_struct *gdt = get_cpu_gdt_table(cpu); - struct i386_pda *pda = &per_cpu(_cpu_pda, cpu); - pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, - (u32 *)&gdt[GDT_ENTRY_PDA].b, - (unsigned long)pda, sizeof(*pda) - 1, - 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, + (u32 *)&gdt[GDT_ENTRY_PERCPU].b, + __per_cpu_offset[cpu], 0xFFFFF, + 0x80 | DESCTYPE_S | 0x2, 0x8); - memset(pda, 0, sizeof(*pda)); - pda->_pda = pda; - pda->cpu_number = cpu; - pda->pcurrent = idle; + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; + per_cpu(cpu_number, cpu) = cpu; } /* Defined in head.S */ @@ -824,9 +822,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); - init_gdt(cpu, idle); + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - start_pda = cpu_pda(cpu); idle->thread.eip = (unsigned long) start_secondary; /* start_eip had better be page-aligned! */ @@ -1188,14 +1186,14 @@ static inline void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); } void __init native_smp_prepare_boot_cpu(void) { unsigned int cpu = smp_processor_id(); - init_gdt(cpu, current); + init_gdt(cpu); switch_to_new_gdt(); cpu_set(cpu, cpu_online_map); diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index ccad7ee960aa..12312988c626 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -504,8 +504,6 @@ static void vmi_pmd_clear(pmd_t *pmd) #endif #ifdef CONFIG_SMP -extern void setup_pda(void); - static void __devinit vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) @@ -530,13 +528,11 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, ap.ds = __USER_DS; ap.es = __USER_DS; - ap.fs = __KERNEL_PDA; + ap.fs = __KERNEL_PERCPU; ap.gs = 0; ap.eflags = 0; - setup_pda(); - #ifdef CONFIG_X86_PAE /* efer should match BSP efer. */ if (cpu_has_nx) { diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 2ce4aa185fc8..d125784ddf5e 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -26,7 +26,6 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) jiffies = jiffies_64; -_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ -- cgit v1.2.3