diff options
Diffstat (limited to 'arch/powerpc/kernel')
42 files changed, 2740 insertions, 1277 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 8f619342f14c..f960a7944553 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -7,7 +7,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ifeq ($(CONFIG_PPC64),y) -CFLAGS_prom_init.o += -mno-minimal-toc +CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif ifeq ($(CONFIG_PPC32),y) CFLAGS_prom_init.o += -fPIC @@ -75,8 +75,8 @@ endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o -obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o +obj-$(CONFIG_PPC_DOORBELL) += dbell.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o extra-y := head_$(CONFIG_WORD_SIZE).o @@ -91,7 +91,6 @@ obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o @@ -122,6 +121,8 @@ ifneq ($(CONFIG_PPC_INDIRECT_IO),y) obj-y += iomap.o endif +obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o + obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) @@ -142,6 +143,7 @@ GCOV_PROFILE_kprobes.o := n extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o +extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o extra-y += systbl_chk.i $(obj)/systbl.o: systbl_chk diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4e23ba2f3ca7..b6c17ec9b169 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -77,6 +77,7 @@ int main(void) DEFINE(NMI_MASK, NMI_MASK); DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); + DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ @@ -117,10 +118,38 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif -#ifdef CONFIG_KVM_BOOKE_HV +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); #endif +#ifdef CONFIG_PPC_BOOK3S_64 + DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar)); +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); + DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); + DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); + DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); + DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); + DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, + transact_vr[0])); + DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct, + transact_vscr)); + DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct, + transact_vrsave)); + DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct, + transact_fpr[0])); + DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct, + transact_fpscr)); +#ifdef CONFIG_VSX + DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct, + transact_fpr[0])); +#endif + /* Local pt_regs on stack for Transactional Memory funcs. */ + DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + + sizeof(struct pt_regs) + 16); +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); @@ -474,6 +503,7 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); + DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); @@ -553,6 +583,10 @@ int main(void) DEFINE(IPI_PRIORITY, IPI_PRIORITY); #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + HSTATE_FIELD(HSTATE_CFAR, cfar); +#endif /* CONFIG_PPC_BOOK3S_64 */ + #else /* CONFIG_PPC_BOOK3S */ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 57cf14065aec..ea847abb0d0a 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -48,6 +48,7 @@ _GLOBAL(__restore_cpu_power7) _GLOBAL(__setup_cpu_power8) mflr r11 + bl __init_FSCR bl __init_hvmode_206 mtlr r11 beqlr @@ -62,6 +63,7 @@ _GLOBAL(__setup_cpu_power8) _GLOBAL(__restore_cpu_power8) mflr r11 + bl __init_FSCR mfmsr r3 rldicl. r0,r3,4,63 beqlr @@ -112,6 +114,12 @@ __init_LPCR: isync blr +__init_FSCR: + mfspr r3,SPRN_FSCR + ori r3,r3,FSCR_TAR|FSCR_DSCR + mtspr SPRN_FSCR,r3 + blr + __init_TLB: /* Clear the TLB */ li r6,128 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 75a3d71b895d..19599ef352bc 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -275,7 +275,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index a892680668d8..9ebbc24bb23c 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -21,7 +21,7 @@ #ifdef CONFIG_SMP void doorbell_setup_this_cpu(void) { - unsigned long tag = mfspr(SPRN_PIR) & 0x3fff; + unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK; smp_muxed_ipi_set_data(smp_processor_id(), tag); } @@ -30,7 +30,7 @@ void doorbell_cause_ipi(int cpu, unsigned long data) { /* Order previous accesses vs. msgsnd, which is treated as a store */ mb(); - ppc_msgsnd(PPC_DBELL, 0, data); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data); } void doorbell_exception(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3d990d3bd8ba..256c5bf0adb7 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -62,8 +62,9 @@ system_call_common: std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) + beq 2f /* if from kernel mode */ ACCOUNT_CPU_USER_ENTRY(r10, r11) - std r2,GPR2(r1) +2: std r2,GPR2(r1) std r3,GPR3(r1) mfcr r2 std r4,GPR4(r1) @@ -94,7 +95,7 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f /* if from user, see if there are any DTL entries to process */ @@ -110,7 +111,7 @@ BEGIN_FW_FTR_SECTION addi r9,r1,STACK_FRAME_OVERHEAD 33: END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ /* * A syscall should always be called with interrupts enabled @@ -226,6 +227,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) + HMT_MEDIUM_LOW_HAS_PPR ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) @@ -302,6 +304,7 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything else left to do? */ + SET_DEFAULT_THREAD_PPR(r3, r9) /* Set thread.ppr = 3 */ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq .ret_from_except_lite @@ -445,6 +448,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ +#ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FTR_SECTION + /* + * Back up the TAR across context switches. Note that the TAR is not + * available for use in the kernel. (To provide this, the TAR should + * be backed up/restored on exception entry/exit instead, and be in + * pt_regs. FIXME, this should be in pt_regs anyway (for debug).) + */ + mfspr r0,SPRN_TAR + std r0,THREAD_TAR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_BCTAR) +#endif + #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -527,6 +543,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) mr r1,r8 /* start using new stack pointer */ std r7,PACAKSAVE(r13) +#ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FTR_SECTION + ld r0,THREAD_TAR(r4) + mtspr SPRN_TAR,r0 +END_FTR_SECTION_IFSET(CPU_FTR_BCTAR) +#endif + #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION ld r0,THREAD_VRSAVE(r4) @@ -762,6 +785,10 @@ fast_exception_return: andc r4,r4,r0 /* r0 contains MSR_RI here */ mtmsrd r4,1 +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* TM debug */ + std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */ +#endif /* * r13 is our per cpu area, only restore it if we are returning to * userspace the value stored in the stack frame may belong to @@ -770,6 +797,7 @@ fast_exception_return: andi. r0,r3,MSR_PR beq 1f ACCOUNT_CPU_USER_EXIT(r2, r4) + RESTORE_PPR(r2, r4) REST_GPR(13, r1) 1: mtspr SPRN_SRR1,r3 @@ -849,13 +877,22 @@ restore_check_irq_replay: addi r3,r1,STACK_FRAME_OVERHEAD; bl .timer_interrupt b .ret_from_except +#ifdef CONFIG_PPC_DOORBELL +1: #ifdef CONFIG_PPC_BOOK3E -1: cmpwi cr0,r3,0x280 + cmpwi cr0,r3,0x280 +#else + BEGIN_FTR_SECTION + cmpwi cr0,r3,0xe80 + FTR_SECTION_ELSE + cmpwi cr0,r3,0xa00 + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) +#endif /* CONFIG_PPC_BOOK3E */ bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; bl .doorbell_exception b .ret_from_except -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_DOORBELL */ 1: b .ret_from_except /* What else to do here ? */ unrecov_restore: diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index f3eab8594d9f..d44a571e45a7 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -23,8 +23,10 @@ #include <asm/code-patching.h> #include <asm/machdep.h> +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) extern void epapr_ev_idle(void); extern u32 epapr_ev_idle_start[]; +#endif bool epapr_paravirt_enabled; @@ -47,11 +49,15 @@ static int __init epapr_paravirt_init(void) for (i = 0; i < (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) patch_instruction(epapr_ev_idle_start + i, insts[i]); +#endif } +#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) if (of_get_property(hyper_node, "has-idle", NULL)) ppc_md.power_save = epapr_ev_idle; +#endif epapr_paravirt_enabled = true; diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4684e33a26c3..ae54553eacd9 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -159,8 +159,9 @@ exc_##n##_common: \ std r9,GPR9(r1); /* save r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ + beq 2f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ - ld r3,excf+EX_R10(r13); /* get back r10 */ \ +2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \ std r12,GPR12(r1); /* save r12 in stackframe */ \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4665e82fa377..56bd92362ce1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -74,13 +74,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ mflr r10 ; \ ld r12,PACAKBASE(r13) ; \ LOAD_HANDLER(r12, system_call_entry_direct) ; \ - mtlr r12 ; \ + mtctr r12 ; \ mfspr r12,SPRN_SRR1 ; \ /* Re-use of r13... No spare regs to do this */ \ li r13,MSR_RI ; \ mtmsrd r13,1 ; \ GET_PACA(r13) ; /* get r13 back */ \ - blr ; + bctr ; #else /* We can branch directly */ #define SYSCALL_PSERIES_2_DIRECT \ @@ -104,7 +104,7 @@ __start_interrupts: .globl system_reset_pSeries; system_reset_pSeries: - HMT_MEDIUM; + HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -153,12 +153,15 @@ machine_check_pSeries_1: * some code path might still want to branch into the original * vector */ - b machine_check_pSeries + HMT_MEDIUM_PPR_DISCARD + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0(PACA_EXMC) + b machine_check_pSeries_0 . = 0x300 .globl data_access_pSeries data_access_pSeries: - HMT_MEDIUM + HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) BEGIN_FTR_SECTION b data_access_check_stab @@ -170,8 +173,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: - HMT_MEDIUM + HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR @@ -201,8 +205,9 @@ data_access_slb_pSeries: . = 0x480 .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: - HMT_MEDIUM + HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ @@ -252,7 +257,7 @@ hardware_interrupt_hv: MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) STD_EXCEPTION_HV(0x980, 0x982, hdecrementer) - STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) @@ -284,16 +289,30 @@ system_call_pSeries: */ . = 0xe00 hv_exception_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_data_storage_hv + . = 0xe20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_instr_storage_hv + . = 0xe40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b emulation_assist_hv - . = 0xe50 - b hmi_exception_hv + . = 0xe60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_hv + . = 0xe80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_doorbell_hv + /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the * prolog code of the PerformanceMonitor one. A little @@ -301,16 +320,27 @@ hv_exception_trampoline: */ performance_monitor_pSeries_1: . = 0xf00 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_pSeries + . = 0xf60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b tm_unavailable_pSeries + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) @@ -322,11 +352,9 @@ vsx_unavailable_pSeries_1: . = 0x1500 .global denorm_exception_hv denorm_exception_hv: - HMT_MEDIUM + HMT_MEDIUM_PPR_DISCARD mtspr SPRN_SPRG_HSCRATCH0,r13 - mfspr r13,SPRN_SPRG_HPACA - std r9,PACA_EXGEN+EX_R9(r13) - std r10,PACA_EXGEN+EX_R10(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) mfspr r9,SPRN_SPRG_HSCRATCH0 @@ -367,10 +395,12 @@ denorm_exception_hv: machine_check_pSeries: .globl machine_check_fwnmi machine_check_fwnmi: - HMT_MEDIUM + HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, - EXC_STD, KVMTEST, 0x200) + EXCEPTION_PROLOG_0(PACA_EXMC) +machine_check_pSeries_0: + EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) + EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) /* moved from 0x300 */ @@ -496,6 +526,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) + RESTORE_PPR_PACA(PACA_EXGEN, r10) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) @@ -506,28 +537,34 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) .align 7 /* moved from 0xe00 */ - STD_EXCEPTION_HV(., 0xe02, h_data_storage) + STD_EXCEPTION_HV_OOL(0xe02, h_data_storage) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) - STD_EXCEPTION_HV(., 0xe22, h_instr_storage) + STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) - STD_EXCEPTION_HV(., 0xe42, emulation_assist) + STD_EXCEPTION_HV_OOL(0xe42, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) - STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */ + STD_EXCEPTION_HV_OOL(0xe62, hmi_exception) /* need to flush cache ? */ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) + MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) - STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) - STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) + STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) /* - * An interrupt came in while soft-disabled. We set paca->irq_happened, - * then, if it was a decrementer interrupt, we bump the dec to max and - * and return, else we hard disable and return. This is called with - * r10 containing the value to OR to the paca field. + * An interrupt came in while soft-disabled. We set paca->irq_happened, then: + * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a doorbell we return immediately since doorbells are edge + * triggered and won't automatically refire. + * - else we hard disable and return. + * This is called with r10 containing the value to OR to the paca field. */ #define MASKED_INTERRUPT(_H) \ masked_##_H##interrupt: \ @@ -535,13 +572,15 @@ masked_##_H##interrupt: \ lbz r11,PACAIRQHAPPENED(r13); \ or r11,r11,r10; \ stb r11,PACAIRQHAPPENED(r13); \ - andi. r10,r10,PACA_IRQ_DEC; \ - beq 1f; \ + cmpwi r10,PACA_IRQ_DEC; \ + bne 1f; \ lis r10,0x7fff; \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ b 2f; \ -1: mfspr r10,SPRN_##_H##SRR1; \ +1: cmpwi r10,PACA_IRQ_DBELL; \ + beq 2f; \ + mfspr r10,SPRN_##_H##SRR1; \ rldicl r10,r10,48,1; /* clear MSR_EE */ \ rotldi r10,r10,16; \ mtspr SPRN_##_H##SRR1,r10; \ @@ -558,8 +597,8 @@ masked_##_H##interrupt: \ /* * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains 0x500 or 0x900 to indicate which - * kind of interrupt. MSR:EE is already off. We generate a + * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate + * which kind of interrupt. MSR:EE is already off. We generate a * stackframe like if a real interrupt had happened. * * Note: While MSR:EE is off, we need to make sure that _MSR @@ -575,9 +614,18 @@ _GLOBAL(__replay_interrupt) mflr r11 mfcr r9 ori r12,r12,MSR_EE - andi. r3,r3,0x0800 - bne decrementer_common - b hardware_interrupt_common + cmpwi r3,0x900 + beq decrementer_common + cmpwi r3,0x500 + beq hardware_interrupt_common +BEGIN_FTR_SECTION + cmpwi r3,0xe80 + beq h_doorbell_common +FTR_SECTION_ELSE + cmpwi r3,0xa00 + beq doorbell_super_common +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) + blr #ifdef CONFIG_PPC_PSERIES /* @@ -586,7 +634,7 @@ _GLOBAL(__replay_interrupt) .globl system_reset_fwnmi .align 7 system_reset_fwnmi: - HMT_MEDIUM + HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) @@ -651,12 +699,21 @@ machine_check_common: STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) - STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) +#ifdef CONFIG_PPC_DOORBELL + STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .doorbell_exception) +#else + STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .unknown_exception) +#endif STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) +#ifdef CONFIG_PPC_DOORBELL + STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception) +#else + STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .unknown_exception) +#endif STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception) @@ -690,8 +747,8 @@ machine_check_common: . = 0x4380 .globl data_access_slb_relon_pSeries data_access_slb_relon_pSeries: - HMT_MEDIUM SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR @@ -715,8 +772,8 @@ data_access_slb_relon_pSeries: . = 0x4480 .globl instruction_access_slb_relon_pSeries instruction_access_slb_relon_pSeries: - HMT_MEDIUM SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ @@ -746,6 +803,7 @@ hardware_interrupt_relon_hv: STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable) MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer) STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer) + MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super) STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b) . = 0x4c00 @@ -759,56 +817,64 @@ system_call_relon_pSeries: STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) . = 0x4e00 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_data_storage_relon_hv . = 0x4e20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_instr_storage_relon_hv . = 0x4e40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b emulation_assist_relon_hv - . = 0x4e50 - b hmi_exception_relon_hv - . = 0x4e60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_relon_hv - /* For when we support the doorbell interrupt: - STD_RELON_EXCEPTION_HYPERVISOR(0x4e80, 0xe80, doorbell_hyper) - */ + . = 0x4e80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_doorbell_relon_hv performance_monitor_relon_pSeries_1: . = 0x4f00 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b performance_monitor_relon_pSeries altivec_unavailable_relon_pSeries_1: . = 0x4f20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b altivec_unavailable_relon_pSeries vsx_unavailable_relon_pSeries_1: . = 0x4f40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_relon_pSeries -#ifdef CONFIG_CBE_RAS - STD_RELON_EXCEPTION_HV(0x5200, 0x1202, cbe_system_error) -#endif /* CONFIG_CBE_RAS */ +tm_unavailable_relon_pSeries_1: + . = 0x4f60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b tm_unavailable_relon_pSeries + STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION . = 0x5500 b denorm_exception_hv #endif -#ifdef CONFIG_CBE_RAS - STD_RELON_EXCEPTION_HV(0x5600, 0x1602, cbe_maintenance) -#else #ifdef CONFIG_HVC_SCOM STD_RELON_EXCEPTION_HV(0x5600, 0x1600, maintence_interrupt) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1600) #endif /* CONFIG_HVC_SCOM */ -#endif /* CONFIG_CBE_RAS */ STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist) -#ifdef CONFIG_CBE_RAS - STD_RELON_EXCEPTION_HV(0x5800, 0x1802, cbe_thermal) -#endif /* CONFIG_CBE_RAS */ /* Other future vectors */ .align 7 @@ -1000,77 +1066,6 @@ unrecov_user_slb: #endif /* __DISABLED__ */ -/* - * r13 points to the PACA, r9 contains the saved CR, - * r12 contain the saved SRR1, SRR0 is still ready for return - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(slb_miss_realmode) - mflr r10 -#ifdef CONFIG_RELOCATABLE - mtctr r11 -#endif - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate_realmode - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- 2f - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -2: mfspr r11,SPRN_SRR0 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10,unrecov_slb) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - rfid - b . - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - - -#ifdef CONFIG_PPC_970_NAP -power4_fixup_nap: - andc r9,r9,r10 - std r9,TI_LOCAL_FLAGS(r11) - ld r10,_LINK(r1) /* make idle task do the */ - std r10,_NIP(r1) /* equivalent of a blr */ - blr -#endif - .align 7 .globl alignment_common alignment_common: @@ -1109,9 +1104,26 @@ fp_unavailable_common: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_fp_unavailable_exception BUG_OPCODE -1: bl .load_up_fpu +1: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in + * transaction), go do TM stuff + */ + rldicl. r0, r12, (64-MSR_TS_LG), (64-2) + bne- 2f +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + bl .load_up_fpu b fast_exception_return - +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +2: /* User process was in a transaction */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .fp_unavailable_tm + b .ret_from_except +#endif .align 7 .globl altivec_unavailable_common altivec_unavailable_common: @@ -1119,8 +1131,25 @@ altivec_unavailable_common: #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION beq 1f +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + BEGIN_FTR_SECTION_NESTED(69) + /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in + * transaction), go do TM stuff + */ + rldicl. r0, r12, (64-MSR_TS_LG), (64-2) + bne- 2f + END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) +#endif bl .load_up_altivec b fast_exception_return +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +2: /* User process was in a transaction */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .altivec_unavailable_tm + b .ret_from_except +#endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif @@ -1137,7 +1166,24 @@ vsx_unavailable_common: #ifdef CONFIG_VSX BEGIN_FTR_SECTION beq 1f +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + BEGIN_FTR_SECTION_NESTED(69) + /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in + * transaction), go do TM stuff + */ + rldicl. r0, r12, (64-MSR_TS_LG), (64-2) + bne- 2f + END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) +#endif b .load_up_vsx +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +2: /* User process was in a transaction */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .vsx_unavailable_tm + b .ret_from_except +#endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif @@ -1148,9 +1194,147 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b .ret_from_except .align 7 + .globl tm_unavailable_common +tm_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .tm_unavailable_exception + b .ret_from_except + + .align 7 .globl __end_handlers __end_handlers: + /* Equivalents to the above handlers for relocation-on interrupt vectors */ + STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) + STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) + STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) + STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) + MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) + + STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + + /* pseries and powernv need to keep the whole page from + * 0x7000 to 0x8000 free for use by the firmware + */ + . = 0x8000 +#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + +/* Space for CPU0's segment table */ + .balign 4096 + .globl initial_stab +initial_stab: + .space 4096 + +#ifdef CONFIG_PPC_POWERNV +_GLOBAL(opal_mc_secondary_handler) + HMT_MEDIUM_PPR_DISCARD + SET_SCRATCH0(r13) + GET_PACA(r13) + clrldi r3,r3,2 + tovirt(r3,r3) + std r3,PACA_OPAL_MC_EVT(r13) + ld r13,OPAL_MC_SRR0(r3) + mtspr SPRN_SRR0,r13 + ld r13,OPAL_MC_SRR1(r3) + mtspr SPRN_SRR1,r13 + ld r3,OPAL_MC_GPR3(r3) + GET_SCRATCH0(r13) + b machine_check_pSeries +#endif /* CONFIG_PPC_POWERNV */ + + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 +#ifdef CONFIG_RELOCATABLE + mtctr r11 +#endif + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- 2f + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +2: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecov_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + + +#ifdef CONFIG_PPC_970_NAP +power4_fixup_nap: + andc r9,r9,r10 + std r9,TI_LOCAL_FLAGS(r11) + ld r10,_LINK(r1) /* make idle task do the */ + std r10,_NIP(r1) /* equivalent of a blr */ + blr +#endif + /* * Hash table stuff */ @@ -1222,7 +1406,7 @@ handle_dabr_fault: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_dabr + bl .do_break 12: b .ret_from_except_lite @@ -1268,20 +1452,36 @@ do_ste_alloc: _GLOBAL(do_stab_bolted) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + mfspr r11,SPRN_DAR /* ea */ + + /* + * check for bad kernel/user address + * (ea & ~REGION_MASK) >= PGTABLE_RANGE + */ + rldicr. r9,r11,4,(63 - 46 - 4) + li r9,0 /* VSID = 0 for bad address */ + bne- 0f + + /* + * Calculate VSID: + * This is the kernel vsid, we take the top for context from + * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * Here we know that (ea >> 60) == 0xc + */ + lis r9,(MAX_USER_CONTEXT + 1)@ha + addi r9,r9,(MAX_USER_CONTEXT + 1)@l + srdi r10,r11,SID_SHIFT + rldimi r10,r9,ESID_BITS,0 /* proto vsid */ + ASM_VSID_SCRAMBLE(r10, r9, 256M) + rldic r9,r10,12,16 /* r9 = vsid << 12 */ + +0: /* Hash to the primary group */ ld r10,PACASTABVIRT(r13) - mfspr r11,SPRN_DAR - srdi r11,r11,28 + srdi r11,r11,SID_SHIFT rldimi r10,r11,7,52 /* r10 = first ste of the group */ - /* Calculate VSID */ - /* This is a kernel address, so protovsid = ESID | 1 << 37 */ - li r9,0x1 - rldimi r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0 - ASM_VSID_SCRAMBLE(r11, r9, 256M) - rldic r9,r11,12,16 /* r9 = vsid << 12 */ - /* Search the primary group for a free entry */ 1: ld r11,0(r10) /* Test valid bit of the current ste */ andi. r11,r11,0x80 @@ -1344,56 +1544,3 @@ _GLOBAL(do_stab_bolted) ld r13,PACA_EXSLB+EX_R13(r13) rfid b . /* prevent speculative execution */ - - - /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV(., 0xe00, h_data_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) - STD_RELON_EXCEPTION_HV(., 0xe20, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) - STD_RELON_EXCEPTION_HV(., 0xe40, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) - STD_RELON_EXCEPTION_HV(., 0xe60, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) - - STD_RELON_EXCEPTION_PSERIES(., 0xf00, performance_monitor) - STD_RELON_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) - STD_RELON_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) - -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) -/* - * Data area reserved for FWNMI option. - * This address (0x7000) is fixed by the RPA. - */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: - - /* pseries and powernv need to keep the whole page from - * 0x7000 to 0x8000 free for use by the firmware - */ - . = 0x8000 -#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ - -/* Space for CPU0's segment table */ - .balign 4096 - .globl initial_stab -initial_stab: - .space 4096 - -#ifdef CONFIG_PPC_POWERNV -_GLOBAL(opal_mc_secondary_handler) - HMT_MEDIUM - SET_SCRATCH0(r13) - GET_PACA(r13) - clrldi r3,r3,2 - tovirt(r3,r3) - std r3,PACA_OPAL_MC_EVT(r13) - ld r13,OPAL_MC_SRR0(r3) - mtspr SPRN_SRR0,r13 - ld r13,OPAL_MC_SRR1(r3) - mtspr SPRN_SRR1,r13 - ld r3,OPAL_MC_GPR3(r3) - GET_SCRATCH0(r13) - b machine_check_pSeries -#endif /* CONFIG_PPC_POWERNV */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index e0ada05f2df3..caeaabf11a2f 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -35,6 +35,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: REST_32VSRS(n,c,base); \ 3: +#define __REST_32FPVSRS_TRANSACT(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_32FPRS_TRANSACT(n,base); \ + b 3f; \ +2: REST_32VSRS_TRANSACT(n,c,base); \ +3: + #define __SAVE_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -45,11 +54,68 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 3: #else #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) +#define __REST_32FPVSRS_TRANSACT(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) +#define REST_32FPVSRS_TRANSACT(n,c,base) \ + __REST_32FPVSRS_TRANSACT(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Wrapper to call load_up_fpu from C. + * void do_load_up_fpu(struct pt_regs *regs); + */ +_GLOBAL(do_load_up_fpu) + mflr r0 + std r0, 16(r1) + stdu r1, -112(r1) + + subi r6, r3, STACK_FRAME_OVERHEAD + /* load_up_fpu expects r12=MSR, r13=PACA, and returns + * with r12 = new MSR. + */ + ld r12,_MSR(r6) + GET_PACA(r13) + + bl load_up_fpu + std r12,_MSR(r6) + + ld r0, 112+16(r1) + addi r1, r1, 112 + mtlr r0 + blr + + +/* void do_load_up_transact_fpu(struct thread_struct *thread) + * + * This is similar to load_up_fpu but for the transactional version of the FP + * register set. It doesn't mess with the task MSR or valid flags. + * Furthermore, we don't do lazy FP with TM currently. + */ +_GLOBAL(do_load_up_transact_fpu) + mfmsr r6 + ori r5,r6,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + SYNC + MTMSRD(r5) + + lfd fr0,THREAD_TRANSACT_FPSCR(r3) + MTFSF_L(fr0) + REST_32FPVSRS_TRANSACT(0, R4, R3) + + /* FP/VSX off again */ + MTMSRD(r6) + SYNC + + blr +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * This task wants to use the FPU now. * On UP, disable FP for the task which had the FPU previously, diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 4989661b710b..8a9b6f59822d 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -430,30 +430,18 @@ label: EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ - START_EXCEPTION(0x1000, Decrementer) - NORMAL_EXCEPTION_PROLOG - lis r0,TSR_PIS@h - mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x1000, timer_interrupt) - -#if 0 -/* NOTE: - * FIT and WDT handlers are not implemented yet. - */ + . = 0x1000 + b Decrementer /* 0x1010 - Fixed Interval Timer (FIT) Exception */ - STND_EXCEPTION(0x1010, FITException, unknown_exception) + . = 0x1010 + b FITException /* 0x1020 - Watchdog Timer (WDT) Exception */ -#ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) -#else - CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception) -#endif -#endif + . = 0x1020 + b WDTException /* 0x1100 - Data TLB Miss Exception * As the name implies, translation is not in the MMU, so search the @@ -738,6 +726,29 @@ label: (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ +Decrementer: + NORMAL_EXCEPTION_PROLOG + lis r0,TSR_PIS@h + mtspr SPRN_TSR,r0 /* Clear the PIT exception */ + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_LITE(0x1000, timer_interrupt) + + /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ +FITException: + NORMAL_EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD; + EXC_XFER_EE(0x1010, unknown_exception) + + /* Watchdog Timer (WDT) Exception. (from 0x1020) */ +WDTException: + CRITICAL_EXCEPTION_PROLOG; + addi r3,r1,STACK_FRAME_OVERHEAD; + EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, + (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), + NOCOPY, crit_transfer_to_handler, + ret_from_crit_exc) + /* * The other Data TLB exceptions bail out to this point * if they can't resolve the lightweight TLB fault. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 116f0868695b..0886ae6dd5be 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -122,6 +122,8 @@ __secondary_hold: #endif /* Grab our physical cpu number */ mr r24,r3 + /* stash r4 for book3e */ + mr r25,r4 /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ @@ -129,16 +131,31 @@ __secondary_hold: std r24,__secondary_hold_acknowledge-_stext(0) sync + li r26,0 +#ifdef CONFIG_PPC_BOOK3E + tovirt(r26,r26) +#endif /* All secondary cpus wait here until told to start. */ -100: ld r4,__secondary_hold_spinloop-_stext(0) +100: ld r4,__secondary_hold_spinloop-_stext(r26) cmpdi 0,r4,0 beq 100b #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#ifdef CONFIG_PPC_BOOK3E + tovirt(r4,r4) +#endif ld r4,0(r4) /* deref function descriptor */ mtctr r4 mr r3,r24 + /* + * it may be the case that other platforms have r4 right to + * begin with, this gives us some safety in case it is not + */ +#ifdef CONFIG_PPC_BOOK3E + mr r4,r25 +#else li r4,0 +#endif /* Make sure that patched code is visible */ isync bctr @@ -169,6 +186,7 @@ _GLOBAL(generic_secondary_thread_init) /* get a valid TOC pointer, wherever we're mapped at */ bl .relative_toc + tovirt(r2,r2) #ifdef CONFIG_PPC_BOOK3E /* Book3E initialization */ @@ -195,6 +213,7 @@ _GLOBAL(generic_secondary_smp_init) /* get a valid TOC pointer, wherever we're mapped at */ bl .relative_toc + tovirt(r2,r2) #ifdef CONFIG_PPC_BOOK3E /* Book3E initialization */ @@ -531,6 +550,7 @@ _GLOBAL(pmac_secondary_start) /* get TOC pointer (real address) */ bl .relative_toc + tovirt(r2,r2) /* Copy some CPU settings from CPU 0 */ bl .__restore_cpu_ppc970 @@ -665,6 +685,13 @@ _GLOBAL(enable_64b_mode) * This puts the TOC pointer into r2, offset by 0x8000 (as expected * by the toolchain). It computes the correct value for wherever we * are running at the moment, using position-independent code. + * + * Note: The compiler constructs pointers using offsets from the + * TOC in -mcmodel=medium mode. After we relocate to 0 but before + * the MMU is on we need our TOC to be a virtual address otherwise + * these pointers will be real addresses which may get stored and + * accessed later with the MMU on. We use tovirt() at the call + * sites to handle this. */ _GLOBAL(relative_toc) mflr r0 @@ -681,8 +708,9 @@ p_toc: .llong __toc_start + 0x8000 - 0b * This is where the main kernel code starts. */ _INIT_STATIC(start_here_multiplatform) - /* set up the TOC (real address) */ - bl .relative_toc + /* set up the TOC */ + bl .relative_toc + tovirt(r2,r2) /* Clear out the BSS. It may have been done in prom_init, * already but that's irrelevant since prom_init will soon diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a89cae481b04..a949bdfc9623 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -73,7 +73,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * If so, DABR will be populated in single_step_dabr_instruction(). */ if (current->thread.last_hit_ubp != bp) - set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); + set_breakpoint(info); return 0; } @@ -97,7 +97,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) } *slot = NULL; - set_dabr(0, 0); + hw_breakpoint_disable(); } /* @@ -127,19 +127,13 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int type, int *gen_bp_type) { - switch (type) { - case DABR_DATA_READ: - *gen_bp_type = HW_BREAKPOINT_R; - break; - case DABR_DATA_WRITE: - *gen_bp_type = HW_BREAKPOINT_W; - break; - case (DABR_DATA_WRITE | DABR_DATA_READ): - *gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R); - break; - default: + *gen_bp_type = 0; + if (type & HW_BRK_TYPE_READ) + *gen_bp_type |= HW_BREAKPOINT_R; + if (type & HW_BRK_TYPE_WRITE) + *gen_bp_type |= HW_BREAKPOINT_W; + if (*gen_bp_type == 0) return -EINVAL; - } return 0; } @@ -148,35 +142,28 @@ int arch_bp_generic_fields(int type, int *gen_bp_type) */ int arch_validate_hwbkpt_settings(struct perf_event *bp) { - int ret = -EINVAL; + int ret = -EINVAL, length_max; struct arch_hw_breakpoint *info = counter_arch_bp(bp); if (!bp) return ret; - switch (bp->attr.bp_type) { - case HW_BREAKPOINT_R: - info->type = DABR_DATA_READ; - break; - case HW_BREAKPOINT_W: - info->type = DABR_DATA_WRITE; - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - info->type = (DABR_DATA_READ | DABR_DATA_WRITE); - break; - default: + info->type = HW_BRK_TYPE_TRANSLATE; + if (bp->attr.bp_type & HW_BREAKPOINT_R) + info->type |= HW_BRK_TYPE_READ; + if (bp->attr.bp_type & HW_BREAKPOINT_W) + info->type |= HW_BRK_TYPE_WRITE; + if (info->type == HW_BRK_TYPE_TRANSLATE) + /* must set alteast read or write */ return ret; - } - + if (!(bp->attr.exclude_user)) + info->type |= HW_BRK_TYPE_USER; + if (!(bp->attr.exclude_kernel)) + info->type |= HW_BRK_TYPE_KERNEL; + if (!(bp->attr.exclude_hv)) + info->type |= HW_BRK_TYPE_HYP; info->address = bp->attr.bp_addr; info->len = bp->attr.bp_len; - info->dabrx = DABRX_ALL; - if (bp->attr.exclude_user) - info->dabrx &= ~DABRX_USER; - if (bp->attr.exclude_kernel) - info->dabrx &= ~DABRX_KERNEL; - if (bp->attr.exclude_hv) - info->dabrx &= ~DABRX_HYP; /* * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) @@ -184,8 +171,16 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the * 'symbolsize' should satisfy the check below. */ + length_max = 8; /* DABR */ + if (cpu_has_feature(CPU_FTR_DAWR)) { + length_max = 512 ; /* 64 doublewords */ + /* DAWR region can't cross 512 boundary */ + if ((bp->attr.bp_addr >> 10) != + ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) + return -EINVAL; + } if (info->len > - (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN))) + (length_max - (info->address & HW_BREAKPOINT_ALIGN))) return -EINVAL; return 0; } @@ -204,7 +199,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) info = counter_arch_bp(tsk->thread.last_hit_ubp); regs->msr &= ~MSR_SE; - set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); + set_breakpoint(info); tsk->thread.last_hit_ubp = NULL; } @@ -222,7 +217,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) unsigned long dar = regs->dar; /* Disable breakpoints during exception handling */ - set_dabr(0, 0); + hw_breakpoint_disable(); /* * The counter may be concurrently released but that can only @@ -255,8 +250,9 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) * we still need to single-step the instruction, but we don't * generate an event. */ - info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && - (dar - bp->attr.bp_addr < bp->attr.bp_len)); + if (!((bp->attr.bp_addr <= dar) && + (dar - bp->attr.bp_addr < bp->attr.bp_len))) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; /* Do not emulate user-space instructions, instead single-step them */ if (user_mode(regs)) { @@ -285,10 +281,10 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) * As a policy, the callback is invoked in a 'trigger-after-execute' * fashion */ - if (!info->extraneous_interrupt) + if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); + set_breakpoint(info); out: rcu_read_unlock(); return rc; @@ -317,10 +313,10 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) * We shall invoke the user-defined callback function in the single * stepping handler to confirm to 'trigger-after-execute' semantics */ - if (!info->extraneous_interrupt) + if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); + set_breakpoint(info); current->thread.last_hit_ubp = NULL; /* diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c862fd716fe3..31c4fdc6859c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -717,6 +717,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) return; } + /* + * In case we have reserved the first bit, we should not emit + * the warning below. + */ + if (tbl->it_offset == 0) + clear_bit(0, tbl->it_map); + /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 71413f41278f..4f97fe345526 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -122,8 +122,8 @@ static inline notrace int decrementer_check_overflow(void) } /* This is called whenever we are re-enabling interrupts - * and returns either 0 (nothing to do) or 500/900 if there's - * either an EE or a DEC to generate. + * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if + * there's an EE, DEC or DBELL to generate. * * This is called in two contexts: From arch_local_irq_restore() * before soft-enabling interrupts, and from the exception exit @@ -182,6 +182,13 @@ notrace unsigned int __check_irq_replay(void) local_paca->irq_happened &= ~PACA_IRQ_DBELL; if (happened & PACA_IRQ_DBELL) return 0x280; +#else + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + if (happened & PACA_IRQ_DBELL) { + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 0xe80; + return 0xa00; + } #endif /* CONFIG_PPC_BOOK3E */ /* There should be nothing left ! */ diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index a7bc7521c064..5ca82cd4a374 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -199,7 +199,7 @@ static int kgdb_iabr_match(struct pt_regs *regs) return 1; } -static int kgdb_dabr_match(struct pt_regs *regs) +static int kgdb_break_match(struct pt_regs *regs) { if (user_mode(regs)) return 0; @@ -459,7 +459,7 @@ static void *old__debugger; static void *old__debugger_bpt; static void *old__debugger_sstep; static void *old__debugger_iabr_match; -static void *old__debugger_dabr_match; +static void *old__debugger_break_match; static void *old__debugger_fault_handler; int kgdb_arch_init(void) @@ -469,7 +469,7 @@ int kgdb_arch_init(void) old__debugger_bpt = __debugger_bpt; old__debugger_sstep = __debugger_sstep; old__debugger_iabr_match = __debugger_iabr_match; - old__debugger_dabr_match = __debugger_dabr_match; + old__debugger_break_match = __debugger_break_match; old__debugger_fault_handler = __debugger_fault_handler; __debugger_ipi = kgdb_call_nmi_hook; @@ -477,7 +477,7 @@ int kgdb_arch_init(void) __debugger_bpt = kgdb_handle_breakpoint; __debugger_sstep = kgdb_singlestep; __debugger_iabr_match = kgdb_iabr_match; - __debugger_dabr_match = kgdb_dabr_match; + __debugger_break_match = kgdb_break_match; __debugger_fault_handler = kgdb_not_implemented; return 0; @@ -490,6 +490,6 @@ void kgdb_arch_exit(void) __debugger_bpt = old__debugger_bpt; __debugger_sstep = old__debugger_sstep; __debugger_iabr_match = old__debugger_iabr_match; - __debugger_dabr_match = old__debugger_dabr_match; + __debugger_break_match = old__debugger_break_match; __debugger_fault_handler = old__debugger_fault_handler; } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e88c64331819..11f5b03a0b06 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -310,7 +310,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; @@ -330,7 +330,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -357,7 +357,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 7206701b1ff1..466a2908bb63 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -162,6 +162,8 @@ static int kexec_all_irq_disabled = 0; static void kexec_smp_down(void *arg) { local_irq_disable(); + hard_irq_disable(); + mb(); /* make sure our irqs are disabled before we say they are */ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; while(kexec_all_irq_disabled == 0) @@ -244,6 +246,8 @@ static void kexec_prepare_cpus(void) wake_offline_cpus(); smp_call_function(kexec_smp_down, NULL, /* wait */0); local_irq_disable(); + hard_irq_disable(); + mb(); /* make sure IRQs are disabled before we say they are */ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; @@ -281,6 +285,7 @@ static void kexec_prepare_cpus(void) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 0); local_irq_disable(); + hard_irq_disable(); } #endif /* SMP */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 9f44a775a106..6ee59a0eb268 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -386,6 +386,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | (value & 0xffff); break; + case R_PPC64_TOC16_LO: + /* Subtract TOC pointer */ + value -= my_r2(sechdrs, me); + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + case R_PPC64_TOC16_DS: /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); @@ -399,6 +407,28 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | (value & 0xfffc); break; + case R_PPC64_TOC16_LO_DS: + /* Subtract TOC pointer */ + value -= my_r2(sechdrs, me); + if ((value & 3) != 0) { + printk("%s: bad TOC16_LO_DS relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xfffc) + | (value & 0xfffc); + break; + + case R_PPC64_TOC16_HA: + /* Subtract TOC pointer */ + value -= my_r2(sechdrs, me); + value = ((value + 0x8000) >> 16); + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + case R_PPC_REL24: /* FIXME: Handle weak symbols here --RR */ if (sym->st_shndx == SHN_UNDEF) { diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 07c12697d708..a7b743076720 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -71,10 +71,8 @@ static int of_pci_phb_probe(struct platform_device *dev) eeh_dev_phb_init_dynamic(phb); /* Register devices with EEH */ -#ifdef CONFIG_EEH if (dev->dev.of_node->child) eeh_add_device_tree_early(dev->dev.of_node); -#endif /* CONFIG_EEH */ /* Scan the bus */ pcibios_scan_phb(phb); @@ -88,13 +86,14 @@ static int of_pci_phb_probe(struct platform_device *dev) pcibios_claim_one_bus(phb->bus); /* Finish EEH setup */ -#ifdef CONFIG_EEH eeh_add_device_tree_late(phb->bus); -#endif /* Add probed PCI devices to the device model */ pci_bus_add_devices(phb->bus); + /* sysfs files should only be added after devices are added */ + eeh_add_sysfs_files(phb->bus); + return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index cd6da855090c..f8f24685f10a 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -120,8 +120,6 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { struct paca_struct *paca; EXPORT_SYMBOL(paca); -struct paca_struct boot_paca; - void __init initialise_paca(struct paca_struct *new_paca, int cpu) { /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 7c37379ea9b1..fa12ae42d98c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1477,11 +1477,14 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); + /* Fixup EEH */ + eeh_add_device_tree_late(bus); + /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(bus); - /* Fixup EEH */ - eeh_add_device_tree_late(bus); + /* sysfs files should only be added after devices are added */ + eeh_add_sysfs_files(bus); } EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h index 02fb0ee26093..a27c914d5802 100644 --- a/arch/powerpc/kernel/ppc32.h +++ b/arch/powerpc/kernel/ppc32.h @@ -16,30 +16,6 @@ /* These are here to support 32-bit syscalls on a 64-bit kernel. */ -#define __old_sigaction32 old_sigaction32 - -struct __old_sigaction32 { - compat_uptr_t sa_handler; - compat_old_sigset_t sa_mask; - unsigned int sa_flags; - compat_uptr_t sa_restorer; /* not used by Linux/SPARC yet */ -}; - - - -struct sigaction32 { - compat_uptr_t sa_handler; /* Really a pointer, but need to deal with 32 bits */ - unsigned int sa_flags; - compat_uptr_t sa_restorer; /* Another 32 bit pointer */ - compat_sigset_t sa_mask; /* A 32 bit mask */ -}; - -typedef struct sigaltstack_32 { - unsigned int ss_sp; - int ss_flags; - compat_size_t ss_size; -} stack_32_t; - struct pt_regs32 { unsigned int gpr[32]; unsigned int nip; @@ -75,7 +51,7 @@ struct mcontext32 { struct ucontext32 { unsigned int uc_flags; unsigned int uc_link; - stack_32_t uc_stack; + compat_stack_t uc_stack; int uc_pad[7]; compat_uptr_t uc_regs; /* points to uc_mcontext field */ compat_sigset_t uc_sigmask; /* mask last for extensibility */ diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index c8ae3714e79b..f19d0bdc3241 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -32,7 +32,7 @@ static loff_t page_map_seek( struct file *file, loff_t off, int whence) { loff_t new; - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); switch(whence) { case 0: @@ -55,13 +55,13 @@ static loff_t page_map_seek( struct file *file, loff_t off, int whence) static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size); } static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); if ((vma->vm_end - vma->vm_start) > dp->size) return -EINVAL; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 81430674e71c..59dd545fdde1 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -50,6 +50,7 @@ #include <asm/runlatch.h> #include <asm/syscalls.h> #include <asm/switch_to.h> +#include <asm/tm.h> #include <asm/debug.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> @@ -57,6 +58,13 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> +/* Transactional Memory debug */ +#ifdef TM_DEBUG_SW +#define TM_DEBUG(x...) printk(KERN_INFO x) +#else +#define TM_DEBUG(x...) do { } while(0) +#endif + extern unsigned long _get_SP(void); #ifndef CONFIG_SMP @@ -271,7 +279,7 @@ void do_send_trap(struct pt_regs *regs, unsigned long address, force_sig_info(SIGTRAP, &info, current); } #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ -void do_dabr(struct pt_regs *regs, unsigned long address, +void do_break (struct pt_regs *regs, unsigned long address, unsigned long error_code) { siginfo_t info; @@ -281,11 +289,11 @@ void do_dabr(struct pt_regs *regs, unsigned long address, 11, SIGSEGV) == NOTIFY_STOP) return; - if (debugger_dabr_match(regs)) + if (debugger_break_match(regs)) return; - /* Clear the DABR */ - set_dabr(0, 0); + /* Clear the breakpoint */ + hw_breakpoint_disable(); /* Deliver the signal to userspace */ info.si_signo = SIGTRAP; @@ -296,7 +304,7 @@ void do_dabr(struct pt_regs *regs, unsigned long address, } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -static DEFINE_PER_CPU(unsigned long, current_dabr); +static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk); #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -364,39 +372,214 @@ static void switch_booke_debug_regs(struct thread_struct *new_thread) #ifndef CONFIG_HAVE_HW_BREAKPOINT static void set_debug_reg_defaults(struct thread_struct *thread) { - if (thread->dabr) { - thread->dabr = 0; - thread->dabrx = 0; - set_dabr(0, 0); - } + thread->hw_brk.address = 0; + thread->hw_brk.type = 0; + set_breakpoint(&thread->hw_brk); } #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -int set_dabr(unsigned long dabr, unsigned long dabrx) -{ - __get_cpu_var(current_dabr) = dabr; - - if (ppc_md.set_dabr) - return ppc_md.set_dabr(dabr, dabrx); - - /* XXX should we have a CPU_FTR_HAS_DABR ? */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS +static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) +{ mtspr(SPRN_DAC1, dabr); #ifdef CONFIG_PPC_47x isync(); #endif + return 0; +} #elif defined(CONFIG_PPC_BOOK3S) +static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) +{ mtspr(SPRN_DABR, dabr); mtspr(SPRN_DABRX, dabrx); + return 0; +} +#else +static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) +{ + return -EINVAL; +} #endif + +static inline int set_dabr(struct arch_hw_breakpoint *brk) +{ + unsigned long dabr, dabrx; + + dabr = brk->address | (brk->type & HW_BRK_TYPE_DABR); + dabrx = ((brk->type >> 3) & 0x7); + + if (ppc_md.set_dabr) + return ppc_md.set_dabr(dabr, dabrx); + + return __set_dabr(dabr, dabrx); +} + +static inline int set_dawr(struct arch_hw_breakpoint *brk) +{ + unsigned long dawr, dawrx, mrd; + + dawr = brk->address; + + dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \ + << (63 - 58); //* read/write bits */ + dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \ + << (63 - 59); //* translate */ + dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \ + >> 3; //* PRIM bits */ + /* dawr length is stored in field MDR bits 48:53. Matches range in + doublewords (64 bits) baised by -1 eg. 0b000000=1DW and + 0b111111=64DW. + brk->len is in bytes. + This aligns up to double word size, shifts and does the bias. + */ + mrd = ((brk->len + 7) >> 3) - 1; + dawrx |= (mrd & 0x3f) << (63 - 53); + + if (ppc_md.set_dawr) + return ppc_md.set_dawr(dawr, dawrx); + mtspr(SPRN_DAWR, dawr); + mtspr(SPRN_DAWRX, dawrx); return 0; } +int set_breakpoint(struct arch_hw_breakpoint *brk) +{ + __get_cpu_var(current_brk) = *brk; + + if (cpu_has_feature(CPU_FTR_DAWR)) + return set_dawr(brk); + + return set_dabr(brk); +} + #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif +static inline bool hw_brk_match(struct arch_hw_breakpoint *a, + struct arch_hw_breakpoint *b) +{ + if (a->address != b->address) + return false; + if (a->type != b->type) + return false; + if (a->len != b->len) + return false; + return true; +} +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void tm_reclaim_task(struct task_struct *tsk) +{ + /* We have to work out if we're switching from/to a task that's in the + * middle of a transaction. + * + * In switching we need to maintain a 2nd register state as + * oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the + * checkpointed (tbegin) state in ckpt_regs and saves the transactional + * (current) FPRs into oldtask->thread.transact_fpr[]. + * + * We also context switch (save) TFHAR/TEXASR/TFIAR in here. + */ + struct thread_struct *thr = &tsk->thread; + + if (!thr->regs) + return; + + if (!MSR_TM_ACTIVE(thr->regs->msr)) + goto out_and_saveregs; + + /* Stash the original thread MSR, as giveup_fpu et al will + * modify it. We hold onto it to see whether the task used + * FP & vector regs. + */ + thr->tm_orig_msr = thr->regs->msr; + + TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " + "ccr=%lx, msr=%lx, trap=%lx)\n", + tsk->pid, thr->regs->nip, + thr->regs->ccr, thr->regs->msr, + thr->regs->trap); + + tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); + + TM_DEBUG("--- tm_reclaim on pid %d complete\n", + tsk->pid); + +out_and_saveregs: + /* Always save the regs here, even if a transaction's not active. + * This context-switches a thread's TM info SPRs. We do it here to + * be consistent with the restore path (in recheckpoint) which + * cannot happen later in _switch(). + */ + tm_save_sprs(thr); +} + +static inline void tm_recheckpoint_new_task(struct task_struct *new) +{ + unsigned long msr; + + if (!cpu_has_feature(CPU_FTR_TM)) + return; + + /* Recheckpoint the registers of the thread we're about to switch to. + * + * If the task was using FP, we non-lazily reload both the original and + * the speculative FP register states. This is because the kernel + * doesn't see if/when a TM rollback occurs, so if we take an FP + * unavoidable later, we are unable to determine which set of FP regs + * need to be restored. + */ + if (!new->thread.regs) + return; + + /* The TM SPRs are restored here, so that TEXASR.FS can be set + * before the trecheckpoint and no explosion occurs. + */ + tm_restore_sprs(&new->thread); + + if (!MSR_TM_ACTIVE(new->thread.regs->msr)) + return; + msr = new->thread.tm_orig_msr; + /* Recheckpoint to restore original checkpointed register state. */ + TM_DEBUG("*** tm_recheckpoint of pid %d " + "(new->msr 0x%lx, new->origmsr 0x%lx)\n", + new->pid, new->thread.regs->msr, msr); + + /* This loads the checkpointed FP/VEC state, if used */ + tm_recheckpoint(&new->thread, msr); + + /* This loads the speculative FP/VEC state, if used */ + if (msr & MSR_FP) { + do_load_up_transact_fpu(&new->thread); + new->thread.regs->msr |= + (MSR_FP | new->thread.fpexc_mode); + } + if (msr & MSR_VEC) { + do_load_up_transact_altivec(&new->thread); + new->thread.regs->msr |= MSR_VEC; + } + /* We may as well turn on VSX too since all the state is restored now */ + if (msr & MSR_VSX) + new->thread.regs->msr |= MSR_VSX; + + TM_DEBUG("*** tm_recheckpoint of pid %d complete " + "(kernel msr 0x%lx)\n", + new->pid, mfmsr()); +} + +static inline void __switch_to_tm(struct task_struct *prev) +{ + if (cpu_has_feature(CPU_FTR_TM)) { + tm_enable(); + tm_reclaim_task(prev); + } +} +#else +#define tm_recheckpoint_new_task(new) +#define __switch_to_tm(prev) +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -407,6 +590,8 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + __switch_to_tm(prev); + #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu * by just saving it every time we switch out if @@ -481,8 +666,8 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) - set_dabr(new->thread.dabr, new->thread.dabrx); + if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -522,6 +707,9 @@ struct task_struct *__switch_to(struct task_struct *prev, * of sync. Hard disable here. */ hard_irq_disable(); + + tm_recheckpoint_new_task(new); + last = _switch(old_thread, new_thread); #ifdef CONFIG_PPC_BOOK3S_64 @@ -683,6 +871,9 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch); +#endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) show_instructions(regs); @@ -813,6 +1004,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.dscr_inherit = current->thread.dscr_inherit; p->thread.dscr = current->thread.dscr; } + if (cpu_has_feature(CPU_FTR_HAS_PPR)) + p->thread.ppr = INIT_PPR; #endif /* * The PPC64 ABI makes use of a TOC to contain function @@ -892,7 +1085,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->msr = MSR_USER32; } #endif - discard_lazy_cpu_state(); #ifdef CONFIG_VSX current->thread.used_vsr = 0; @@ -912,6 +1104,13 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.spefscr = 0; current->thread.used_spe = 0; #endif /* CONFIG_SPE */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM)) + regs->msr |= MSR_TM; + current->thread.tm_tfhar = 0; + current->thread.tm_texasr = 0; + current->thread.tm_tfiar = 0; +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 779f34049a56..13f8d168b3f1 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -66,8 +66,8 @@ * is running at whatever address it has been loaded at. * On ppc32 we compile with -mrelocatable, which means that references * to extern and static variables get relocated automatically. - * On ppc64 we have to relocate the references explicitly with - * RELOC. (Note that strings count as static variables.) + * ppc64 objects are always relocatable, we just need to relocate the + * TOC. * * Because OF may have mapped I/O devices into the area starting at * KERNELBASE, particularly on CHRP machines, we can't safely call @@ -79,13 +79,11 @@ * On ppc64, 64 bit values are truncated to 32 bits (and * fortunately don't get interpreted as two arguments). */ +#define ADDR(x) (u32)(unsigned long)(x) + #ifdef CONFIG_PPC64 -#define RELOC(x) (*PTRRELOC(&(x))) -#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x)) #define OF_WORKAROUNDS 0 #else -#define RELOC(x) (x) -#define ADDR(x) (u32) (x) #define OF_WORKAROUNDS of_workarounds int of_workarounds; #endif @@ -95,7 +93,7 @@ int of_workarounds; #define PROM_BUG() do { \ prom_printf("kernel BUG at %s line 0x%x!\n", \ - RELOC(__FILE__), __LINE__); \ + __FILE__, __LINE__); \ __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ } while (0) @@ -233,7 +231,7 @@ static int __init call_prom(const char *service, int nargs, int nret, ...) for (i = 0; i < nret; i++) args.args[nargs+i] = 0; - if (enter_prom(&args, RELOC(prom_entry)) < 0) + if (enter_prom(&args, prom_entry) < 0) return PROM_ERROR; return (nret > 0) ? args.args[nargs] : 0; @@ -258,7 +256,7 @@ static int __init call_prom_ret(const char *service, int nargs, int nret, for (i = 0; i < nret; i++) args.args[nargs+i] = 0; - if (enter_prom(&args, RELOC(prom_entry)) < 0) + if (enter_prom(&args, prom_entry) < 0) return PROM_ERROR; if (rets != NULL) @@ -272,20 +270,19 @@ static int __init call_prom_ret(const char *service, int nargs, int nret, static void __init prom_print(const char *msg) { const char *p, *q; - struct prom_t *_prom = &RELOC(prom); - if (_prom->stdout == 0) + if (prom.stdout == 0) return; for (p = msg; *p != 0; p = q) { for (q = p; *q != 0 && *q != '\n'; ++q) ; if (q > p) - call_prom("write", 3, 1, _prom->stdout, p, q - p); + call_prom("write", 3, 1, prom.stdout, p, q - p); if (*q == 0) break; ++q; - call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2); + call_prom("write", 3, 1, prom.stdout, ADDR("\r\n"), 2); } } @@ -294,7 +291,6 @@ static void __init prom_print_hex(unsigned long val) { int i, nibbles = sizeof(val)*2; char buf[sizeof(val)*2+1]; - struct prom_t *_prom = &RELOC(prom); for (i = nibbles-1; i >= 0; i--) { buf[i] = (val & 0xf) + '0'; @@ -303,7 +299,7 @@ static void __init prom_print_hex(unsigned long val) val >>= 4; } buf[nibbles] = '\0'; - call_prom("write", 3, 1, _prom->stdout, buf, nibbles); + call_prom("write", 3, 1, prom.stdout, buf, nibbles); } /* max number of decimal digits in an unsigned long */ @@ -312,7 +308,6 @@ static void __init prom_print_dec(unsigned long val) { int i, size; char buf[UL_DIGITS+1]; - struct prom_t *_prom = &RELOC(prom); for (i = UL_DIGITS-1; i >= 0; i--) { buf[i] = (val % 10) + '0'; @@ -322,7 +317,7 @@ static void __init prom_print_dec(unsigned long val) } /* shift stuff down */ size = UL_DIGITS - i; - call_prom("write", 3, 1, _prom->stdout, buf+i, size); + call_prom("write", 3, 1, prom.stdout, buf+i, size); } static void __init prom_printf(const char *format, ...) @@ -331,22 +326,18 @@ static void __init prom_printf(const char *format, ...) va_list args; unsigned long v; long vs; - struct prom_t *_prom = &RELOC(prom); va_start(args, format); -#ifdef CONFIG_PPC64 - format = PTRRELOC(format); -#endif for (p = format; *p != 0; p = q) { for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q) ; if (q > p) - call_prom("write", 3, 1, _prom->stdout, p, q - p); + call_prom("write", 3, 1, prom.stdout, p, q - p); if (*q == 0) break; if (*q == '\n') { ++q; - call_prom("write", 3, 1, _prom->stdout, + call_prom("write", 3, 1, prom.stdout, ADDR("\r\n"), 2); continue; } @@ -368,7 +359,7 @@ static void __init prom_printf(const char *format, ...) ++q; vs = va_arg(args, int); if (vs < 0) { - prom_print(RELOC("-")); + prom_print("-"); vs = -vs; } prom_print_dec(vs); @@ -389,7 +380,7 @@ static void __init prom_printf(const char *format, ...) ++q; vs = va_arg(args, long); if (vs < 0) { - prom_print(RELOC("-")); + prom_print("-"); vs = -vs; } prom_print_dec(vs); @@ -403,7 +394,6 @@ static void __init prom_printf(const char *format, ...) static unsigned int __init prom_claim(unsigned long virt, unsigned long size, unsigned long align) { - struct prom_t *_prom = &RELOC(prom); if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) { /* @@ -414,21 +404,21 @@ static unsigned int __init prom_claim(unsigned long virt, unsigned long size, prom_arg_t result; ret = call_prom_ret("call-method", 5, 2, &result, - ADDR("claim"), _prom->memory, + ADDR("claim"), prom.memory, align, size, virt); if (ret != 0 || result == -1) return -1; ret = call_prom_ret("call-method", 5, 2, &result, - ADDR("claim"), _prom->mmumap, + ADDR("claim"), prom.mmumap, align, size, virt); if (ret != 0) { call_prom("call-method", 4, 1, ADDR("release"), - _prom->memory, size, virt); + prom.memory, size, virt); return -1; } /* the 0x12 is M (coherence) + PP == read/write */ call_prom("call-method", 6, 1, - ADDR("map"), _prom->mmumap, 0x12, size, virt, virt); + ADDR("map"), prom.mmumap, 0x12, size, virt, virt); return virt; } return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size, @@ -437,13 +427,10 @@ static unsigned int __init prom_claim(unsigned long virt, unsigned long size, static void __init __attribute__((noreturn)) prom_panic(const char *reason) { -#ifdef CONFIG_PPC64 - reason = PTRRELOC(reason); -#endif prom_print(reason); /* Do not call exit because it clears the screen on pmac * it also causes some sort of double-fault on early pmacs */ - if (RELOC(of_platform) == PLATFORM_POWERMAC) + if (of_platform == PLATFORM_POWERMAC) asm("trap\n"); /* ToDo: should put up an SRC here on pSeries */ @@ -525,13 +512,13 @@ static int __init prom_setprop(phandle node, const char *nodename, add_string(&p, tohex((u32)(unsigned long) value)); add_string(&p, tohex(valuelen)); add_string(&p, tohex(ADDR(pname))); - add_string(&p, tohex(strlen(RELOC(pname)))); + add_string(&p, tohex(strlen(pname))); add_string(&p, "property"); *p = 0; return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); } -/* We can't use the standard versions because of RELOC headaches. */ +/* We can't use the standard versions because of relocation headaches. */ #define isxdigit(c) (('0' <= (c) && (c) <= '9') \ || ('a' <= (c) && (c) <= 'f') \ || ('A' <= (c) && (c) <= 'F')) @@ -598,43 +585,42 @@ unsigned long prom_memparse(const char *ptr, const char **retptr) */ static void __init early_cmdline_parse(void) { - struct prom_t *_prom = &RELOC(prom); const char *opt; char *p; int l = 0; - RELOC(prom_cmd_line[0]) = 0; - p = RELOC(prom_cmd_line); - if ((long)_prom->chosen > 0) - l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1); + prom_cmd_line[0] = 0; + p = prom_cmd_line; + if ((long)prom.chosen > 0) + l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); #ifdef CONFIG_CMDLINE if (l <= 0 || p[0] == '\0') /* dbl check */ - strlcpy(RELOC(prom_cmd_line), - RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line)); + strlcpy(prom_cmd_line, + CONFIG_CMDLINE, sizeof(prom_cmd_line)); #endif /* CONFIG_CMDLINE */ - prom_printf("command line: %s\n", RELOC(prom_cmd_line)); + prom_printf("command line: %s\n", prom_cmd_line); #ifdef CONFIG_PPC64 - opt = strstr(RELOC(prom_cmd_line), RELOC("iommu=")); + opt = strstr(prom_cmd_line, "iommu="); if (opt) { prom_printf("iommu opt is: %s\n", opt); opt += 6; while (*opt && *opt == ' ') opt++; - if (!strncmp(opt, RELOC("off"), 3)) - RELOC(prom_iommu_off) = 1; - else if (!strncmp(opt, RELOC("force"), 5)) - RELOC(prom_iommu_force_on) = 1; + if (!strncmp(opt, "off", 3)) + prom_iommu_off = 1; + else if (!strncmp(opt, "force", 5)) + prom_iommu_force_on = 1; } #endif - opt = strstr(RELOC(prom_cmd_line), RELOC("mem=")); + opt = strstr(prom_cmd_line, "mem="); if (opt) { opt += 4; - RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt); + prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 /* Align to 16 MB == size of ppc64 large page */ - RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); + prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } } @@ -887,7 +873,7 @@ static int __init prom_count_smt_threads(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("cpu"))) + if (strcmp(type, "cpu")) continue; /* * There is an entry for each smt thread, each entry being @@ -929,7 +915,7 @@ static void __init prom_send_capabilities(void) * (we assume this is the same for all cores) and use it to * divide NR_CPUS. */ - cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]); + cores = (u32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; if (*cores != NR_CPUS) { prom_printf("WARNING ! " "ibm_architecture_vec structure inconsistent: %lu!\n", @@ -1005,21 +991,21 @@ static void __init prom_send_capabilities(void) */ static unsigned long __init alloc_up(unsigned long size, unsigned long align) { - unsigned long base = RELOC(alloc_bottom); + unsigned long base = alloc_bottom; unsigned long addr = 0; if (align) base = _ALIGN_UP(base, align); prom_debug("alloc_up(%x, %x)\n", size, align); - if (RELOC(ram_top) == 0) + if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); if (align) - base = _ALIGN_UP(RELOC(alloc_bottom), align); + base = _ALIGN_UP(alloc_bottom, align); else - base = RELOC(alloc_bottom); + base = alloc_bottom; - for(; (base + size) <= RELOC(alloc_top); + for(; (base + size) <= alloc_top; base = _ALIGN_UP(base + 0x100000, align)) { prom_debug(" trying: 0x%x\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); @@ -1031,14 +1017,14 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) } if (addr == 0) return 0; - RELOC(alloc_bottom) = addr + size; + alloc_bottom = addr + size; prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); - prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); - prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); - prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); - prom_debug(" ram_top : %x\n", RELOC(ram_top)); + prom_debug(" alloc_bottom : %x\n", alloc_bottom); + prom_debug(" alloc_top : %x\n", alloc_top); + prom_debug(" alloc_top_hi : %x\n", alloc_top_high); + prom_debug(" rmo_top : %x\n", rmo_top); + prom_debug(" ram_top : %x\n", ram_top); return addr; } @@ -1054,32 +1040,32 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, unsigned long base, addr = 0; prom_debug("alloc_down(%x, %x, %s)\n", size, align, - highmem ? RELOC("(high)") : RELOC("(low)")); - if (RELOC(ram_top) == 0) + highmem ? "(high)" : "(low)"); + if (ram_top == 0) prom_panic("alloc_down() called with mem not initialized\n"); if (highmem) { /* Carve out storage for the TCE table. */ - addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align); - if (addr <= RELOC(alloc_bottom)) + addr = _ALIGN_DOWN(alloc_top_high - size, align); + if (addr <= alloc_bottom) return 0; /* Will we bump into the RMO ? If yes, check out that we * didn't overlap existing allocations there, if we did, * we are dead, we must be the first in town ! */ - if (addr < RELOC(rmo_top)) { + if (addr < rmo_top) { /* Good, we are first */ - if (RELOC(alloc_top) == RELOC(rmo_top)) - RELOC(alloc_top) = RELOC(rmo_top) = addr; + if (alloc_top == rmo_top) + alloc_top = rmo_top = addr; else return 0; } - RELOC(alloc_top_high) = addr; + alloc_top_high = addr; goto bail; } - base = _ALIGN_DOWN(RELOC(alloc_top) - size, align); - for (; base > RELOC(alloc_bottom); + base = _ALIGN_DOWN(alloc_top - size, align); + for (; base > alloc_bottom; base = _ALIGN_DOWN(base - 0x100000, align)) { prom_debug(" trying: 0x%x\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); @@ -1089,15 +1075,15 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, } if (addr == 0) return 0; - RELOC(alloc_top) = addr; + alloc_top = addr; bail: prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); - prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); - prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); - prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); - prom_debug(" ram_top : %x\n", RELOC(ram_top)); + prom_debug(" alloc_bottom : %x\n", alloc_bottom); + prom_debug(" alloc_top : %x\n", alloc_top); + prom_debug(" alloc_top_hi : %x\n", alloc_top_high); + prom_debug(" rmo_top : %x\n", rmo_top); + prom_debug(" ram_top : %x\n", ram_top); return addr; } @@ -1137,7 +1123,7 @@ static unsigned long __init prom_next_cell(int s, cell_t **cellp) static void __init reserve_mem(u64 base, u64 size) { u64 top = base + size; - unsigned long cnt = RELOC(mem_reserve_cnt); + unsigned long cnt = mem_reserve_cnt; if (size == 0) return; @@ -1152,9 +1138,9 @@ static void __init reserve_mem(u64 base, u64 size) if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) prom_panic("Memory reserve map exhausted !\n"); - RELOC(mem_reserve_map)[cnt].base = base; - RELOC(mem_reserve_map)[cnt].size = size; - RELOC(mem_reserve_cnt) = cnt + 1; + mem_reserve_map[cnt].base = base; + mem_reserve_map[cnt].size = size; + mem_reserve_cnt = cnt + 1; } /* @@ -1167,7 +1153,6 @@ static void __init prom_init_mem(void) char *path, type[64]; unsigned int plen; cell_t *p, *endp; - struct prom_t *_prom = &RELOC(prom); u32 rac, rsc; /* @@ -1176,14 +1161,14 @@ static void __init prom_init_mem(void) * 2) top of memory */ rac = 2; - prom_getprop(_prom->root, "#address-cells", &rac, sizeof(rac)); + prom_getprop(prom.root, "#address-cells", &rac, sizeof(rac)); rsc = 1; - prom_getprop(_prom->root, "#size-cells", &rsc, sizeof(rsc)); + prom_getprop(prom.root, "#size-cells", &rsc, sizeof(rsc)); prom_debug("root_addr_cells: %x\n", (unsigned long) rac); prom_debug("root_size_cells: %x\n", (unsigned long) rsc); prom_debug("scanning memory:\n"); - path = RELOC(prom_scratch); + path = prom_scratch; for (node = 0; prom_next_node(&node); ) { type[0] = 0; @@ -1196,15 +1181,15 @@ static void __init prom_init_mem(void) */ prom_getprop(node, "name", type, sizeof(type)); } - if (strcmp(type, RELOC("memory"))) + if (strcmp(type, "memory")) continue; - plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf)); + plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf)); if (plen > sizeof(regbuf)) { prom_printf("memory node too large for buffer !\n"); plen = sizeof(regbuf); } - p = RELOC(regbuf); + p = regbuf; endp = p + (plen / sizeof(cell_t)); #ifdef DEBUG_PROM @@ -1222,14 +1207,14 @@ static void __init prom_init_mem(void) if (size == 0) continue; prom_debug(" %x %x\n", base, size); - if (base == 0 && (RELOC(of_platform) & PLATFORM_LPAR)) - RELOC(rmo_top) = size; - if ((base + size) > RELOC(ram_top)) - RELOC(ram_top) = base + size; + if (base == 0 && (of_platform & PLATFORM_LPAR)) + rmo_top = size; + if ((base + size) > ram_top) + ram_top = base + size; } } - RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000); + alloc_bottom = PAGE_ALIGN((unsigned long)&_end + 0x4000); /* * If prom_memory_limit is set we reduce the upper limits *except* for @@ -1237,20 +1222,20 @@ static void __init prom_init_mem(void) * TCE's up there. */ - RELOC(alloc_top_high) = RELOC(ram_top); + alloc_top_high = ram_top; - if (RELOC(prom_memory_limit)) { - if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) { + if (prom_memory_limit) { + if (prom_memory_limit <= alloc_bottom) { prom_printf("Ignoring mem=%x <= alloc_bottom.\n", - RELOC(prom_memory_limit)); - RELOC(prom_memory_limit) = 0; - } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) { + prom_memory_limit); + prom_memory_limit = 0; + } else if (prom_memory_limit >= ram_top) { prom_printf("Ignoring mem=%x >= ram_top.\n", - RELOC(prom_memory_limit)); - RELOC(prom_memory_limit) = 0; + prom_memory_limit); + prom_memory_limit = 0; } else { - RELOC(ram_top) = RELOC(prom_memory_limit); - RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit)); + ram_top = prom_memory_limit; + rmo_top = min(rmo_top, prom_memory_limit); } } @@ -1262,36 +1247,35 @@ static void __init prom_init_mem(void) * Since 768MB is plenty of room, and we need to cap to something * reasonable on 32-bit, cap at 768MB on all machines. */ - if (!RELOC(rmo_top)) - RELOC(rmo_top) = RELOC(ram_top); - RELOC(rmo_top) = min(0x30000000ul, RELOC(rmo_top)); - RELOC(alloc_top) = RELOC(rmo_top); - RELOC(alloc_top_high) = RELOC(ram_top); + if (!rmo_top) + rmo_top = ram_top; + rmo_top = min(0x30000000ul, rmo_top); + alloc_top = rmo_top; + alloc_top_high = ram_top; /* * Check if we have an initrd after the kernel but still inside * the RMO. If we do move our bottom point to after it. */ - if (RELOC(prom_initrd_start) && - RELOC(prom_initrd_start) < RELOC(rmo_top) && - RELOC(prom_initrd_end) > RELOC(alloc_bottom)) - RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + if (prom_initrd_start && + prom_initrd_start < rmo_top && + prom_initrd_end > alloc_bottom) + alloc_bottom = PAGE_ALIGN(prom_initrd_end); prom_printf("memory layout at init:\n"); - prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); - prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); - prom_printf(" alloc_top : %x\n", RELOC(alloc_top)); - prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); - prom_printf(" rmo_top : %x\n", RELOC(rmo_top)); - prom_printf(" ram_top : %x\n", RELOC(ram_top)); + prom_printf(" memory_limit : %x (16 MB aligned)\n", prom_memory_limit); + prom_printf(" alloc_bottom : %x\n", alloc_bottom); + prom_printf(" alloc_top : %x\n", alloc_top); + prom_printf(" alloc_top_hi : %x\n", alloc_top_high); + prom_printf(" rmo_top : %x\n", rmo_top); + prom_printf(" ram_top : %x\n", ram_top); } static void __init prom_close_stdin(void) { - struct prom_t *_prom = &RELOC(prom); ihandle val; - if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) + if (prom_getprop(prom.chosen, "stdin", &val, sizeof(val)) > 0) call_prom("close", 1, 0, val); } @@ -1332,19 +1316,19 @@ static void __init prom_query_opal(void) } prom_printf("Querying for OPAL presence... "); - rc = opal_query_takeover(&RELOC(prom_opal_size), - &RELOC(prom_opal_align)); + rc = opal_query_takeover(&prom_opal_size, + &prom_opal_align); prom_debug("(rc = %ld) ", rc); if (rc != 0) { prom_printf("not there.\n"); return; } - RELOC(of_platform) = PLATFORM_OPAL; + of_platform = PLATFORM_OPAL; prom_printf(" there !\n"); - prom_debug(" opal_size = 0x%lx\n", RELOC(prom_opal_size)); - prom_debug(" opal_align = 0x%lx\n", RELOC(prom_opal_align)); - if (RELOC(prom_opal_align) < 0x10000) - RELOC(prom_opal_align) = 0x10000; + prom_debug(" opal_size = 0x%lx\n", prom_opal_size); + prom_debug(" opal_align = 0x%lx\n", prom_opal_align); + if (prom_opal_align < 0x10000) + prom_opal_align = 0x10000; } static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...) @@ -1365,8 +1349,8 @@ static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...) for (i = 0; i < nret; ++i) rtas_args.rets[i] = 0; - opal_enter_rtas(&rtas_args, RELOC(prom_rtas_data), - RELOC(prom_rtas_entry)); + opal_enter_rtas(&rtas_args, prom_rtas_data, + prom_rtas_entry); if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) @@ -1381,9 +1365,8 @@ static void __init prom_opal_hold_cpus(void) phandle node; char type[64]; u32 servers[8]; - struct prom_t *_prom = &RELOC(prom); - void *entry = (unsigned long *)&RELOC(opal_secondary_entry); - struct opal_secondary_data *data = &RELOC(opal_secondary_data); + void *entry = (unsigned long *)&opal_secondary_entry; + struct opal_secondary_data *data = &opal_secondary_data; prom_debug("prom_opal_hold_cpus: start...\n"); prom_debug(" - entry = 0x%x\n", entry); @@ -1396,12 +1379,12 @@ static void __init prom_opal_hold_cpus(void) for (node = 0; prom_next_node(&node); ) { type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("cpu")) != 0) + if (strcmp(type, "cpu") != 0) continue; /* Skip non-configured cpus. */ if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, RELOC("okay")) != 0) + if (strcmp(type, "okay") != 0) continue; cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, @@ -1412,7 +1395,7 @@ static void __init prom_opal_hold_cpus(void) for (i = 0; i < cnt; i++) { cpu = servers[i]; prom_debug("CPU %d ... ", cpu); - if (cpu == _prom->cpu) { + if (cpu == prom.cpu) { prom_debug("booted !\n"); continue; } @@ -1423,7 +1406,7 @@ static void __init prom_opal_hold_cpus(void) * spinloop. */ data->ack = -1; - rc = prom_rtas_call(RELOC(prom_rtas_start_cpu), 3, 1, + rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1, NULL, cpu, entry, data); prom_debug("rtas rc=%d ...", rc); @@ -1443,21 +1426,21 @@ static void __init prom_opal_hold_cpus(void) static void __init prom_opal_takeover(void) { - struct opal_secondary_data *data = &RELOC(opal_secondary_data); + struct opal_secondary_data *data = &opal_secondary_data; struct opal_takeover_args *args = &data->args; - u64 align = RELOC(prom_opal_align); + u64 align = prom_opal_align; u64 top_addr, opal_addr; - args->k_image = (u64)RELOC(_stext); + args->k_image = (u64)_stext; args->k_size = _end - _stext; args->k_entry = 0; args->k_entry2 = 0x60; top_addr = _ALIGN_UP(args->k_size, align); - if (RELOC(prom_initrd_start) != 0) { - args->rd_image = RELOC(prom_initrd_start); - args->rd_size = RELOC(prom_initrd_end) - args->rd_image; + if (prom_initrd_start != 0) { + args->rd_image = prom_initrd_start; + args->rd_size = prom_initrd_end - args->rd_image; args->rd_loc = top_addr; top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); } @@ -1469,13 +1452,13 @@ static void __init prom_opal_takeover(void) * has plenty of memory, and we ask for the HAL for now to * be just below the 1G point, or above the initrd */ - opal_addr = _ALIGN_DOWN(0x40000000 - RELOC(prom_opal_size), align); + opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align); if (opal_addr < top_addr) opal_addr = top_addr; args->hal_addr = opal_addr; /* Copy the command line to the kernel image */ - strlcpy(RELOC(boot_command_line), RELOC(prom_cmd_line), + strlcpy(boot_command_line, prom_cmd_line, COMMAND_LINE_SIZE); prom_debug(" k_image = 0x%lx\n", args->k_image); @@ -1557,8 +1540,8 @@ static void __init prom_instantiate_opal(void) &entry, sizeof(entry)); #ifdef CONFIG_PPC_EARLY_DEBUG_OPAL - RELOC(prom_opal_base) = base; - RELOC(prom_opal_entry) = entry; + prom_opal_base = base; + prom_opal_entry = entry; #endif prom_debug("prom_instantiate_opal: end...\n"); } @@ -1616,9 +1599,9 @@ static void __init prom_instantiate_rtas(void) #ifdef CONFIG_PPC_POWERNV /* PowerVN takeover hack */ - RELOC(prom_rtas_data) = base; - RELOC(prom_rtas_entry) = entry; - prom_getprop(rtas_node, "start-cpu", &RELOC(prom_rtas_start_cpu), 4); + prom_rtas_data = base; + prom_rtas_entry = entry; + prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4); #endif prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); @@ -1693,20 +1676,20 @@ static void __init prom_initialize_tce_table(void) phandle node; ihandle phb_node; char compatible[64], type[64], model[64]; - char *path = RELOC(prom_scratch); + char *path = prom_scratch; u64 base, align; u32 minalign, minsize; u64 tce_entry, *tce_entryp; u64 local_alloc_top, local_alloc_bottom; u64 i; - if (RELOC(prom_iommu_off)) + if (prom_iommu_off) return; prom_debug("starting prom_initialize_tce_table\n"); /* Cache current top of allocs so we reserve a single block */ - local_alloc_top = RELOC(alloc_top_high); + local_alloc_top = alloc_top_high; local_alloc_bottom = local_alloc_top; /* Search all nodes looking for PHBs. */ @@ -1719,19 +1702,19 @@ static void __init prom_initialize_tce_table(void) prom_getprop(node, "device_type", type, sizeof(type)); prom_getprop(node, "model", model, sizeof(model)); - if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) + if ((type[0] == 0) || (strstr(type, "pci") == NULL)) continue; /* Keep the old logic intact to avoid regression. */ if (compatible[0] != 0) { - if ((strstr(compatible, RELOC("python")) == NULL) && - (strstr(compatible, RELOC("Speedwagon")) == NULL) && - (strstr(compatible, RELOC("Winnipeg")) == NULL)) + if ((strstr(compatible, "python") == NULL) && + (strstr(compatible, "Speedwagon") == NULL) && + (strstr(compatible, "Winnipeg") == NULL)) continue; } else if (model[0] != 0) { - if ((strstr(model, RELOC("ython")) == NULL) && - (strstr(model, RELOC("peedwagon")) == NULL) && - (strstr(model, RELOC("innipeg")) == NULL)) + if ((strstr(model, "ython") == NULL) && + (strstr(model, "peedwagon") == NULL) && + (strstr(model, "innipeg") == NULL)) continue; } @@ -1810,8 +1793,8 @@ static void __init prom_initialize_tce_table(void) /* These are only really needed if there is a memory limit in * effect, but we don't know so export them always. */ - RELOC(prom_tce_alloc_start) = local_alloc_bottom; - RELOC(prom_tce_alloc_end) = local_alloc_top; + prom_tce_alloc_start = local_alloc_bottom; + prom_tce_alloc_end = local_alloc_top; /* Flag the first invalid entry */ prom_debug("ending prom_initialize_tce_table\n"); @@ -1848,7 +1831,6 @@ static void __init prom_hold_cpus(void) unsigned int reg; phandle node; char type[64]; - struct prom_t *_prom = &RELOC(prom); unsigned long *spinloop = (void *) LOW_ADDR(__secondary_hold_spinloop); unsigned long *acknowledge @@ -1874,12 +1856,12 @@ static void __init prom_hold_cpus(void) for (node = 0; prom_next_node(&node); ) { type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("cpu")) != 0) + if (strcmp(type, "cpu") != 0) continue; /* Skip non-configured cpus. */ if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, RELOC("okay")) != 0) + if (strcmp(type, "okay") != 0) continue; reg = -1; @@ -1893,7 +1875,7 @@ static void __init prom_hold_cpus(void) */ *acknowledge = (unsigned long)-1; - if (reg != _prom->cpu) { + if (reg != prom.cpu) { /* Primary Thread of non-boot cpu or any thread */ prom_printf("starting cpu hw idx %lu... ", reg); call_prom("start-cpu", 3, 0, node, @@ -1920,22 +1902,20 @@ static void __init prom_hold_cpus(void) static void __init prom_init_client_services(unsigned long pp) { - struct prom_t *_prom = &RELOC(prom); - /* Get a handle to the prom entry point before anything else */ - RELOC(prom_entry) = pp; + prom_entry = pp; /* get a handle for the stdout device */ - _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); - if (!PHANDLE_VALID(_prom->chosen)) + prom.chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); + if (!PHANDLE_VALID(prom.chosen)) prom_panic("cannot find chosen"); /* msg won't be printed :( */ /* get device tree root */ - _prom->root = call_prom("finddevice", 1, 1, ADDR("/")); - if (!PHANDLE_VALID(_prom->root)) + prom.root = call_prom("finddevice", 1, 1, ADDR("/")); + if (!PHANDLE_VALID(prom.root)) prom_panic("cannot find device tree root"); /* msg won't be printed :( */ - _prom->mmumap = 0; + prom.mmumap = 0; } #ifdef CONFIG_PPC32 @@ -1946,7 +1926,6 @@ static void __init prom_init_client_services(unsigned long pp) */ static void __init prom_find_mmu(void) { - struct prom_t *_prom = &RELOC(prom); phandle oprom; char version[64]; @@ -1964,10 +1943,10 @@ static void __init prom_find_mmu(void) call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); } else return; - _prom->memory = call_prom("open", 1, 1, ADDR("/memory")); - prom_getprop(_prom->chosen, "mmu", &_prom->mmumap, - sizeof(_prom->mmumap)); - if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap)) + prom.memory = call_prom("open", 1, 1, ADDR("/memory")); + prom_getprop(prom.chosen, "mmu", &prom.mmumap, + sizeof(prom.mmumap)); + if (!IHANDLE_VALID(prom.memory) || !IHANDLE_VALID(prom.mmumap)) of_workarounds &= ~OF_WA_CLAIM; /* hmmm */ } #else @@ -1976,36 +1955,34 @@ static void __init prom_find_mmu(void) static void __init prom_init_stdout(void) { - struct prom_t *_prom = &RELOC(prom); - char *path = RELOC(of_stdout_device); + char *path = of_stdout_device; char type[16]; u32 val; - if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0) + if (prom_getprop(prom.chosen, "stdout", &val, sizeof(val)) <= 0) prom_panic("cannot find stdout"); - _prom->stdout = val; + prom.stdout = val; /* Get the full OF pathname of the stdout device */ memset(path, 0, 256); - call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255); - val = call_prom("instance-to-package", 1, 1, _prom->stdout); - prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package", + call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); + val = call_prom("instance-to-package", 1, 1, prom.stdout); + prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", &val, sizeof(val)); - prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device)); - prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path", + prom_printf("OF stdout device is: %s\n", of_stdout_device); + prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", path, strlen(path) + 1); /* If it's a display, note it */ memset(type, 0, sizeof(type)); prom_getprop(val, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("display")) == 0) + if (strcmp(type, "display") == 0) prom_setprop(val, path, "linux,boot-display", NULL, 0); } static int __init prom_find_machine_type(void) { - struct prom_t *_prom = &RELOC(prom); char compat[256]; int len, i = 0; #ifdef CONFIG_PPC64 @@ -2014,7 +1991,7 @@ static int __init prom_find_machine_type(void) #endif /* Look for a PowerMac or a Cell */ - len = prom_getprop(_prom->root, "compatible", + len = prom_getprop(prom.root, "compatible", compat, sizeof(compat)-1); if (len > 0) { compat[len] = 0; @@ -2023,16 +2000,16 @@ static int __init prom_find_machine_type(void) int sl = strlen(p); if (sl == 0) break; - if (strstr(p, RELOC("Power Macintosh")) || - strstr(p, RELOC("MacRISC"))) + if (strstr(p, "Power Macintosh") || + strstr(p, "MacRISC")) return PLATFORM_POWERMAC; #ifdef CONFIG_PPC64 /* We must make sure we don't detect the IBM Cell * blades as pSeries due to some firmware issues, * so we do it here. */ - if (strstr(p, RELOC("IBM,CBEA")) || - strstr(p, RELOC("IBM,CPBW-1.0"))) + if (strstr(p, "IBM,CBEA") || + strstr(p, "IBM,CPBW-1.0")) return PLATFORM_GENERIC; #endif /* CONFIG_PPC64 */ i += sl + 1; @@ -2049,11 +2026,11 @@ static int __init prom_find_machine_type(void) * non-IBM designs ! * - it has /rtas */ - len = prom_getprop(_prom->root, "device_type", + len = prom_getprop(prom.root, "device_type", compat, sizeof(compat)-1); if (len <= 0) return PLATFORM_GENERIC; - if (strcmp(compat, RELOC("chrp"))) + if (strcmp(compat, "chrp")) return PLATFORM_GENERIC; /* Default to pSeries. We need to know if we are running LPAR */ @@ -2115,11 +2092,11 @@ static void __init prom_check_displays(void) for (node = 0; prom_next_node(&node); ) { memset(type, 0, sizeof(type)); prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("display")) != 0) + if (strcmp(type, "display") != 0) continue; /* It seems OF doesn't null-terminate the path :-( */ - path = RELOC(prom_scratch); + path = prom_scratch; memset(path, 0, PROM_SCRATCH_SIZE); /* @@ -2143,15 +2120,15 @@ static void __init prom_check_displays(void) /* Setup a usable color table when the appropriate * method is available. Should update this to set-colors */ - clut = RELOC(default_colors); + clut = default_colors; for (i = 0; i < 16; i++, clut += 3) if (prom_set_color(ih, i, clut[0], clut[1], clut[2]) != 0) break; #ifdef CONFIG_LOGO_LINUX_CLUT224 - clut = PTRRELOC(RELOC(logo_linux_clut224.clut)); - for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3) + clut = PTRRELOC(logo_linux_clut224.clut); + for (i = 0; i < logo_linux_clut224.clutsize; i++, clut += 3) if (prom_set_color(ih, i + 32, clut[0], clut[1], clut[2]) != 0) break; @@ -2171,8 +2148,8 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, unsigned long room, chunk; prom_debug("Chunk exhausted, claiming more at %x...\n", - RELOC(alloc_bottom)); - room = RELOC(alloc_top) - RELOC(alloc_bottom); + alloc_bottom); + room = alloc_top - alloc_bottom; if (room > DEVTREE_CHUNK_SIZE) room = DEVTREE_CHUNK_SIZE; if (room < PAGE_SIZE) @@ -2198,9 +2175,9 @@ static unsigned long __init dt_find_string(char *str) { char *s, *os; - s = os = (char *)RELOC(dt_string_start); + s = os = (char *)dt_string_start; s += 4; - while (s < (char *)RELOC(dt_string_end)) { + while (s < (char *)dt_string_end) { if (strcmp(s, str) == 0) return s - os; s += strlen(s) + 1; @@ -2222,10 +2199,10 @@ static void __init scan_dt_build_strings(phandle node, unsigned long soff; phandle child; - sstart = (char *)RELOC(dt_string_start); + sstart = (char *)dt_string_start; /* get and store all property names */ - prev_name = RELOC(""); + prev_name = ""; for (;;) { /* 64 is max len of name including nul. */ namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); @@ -2236,9 +2213,9 @@ static void __init scan_dt_build_strings(phandle node, } /* skip "name" */ - if (strcmp(namep, RELOC("name")) == 0) { + if (strcmp(namep, "name") == 0) { *mem_start = (unsigned long)namep; - prev_name = RELOC("name"); + prev_name = "name"; continue; } /* get/create string entry */ @@ -2249,7 +2226,7 @@ static void __init scan_dt_build_strings(phandle node, } else { /* Trim off some if we can */ *mem_start = (unsigned long)namep + strlen(namep) + 1; - RELOC(dt_string_end) = *mem_start; + dt_string_end = *mem_start; } prev_name = namep; } @@ -2304,35 +2281,35 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, } /* get it again for debugging */ - path = RELOC(prom_scratch); + path = prom_scratch; memset(path, 0, PROM_SCRATCH_SIZE); call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); /* get and store all properties */ - prev_name = RELOC(""); - sstart = (char *)RELOC(dt_string_start); + prev_name = ""; + sstart = (char *)dt_string_start; for (;;) { if (call_prom("nextprop", 3, 1, node, prev_name, - RELOC(pname)) != 1) + pname) != 1) break; /* skip "name" */ - if (strcmp(RELOC(pname), RELOC("name")) == 0) { - prev_name = RELOC("name"); + if (strcmp(pname, "name") == 0) { + prev_name = "name"; continue; } /* find string offset */ - soff = dt_find_string(RELOC(pname)); + soff = dt_find_string(pname); if (soff == 0) { prom_printf("WARNING: Can't find string index for" - " <%s>, node %s\n", RELOC(pname), path); + " <%s>, node %s\n", pname, path); break; } prev_name = sstart + soff; /* get length */ - l = call_prom("getproplen", 2, 1, node, RELOC(pname)); + l = call_prom("getproplen", 2, 1, node, pname); /* sanity checks */ if (l == PROM_ERROR) @@ -2345,10 +2322,10 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* push property content */ valp = make_room(mem_start, mem_end, l, 4); - call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); + call_prom("getprop", 4, 1, node, pname, valp, l); *mem_start = _ALIGN(*mem_start, 4); - if (!strcmp(RELOC(pname), RELOC("phandle"))) + if (!strcmp(pname, "phandle")) has_phandle = 1; } @@ -2356,7 +2333,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, * existed (can happen with OPAL) */ if (!has_phandle) { - soff = dt_find_string(RELOC("linux,phandle")); + soff = dt_find_string("linux,phandle"); if (soff == 0) prom_printf("WARNING: Can't find string index for" " <linux-phandle> node %s\n", path); @@ -2384,7 +2361,6 @@ static void __init flatten_device_tree(void) phandle root; unsigned long mem_start, mem_end, room; struct boot_param_header *hdr; - struct prom_t *_prom = &RELOC(prom); char *namep; u64 *rsvmap; @@ -2392,10 +2368,10 @@ static void __init flatten_device_tree(void) * Check how much room we have between alloc top & bottom (+/- a * few pages), crop to 1MB, as this is our "chunk" size */ - room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000; + room = alloc_top - alloc_bottom - 0x4000; if (room > DEVTREE_CHUNK_SIZE) room = DEVTREE_CHUNK_SIZE; - prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom)); + prom_debug("starting device tree allocs at %x\n", alloc_bottom); /* Now try to claim that */ mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); @@ -2412,66 +2388,66 @@ static void __init flatten_device_tree(void) mem_start = _ALIGN(mem_start, 4); hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); - RELOC(dt_header_start) = (unsigned long)hdr; + dt_header_start = (unsigned long)hdr; rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); /* Start of strings */ mem_start = PAGE_ALIGN(mem_start); - RELOC(dt_string_start) = mem_start; + dt_string_start = mem_start; mem_start += 4; /* hole */ /* Add "linux,phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - strcpy(namep, RELOC("linux,phandle")); + strcpy(namep, "linux,phandle"); mem_start = (unsigned long)namep + strlen(namep) + 1; /* Build string array */ prom_printf("Building dt strings...\n"); scan_dt_build_strings(root, &mem_start, &mem_end); - RELOC(dt_string_end) = mem_start; + dt_string_end = mem_start; /* Build structure */ mem_start = PAGE_ALIGN(mem_start); - RELOC(dt_struct_start) = mem_start; + dt_struct_start = mem_start; prom_printf("Building dt structure...\n"); scan_dt_build_struct(root, &mem_start, &mem_end); dt_push_token(OF_DT_END, &mem_start, &mem_end); - RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); + dt_struct_end = PAGE_ALIGN(mem_start); /* Finish header */ - hdr->boot_cpuid_phys = _prom->cpu; + hdr->boot_cpuid_phys = prom.cpu; hdr->magic = OF_DT_HEADER; - hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); - hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); - hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); - hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); - hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); + hdr->totalsize = dt_struct_end - dt_header_start; + hdr->off_dt_struct = dt_struct_start - dt_header_start; + hdr->off_dt_strings = dt_string_start - dt_header_start; + hdr->dt_strings_size = dt_string_end - dt_string_start; + hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - dt_header_start; hdr->version = OF_DT_VERSION; /* Version 16 is not backward compatible */ hdr->last_comp_version = 0x10; /* Copy the reserve map in */ - memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map)); + memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map)); #ifdef DEBUG_PROM { int i; prom_printf("reserved memory map:\n"); - for (i = 0; i < RELOC(mem_reserve_cnt); i++) + for (i = 0; i < mem_reserve_cnt; i++) prom_printf(" %x - %x\n", - RELOC(mem_reserve_map)[i].base, - RELOC(mem_reserve_map)[i].size); + mem_reserve_map[i].base, + mem_reserve_map[i].size); } #endif /* Bump mem_reserve_cnt to cause further reservations to fail * since it's too late. */ - RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE; + mem_reserve_cnt = MEM_RESERVE_MAP_SIZE; prom_printf("Device tree strings 0x%x -> 0x%x\n", - RELOC(dt_string_start), RELOC(dt_string_end)); + dt_string_start, dt_string_end); prom_printf("Device tree struct 0x%x -> 0x%x\n", - RELOC(dt_struct_start), RELOC(dt_struct_end)); + dt_struct_start, dt_struct_end); } @@ -2526,7 +2502,6 @@ static void __init fixup_device_tree_maple_memory_controller(void) phandle mc; u32 mc_reg[4]; char *name = "/hostbridge@f8000000"; - struct prom_t *_prom = &RELOC(prom); u32 ac, sc; mc = call_prom("finddevice", 1, 1, ADDR(name)); @@ -2536,8 +2511,8 @@ static void __init fixup_device_tree_maple_memory_controller(void) if (prom_getproplen(mc, "reg") != 8) return; - prom_getprop(_prom->root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(_prom->root, "#size-cells", &sc, sizeof(sc)); + prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); + prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); if ((ac != 2) || (sc != 2)) return; @@ -2806,50 +2781,94 @@ static void __init fixup_device_tree(void) static void __init prom_find_boot_cpu(void) { - struct prom_t *_prom = &RELOC(prom); u32 getprop_rval; ihandle prom_cpu; phandle cpu_pkg; - _prom->cpu = 0; - if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) + prom.cpu = 0; + if (prom_getprop(prom.chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) return; cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); - _prom->cpu = getprop_rval; + prom.cpu = getprop_rval; - prom_debug("Booting CPU hw index = %lu\n", _prom->cpu); + prom_debug("Booting CPU hw index = %lu\n", prom.cpu); } static void __init prom_check_initrd(unsigned long r3, unsigned long r4) { #ifdef CONFIG_BLK_DEV_INITRD - struct prom_t *_prom = &RELOC(prom); - if (r3 && r4 && r4 != 0xdeadbeef) { unsigned long val; - RELOC(prom_initrd_start) = is_kernel_addr(r3) ? __pa(r3) : r3; - RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; + prom_initrd_start = is_kernel_addr(r3) ? __pa(r3) : r3; + prom_initrd_end = prom_initrd_start + r4; - val = RELOC(prom_initrd_start); - prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start", + val = prom_initrd_start; + prom_setprop(prom.chosen, "/chosen", "linux,initrd-start", &val, sizeof(val)); - val = RELOC(prom_initrd_end); - prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end", + val = prom_initrd_end; + prom_setprop(prom.chosen, "/chosen", "linux,initrd-end", &val, sizeof(val)); - reserve_mem(RELOC(prom_initrd_start), - RELOC(prom_initrd_end) - RELOC(prom_initrd_start)); + reserve_mem(prom_initrd_start, + prom_initrd_end - prom_initrd_start); - prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start)); - prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end)); + prom_debug("initrd_start=0x%x\n", prom_initrd_start); + prom_debug("initrd_end=0x%x\n", prom_initrd_end); } #endif /* CONFIG_BLK_DEV_INITRD */ } +#ifdef CONFIG_PPC64 +#ifdef CONFIG_RELOCATABLE +static void reloc_toc(void) +{ +} + +static void unreloc_toc(void) +{ +} +#else +static void __reloc_toc(unsigned long offset, unsigned long nr_entries) +{ + unsigned long i; + unsigned long *toc_entry; + + /* Get the start of the TOC by using r2 directly. */ + asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry)); + + for (i = 0; i < nr_entries; i++) { + *toc_entry = *toc_entry + offset; + toc_entry++; + } +} + +static void reloc_toc(void) +{ + unsigned long offset = reloc_offset(); + unsigned long nr_entries = + (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); + + __reloc_toc(offset, nr_entries); + + mb(); +} + +static void unreloc_toc(void) +{ + unsigned long offset = reloc_offset(); + unsigned long nr_entries = + (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); + + mb(); + + __reloc_toc(-offset, nr_entries); +} +#endif +#endif /* * We enter here early on, when the Open Firmware prom is still @@ -2861,20 +2880,19 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long r6, unsigned long r7, unsigned long kbase) { - struct prom_t *_prom; unsigned long hdr; #ifdef CONFIG_PPC32 unsigned long offset = reloc_offset(); reloc_got2(offset); +#else + reloc_toc(); #endif - _prom = &RELOC(prom); - /* * First zero the BSS */ - memset(&RELOC(__bss_start), 0, __bss_stop - __bss_start); + memset(&__bss_start, 0, __bss_stop - __bss_start); /* * Init interface to Open Firmware, get some node references, @@ -2893,14 +2911,14 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_init_stdout(); - prom_printf("Preparing to boot %s", RELOC(linux_banner)); + prom_printf("Preparing to boot %s", linux_banner); /* * Get default machine type. At this point, we do not differentiate * between pSeries SMP and pSeries LPAR */ - RELOC(of_platform) = prom_find_machine_type(); - prom_printf("Detected machine type: %x\n", RELOC(of_platform)); + of_platform = prom_find_machine_type(); + prom_printf("Detected machine type: %x\n", of_platform); #ifndef CONFIG_NONSTATIC_KERNEL /* Bail if this is a kdump kernel. */ @@ -2917,15 +2935,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * On pSeries, inform the firmware about our capabilities */ - if (RELOC(of_platform) == PLATFORM_PSERIES || - RELOC(of_platform) == PLATFORM_PSERIES_LPAR) + if (of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) prom_send_capabilities(); #endif /* * Copy the CPU hold code */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) + if (of_platform != PLATFORM_POWERMAC) copy_and_flush(0, kbase, 0x100, 0); /* @@ -2954,7 +2972,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * that uses the allocator, we need to make sure we get the top of memory * available for us here... */ - if (RELOC(of_platform) == PLATFORM_PSERIES) + if (of_platform == PLATFORM_PSERIES) prom_initialize_tce_table(); #endif @@ -2962,19 +2980,19 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, try to instantiate RTAS. PowerMacs don't * have a usable RTAS implementation. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC && - RELOC(of_platform) != PLATFORM_OPAL) + if (of_platform != PLATFORM_POWERMAC && + of_platform != PLATFORM_OPAL) prom_instantiate_rtas(); #ifdef CONFIG_PPC_POWERNV /* Detect HAL and try instanciating it & doing takeover */ - if (RELOC(of_platform) == PLATFORM_PSERIES_LPAR) { + if (of_platform == PLATFORM_PSERIES_LPAR) { prom_query_opal(); - if (RELOC(of_platform) == PLATFORM_OPAL) { + if (of_platform == PLATFORM_OPAL) { prom_opal_hold_cpus(); prom_opal_takeover(); } - } else if (RELOC(of_platform) == PLATFORM_OPAL) + } else if (of_platform == PLATFORM_OPAL) prom_instantiate_opal(); #endif @@ -2988,32 +3006,32 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * * PowerMacs use a different mechanism to spin CPUs */ - if (RELOC(of_platform) != PLATFORM_POWERMAC && - RELOC(of_platform) != PLATFORM_OPAL) + if (of_platform != PLATFORM_POWERMAC && + of_platform != PLATFORM_OPAL) prom_hold_cpus(); /* * Fill in some infos for use by the kernel later on */ - if (RELOC(prom_memory_limit)) - prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit", - &RELOC(prom_memory_limit), + if (prom_memory_limit) + prom_setprop(prom.chosen, "/chosen", "linux,memory-limit", + &prom_memory_limit, sizeof(prom_memory_limit)); #ifdef CONFIG_PPC64 - if (RELOC(prom_iommu_off)) - prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off", + if (prom_iommu_off) + prom_setprop(prom.chosen, "/chosen", "linux,iommu-off", NULL, 0); - if (RELOC(prom_iommu_force_on)) - prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on", + if (prom_iommu_force_on) + prom_setprop(prom.chosen, "/chosen", "linux,iommu-force-on", NULL, 0); - if (RELOC(prom_tce_alloc_start)) { - prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start", - &RELOC(prom_tce_alloc_start), + if (prom_tce_alloc_start) { + prom_setprop(prom.chosen, "/chosen", "linux,tce-alloc-start", + &prom_tce_alloc_start, sizeof(prom_tce_alloc_start)); - prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end", - &RELOC(prom_tce_alloc_end), + prom_setprop(prom.chosen, "/chosen", "linux,tce-alloc-end", + &prom_tce_alloc_end, sizeof(prom_tce_alloc_end)); } #endif @@ -3035,8 +3053,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * closed stdin already (in particular the powerbook 101). It * appears that the OPAL version of OFW doesn't like it either. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC && - RELOC(of_platform) != PLATFORM_OPAL) + if (of_platform != PLATFORM_POWERMAC && + of_platform != PLATFORM_OPAL) prom_close_stdin(); /* @@ -3051,22 +3069,24 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * tree and NULL as r5, thus triggering the new entry point which * is common to us and kexec */ - hdr = RELOC(dt_header_start); + hdr = dt_header_start; /* Don't print anything after quiesce under OPAL, it crashes OFW */ - if (RELOC(of_platform) != PLATFORM_OPAL) { + if (of_platform != PLATFORM_OPAL) { prom_printf("returning from prom_init\n"); prom_debug("->dt_header_start=0x%x\n", hdr); } #ifdef CONFIG_PPC32 reloc_got2(-offset); +#else + unreloc_toc(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG_OPAL /* OPAL early debug gets the OPAL base & entry in r8 and r9 */ __start(hdr, kbase, 0, 0, 0, - RELOC(prom_opal_base), RELOC(prom_opal_entry)); + prom_opal_base, prom_opal_entry); #else __start(hdr, kbase, 0, 0, 0, 0, 0); #endif diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 70f4286eaa7a..3765da6be4f2 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -22,7 +22,7 @@ __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry -boot_command_line" +boot_command_line __prom_init_toc_start __prom_init_toc_end" NM="$1" OBJ="$2" diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index c4970004d44d..f9b30c68ba47 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -179,6 +179,30 @@ static int set_user_msr(struct task_struct *task, unsigned long msr) return 0; } +#ifdef CONFIG_PPC64 +static unsigned long get_user_dscr(struct task_struct *task) +{ + return task->thread.dscr; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + task->thread.dscr = dscr; + task->thread.dscr_inherit = 1; + return 0; +} +#else +static unsigned long get_user_dscr(struct task_struct *task) +{ + return -EIO; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + return -EIO; +} +#endif + /* * We prevent mucking around with the reserved area of trap * which are used internally by the kernel. @@ -200,6 +224,9 @@ unsigned long ptrace_get_reg(struct task_struct *task, int regno) if (regno == PT_MSR) return get_user_msr(task); + if (regno == PT_DSCR) + return get_user_dscr(task); + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) return ((unsigned long *)task->thread.regs)[regno]; @@ -218,6 +245,8 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) return set_user_msr(task, data); if (regno == PT_TRAP) return set_user_trap(task, data); + if (regno == PT_DSCR) + return set_user_dscr(task, data); if (regno <= PT_MAX_PUT_REG) { ((unsigned long *)task->thread.regs)[regno] = data; @@ -905,6 +934,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, struct perf_event *bp; struct perf_event_attr attr; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + struct arch_hw_breakpoint hw_brk; +#endif /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). * For embedded processors we support one DAC and no IAC's at the @@ -931,14 +963,17 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, */ /* Ensure breakpoint translation bit is set */ - if (data && !(data & DABR_TRANSLATION)) + if (data && !(data & HW_BRK_TYPE_TRANSLATE)) return -EIO; + hw_brk.address = data & (~HW_BRK_TYPE_DABR); + hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; + hw_brk.len = 8; #ifdef CONFIG_HAVE_HW_BREAKPOINT if (ptrace_get_breakpoints(task) < 0) return -ESRCH; bp = thread->ptrace_bps[0]; - if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { + if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; @@ -948,10 +983,8 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, } if (bp) { attr = bp->attr; - attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; - arch_bp_generic_fields(data & - (DABR_DATA_WRITE | DABR_DATA_READ), - &attr.bp_type); + attr.bp_addr = hw_brk.address; + arch_bp_generic_fields(hw_brk.type, &attr.bp_type); /* Enable breakpoint */ attr.disabled = false; @@ -963,16 +996,15 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, } thread->ptrace_bps[0] = bp; ptrace_put_breakpoints(task); - thread->dabr = data; - thread->dabrx = DABRX_ALL; + thread->hw_brk = hw_brk; return 0; } /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); - attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; - arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ), - &attr.bp_type); + attr.bp_addr = hw_brk.address; + arch_bp_generic_fields(hw_brk.type, + &attr.bp_type); thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, task); @@ -985,10 +1017,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_put_breakpoints(task); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - /* Move contents to the DABR register */ - task->thread.dabr = data; - task->thread.dabrx = DABRX_ALL; + task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ /* As described above, it was assumed 3 bits were passed with the data * address, but we will assume only the mode bits will be passed @@ -1349,7 +1378,7 @@ static long ppc_set_hwdebug(struct task_struct *child, struct perf_event_attr attr; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #ifndef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dabr; + struct arch_hw_breakpoint brk; #endif if (bp_info->version != 1) @@ -1397,12 +1426,13 @@ static long ppc_set_hwdebug(struct task_struct *child, if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - dabr = (unsigned long)bp_info->addr & ~7UL; - dabr |= DABR_TRANSLATION; + brk.address = bp_info->addr & ~7UL; + brk.type = HW_BRK_TYPE_TRANSLATE; + brk.len = 8; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dabr |= DABR_DATA_READ; + brk.type |= HW_BRK_TYPE_READ; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dabr |= DABR_DATA_WRITE; + brk.type |= HW_BRK_TYPE_WRITE; #ifdef CONFIG_HAVE_HW_BREAKPOINT if (ptrace_get_breakpoints(child) < 0) return -ESRCH; @@ -1427,8 +1457,7 @@ static long ppc_set_hwdebug(struct task_struct *child, hw_breakpoint_init(&attr); attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN; attr.bp_len = len; - arch_bp_generic_fields(dabr & (DABR_DATA_WRITE | DABR_DATA_READ), - &attr.bp_type); + arch_bp_generic_fields(brk.type, &attr.bp_type); thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child); @@ -1445,11 +1474,10 @@ static long ppc_set_hwdebug(struct task_struct *child, if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) return -EINVAL; - if (child->thread.dabr) + if (child->thread.hw_brk.address) return -ENOSPC; - child->thread.dabr = dabr; - child->thread.dabrx = DABRX_ALL; + child->thread.hw_brk = brk; return 1; #endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ @@ -1495,10 +1523,11 @@ static long ppc_del_hwdebug(struct task_struct *child, long data) ptrace_put_breakpoints(child); return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.dabr == 0) + if (child->thread.hw_brk.address == 0) return -ENOENT; - child->thread.dabr = 0; + child->thread.hw_brk.address = 0; + child->thread.hw_brk.type = 0; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ return 0; @@ -1642,6 +1671,9 @@ long arch_ptrace(struct task_struct *child, long request, } case PTRACE_GET_DEBUGREG: { +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr_fake; +#endif ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) @@ -1649,7 +1681,9 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_PPC_ADV_DEBUG_REGS ret = put_user(child->thread.dac1, datalp); #else - ret = put_user(child->thread.dabr, datalp); + dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + ret = put_user(dabr_fake, datalp); #endif break; } diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 8c21658719d9..c0244e766834 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -252,6 +252,9 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } case PTRACE_GET_DEBUGREG: { +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr_fake; +#endif ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) @@ -259,7 +262,10 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, #ifdef CONFIG_PPC_ADV_DEBUG_REGS ret = put_user(child->thread.dac1, (u32 __user *)data); #else - ret = put_user(child->thread.dabr, (u32 __user *)data); + dabr_fake = ( + (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + ret = put_user(dabr_fake, (u32 __user *)data); #endif break; } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 8329190312c1..c642f0132988 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -191,7 +191,7 @@ static void free_flash_list(struct flash_block_list *f) static int rtas_flash_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_update_flash_t *uf; uf = (struct rtas_update_flash_t *) dp->data; @@ -253,7 +253,7 @@ static void get_flash_status_msg(int status, char *buf) static ssize_t rtas_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_update_flash_t *uf; char msg[RTAS_MSG_MAXLEN]; @@ -282,7 +282,7 @@ void rtas_block_ctor(void *ptr) static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, size_t count, loff_t *off) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_update_flash_t *uf; char *p; int next_free; @@ -374,7 +374,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf) static ssize_t manage_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_manage_flash_t *args_buf; char msg[RTAS_MSG_MAXLEN]; int msglen; @@ -391,7 +391,7 @@ static ssize_t manage_flash_read(struct file *file, char __user *buf, static ssize_t manage_flash_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_manage_flash_t *args_buf; const char reject_str[] = "0"; const char commit_str[] = "1"; @@ -462,7 +462,7 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, static ssize_t validate_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_validate_flash_t *args_buf; char msg[RTAS_MSG_MAXLEN]; int msglen; @@ -477,7 +477,7 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, static ssize_t validate_flash_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_validate_flash_t *args_buf; int rc; @@ -526,7 +526,7 @@ done: static int validate_flash_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_validate_flash_t *args_buf; args_buf = (struct rtas_validate_flash_t *) dp->data; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6da881b35dac..75fbaceb5c87 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -156,6 +156,15 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ +/** Fix up paca fields required for the boot cpu */ +static void fixup_boot_paca(void) +{ + /* The boot cpu is started */ + get_paca()->cpu_start = 1; + /* Allow percpu accesses to work until we setup percpu data */ + get_paca()->data_offset = 0; +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -177,6 +186,8 @@ early_param("smt-enabled", early_smt_enabled); void __init early_setup(unsigned long dt_ptr) { + static __initdata struct paca_struct boot_paca; + /* -------- printk is _NOT_ safe to use here ! ------- */ /* Identify CPU type */ @@ -185,6 +196,7 @@ void __init early_setup(unsigned long dt_ptr) /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); + fixup_boot_paca(); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -205,11 +217,7 @@ void __init early_setup(unsigned long dt_ptr) /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); - - /* Fix up paca fields required for the boot cpu */ - get_paca()->cpu_start = 1; - /* Allow percpu accesses to "work" until we setup percpu data */ - get_paca()->data_offset = 0; + fixup_boot_paca(); /* Probe the machine type */ probe_machine(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 3b997118df50..cf12eae02de5 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -130,8 +130,9 @@ static int do_signal(struct pt_regs *regs) * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) - set_dabr(current->thread.dabr, current->thread.dabrx); + if (current->thread.hw_brk.address && + current->thread.hw_brk.type) + set_breakpoint(¤t->thread.hw_brk); #endif /* Re-enable the breakpoints for the signal stack */ thread_change_pc(current, regs); @@ -169,10 +170,3 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); } } - -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, - unsigned long r5, unsigned long r6, unsigned long r7, - unsigned long r8, struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->gpr[1]); -} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index e00acb413934..ec84c901ceab 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -25,13 +25,21 @@ extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, extern unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); +extern unsigned long copy_transact_fpr_to_user(void __user *to, + struct task_struct *task); extern unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); +extern unsigned long copy_transact_fpr_from_user(struct task_struct *task, + void __user *from); #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); +extern unsigned long copy_transact_vsx_to_user(void __user *to, + struct task_struct *task); extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); +extern unsigned long copy_transact_vsx_from_user(struct task_struct *task, + void __user *from); #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 804e323c139d..3acb28e245b4 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -43,6 +43,7 @@ #include <asm/sigcontext.h> #include <asm/vdso.h> #include <asm/switch_to.h> +#include <asm/tm.h> #ifdef CONFIG_PPC64 #include "ppc32.h" #include <asm/unistd.h> @@ -56,10 +57,7 @@ #undef DEBUG_SIG #ifdef CONFIG_PPC64 -#define sys_sigsuspend compat_sys_sigsuspend -#define sys_rt_sigsuspend compat_sys_rt_sigsuspend #define sys_rt_sigreturn compat_sys_rt_sigreturn -#define sys_sigaction compat_sys_sigaction #define sys_swapcontext compat_sys_swapcontext #define sys_sigreturn compat_sys_sigreturn @@ -68,6 +66,8 @@ #define mcontext mcontext32 #define ucontext ucontext32 +#define __save_altstack __compat_save_altstack + /* * Userspace code may pass a ucontext which doesn't include VSX added * at the end. We need to check for this case. @@ -130,23 +130,6 @@ static inline int get_sigset_t(sigset_t *set, return 0; } -static inline int get_old_sigaction(struct k_sigaction *new_ka, - struct old_sigaction __user *act) -{ - compat_old_sigset_t mask; - compat_uptr_t handler, restorer; - - if (get_user(handler, &act->sa_handler) || - __get_user(restorer, &act->sa_restorer) || - __get_user(new_ka->sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - new_ka->sa.sa_handler = compat_ptr(handler); - new_ka->sa.sa_restorer = compat_ptr(restorer); - siginitset(&new_ka->sa.sa_mask, mask); - return 0; -} - #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) @@ -196,21 +179,6 @@ static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) return copy_from_user(set, uset, sizeof(*uset)); } -static inline int get_old_sigaction(struct k_sigaction *new_ka, - struct old_sigaction __user *act) -{ - old_sigset_t mask; - - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka->sa.sa_handler, &act->sa_handler) || - __get_user(new_ka->sa.sa_restorer, &act->sa_restorer) || - __get_user(new_ka->sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - siginitset(&new_ka->sa.sa_mask, mask); - return 0; -} - #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) @@ -234,50 +202,8 @@ static inline int restore_general_regs(struct pt_regs *regs, return -EFAULT; return 0; } - -#endif /* CONFIG_PPC64 */ - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -long sys_sigsuspend(old_sigset_t mask) -{ - sigset_t blocked; - siginitset(&blocked, mask); - return sigsuspend(&blocked); -} - -long sys_sigaction(int sig, struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - -#ifdef CONFIG_PPC64 - if (sig < 0) - sig = -sig; #endif - if (act) { - if (get_old_sigaction(&new_ka, act)) - return -EFAULT; - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(to_user_ptr(old_ka.sa.sa_handler), - &oact->sa_handler) || - __put_user(to_user_ptr(old_ka.sa.sa_restorer), - &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} - /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -293,6 +219,10 @@ long sys_sigaction(int sig, struct old_sigaction __user *act, struct sigframe { struct sigcontext sctx; /* the sigcontext */ struct mcontext mctx; /* all the register values */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct sigcontext sctx_transact; + struct mcontext mctx_transact; +#endif /* * Programs using the rs6000/xcoff abi can save up to 19 gp * regs and 18 fp regs below sp before decrementing it. @@ -321,6 +251,9 @@ struct rt_sigframe { struct siginfo info; #endif struct ucontext uc; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct ucontext uc_transact; +#endif /* * Programs using the rs6000/xcoff abi can save up to 19 gp * regs and 18 fp regs below sp before decrementing it. @@ -381,6 +314,61 @@ unsigned long copy_vsx_from_user(struct task_struct *task, task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return 0; } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +unsigned long copy_transact_fpr_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NFPREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + buf[i] = task->thread.TS_TRANS_FPR(i); + memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double)); + return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); +} + +unsigned long copy_transact_fpr_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NFPREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) + return 1; + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + task->thread.TS_TRANS_FPR(i) = buf[i]; + memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double)); + + return 0; +} + +unsigned long copy_transact_vsx_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < ELF_NVSRHALFREG; i++) + buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET]; + return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); +} + +unsigned long copy_transact_vsx_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) + return 1; + for (i = 0; i < ELF_NVSRHALFREG ; i++) + task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i]; + return 0; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) @@ -395,6 +383,22 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task, return __copy_from_user(task->thread.fpr, from, ELF_NFPREG * sizeof(double)); } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +inline unsigned long copy_transact_fpr_to_user(void __user *to, + struct task_struct *task) +{ + return __copy_to_user(to, task->thread.transact_fpr, + ELF_NFPREG * sizeof(double)); +} + +inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, + void __user *from) +{ + return __copy_from_user(task->thread.transact_fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* @@ -483,6 +487,156 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Save the current user registers on the user stack. + * We only save the altivec/spe registers if the process has used + * altivec/spe instructions at some point. + * We also save the transactional registers to a second ucontext in the + * frame. + * + * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). + */ +static int save_tm_user_regs(struct pt_regs *regs, + struct mcontext __user *frame, + struct mcontext __user *tm_frame, int sigret) +{ + unsigned long msr = regs->msr; + + /* tm_reclaim rolls back all reg states, updating thread.ckpt_regs, + * thread.transact_fpr[], thread.transact_vr[], etc. + */ + tm_enable(); + tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); + + /* Make sure floating point registers are stored in regs */ + flush_fp_to_thread(current); + + /* Save both sets of general registers */ + if (save_general_regs(¤t->thread.ckpt_regs, frame) + || save_general_regs(regs, tm_frame)) + return 1; + + /* Stash the top half of the 64bit MSR into the 32bit MSR word + * of the transactional mcontext. This way we have a backward-compatible + * MSR in the 'normal' (checkpointed) mcontext and additionally one can + * also look at what type of transaction (T or S) was active at the + * time of the signal. + */ + if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR])) + return 1; + +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + if (msr & MSR_VEC) { + if (__copy_to_user(&tm_frame->mc_vregs, + current->thread.transact_vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + } else { + if (__copy_to_user(&tm_frame->mc_vregs, + current->thread.vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + } + + /* set MSR_VEC in the saved MSR value to indicate that + * frame->mc_vregs contains valid data + */ + msr |= MSR_VEC; + } + + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. Since VSCR only contains 32 bits saved in the least + * significant bits of a vector, we "cheat" and stuff VRSAVE in the + * most significant bits of that same vector. --BenH + */ + if (__put_user(current->thread.vrsave, + (u32 __user *)&frame->mc_vregs[32])) + return 1; + if (msr & MSR_VEC) { + if (__put_user(current->thread.transact_vrsave, + (u32 __user *)&tm_frame->mc_vregs[32])) + return 1; + } else { + if (__put_user(current->thread.vrsave, + (u32 __user *)&tm_frame->mc_vregs[32])) + return 1; + } +#endif /* CONFIG_ALTIVEC */ + + if (copy_fpr_to_user(&frame->mc_fregs, current)) + return 1; + if (msr & MSR_FP) { + if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current)) + return 1; + } else { + if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) + return 1; + } + +#ifdef CONFIG_VSX + /* + * Copy VSR 0-31 upper half from thread_struct to local + * buffer, then write that to userspace. Also set MSR_VSX in + * the saved MSR value to indicate that frame->mc_vregs + * contains valid data + */ + if (current->thread.used_vsr) { + __giveup_vsx(current); + if (copy_vsx_to_user(&frame->mc_vsregs, current)) + return 1; + if (msr & MSR_VSX) { + if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs, + current)) + return 1; + } else { + if (copy_vsx_to_user(&tm_frame->mc_vsregs, current)) + return 1; + } + + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE + /* SPE regs are not checkpointed with TM, so this section is + * simply the same as in save_user_regs(). + */ + if (current->thread.used_spe) { + flush_spe_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32))) + return 1; + /* set MSR_SPE in the saved MSR value to indicate that + * frame->mc_vregs contains valid data */ + msr |= MSR_SPE; + } + + /* We always copy to/from spefscr */ + if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) + return 1; +#endif /* CONFIG_SPE */ + + if (__put_user(msr, &frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) + || __put_user(0x44000002UL, &frame->tramp[1])) + return 1; + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + } + + return 0; +} +#endif + /* * Restore the current user register values from the user stack, * (except for MSR). @@ -588,90 +742,140 @@ static long restore_user_regs(struct pt_regs *regs, return 0; } -#ifdef CONFIG_PPC64 -long compat_sys_rt_sigaction(int sig, const struct sigaction32 __user *act, - struct sigaction32 __user *oact, size_t sigsetsize) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Restore the current user register values from the user stack, except for + * MSR, and recheckpoint the original checkpointed register state for processes + * in transactions. + */ +static long restore_tm_user_regs(struct pt_regs *regs, + struct mcontext __user *sr, + struct mcontext __user *tm_sr) { - struct k_sigaction new_ka, old_ka; - int ret; + long err; + unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; + /* + * restore general registers but not including MSR or SOFTE. Also + * take care of keeping r2 (TLS) intact if not a signal. + * See comment in signal_64.c:restore_tm_sigcontexts(); + * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR + * were set by the signal delivery. + */ + err = restore_general_regs(regs, tm_sr); + err |= restore_general_regs(¤t->thread.ckpt_regs, sr); - if (act) { - compat_uptr_t handler; + err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]); - ret = get_user(handler, &act->sa_handler); - new_ka.sa.sa_handler = compat_ptr(handler); - ret |= get_sigset_t(&new_ka.sa.sa_mask, &act->sa_mask); - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - if (ret) - return -EFAULT; - } + err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); + if (err) + return 1; - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - if (!ret && oact) { - ret = put_user(to_user_ptr(old_ka.sa.sa_handler), &oact->sa_handler); - ret |= put_sigset_t(&oact->sa_mask, &old_ka.sa.sa_mask); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - } - return ret; -} + /* Restore the previous little-endian mode */ + regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); -/* - * Note: it is necessary to treat how as an unsigned int, with the - * corresponding cast to a signed int to insure that the proper - * conversion (sign extension) between the register representation - * of a signed int (msr in 32-bit mode) and the register representation - * of a signed int (msr in 64-bit mode) is performed. - */ -long compat_sys_rt_sigprocmask(u32 how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, size_t sigsetsize) -{ - sigset_t s; - sigset_t __user *up; - int ret; - mm_segment_t old_fs = get_fs(); + /* + * Do this before updating the thread state in + * current->thread.fpr/vr/evr. That way, if we get preempted + * and another task grabs the FPU/Altivec/SPE, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); - if (set) { - if (get_sigset_t(&s, set)) - return -EFAULT; +#ifdef CONFIG_ALTIVEC + regs->msr &= ~MSR_VEC; + if (msr & MSR_VEC) { + /* restore altivec registers from the stack */ + if (__copy_from_user(current->thread.vr, &sr->mc_vregs, + sizeof(sr->mc_vregs)) || + __copy_from_user(current->thread.transact_vr, + &tm_sr->mc_vregs, + sizeof(sr->mc_vregs))) + return 1; + } else if (current->thread.used_vr) { + memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); + memset(current->thread.transact_vr, 0, + ELF_NVRREG * sizeof(vector128)); } - set_fs(KERNEL_DS); - /* This is valid because of the set_fs() */ - up = (sigset_t __user *) &s; - ret = sys_rt_sigprocmask((int)how, set ? up : NULL, oset ? up : NULL, - sigsetsize); - set_fs(old_fs); - if (ret) - return ret; - if (oset) { - if (put_sigset_t(oset, &s)) - return -EFAULT; - } - return 0; -} + /* Always get VRSAVE back */ + if (__get_user(current->thread.vrsave, + (u32 __user *)&sr->mc_vregs[32]) || + __get_user(current->thread.transact_vrsave, + (u32 __user *)&tm_sr->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ -long compat_sys_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) -{ - sigset_t s; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - /* The __user pointer cast is valid because of the set_fs() */ - ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize); - set_fs(old_fs); - if (!ret) { - if (put_sigset_t(set, &s)) - return -EFAULT; + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + + if (copy_fpr_from_user(current, &sr->mc_fregs) || + copy_transact_fpr_from_user(current, &tm_sr->mc_fregs)) + return 1; + +#ifdef CONFIG_VSX + regs->msr &= ~MSR_VSX; + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + if (copy_vsx_from_user(current, &sr->mc_vsregs) || + copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs)) + return 1; + } else if (current->thread.used_vsr) + for (i = 0; i < 32 ; i++) { + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; + } +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_SPE + /* SPE regs are not checkpointed with TM, so this section is + * simply the same as in restore_user_regs(). + */ + regs->msr &= ~MSR_SPE; + if (msr & MSR_SPE) { + if (__copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32))) + return 1; + } else if (current->thread.used_spe) + memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); + + /* Always get SPEFSCR back */ + if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + + ELF_NEVRREG)) + return 1; +#endif /* CONFIG_SPE */ + + /* Now, recheckpoint. This loads up all of the checkpointed (older) + * registers, including FP and V[S]Rs. After recheckpointing, the + * transactional versions should be loaded. + */ + tm_enable(); + /* This loads the checkpointed FP/VEC state, if used */ + tm_recheckpoint(¤t->thread, msr); + /* The task has moved into TM state S, so ensure MSR reflects this */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + + /* This loads the speculative FP/VEC state, if used */ + if (msr & MSR_FP) { + do_load_up_transact_fpu(¤t->thread); + regs->msr |= (MSR_FP | current->thread.fpexc_mode); + } + if (msr & MSR_VEC) { + do_load_up_transact_altivec(¤t->thread); + regs->msr |= MSR_VEC; } - return ret; -} + return 0; +} +#endif +#ifdef CONFIG_PPC64 int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s) { int err; @@ -740,79 +944,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) return 0; } - -/* - * Note: it is necessary to treat pid and sig as unsigned ints, with the - * corresponding cast to a signed int to insure that the proper conversion - * (sign extension) between the register representation of a signed int - * (msr in 32-bit mode) and the register representation of a signed int - * (msr in 64-bit mode) is performed. - */ -long compat_sys_rt_sigqueueinfo(u32 pid, u32 sig, compat_siginfo_t __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - ret = copy_siginfo_from_user32(&info, uinfo); - if (unlikely(ret)) - return ret; - - set_fs (KERNEL_DS); - /* The __user pointer cast is valid becasuse of the set_fs() */ - ret = sys_rt_sigqueueinfo((int)pid, (int)sig, (siginfo_t __user *) &info); - set_fs (old_fs); - return ret; -} -/* - * Start Alternate signal stack support - * - * System Calls - * sigaltatck compat_sys_sigaltstack - */ - -int compat_sys_sigaltstack(u32 __new, u32 __old, int r5, - int r6, int r7, int r8, struct pt_regs *regs) -{ - stack_32_t __user * newstack = compat_ptr(__new); - stack_32_t __user * oldstack = compat_ptr(__old); - stack_t uss, uoss; - int ret; - mm_segment_t old_fs; - unsigned long sp; - compat_uptr_t ss_sp; - - /* - * set sp to the user stack on entry to the system call - * the system call router sets R9 to the saved registers - */ - sp = regs->gpr[1]; - - /* Put new stack info in local 64 bit stack struct */ - if (newstack) { - if (get_user(ss_sp, &newstack->ss_sp) || - __get_user(uss.ss_flags, &newstack->ss_flags) || - __get_user(uss.ss_size, &newstack->ss_size)) - return -EFAULT; - uss.ss_sp = compat_ptr(ss_sp); - } - - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The __user pointer casts are valid because of the set_fs() */ - ret = do_sigaltstack( - newstack ? (stack_t __user *) &uss : NULL, - oldstack ? (stack_t __user *) &uoss : NULL, - sp); - set_fs(old_fs); - /* Copy the stack information to the user output buffer */ - if (!ret && oldstack && - (put_user(ptr_to_compat(uoss.ss_sp), &oldstack->ss_sp) || - __put_user(uoss.ss_flags, &oldstack->ss_flags) || - __put_user(uoss.ss_size, &oldstack->ss_size))) - return -EFAULT; - return ret; -} #endif /* CONFIG_PPC64 */ /* @@ -827,6 +958,8 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, struct mcontext __user *frame; void __user *addr; unsigned long newsp = 0; + int sigret; + unsigned long tramp; /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ @@ -838,11 +971,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Put the siginfo & fill in most of the ucontext */ if (copy_siginfo_to_user(&rt_sf->info, info) || __put_user(0, &rt_sf->uc.uc_flags) - || __put_user(0, &rt_sf->uc.uc_link) - || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp) - || __put_user(sas_ss_flags(regs->gpr[1]), - &rt_sf->uc.uc_stack.ss_flags) - || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size) + || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), &rt_sf->uc.uc_regs) || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset)) @@ -852,14 +981,37 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, frame = &rt_sf->uc.uc_mcontext; addr = frame; if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { - if (save_user_regs(regs, frame, 0, 1)) - goto badframe; - regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; + sigret = 0; + tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp; } else { - if (save_user_regs(regs, frame, __NR_rt_sigreturn, 1)) + sigret = __NR_rt_sigreturn; + tramp = (unsigned long) frame->tramp; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, + &rt_sf->uc_transact.uc_mcontext, sigret)) + goto badframe; + } + else +#endif + if (save_user_regs(regs, frame, sigret, 1)) + goto badframe; + regs->link = tramp; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + if (__put_user((unsigned long)&rt_sf->uc_transact, + &rt_sf->uc.uc_link) + || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), + &rt_sf->uc_transact.uc_regs)) goto badframe; - regs->link = (unsigned long) frame->tramp; } + else +#endif + if (__put_user(0, &rt_sf->uc.uc_link)) + goto badframe; current->thread.fpscr.val = 0; /* turn off all fp exceptions */ @@ -878,6 +1030,13 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state: + */ + regs->msr &= ~MSR_TS_MASK; +#endif return 1; badframe: @@ -925,6 +1084,35 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static int do_setcontext_tm(struct ucontext __user *ucp, + struct ucontext __user *tm_ucp, + struct pt_regs *regs) +{ + sigset_t set; + struct mcontext __user *mcp; + struct mcontext __user *tm_mcp; + u32 cmcp; + u32 tm_cmcp; + + if (get_sigset_t(&set, &ucp->uc_sigmask)) + return -EFAULT; + + if (__get_user(cmcp, &ucp->uc_regs) || + __get_user(tm_cmcp, &tm_ucp->uc_regs)) + return -EFAULT; + mcp = (struct mcontext __user *)(u64)cmcp; + tm_mcp = (struct mcontext __user *)(u64)tm_cmcp; + /* no need to check access_ok(mcp), since mcp < 4GB */ + + set_current_blocked(&set); + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + return -EFAULT; + + return 0; +} +#endif + long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) @@ -1020,7 +1208,12 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { struct rt_sigframe __user *rt_sf; - +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct ucontext __user *uc_transact; + unsigned long msr_hi; + unsigned long tmp; + int tm_restore = 0; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; @@ -1028,6 +1221,34 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) goto bad; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (__get_user(tmp, &rt_sf->uc.uc_link)) + goto bad; + uc_transact = (struct ucontext __user *)(uintptr_t)tmp; + if (uc_transact) { + u32 cmcp; + struct mcontext __user *mcp; + + if (__get_user(cmcp, &uc_transact->uc_regs)) + return -EFAULT; + mcp = (struct mcontext __user *)(u64)cmcp; + /* The top 32 bits of the MSR are stashed in the transactional + * ucontext. */ + if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) + goto bad; + + if (MSR_TM_SUSPENDED(msr_hi<<32)) { + /* We only recheckpoint on return if we're + * transaction. + */ + tm_restore = 1; + if (do_setcontext_tm(&rt_sf->uc, uc_transact, regs)) + goto bad; + } + } + if (!tm_restore) + /* Fall through, for non-TM restore */ +#endif if (do_setcontext(&rt_sf->uc, regs, 1)) goto bad; @@ -1039,14 +1260,11 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, * change it. -- paulus */ #ifdef CONFIG_PPC64 - /* - * We use the compat_sys_ version that does the 32/64 bits conversion - * and takes userland pointer directly. What about error checking ? - * nobody does any... - */ - compat_sys_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs); + if (compat_restore_altstack(&rt_sf->uc.uc_stack)) + goto bad; #else - do_sigaltstack(&rt_sf->uc.uc_stack, NULL, regs->gpr[1]); + if (restore_altstack(&rt_sf->uc.uc_stack)) + goto bad; #endif set_thread_flag(TIF_RESTOREALL); return 0; @@ -1162,7 +1380,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, * always done it up until now so it is probably better not to * change it. -- paulus */ - do_sigaltstack(&ctx->uc_stack, NULL, regs->gpr[1]); + restore_altstack(&ctx->uc_stack); set_thread_flag(TIF_RESTOREALL); out: @@ -1179,6 +1397,8 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, struct sigcontext __user *sc; struct sigframe __user *frame; unsigned long newsp = 0; + int sigret; + unsigned long tramp; /* Set up Signal Frame */ frame = get_sigframe(ka, regs, sizeof(*frame), 1); @@ -1201,14 +1421,25 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, goto badframe; if (vdso32_sigtramp && current->mm->context.vdso_base) { - if (save_user_regs(regs, &frame->mctx, 0, 1)) - goto badframe; - regs->link = current->mm->context.vdso_base + vdso32_sigtramp; + sigret = 0; + tramp = current->mm->context.vdso_base + vdso32_sigtramp; } else { - if (save_user_regs(regs, &frame->mctx, __NR_sigreturn, 1)) + sigret = __NR_sigreturn; + tramp = (unsigned long) frame->mctx.tramp; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, + sigret)) goto badframe; - regs->link = (unsigned long) frame->mctx.tramp; } + else +#endif + if (save_user_regs(regs, &frame->mctx, sigret, 1)) + goto badframe; + + regs->link = tramp; current->thread.fpscr.val = 0; /* turn off all fp exceptions */ @@ -1223,7 +1454,13 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; - +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state: + */ + regs->msr &= ~MSR_TS_MASK; +#endif return 1; badframe: diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1ca045d44324..995f8543cb57 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -34,6 +34,7 @@ #include <asm/syscalls.h> #include <asm/vdso.h> #include <asm/switch_to.h> +#include <asm/tm.h> #include "signal.h" @@ -56,6 +57,9 @@ struct rt_sigframe { /* sys_rt_sigreturn requires the ucontext be the first field */ struct ucontext uc; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct ucontext uc_transact; +#endif unsigned long _unused[2]; unsigned int tramp[TRAMP_SIZE]; struct siginfo __user *pinfo; @@ -145,6 +149,145 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, return err; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * As above, but Transactional Memory is in use, so deliver sigcontexts + * containing checkpointed and transactional register states. + * + * To do this, we treclaim to gather both sets of registers and set up the + * 'normal' sigcontext registers with rolled-back register values such that a + * simple signal handler sees a correct checkpointed register state. + * If interested, a TM-aware sighandler can examine the transactional registers + * in the 2nd sigcontext to determine the real origin of the signal. + */ +static long setup_tm_sigcontexts(struct sigcontext __user *sc, + struct sigcontext __user *tm_sc, + struct pt_regs *regs, + int signr, sigset_t *set, unsigned long handler) +{ + /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the + * process never used altivec yet (MSR_VEC is zero in pt_regs of + * the context). This is very important because we must ensure we + * don't lose the VRSAVE content that may have been set prior to + * the process doing its first vector operation + * Userland shall check AT_HWCAP to know wether it can rely on the + * v_regs pointer or not. + */ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *) + (((unsigned long)sc->vmx_reserve + 15) & ~0xful); + elf_vrreg_t __user *tm_v_regs = (elf_vrreg_t __user *) + (((unsigned long)tm_sc->vmx_reserve + 15) & ~0xful); +#endif + unsigned long msr = regs->msr; + long err = 0; + + BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + + /* tm_reclaim rolls back all reg states, saving checkpointed (older) + * GPRs to thread.ckpt_regs and (if used) FPRs to (newer) + * thread.transact_fp and/or VRs to (newer) thread.transact_vr. + * THEN we save out FP/VRs, if necessary, to the checkpointed (older) + * thread.fr[]/vr[]s. The transactional (newer) GPRs are on the + * stack, in *regs. + */ + tm_enable(); + tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); + + flush_fp_to_thread(current); + +#ifdef CONFIG_ALTIVEC + err |= __put_user(v_regs, &sc->v_regs); + err |= __put_user(tm_v_regs, &tm_sc->v_regs); + + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ + err |= __copy_to_user(v_regs, current->thread.vr, + 33 * sizeof(vector128)); + /* If VEC was enabled there are transactional VRs valid too, + * else they're a copy of the checkpointed VRs. + */ + if (msr & MSR_VEC) + err |= __copy_to_user(tm_v_regs, + current->thread.transact_vr, + 33 * sizeof(vector128)); + else + err |= __copy_to_user(tm_v_regs, + current->thread.vr, + 33 * sizeof(vector128)); + + /* set MSR_VEC in the MSR value in the frame to indicate + * that sc->v_reg contains valid data. + */ + msr |= MSR_VEC; + } + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. + */ + err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + if (msr & MSR_VEC) + err |= __put_user(current->thread.transact_vrsave, + (u32 __user *)&tm_v_regs[33]); + else + err |= __put_user(current->thread.vrsave, + (u32 __user *)&tm_v_regs[33]); + +#else /* CONFIG_ALTIVEC */ + err |= __put_user(0, &sc->v_regs); + err |= __put_user(0, &tm_sc->v_regs); +#endif /* CONFIG_ALTIVEC */ + + /* copy fpr regs and fpscr */ + err |= copy_fpr_to_user(&sc->fp_regs, current); + if (msr & MSR_FP) + err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current); + else + err |= copy_fpr_to_user(&tm_sc->fp_regs, current); + +#ifdef CONFIG_VSX + /* + * Copy VSX low doubleword to local buffer for formatting, + * then out to userspace. Update v_regs to point after the + * VMX data. + */ + if (current->thread.used_vsr) { + __giveup_vsx(current); + v_regs += ELF_NVRREG; + tm_v_regs += ELF_NVRREG; + + err |= copy_vsx_to_user(v_regs, current); + + if (msr & MSR_VSX) + err |= copy_transact_vsx_to_user(tm_v_regs, current); + else + err |= copy_vsx_to_user(tm_v_regs, current); + + /* set MSR_VSX in the MSR value in the frame to + * indicate that sc->vs_reg) contains valid data. + */ + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ + + err |= __put_user(&sc->gp_regs, &sc->regs); + err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs); + WARN_ON(!FULL_REGS(regs)); + err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); + err |= __copy_to_user(&sc->gp_regs, + ¤t->thread.ckpt_regs, GP_REGS_SIZE); + err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]); + err |= __put_user(msr, &sc->gp_regs[PT_MSR]); + err |= __put_user(signr, &sc->signal); + err |= __put_user(handler, &sc->handler); + if (set != NULL) + err |= __put_user(set->sig[0], &sc->oldmask); + + return err; +} +#endif + /* * Restore the sigcontext from the signal frame. */ @@ -241,6 +384,153 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, return err; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Restore the two sigcontexts from the frame of a transactional processes. + */ + +static long restore_tm_sigcontexts(struct pt_regs *regs, + struct sigcontext __user *sc, + struct sigcontext __user *tm_sc) +{ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs, *tm_v_regs; +#endif + unsigned long err = 0; + unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif + /* copy the GPRs */ + err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr)); + err |= __copy_from_user(¤t->thread.ckpt_regs, sc->gp_regs, + sizeof(regs->gpr)); + + /* + * TFHAR is restored from the checkpointed 'wound-back' ucontext's NIP. + * TEXASR was set by the signal delivery reclaim, as was TFIAR. + * Users doing anything abhorrent like thread-switching w/ signals for + * TM-Suspended code will have to back TEXASR/TFIAR up themselves. + * For the case of getting a signal and simply returning from it, + * we don't need to re-copy them here. + */ + err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]); + err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]); + + /* get MSR separately, transfer the LE bit if doing signal return */ + err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + + /* The following non-GPR non-FPR non-VR state is also checkpointed: */ + err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]); + err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]); + err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]); + err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]); + err |= __get_user(current->thread.ckpt_regs.ctr, + &sc->gp_regs[PT_CTR]); + err |= __get_user(current->thread.ckpt_regs.link, + &sc->gp_regs[PT_LNK]); + err |= __get_user(current->thread.ckpt_regs.xer, + &sc->gp_regs[PT_XER]); + err |= __get_user(current->thread.ckpt_regs.ccr, + &sc->gp_regs[PT_CCR]); + + /* These regs are not checkpointed; they can go in 'regs'. */ + err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); + err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); + err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); + err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); + + /* + * Do this before updating the thread state in + * current->thread.fpr/vr. That way, if we get preempted + * and another task grabs the FPU/Altivec, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + + /* + * Force reload of FP/VEC. + * This has to be done before copying stuff into current->thread.fpr/vr + * for the reasons explained in the previous comment. + */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + +#ifdef CONFIG_ALTIVEC + err |= __get_user(v_regs, &sc->v_regs); + err |= __get_user(tm_v_regs, &tm_sc->v_regs); + if (err) + return err; + if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128))) + return -EFAULT; + if (tm_v_regs && !access_ok(VERIFY_READ, + tm_v_regs, 34 * sizeof(vector128))) + return -EFAULT; + /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ + if (v_regs != 0 && tm_v_regs != 0 && (msr & MSR_VEC) != 0) { + err |= __copy_from_user(current->thread.vr, v_regs, + 33 * sizeof(vector128)); + err |= __copy_from_user(current->thread.transact_vr, tm_v_regs, + 33 * sizeof(vector128)); + } + else if (current->thread.used_vr) { + memset(current->thread.vr, 0, 33 * sizeof(vector128)); + memset(current->thread.transact_vr, 0, 33 * sizeof(vector128)); + } + /* Always get VRSAVE back */ + if (v_regs != 0 && tm_v_regs != 0) { + err |= __get_user(current->thread.vrsave, + (u32 __user *)&v_regs[33]); + err |= __get_user(current->thread.transact_vrsave, + (u32 __user *)&tm_v_regs[33]); + } + else { + current->thread.vrsave = 0; + current->thread.transact_vrsave = 0; + } +#endif /* CONFIG_ALTIVEC */ + /* restore floating point */ + err |= copy_fpr_from_user(current, &sc->fp_regs); + err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs); +#ifdef CONFIG_VSX + /* + * Get additional VSX data. Update v_regs to point after the + * VMX data. Copy VSX low doubleword from userspace to local + * buffer for formatting, then into the taskstruct. + */ + if (v_regs && ((msr & MSR_VSX) != 0)) { + v_regs += ELF_NVRREG; + tm_v_regs += ELF_NVRREG; + err |= copy_vsx_from_user(current, v_regs); + err |= copy_transact_vsx_from_user(current, tm_v_regs); + } else { + for (i = 0; i < 32 ; i++) { + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; + } + } +#endif + tm_enable(); + /* This loads the checkpointed FP/VEC state, if used */ + tm_recheckpoint(¤t->thread, msr); + /* The task has moved into TM state S, so ensure MSR reflects this: */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); + + /* This loads the speculative FP/VEC state, if used */ + if (msr & MSR_FP) { + do_load_up_transact_fpu(¤t->thread); + regs->msr |= (MSR_FP | current->thread.fpexc_mode); + } + if (msr & MSR_VEC) { + do_load_up_transact_altivec(¤t->thread); + regs->msr |= MSR_VEC; + } + + return err; +} +#endif + /* * Setup the trampoline code on the stack */ @@ -355,6 +645,9 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, { struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsigned long msr; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; @@ -365,13 +658,26 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) + goto badframe; + if (MSR_TM_SUSPENDED(msr)) { + /* We recheckpoint on return. */ + struct ucontext __user *uc_transact; + if (__get_user(uc_transact, &uc->uc_link)) + goto badframe; + if (restore_tm_sigcontexts(regs, &uc->uc_mcontext, + &uc_transact->uc_mcontext)) + goto badframe; + } + else + /* Fall through, for non-TM restore */ +#endif if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) goto badframe; - /* do_sigaltstack expects a __user pointer and won't modify - * what's in there anyway - */ - do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]); + if (restore_altstack(&uc->uc_stack)) + goto badframe; set_thread_flag(TIF_RESTOREALL); return 0; @@ -415,19 +721,39 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gpr[1]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, - (unsigned long)ka->sa.sa_handler, 1); + err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + /* The ucontext_t passed to userland points to the second + * ucontext_t (for transactional state) with its uc_link ptr. + */ + err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); + err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, + &frame->uc_transact.uc_mcontext, + regs, signr, + NULL, + (unsigned long)ka->sa.sa_handler); + } else +#endif + { + err |= __put_user(0, &frame->uc.uc_link); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, + NULL, (unsigned long)ka->sa.sa_handler, + 1); + } err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto badframe; /* Make sure signal handler doesn't get spurious FP exceptions */ current->thread.fpscr.val = 0; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state: + */ + regs->msr &= ~MSR_TS_MASK; +#endif /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 793401e65088..76bd9da8cb71 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -610,7 +610,7 @@ static struct device_node *cpu_to_l2cache(int cpu) } /* Activate a secondary processor. */ -void start_secondary(void *unused) +__cpuinit void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); struct device_node *l2_cache; diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 8a93778ed9f5..d0bafc0cdf06 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -61,16 +61,6 @@ asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); } -/* Note: it is necessary to treat option as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2) -{ - return sys_sysfs((int)option, arg1, arg2); -} - #ifdef CONFIG_SYSVIPC long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth) @@ -156,309 +146,6 @@ asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, (off_t __user *)offset, count); } -/* Note: it is necessary to treat option as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) -{ - return sys_prctl((int)option, - (unsigned long) arg2, - (unsigned long) arg3, - (unsigned long) arg4, - (unsigned long) arg5); -} - -/* Note: it is necessary to treat pid as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_rr_get_interval_wrapper(u32 pid, - struct compat_timespec __user *interval) -{ - return compat_sys_sched_rr_get_interval((int)pid, interval); -} - -/* Note: it is necessary to treat mode as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_access(const char __user * filename, u32 mode) -{ - return sys_access(filename, (int)mode); -} - - -/* Note: it is necessary to treat mode as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_creat(const char __user * pathname, u32 mode) -{ - return sys_creat(pathname, (int)mode); -} - - -/* Note: it is necessary to treat pid and options as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_waitpid(u32 pid, unsigned int __user * stat_addr, u32 options) -{ - return sys_waitpid((int)pid, stat_addr, (int)options); -} - - -/* Note: it is necessary to treat gidsetsize as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_getgroups(u32 gidsetsize, gid_t __user *grouplist) -{ - return sys_getgroups((int)gidsetsize, grouplist); -} - - -/* Note: it is necessary to treat pid as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_getpgid(u32 pid) -{ - return sys_getpgid((int)pid); -} - - - -/* Note: it is necessary to treat pid as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_getsid(u32 pid) -{ - return sys_getsid((int)pid); -} - - -/* Note: it is necessary to treat pid and sig as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_kill(u32 pid, u32 sig) -{ - return sys_kill((int)pid, (int)sig); -} - - -/* Note: it is necessary to treat mode as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_mkdir(const char __user * pathname, u32 mode) -{ - return sys_mkdir(pathname, (int)mode); -} - -long compat_sys_nice(u32 increment) -{ - /* sign extend increment */ - return sys_nice((int)increment); -} - -off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) -{ - /* sign extend n */ - return sys_lseek(fd, (int)offset, origin); -} - -long compat_sys_truncate(const char __user * path, u32 length) -{ - /* sign extend length */ - return sys_truncate(path, (int)length); -} - -long compat_sys_ftruncate(int fd, u32 length) -{ - /* sign extend length */ - return sys_ftruncate(fd, (int)length); -} - -/* Note: it is necessary to treat bufsiz as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_readlink(const char __user * path, char __user * buf, u32 bufsiz) -{ - return sys_readlink(path, buf, (int)bufsiz); -} - -/* Note: it is necessary to treat option as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_get_priority_max(u32 policy) -{ - return sys_sched_get_priority_max((int)policy); -} - - -/* Note: it is necessary to treat policy as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_get_priority_min(u32 policy) -{ - return sys_sched_get_priority_min((int)policy); -} - - -/* Note: it is necessary to treat pid as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_getparam(u32 pid, struct sched_param __user *param) -{ - return sys_sched_getparam((int)pid, param); -} - - -/* Note: it is necessary to treat pid as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_getscheduler(u32 pid) -{ - return sys_sched_getscheduler((int)pid); -} - - -/* Note: it is necessary to treat pid as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_setparam(u32 pid, struct sched_param __user *param) -{ - return sys_sched_setparam((int)pid, param); -} - - -/* Note: it is necessary to treat pid and policy as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_sched_setscheduler(u32 pid, u32 policy, struct sched_param __user *param) -{ - return sys_sched_setscheduler((int)pid, (int)policy, param); -} - - -/* Note: it is necessary to treat len as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_setdomainname(char __user *name, u32 len) -{ - return sys_setdomainname(name, (int)len); -} - - -/* Note: it is necessary to treat gidsetsize as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_setgroups(u32 gidsetsize, gid_t __user *grouplist) -{ - return sys_setgroups((int)gidsetsize, grouplist); -} - - -asmlinkage long compat_sys_sethostname(char __user *name, u32 len) -{ - /* sign extend len */ - return sys_sethostname(name, (int)len); -} - - -/* Note: it is necessary to treat pid and pgid as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_setpgid(u32 pid, u32 pgid) -{ - return sys_setpgid((int)pid, (int)pgid); -} - -long compat_sys_getpriority(u32 which, u32 who) -{ - /* sign extend which and who */ - return sys_getpriority((int)which, (int)who); -} - -long compat_sys_setpriority(u32 which, u32 who, u32 niceval) -{ - /* sign extend which, who and niceval */ - return sys_setpriority((int)which, (int)who, (int)niceval); -} - -long compat_sys_ioprio_get(u32 which, u32 who) -{ - /* sign extend which and who */ - return sys_ioprio_get((int)which, (int)who); -} - -long compat_sys_ioprio_set(u32 which, u32 who, u32 ioprio) -{ - /* sign extend which, who and ioprio */ - return sys_ioprio_set((int)which, (int)who, (int)ioprio); -} - -/* Note: it is necessary to treat newmask as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_ssetmask(u32 newmask) -{ - return sys_ssetmask((int) newmask); -} - -asmlinkage long compat_sys_syslog(u32 type, char __user * buf, u32 len) -{ - /* sign extend len */ - return sys_syslog(type, buf, (int)len); -} - - -/* Note: it is necessary to treat mask as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long compat_sys_umask(u32 mask) -{ - return sys_umask((int)mask); -} - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -467,12 +154,6 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len, return sys_mmap(addr, len, prot, flags, fd, pgoff << 12); } -long compat_sys_tgkill(u32 tgid, u32 pid, int sig) -{ - /* sign extend tgid, pid */ - return sys_tgkill((int)tgid, (int)pid, sig); -} - /* * long long munging: * The 32 bit ABI passes long longs in an odd even register pair. diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 127361e093f4..f77fa22754bc 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factors for converting from cputime_t (timebase ticks) to * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). @@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk) if (stolen) account_steal_time(stolen); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { @@ -377,7 +378,7 @@ void vtime_account_user(struct task_struct *tsk) account_user_time(tsk, utime, utimescaled); } -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif @@ -668,7 +669,7 @@ int update_persistent_clock(struct timespec now) struct rtc_time tm; if (!ppc_md.set_rtc_time) - return 0; + return -ENODEV; to_tm(now.tv_sec + 1 + timezone_offset, &tm); tm.tm_year -= 1900; diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S new file mode 100644 index 000000000000..84dbace657ce --- /dev/null +++ b/arch/powerpc/kernel/tm.S @@ -0,0 +1,388 @@ +/* + * Transactional memory support routines to reclaim and recheckpoint + * transactional process state. + * + * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/ptrace.h> +#include <asm/reg.h> + +#ifdef CONFIG_VSX +/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */ +#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + SAVE_32FPRS_TRANSACT(n,base); \ + b 3f; \ +2: SAVE_32VSRS_TRANSACT(n,c,base); \ +3: +/* ...and this is just plain borrowed from there. */ +#define __REST_32FPRS_VSRS(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_32FPRS(n,base); \ + b 3f; \ +2: REST_32VSRS(n,c,base); \ +3: +#else +#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) SAVE_32FPRS_TRANSACT(n, base) +#define __REST_32FPRS_VSRS(n,c,base) REST_32FPRS(n, base) +#endif +#define SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ + __SAVE_32FPRS_VSRS_TRANSACT(n,__REG_##c,__REG_##base) +#define REST_32FPRS_VSRS(n,c,base) \ + __REST_32FPRS_VSRS(n,__REG_##c,__REG_##base) + +/* Stack frame offsets for local variables. */ +#define TM_FRAME_L0 TM_FRAME_SIZE-16 +#define TM_FRAME_L1 TM_FRAME_SIZE-8 +#define STACK_PARAM(x) (48+((x)*8)) + + +/* In order to access the TM SPRs, TM must be enabled. So, do so: */ +_GLOBAL(tm_enable) + mfmsr r4 + li r3, MSR_TM >> 32 + sldi r3, r3, 32 + and. r0, r4, r3 + bne 1f + or r4, r4, r3 + mtmsrd r4 +1: blr + +_GLOBAL(tm_save_sprs) + mfspr r0, SPRN_TFHAR + std r0, THREAD_TM_TFHAR(r3) + mfspr r0, SPRN_TEXASR + std r0, THREAD_TM_TEXASR(r3) + mfspr r0, SPRN_TFIAR + std r0, THREAD_TM_TFIAR(r3) + blr + +_GLOBAL(tm_restore_sprs) + ld r0, THREAD_TM_TFHAR(r3) + mtspr SPRN_TFHAR, r0 + ld r0, THREAD_TM_TEXASR(r3) + mtspr SPRN_TEXASR, r0 + ld r0, THREAD_TM_TFIAR(r3) + mtspr SPRN_TFIAR, r0 + blr + + /* Passed an 8-bit failure cause as first argument. */ +_GLOBAL(tm_abort) + TABORT(R3) + blr + + +/* void tm_reclaim(struct thread_struct *thread, + * unsigned long orig_msr, + * uint8_t cause) + * + * - Performs a full reclaim. This destroys outstanding + * transactions and updates thread->regs.tm_ckpt_* with the + * original checkpointed state. Note that thread->regs is + * unchanged. + * - FP regs are written back to thread->transact_fpr before + * reclaiming. These are the transactional (current) versions. + * + * Purpose is to both abort transactions of, and preserve the state of, + * a transactions at a context switch. We preserve/restore both sets of process + * state to restore them when the thread's scheduled again. We continue in + * userland as though nothing happened, but when the transaction is resumed + * they will abort back to the checkpointed state we save out here. + * + * Call with IRQs off, stacks get all out of sync for some periods in here! + */ +_GLOBAL(tm_reclaim) + mfcr r6 + mflr r0 + std r6, 8(r1) + std r0, 16(r1) + std r2, 40(r1) + stdu r1, -TM_FRAME_SIZE(r1) + + /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ + + std r3, STACK_PARAM(0)(r1) + SAVE_NVGPRS(r1) + + mfmsr r14 + mr r15, r14 + ori r15, r15, MSR_FP + oris r15, r15, MSR_VEC@h +#ifdef CONFIG_VSX + BEGIN_FTR_SECTION + oris r15,r15, MSR_VSX@h + END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + mtmsrd r15 + std r14, TM_FRAME_L0(r1) + + /* Stash the stack pointer away for use after reclaim */ + std r1, PACAR1(r13) + + /* ******************** FPR/VR/VSRs ************ + * Before reclaiming, capture the current/transactional FPR/VR + * versions /if used/. + * + * (If VSX used, FP and VMX are implied. Or, we don't need to look + * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) + * + * We're passed the thread's MSR as parameter 2. + * + * We enabled VEC/FP/VSX in the msr above, so we can execute these + * instructions! + */ + andis. r0, r4, MSR_VEC@h + beq dont_backup_vec + + SAVE_32VRS_TRANSACT(0, r6, r3) /* r6 scratch, r3 thread */ + mfvscr vr0 + li r6, THREAD_TRANSACT_VSCR + stvx vr0, r3, r6 + mfspr r0, SPRN_VRSAVE + std r0, THREAD_TRANSACT_VRSAVE(r3) + +dont_backup_vec: + andi. r0, r4, MSR_FP + beq dont_backup_fp + + SAVE_32FPRS_VSRS_TRANSACT(0, R6, R3) /* r6 scratch, r3 thread */ + + mffs fr0 + stfd fr0,THREAD_TRANSACT_FPSCR(r3) + +dont_backup_fp: + /* The moment we treclaim, ALL of our GPRs will switch + * to user register state. (FPRs, CCR etc. also!) + * Use an sprg and a tm_scratch in the PACA to shuffle. + */ + TRECLAIM(R5) /* Cause in r5 */ + + /* ******************** GPRs ******************** */ + /* Stash the checkpointed r13 away in the scratch SPR and get the real + * paca + */ + SET_SCRATCH0(r13) + GET_PACA(r13) + + /* Stash the checkpointed r1 away in paca tm_scratch and get the real + * stack pointer back + */ + std r1, PACATMSCRATCH(r13) + ld r1, PACAR1(r13) + + /* Now get some more GPRS free */ + std r7, GPR7(r1) /* Temporary stash */ + std r12, GPR12(r1) /* '' '' '' */ + ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */ + + addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ + + /* Make r7 look like an exception frame so that we + * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr! + */ + subi r7, r7, STACK_FRAME_OVERHEAD + + /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ + SAVE_GPR(0, r7) /* user r0 */ + SAVE_GPR(2, r7) /* user r2 */ + SAVE_4GPRS(3, r7) /* user r3-r6 */ + SAVE_4GPRS(8, r7) /* user r8-r11 */ + ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r4, GPR7(r1) /* user r7 */ + ld r5, GPR12(r1) /* user r12 */ + GET_SCRATCH0(6) /* user r13 */ + std r3, GPR1(r7) + std r4, GPR7(r7) + std r5, GPR12(r7) + std r6, GPR13(r7) + + SAVE_NVGPRS(r7) /* user r14-r31 */ + + /* ******************** NIP ******************** */ + mfspr r3, SPRN_TFHAR + std r3, _NIP(r7) /* Returns to failhandler */ + /* The checkpointed NIP is ignored when rescheduling/rechkpting, + * but is used in signal return to 'wind back' to the abort handler. + */ + + /* ******************** CR,LR,CCR,MSR ********** */ + mfctr r3 + mflr r4 + mfcr r5 + mfxer r6 + + std r3, _CTR(r7) + std r4, _LINK(r7) + std r5, _CCR(r7) + std r6, _XER(r7) + + /* MSR and flags: We don't change CRs, and we don't need to alter + * MSR. + */ + + /* TM regs, incl TEXASR -- these live in thread_struct. Note they've + * been updated by the treclaim, to explain to userland the failure + * cause (aborted). + */ + mfspr r0, SPRN_TEXASR + mfspr r3, SPRN_TFHAR + mfspr r4, SPRN_TFIAR + std r0, THREAD_TM_TEXASR(r12) + std r3, THREAD_TM_TFHAR(r12) + std r4, THREAD_TM_TFIAR(r12) + + /* AMR and PPR are checkpointed too, but are unsupported by Linux. */ + + /* Restore original MSR/IRQ state & clear TM mode */ + ld r14, TM_FRAME_L0(r1) /* Orig MSR */ + li r15, 0 + rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1 + mtmsrd r14 + + REST_NVGPRS(r1) + + addi r1, r1, TM_FRAME_SIZE + ld r4, 8(r1) + ld r0, 16(r1) + mtcr r4 + mtlr r0 + ld r2, 40(r1) + blr + + + /* void tm_recheckpoint(struct thread_struct *thread, + * unsigned long orig_msr) + * - Restore the checkpointed register state saved by tm_reclaim + * when we switch_to a process. + * + * Call with IRQs off, stacks get all out of sync for + * some periods in here! + */ +_GLOBAL(tm_recheckpoint) + mfcr r5 + mflr r0 + std r5, 8(r1) + std r0, 16(r1) + std r2, 40(r1) + stdu r1, -TM_FRAME_SIZE(r1) + + /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + * This is used for backing up the NVGPRs: + */ + SAVE_NVGPRS(r1) + + std r1, PACAR1(r13) + + /* Load complete register state from ts_ckpt* registers */ + + addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ + + /* Make r7 look like an exception frame so that we + * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr! + */ + subi r7, r7, STACK_FRAME_OVERHEAD + + SET_SCRATCH0(r1) + + mfmsr r6 + /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ + + /* Enable FP/vec in MSR if necessary! */ + lis r5, MSR_VEC@h + ori r5, r5, MSR_FP + and. r5, r4, r5 + beq restore_gprs /* if neither, skip both */ + +#ifdef CONFIG_VSX + BEGIN_FTR_SECTION + oris r5, r5, MSR_VSX@h + END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ + mtmsr r5 + + /* FP and VEC registers: These are recheckpointed from thread.fpr[] + * and thread.vr[] respectively. The thread.transact_fpr[] version + * is more modern, and will be loaded subsequently by any FPUnavailable + * trap. + */ + andis. r0, r4, MSR_VEC@h + beq dont_restore_vec + + li r5, THREAD_VSCR + lvx vr0, r3, r5 + mtvscr vr0 + REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ + ld r5, THREAD_VRSAVE(r3) + mtspr SPRN_VRSAVE, r5 + +dont_restore_vec: + andi. r0, r4, MSR_FP + beq dont_restore_fp + + lfd fr0, THREAD_FPSCR(r3) + MTFSF_L(fr0) + REST_32FPRS_VSRS(0, R4, R3) + +dont_restore_fp: + mtmsr r6 /* FP/Vec off again! */ + +restore_gprs: + /* ******************** CR,LR,CCR,MSR ********** */ + ld r3, _CTR(r7) + ld r4, _LINK(r7) + ld r5, _CCR(r7) + ld r6, _XER(r7) + + mtctr r3 + mtlr r4 + mtcr r5 + mtxer r6 + + /* MSR and flags: We don't change CRs, and we don't need to alter + * MSR. + */ + + REST_4GPRS(0, r7) /* GPR0-3 */ + REST_GPR(4, r7) /* GPR4-6 */ + REST_GPR(5, r7) + REST_GPR(6, r7) + REST_4GPRS(8, r7) /* GPR8-11 */ + REST_2GPRS(12, r7) /* GPR12-13 */ + + REST_NVGPRS(r7) /* GPR14-31 */ + + ld r7, GPR7(r7) /* GPR7 */ + + /* Commit register state as checkpointed state: */ + TRECHKPT + + /* Our transactional state has now changed. + * + * Now just get out of here. Transactional (current) state will be + * updated once restore is called on the return path in the _switch-ed + * -to process. + */ + + GET_PACA(r13) + GET_SCRATCH0(r1) + + REST_NVGPRS(r1) + + addi r1, r1, TM_FRAME_SIZE + ld r4, 8(r1) + ld r0, 16(r1) + mtcr r4 + mtlr r0 + ld r2, 40(r1) + blr + + /* ****************************************************************** */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 32518401af68..37cc40ef5043 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -58,6 +58,7 @@ #include <asm/rio.h> #include <asm/fadump.h> #include <asm/switch_to.h> +#include <asm/tm.h> #include <asm/debug.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) @@ -66,7 +67,7 @@ int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly; int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly; -int (*__debugger_dabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly; int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly; EXPORT_SYMBOL(__debugger); @@ -74,10 +75,17 @@ EXPORT_SYMBOL(__debugger_ipi); EXPORT_SYMBOL(__debugger_bpt); EXPORT_SYMBOL(__debugger_sstep); EXPORT_SYMBOL(__debugger_iabr_match); -EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_break_match); EXPORT_SYMBOL(__debugger_fault_handler); #endif +/* Transactional Memory trap debug */ +#ifdef TM_DEBUG_SW +#define TM_DEBUG(x...) printk(KERN_INFO x) +#else +#define TM_DEBUG(x...) do { } while(0) +#endif + /* * Trap & Exception support */ @@ -138,7 +146,7 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, { bust_spinlocks(0); die_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; oops_exit(); printk("\n"); @@ -350,6 +358,7 @@ static inline int check_io_access(struct pt_regs *regs) exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) #define get_mc_reason(regs) ((regs)->msr) +#define REASON_TM 0x200000 #define REASON_FP 0x100000 #define REASON_ILLEGAL 0x80000 #define REASON_PRIVILEGED 0x40000 @@ -1020,6 +1029,38 @@ void __kprobes program_check_exception(struct pt_regs *regs) _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); return; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (reason & REASON_TM) { + /* This is a TM "Bad Thing Exception" program check. + * This occurs when: + * - An rfid/hrfid/mtmsrd attempts to cause an illegal + * transition in TM states. + * - A trechkpt is attempted when transactional. + * - A treclaim is attempted when non transactional. + * - A tend is illegally attempted. + * - writing a TM SPR when transactional. + */ + if (!user_mode(regs) && + report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { + regs->nip += 4; + return; + } + /* If usermode caused this, it's done something illegal and + * gets a SIGILL slap on the wrist. We call it an illegal + * operand to distinguish from the instruction just being bad + * (e.g. executing a 'tend' on a CPU without TM!); it's an + * illegal /placement/ of a valid instruction. + */ + if (user_mode(regs)) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } else { + printk(KERN_EMERG "Unexpected TM Bad Thing exception " + "at %lx (msr 0x%x)\n", regs->nip, reason); + die("Unrecoverable exception", regs, SIGABRT); + } + } +#endif /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) @@ -1160,6 +1201,109 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } +void tm_unavailable_exception(struct pt_regs *regs) +{ + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + + /* Currently we never expect a TMU exception. Catch + * this and kill the process! + */ + printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " + "(msr %lx)\n", + regs->nip, regs->msr); + + if (user_mode(regs)) { + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + + die("Unexpected TM unavailable exception", regs, SIGABRT); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + +extern void do_load_up_fpu(struct pt_regs *regs); + +void fp_unavailable_tm(struct pt_regs *regs) +{ + /* Note: This does not handle any kind of FP laziness. */ + + TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", + regs->nip, regs->msr); + tm_enable(); + + /* We can only have got here if the task started using FP after + * beginning the transaction. So, the transactional regs are just a + * copy of the checkpointed ones. But, we still need to recheckpoint + * as we're enabling FP for the process; it will return, abort the + * transaction, and probably retry but now with FP enabled. So the + * checkpointed FP registers need to be loaded. + */ + tm_reclaim(¤t->thread, current->thread.regs->msr, + TM_CAUSE_FAC_UNAV); + /* Reclaim didn't save out any FPRs to transact_fprs. */ + + /* Enable FP for the task: */ + regs->msr |= (MSR_FP | current->thread.fpexc_mode); + + /* This loads and recheckpoints the FP registers from + * thread.fpr[]. They will remain in registers after the + * checkpoint so we don't need to reload them after. + */ + tm_recheckpoint(¤t->thread, regs->msr); +} + +#ifdef CONFIG_ALTIVEC +extern void do_load_up_altivec(struct pt_regs *regs); + +void altivec_unavailable_tm(struct pt_regs *regs) +{ + /* See the comments in fp_unavailable_tm(). This function operates + * the same way. + */ + + TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx," + "MSR=%lx\n", + regs->nip, regs->msr); + tm_enable(); + tm_reclaim(¤t->thread, current->thread.regs->msr, + TM_CAUSE_FAC_UNAV); + regs->msr |= MSR_VEC; + tm_recheckpoint(¤t->thread, regs->msr); + current->thread.used_vr = 1; +} +#endif + +#ifdef CONFIG_VSX +void vsx_unavailable_tm(struct pt_regs *regs) +{ + /* See the comments in fp_unavailable_tm(). This works similarly, + * though we're loading both FP and VEC registers in here. + * + * If FP isn't in use, load FP regs. If VEC isn't in use, load VEC + * regs. Either way, set MSR_VSX. + */ + + TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx," + "MSR=%lx\n", + regs->nip, regs->msr); + + tm_enable(); + /* This reclaims FP and/or VR regs if they're already enabled */ + tm_reclaim(¤t->thread, current->thread.regs->msr, + TM_CAUSE_FAC_UNAV); + + regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | + MSR_VSX; + /* This loads & recheckpoints FP and VRs. */ + tm_recheckpoint(¤t->thread, regs->msr); + current->thread.used_vsr = 1; +} +#endif +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + void performance_monitor_exception(struct pt_regs *regs) { __get_cpu_var(irq_stat).pmu_irqs++; @@ -1515,7 +1659,7 @@ void unrecoverable_exception(struct pt_regs *regs) die("Unrecoverable exception", regs, SIGABRT); } -#ifdef CONFIG_BOOKE_WDT +#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) /* * Default handler for a Watchdog exception, * spins until a reboot occurs diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index e830289d2e48..9e20999aaef2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -7,6 +7,57 @@ #include <asm/page.h> #include <asm/ptrace.h> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Wrapper to call load_up_altivec from C. + * void do_load_up_altivec(struct pt_regs *regs); + */ +_GLOBAL(do_load_up_altivec) + mflr r0 + std r0, 16(r1) + stdu r1, -112(r1) + + subi r6, r3, STACK_FRAME_OVERHEAD + /* load_up_altivec expects r12=MSR, r13=PACA, and returns + * with r12 = new MSR. + */ + ld r12,_MSR(r6) + GET_PACA(r13) + bl load_up_altivec + std r12,_MSR(r6) + + ld r0, 112+16(r1) + addi r1, r1, 112 + mtlr r0 + blr + +/* void do_load_up_transact_altivec(struct thread_struct *thread) + * + * This is similar to load_up_altivec but for the transactional version of the + * vector regs. It doesn't mess with the task MSR or valid flags. + * Furthermore, VEC laziness is not supported with TM currently. + */ +_GLOBAL(do_load_up_transact_altivec) + mfmsr r6 + oris r5,r6,MSR_VEC@h + MTMSRD(r5) + isync + + li r4,1 + stw r4,THREAD_USED_VR(r3) + + li r10,THREAD_TRANSACT_VSCR + lvx vr0,r10,r3 + mtvscr vr0 + REST_32VRS_TRANSACT(0,r4,r3) + + /* Disable VEC again. */ + MTMSRD(r6) + isync + + blr +#endif + /* * load_up_altivec(unused, unused, tsk) * Disable VMX for the task which had it previously, diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 65d1c08cf09e..654e479802f2 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -218,6 +218,11 @@ SECTIONS .got : AT(ADDR(.got) - LOAD_OFFSET) { __toc_start = .; +#ifndef CONFIG_RELOCATABLE + __prom_init_toc_start = .; + arch/powerpc/kernel/prom_init.o*(.toc .got) + __prom_init_toc_end = .; +#endif *(.got) *(.toc) } |