From 8482644aea11e0647867732319ccf35879a9acc2 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 27 Feb 2013 19:45:25 +0900 Subject: KVM: set_memory_region: Refactor commit_memory_region() This patch makes the parameter old a const pointer to the old memory slot and adds a new parameter named change to know the change being requested: the former is for removing extra copying and the latter is for cleaning up the code. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf416..44fa9ad1d62c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, -- cgit v1.2.3 From 78accda4f888c77122cf3da6185f905d4677eb07 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Sun, 24 Feb 2013 18:57:12 +0000 Subject: KVM: PPC: Added one_reg interface for timer registers If userspace wants to change some specific bits of TSR (timer status register) then it uses GET/SET_SREGS ioctl interface. So the steps will be: i) user-space will make get ioctl, ii) change TSR in userspace iii) then make set ioctl. It can happen that TSR gets changed by kernel after step i) and before step iii). To avoid this we have added below one_reg ioctls for oring and clearing specific bits in TSR. This patch adds one registerface for: 1) setting specific bit in TSR (timer status register) 2) clearing specific bit in TSR (timer status register) 3) setting/getting the TCR register. There are cases where we want to only change TCR and not TSR. Although we can uses SREGS without KVM_SREGS_E_UPDATE_TSR flag but I think one reg is better. I am open if someone feels we should use SREGS only here. 4) getting/setting TSR register Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 4 ++++ arch/powerpc/include/uapi/asm/kvm.h | 5 +++++ arch/powerpc/kvm/booke.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c16b442556e8..976eb650e7ef 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1788,6 +1788,10 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 PPC | KVM_REG_PPC_EPR | 32 + PPC | KVM_REG_PPC_TCR | 32 + PPC | KVM_REG_PPC_TSR | 32 + PPC | KVM_REG_PPC_OR_TSR | 32 + PPC | KVM_REG_PPC_CLEAR_TSR | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d00adb9..ef072b1a6e3f 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -417,4 +417,9 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) +/* Timer Status Register OR/CLEAR interface */ +#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) +#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) +#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) +#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f2fd47d35ab5..11825539e2b1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1442,6 +1442,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); break; #endif + case KVM_REG_PPC_TCR: + r = put_user(vcpu->arch.tcr, (u32 __user *)(long)reg->addr); + break; + case KVM_REG_PPC_TSR: + r = put_user(vcpu->arch.tsr, (u32 __user *)(long)reg->addr); + break; default: break; } @@ -1485,6 +1491,30 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; } #endif + case KVM_REG_PPC_OR_TSR: { + u32 tsr_bits; + r = get_user(tsr_bits, (u32 __user *)(long)reg->addr); + kvmppc_set_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_CLEAR_TSR: { + u32 tsr_bits; + r = get_user(tsr_bits, (u32 __user *)(long)reg->addr); + kvmppc_clr_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_TSR: { + u32 tsr; + r = get_user(tsr, (u32 __user *)(long)reg->addr); + kvmppc_set_tsr(vcpu, tsr); + break; + } + case KVM_REG_PPC_TCR: { + u32 tcr; + r = get_user(tcr, (u32 __user *)(long)reg->addr); + kvmppc_set_tcr(vcpu, tcr); + break; + } default: break; } -- cgit v1.2.3 From 15b708beee6841e0a59ded702c8bfe3042a5b5a4 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 27 Feb 2013 18:13:10 +0000 Subject: KVM: PPC: booke: Added debug handler Installed debug handler will be used for guest debug support and debug facility emulation features (patches for these features will follow this patch). Signed-off-by: Liu Yu [bharat.bhushan@freescale.com: Substantial changes] Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/booke_interrupts.S | 42 ++++++++++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d1bb86074721..e34f8fee9080 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -504,6 +504,7 @@ struct kvm_vcpu_arch { u32 tlbcfg[4]; u32 mmucfg; u32 epr; + u32 crit_save; struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b6c17ec9b169..d87c90886c75 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -596,6 +596,7 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); #endif /* CONFIG_PPC_BOOK3S */ #endif /* CONFIG_KVM */ diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index f4bb55c96517..2c6deb5ef2fe 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -54,8 +54,7 @@ (1< Date: Thu, 14 Feb 2013 14:00:25 +0000 Subject: KVM: PPC: Remove unused argument to kvmppc_core_dequeue_external Currently kvmppc_core_dequeue_external() takes a struct kvm_interrupt * argument and does nothing with it, in any of its implementations. This removes it in order to make things easier for forthcoming in-kernel interrupt controller emulation code. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 3 +-- arch/powerpc/kvm/book3s.c | 3 +-- arch/powerpc/kvm/booke.c | 3 +-- arch/powerpc/kvm/powerpc.c | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44fa9ad1d62c..f58930779ae8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b645285240..6548445fd823 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 11825539e2b1..58057d6f146d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7b5d4d20cdc5..16b45954511c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } -- cgit v1.2.3 From 8c32a2ea655d035798d3270717924ad8be903e24 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 20 Mar 2013 20:24:58 +0000 Subject: Added ONE_REG interface for debug instruction This patch adds the one_reg interface to get the special instruction to be used for setting software breakpoint from userspace. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/include/asm/kvm_booke.h | 2 ++ arch/powerpc/include/uapi/asm/kvm.h | 4 ++++ arch/powerpc/kvm/book3s.c | 6 ++++++ arch/powerpc/kvm/booke.c | 6 ++++++ 5 files changed, 20 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5a56e1c5f851..bc81842ea25a 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -458,6 +458,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) #define OSI_SC_MAGIC_R4 0x77810F9B #define INS_DCBZ 0x7c0007ec +/* TO = 31 for unconditional trap */ +#define INS_TW 0x7fe00008 /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index b7cd3356a532..d3c1eb34c986 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -26,6 +26,8 @@ /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS 64 +#define KVMPPC_INST_EHPRIV 0x7c00021c + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.gpr[num] = val; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index ef072b1a6e3f..c2ff99c01562 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -422,4 +422,8 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) #define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) #define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) + +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 6548445fd823..2d32ae4bc439 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -529,6 +529,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); break; #endif /* CONFIG_ALTIVEC */ + case KVM_REG_PPC_DEBUG_INST: { + u32 opcode = INS_TW; + r = copy_to_user((u32 __user *)(long)reg->addr, + &opcode, sizeof(u32)); + break; + } default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 58057d6f146d..a49a68a25c39 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1447,6 +1447,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_TSR: r = put_user(vcpu->arch.tsr, (u32 __user *)(long)reg->addr); break; + case KVM_REG_PPC_DEBUG_INST: { + u32 opcode = KVMPPC_INST_EHPRIV; + r = copy_to_user((u32 __user *)(long)reg->addr, + &opcode, sizeof(u32)); + break; + } default: break; } -- cgit v1.2.3 From adccf65ca431b41733483f476e8de9e3cf171c44 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 25 Apr 2013 06:33:57 +0000 Subject: KVM: PPC: cache flush for kernel managed pages Kernel can only access pages which maps as memory. So flush only the valid kernel pages. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f58930779ae8..4794de6ea379 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -282,8 +282,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids); static inline void kvmppc_mmu_flush_icache(pfn_t pfn) { - /* Clear i-cache for new pages */ struct page *page; + /* + * We can only access pages that the kernel maps + * as memory. Bail out for unmapped ones. + */ + if (!pfn_valid(pfn)) + return; + + /* Clear i-cache for new pages */ page = pfn_to_page(pfn); if (!test_bit(PG_arch_1, &page->flags)) { flush_dcache_icache_page(page); -- cgit v1.2.3 From 092d62ee93039bfccbb3a36c69d0c3ee0966a97a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 8 Apr 2013 00:32:12 +0000 Subject: KVM: PPC: debug stub interface parameter defined This patch defines the interface parameter for KVM_SET_GUEST_DEBUG ioctl support. Follow up patches will use this for setting up hardware breakpoints, watchpoints and software breakpoints. Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below. This is because I am not sure what is required for book3s. So this ioctl behaviour will not change for book3s. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/uapi/asm/kvm.h | 23 +++++++++++++++++++++++ arch/powerpc/kvm/book3s.c | 6 ++++++ arch/powerpc/kvm/booke.c | 6 ++++++ arch/powerpc/kvm/powerpc.c | 6 ------ 4 files changed, 35 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c2ff99c01562..c0c38ed9c97d 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -272,8 +272,31 @@ struct kvm_debug_exit_arch { /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { + struct { + /* H/W breakpoint/watchpoint address */ + __u64 addr; + /* + * Type denotes h/w breakpoint, read watchpoint, write + * watchpoint or watchpoint (both read and write). + */ +#define KVMPPC_DEBUG_NONE 0x0 +#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) +#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) +#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) + __u32 type; + __u32 reserved; + } bp[16]; }; +/* Debug related defines */ +/* + * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic + * and upper 16 bits are architecture specific. Architecture specific defines + * that ioctl is for setting hardware breakpoint or software breakpoint. + */ +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 + /* definition of registers in kvm_run */ struct kvm_sync_regs { }; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 2d32ae4bc439..128ed3a856b9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -612,6 +612,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return 0; } +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a49a68a25c39..a3e2db0cf2f9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1526,6 +1526,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) return r; } +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a822659db50a..6b8108624851 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -531,12 +531,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { -- cgit v1.2.3 From c402a3f457b9689451c4e422781026633a5b6287 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 8 Apr 2013 00:32:13 +0000 Subject: Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER Instruction emulation return EMULATE_DO_PAPR when it requires exit to userspace on book3s. Similar return is required for booke. EMULATE_DO_PAPR reads out to be confusing so it is renamed to EMULATE_EXIT_USER. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_emulate.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 4794de6ea379..bcc68b1afc66 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -44,7 +44,7 @@ enum emulation_result { EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ - EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ + EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 836c56975e21..cdd19d64e354 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -194,7 +194,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->papr_hcall.args[i] = gpr; } - emulated = EMULATE_DO_PAPR; + emulated = EMULATE_EXIT_USER; break; } #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 286e23e6b92d..b960faf990bf 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -762,7 +762,7 @@ program_interrupt: run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; - case EMULATE_DO_PAPR: + case EMULATE_EXIT_USER: run->exit_reason = KVM_EXIT_PAPR_HCALL; vcpu->arch.hcall_needed = 1; r = RESUME_HOST_NV; -- cgit v1.2.3 From a85d2aa23e51a9460e034e283da2513930b4f183 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Apr 2013 00:03:08 +0000 Subject: KVM: PPC: e500: Expose MMU registers via ONE_REG MMU registers were exposed to user-space using sregs interface. Add them to ONE_REG interface using kvmppc_get_one_reg/kvmppc_set_one_reg delegation mechanism. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 11 +++++ arch/powerpc/include/uapi/asm/kvm.h | 17 +++++++ arch/powerpc/kvm/e500.c | 6 ++- arch/powerpc/kvm/e500.h | 4 ++ arch/powerpc/kvm/e500_mmu.c | 94 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/e500mc.c | 6 ++- 6 files changed, 134 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 976eb650e7ef..1a766637ac21 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1792,6 +1792,17 @@ registers, find a list below: PPC | KVM_REG_PPC_TSR | 32 PPC | KVM_REG_PPC_OR_TSR | 32 PPC | KVM_REG_PPC_CLEAR_TSR | 32 + PPC | KVM_REG_PPC_MAS0 | 32 + PPC | KVM_REG_PPC_MAS1 | 32 + PPC | KVM_REG_PPC_MAS2 | 64 + PPC | KVM_REG_PPC_MAS7_3 | 64 + PPC | KVM_REG_PPC_MAS4 | 32 + PPC | KVM_REG_PPC_MAS6 | 32 + PPC | KVM_REG_PPC_MMUCFG | 32 + PPC | KVM_REG_PPC_TLB0CFG | 32 + PPC | KVM_REG_PPC_TLB1CFG | 32 + PPC | KVM_REG_PPC_TLB2CFG | 32 + PPC | KVM_REG_PPC_TLB3CFG | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c0c38ed9c97d..0c5cffb6a58e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -449,4 +449,21 @@ struct kvm_get_htab_header { /* Debugging: Special instruction for software breakpoint */ #define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) +/* MMU registers */ +#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c) +#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d) +#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e) +#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f) +#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90) +#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91) +#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92) +/* + * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using + * KVM_CAP_SW_TLB ioctl + */ +#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93) +#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94) +#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95) +#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 576010f6c27d..ce6b73c29612 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -428,13 +428,15 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - return -EINVAL; + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; } int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - return -EINVAL; + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; } struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 33db48a8ce24..b73ca7a1c09f 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -131,6 +131,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); +int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); #ifdef CONFIG_KVM_E500V2 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 5c4475983f78..44f7762694ba 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -596,6 +596,100 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return 0; } +int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = 0; + long int i; + + switch (id) { + case KVM_REG_PPC_MAS0: + *val = get_reg_val(id, vcpu->arch.shared->mas0); + break; + case KVM_REG_PPC_MAS1: + *val = get_reg_val(id, vcpu->arch.shared->mas1); + break; + case KVM_REG_PPC_MAS2: + *val = get_reg_val(id, vcpu->arch.shared->mas2); + break; + case KVM_REG_PPC_MAS7_3: + *val = get_reg_val(id, vcpu->arch.shared->mas7_3); + break; + case KVM_REG_PPC_MAS4: + *val = get_reg_val(id, vcpu->arch.shared->mas4); + break; + case KVM_REG_PPC_MAS6: + *val = get_reg_val(id, vcpu->arch.shared->mas6); + break; + case KVM_REG_PPC_MMUCFG: + *val = get_reg_val(id, vcpu->arch.mmucfg); + break; + case KVM_REG_PPC_TLB0CFG: + case KVM_REG_PPC_TLB1CFG: + case KVM_REG_PPC_TLB2CFG: + case KVM_REG_PPC_TLB3CFG: + i = id - KVM_REG_PPC_TLB0CFG; + *val = get_reg_val(id, vcpu->arch.tlbcfg[i]); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = 0; + long int i; + + switch (id) { + case KVM_REG_PPC_MAS0: + vcpu->arch.shared->mas0 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS1: + vcpu->arch.shared->mas1 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS2: + vcpu->arch.shared->mas2 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS7_3: + vcpu->arch.shared->mas7_3 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS4: + vcpu->arch.shared->mas4 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS6: + vcpu->arch.shared->mas6 = set_reg_val(id, *val); + break; + /* Only allow MMU registers to be set to the config supported by KVM */ + case KVM_REG_PPC_MMUCFG: { + u32 reg = set_reg_val(id, *val); + if (reg != vcpu->arch.mmucfg) + r = -EINVAL; + break; + } + case KVM_REG_PPC_TLB0CFG: + case KVM_REG_PPC_TLB1CFG: + case KVM_REG_PPC_TLB2CFG: + case KVM_REG_PPC_TLB3CFG: { + /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */ + u32 reg = set_reg_val(id, *val); + i = id - KVM_REG_PPC_TLB0CFG; + if (reg != vcpu->arch.tlbcfg[i]) + r = -EINVAL; + break; + } + default: + r = -EINVAL; + break; + } + + return r; +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg) { diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index b071bdcbe37d..ab073a80cd45 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -258,13 +258,15 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - return -EINVAL; + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; } int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - return -EINVAL; + int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + return r; } struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) -- cgit v1.2.3 From 307d9008ed4f28920e0e78719e10d0f407341e00 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Apr 2013 00:03:10 +0000 Subject: KVM: PPC: e500: Add support for TLBnPS registers Add support for TLBnPS registers available in MMU Architecture Version (MAV) 2.0. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 4 ++++ arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/uapi/asm/kvm.h | 4 ++++ arch/powerpc/kvm/e500.h | 18 ++++++++++++++++++ arch/powerpc/kvm/e500_emulate.c | 10 ++++++++++ arch/powerpc/kvm/e500_mmu.c | 22 ++++++++++++++++++++++ 6 files changed, 59 insertions(+) (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 1a766637ac21..f045377ae5a0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1803,6 +1803,10 @@ registers, find a list below: PPC | KVM_REG_PPC_TLB1CFG | 32 PPC | KVM_REG_PPC_TLB2CFG | 32 PPC | KVM_REG_PPC_TLB3CFG | 32 + PPC | KVM_REG_PPC_TLB0PS | 32 + PPC | KVM_REG_PPC_TLB1PS | 32 + PPC | KVM_REG_PPC_TLB2PS | 32 + PPC | KVM_REG_PPC_TLB3PS | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e34f8fee9080..3b6cee3e33a8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -502,6 +502,7 @@ struct kvm_vcpu_arch { spinlock_t wdt_lock; struct timer_list wdt_timer; u32 tlbcfg[4]; + u32 tlbps[4]; u32 mmucfg; u32 epr; u32 crit_save; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0c5cffb6a58e..4dd36c399842 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -465,5 +465,9 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94) #define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95) #define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96) +#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97) +#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98) +#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) +#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index b73ca7a1c09f..c2e5e98453a6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -23,6 +23,10 @@ #include #include +enum vcpu_ftr { + VCPU_FTR_MMU_V2 +}; + #define E500_PID_NUM 3 #define E500_TLB_NUM 2 @@ -299,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) #define get_tlb_sts(gtlbe) (MAS1_TS) #endif /* !BOOKE_HV */ +static inline bool has_feature(const struct kvm_vcpu *vcpu, + enum vcpu_ftr ftr) +{ + bool has_ftr; + switch (ftr) { + case VCPU_FTR_MMU_V2: + has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2); + break; + default: + return false; + } + return has_ftr; +} + #endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353a836a..12b8de2f91ed 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_TLB1CFG: *spr_val = vcpu->arch.tlbcfg[1]; break; + case SPRN_TLB0PS: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + *spr_val = vcpu->arch.tlbps[0]; + break; + case SPRN_TLB1PS: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + *spr_val = vcpu->arch.tlbps[1]; + break; case SPRN_L1CSR0: *spr_val = vcpu_e500->l1csr0; break; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 08a5b0d296fa..a863dc1791eb 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -631,6 +631,13 @@ int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, i = id - KVM_REG_PPC_TLB0CFG; *val = get_reg_val(id, vcpu->arch.tlbcfg[i]); break; + case KVM_REG_PPC_TLB0PS: + case KVM_REG_PPC_TLB1PS: + case KVM_REG_PPC_TLB2PS: + case KVM_REG_PPC_TLB3PS: + i = id - KVM_REG_PPC_TLB0PS; + *val = get_reg_val(id, vcpu->arch.tlbps[i]); + break; default: r = -EINVAL; break; @@ -682,6 +689,16 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, r = -EINVAL; break; } + case KVM_REG_PPC_TLB0PS: + case KVM_REG_PPC_TLB1PS: + case KVM_REG_PPC_TLB2PS: + case KVM_REG_PPC_TLB3PS: { + u32 reg = set_reg_val(id, *val); + i = id - KVM_REG_PPC_TLB0PS; + if (reg != vcpu->arch.tlbps[i]) + r = -EINVAL; + break; + } default: r = -EINVAL; break; @@ -855,6 +872,11 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu, vcpu->arch.tlbcfg[1] |= params[1].entries; vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT; + if (has_feature(vcpu, VCPU_FTR_MMU_V2)) { + vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS); + vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS); + } + return 0; } -- cgit v1.2.3 From 9a6061d7fdedbf025549adf5c9d920d90bbf4a69 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Apr 2013 00:03:11 +0000 Subject: KVM: PPC: e500: Add support for EPTCFG register EPTCFG register defined by E.PT is accessed unconditionally by Linux guests in the presence of MAV 2.0. Emulate it now. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/e500_emulate.c | 9 +++++++++ arch/powerpc/kvm/e500_mmu.c | 12 ++++++++++++ 5 files changed, 24 insertions(+) (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f045377ae5a0..a1f2200e43d0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1807,6 +1807,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TLB1PS | 32 PPC | KVM_REG_PPC_TLB2PS | 32 PPC | KVM_REG_PPC_TLB3PS | 32 + PPC | KVM_REG_PPC_EPTCFG | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3b6cee3e33a8..8a48e686a755 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -504,6 +504,7 @@ struct kvm_vcpu_arch { u32 tlbcfg[4]; u32 tlbps[4]; u32 mmucfg; + u32 eptcfg; u32 epr; u32 crit_save; struct kvmppc_booke_debug_reg dbg_reg; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 4dd36c399842..41d59d8bdfe8 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -469,5 +469,6 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98) #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) +#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 12b8de2f91ed..b10a01243abd 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -317,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_MMUCFG: *spr_val = vcpu->arch.mmucfg; break; + case SPRN_EPTCFG: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + /* + * Legacy Linux guests access EPTCFG register even if the E.PT + * category is disabled in the VM. Give them a chance to live. + */ + *spr_val = vcpu->arch.eptcfg; + break; /* extra exceptions */ case SPRN_IVOR32: diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index a863dc1791eb..1c1c5cb78495 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -624,6 +624,9 @@ int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_MMUCFG: *val = get_reg_val(id, vcpu->arch.mmucfg); break; + case KVM_REG_PPC_EPTCFG: + *val = get_reg_val(id, vcpu->arch.eptcfg); + break; case KVM_REG_PPC_TLB0CFG: case KVM_REG_PPC_TLB1CFG: case KVM_REG_PPC_TLB2CFG: @@ -678,6 +681,12 @@ int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, r = -EINVAL; break; } + case KVM_REG_PPC_EPTCFG: { + u32 reg = set_reg_val(id, *val); + if (reg != vcpu->arch.eptcfg) + r = -EINVAL; + break; + } case KVM_REG_PPC_TLB0CFG: case KVM_REG_PPC_TLB1CFG: case KVM_REG_PPC_TLB2CFG: @@ -875,6 +884,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu, if (has_feature(vcpu, VCPU_FTR_MMU_V2)) { vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS); vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS); + + /* Guest mmu emulation currently doesn't handle E.PT */ + vcpu->arch.eptcfg = 0; } return 0; -- cgit v1.2.3 From a1b4a0f6064aacad0d708105e6f60a06e93fbf37 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 18 Apr 2013 19:50:24 +0000 Subject: KVM: PPC: Book3S HV: Make HPT reading code notice R/C bit changes At present, the code that determines whether a HPT entry has changed, and thus needs to be sent to userspace when it is copying the HPT, doesn't consider a hardware update to the reference and change bits (R and C) in the HPT entries to constitute a change that needs to be sent to userspace. This adds code to check for changes in R and C when we are scanning the HPT to find changed entries, and adds code to set the changed flag for the HPTE when we update the R and C bits in the guest view of the HPTE. Since we now need to set the HPTE changed flag in book3s_64_mmu_hv.c as well as book3s_hv_rm_mmu.c, we move the note_hpte_modification() function into kvm_book3s_64.h. Current Linux guest kernels don't use the hardware updates of R and C in the HPT, so this change won't affect them. Linux (or other) kernels might in future want to use the R and C bits and have them correctly transferred across when a guest is migrated, so it is better to correct this deficiency. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_64.h | 13 +++++++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 59 +++++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 ------ 3 files changed, 63 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38bec1dc9928..9c1ff330c805 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v) (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); } +#ifdef CONFIG_KVM_BOOK3S_64_HV +/* + * Note modification of an HPTE; set the HPTE modified bit + * if anyone is interested. + */ +static inline void note_hpte_modification(struct kvm *kvm, + struct revmap_entry *rev) +{ + if (atomic_read(&kvm->arch.hpte_mod_interest)) + rev->guest_rpte |= HPTE_GR_MODIFIED; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cc18abd6dde..d641a6634b02 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Harvest R and C */ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; - rev[i].guest_rpte = ptel | rcbits; + if (rcbits & ~rev[i].guest_rpte) { + rev[i].guest_rpte = ptel | rcbits; + note_hpte_modification(kvm, &rev[i]); + } } unlock_rmap(rmapp); hptep[0] &= ~HPTE_V_HVLOCK; @@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); - rev[i].guest_rpte |= HPTE_R_R; + if (!(rev[i].guest_rpte & HPTE_R_R)) { + rev[i].guest_rpte |= HPTE_R_R; + note_hpte_modification(kvm, &rev[i]); + } ret = 1; } hptep[0] &= ~HPTE_V_HVLOCK; @@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) hptep[1] &= ~HPTE_R_C; eieio(); hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; - rev[i].guest_rpte |= HPTE_R_C; + if (!(rev[i].guest_rpte & HPTE_R_C)) { + rev[i].guest_rpte |= HPTE_R_C; + note_hpte_modification(kvm, &rev[i]); + } ret = 1; } hptep[0] &= ~HPTE_V_HVLOCK; @@ -1193,16 +1202,36 @@ struct kvm_htab_ctx { #define HPTE_SIZE (2 * sizeof(unsigned long)) +/* + * Returns 1 if this HPT entry has been modified or has pending + * R/C bit changes. + */ +static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +{ + unsigned long rcbits_unset; + + if (revp->guest_rpte & HPTE_GR_MODIFIED) + return 1; + + /* Also need to consider changes in reference and changed bits */ + rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); + if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + return 1; + + return 0; +} + static long record_hpte(unsigned long flags, unsigned long *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { unsigned long v, r; + unsigned long rcbits_unset; int ok = 1; int valid, dirty; /* Unmodified entries are uninteresting except on the first pass */ - dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + dirty = hpte_dirty(revp, hptp); if (!first_pass && !dirty) return 0; @@ -1223,16 +1252,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); v = hptp[0]; + + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + + /* Harvest R and C into guest view if necessary */ + rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); + if (valid && (rcbits_unset & hptp[1])) { + revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | + HPTE_GR_MODIFIED; + dirty = 1; + } + if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; + valid = 1; } - /* re-evaluate valid and dirty from synchronized HPTE value */ - valid = !!(v & HPTE_V_VALID); if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) valid = 0; - r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); - dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + + r = revp->guest_rpte; /* only clear modified if this is the right sort of entry */ if (valid == want_valid && dirty) { r &= ~HPTE_GR_MODIFIED; @@ -1288,7 +1329,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, /* Skip uninteresting entries, i.e. clean on not-first pass */ if (!first_pass) { while (i < kvm->arch.hpt_npte && - !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + !hpte_dirty(revp, hptp)) { ++i; hptp += 2; ++revp; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 19c93bae1aea..6dcbb49105a4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); -/* - * Note modification of an HPTE; set the HPTE modified bit - * if anyone is interested. - */ -static inline void note_hpte_modification(struct kvm *kvm, - struct revmap_entry *rev) -{ - if (atomic_read(&kvm->arch.hpte_mod_interest)) - rev->guest_rpte |= HPTE_GR_MODIFIED; -} - /* Remove this HPTE from the chain for a real page */ static void remove_revmap_chain(struct kvm *kvm, long pte_index, struct revmap_entry *rev, -- cgit v1.2.3 From c35635efdc0312e013ebda1c8f3b5dd038c0d0e7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 18 Apr 2013 19:51:04 +0000 Subject: KVM: PPC: Book3S HV: Report VPA and DTL modifications in dirty map At present, the KVM_GET_DIRTY_LOG ioctl doesn't report modifications done by the host to the virtual processor areas (VPAs) and dispatch trace logs (DTLs) registered by the guest. This is because those modifications are done either in real mode or in the host kernel context, and in neither case does the access go through the guest's HPT, and thus no change (C) bit gets set in the guest's HPT. However, the changes done by the host do need to be tracked so that the modified pages get transferred when doing live migration. In order to track these modifications, this adds a dirty flag to the struct representing the VPA/DTL areas, and arranges to set the flag when the VPA/DTL gets modified by the host. Then, when we are collecting the dirty log, we also check the dirty flags for the VPA and DTL for each vcpu and set the relevant bit in the dirty log if necessary. Doing this also means we now need to keep track of the guest physical address of the VPA/DTL areas. So as not to lose track of modifications to a VPA/DTL area when it gets unregistered, or when a new area gets registered in its place, we need to transfer the dirty state to the rmap chain. This adds code to kvmppc_unpin_guest_page() to do that if the area was dirty. To simplify that code, we now require that all VPA, DTL and SLB shadow buffer areas fit within a single host page. Guests already comply with this requirement because pHyp requires that these areas not cross a 4k boundary. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 3 +- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 61 ++++++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_hv.c | 30 ++++++++++------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +++ 6 files changed, 80 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bc81842ea25a..c55f7e6affaa 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -156,7 +156,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); -extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); +extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, + unsigned long gpa, bool dirty); extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8a48e686a755..1443768a6588 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -301,11 +301,13 @@ struct kvmppc_vcore { * that a guest can register. */ struct kvmppc_vpa { + unsigned long gpa; /* Current guest phys addr */ void *pinned_addr; /* Address in kernel linear mapping */ void *pinned_end; /* End of region */ unsigned long next_gpa; /* Guest phys addr for update */ unsigned long len; /* Number of bytes required */ u8 update_pending; /* 1 => update pinned_addr from next_gpa */ + bool dirty; /* true => area has been modified by kernel */ }; struct kvmppc_pte { diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d87c90886c75..dbfd5498f440 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -477,6 +477,7 @@ int main(void) DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); + DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d641a6634b02..69efe0d6cedc 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1099,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) return ret; } +static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, + struct kvm_memory_slot *memslot, + unsigned long *map) +{ + unsigned long gfn; + + if (!vpa->dirty || !vpa->pinned_addr) + return; + gfn = vpa->gpa >> PAGE_SHIFT; + if (gfn < memslot->base_gfn || + gfn >= memslot->base_gfn + memslot->npages) + return; + + vpa->dirty = false; + if (map) + __set_bit_le(gfn - memslot->base_gfn, map); +} + long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map) { unsigned long i; unsigned long *rmapp; + struct kvm_vcpu *vcpu; preempt_disable(); rmapp = memslot->arch.rmap; @@ -1112,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, __set_bit_le(i, map); ++rmapp; } + + /* Harvest dirty bits from VPA and DTL updates */ + /* Note: we never modify the SLB shadow buffer areas */ + kvm_for_each_vcpu(i, vcpu, kvm) { + spin_lock(&vcpu->arch.vpa_update_lock); + harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); + harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); + spin_unlock(&vcpu->arch.vpa_update_lock); + } preempt_enable(); return 0; } @@ -1123,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page, *pages[1]; int npages; - unsigned long hva, psize, offset; + unsigned long hva, offset; unsigned long pa; unsigned long *physp; int srcu_idx; @@ -1155,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, } srcu_read_unlock(&kvm->srcu, srcu_idx); - psize = PAGE_SIZE; - if (PageHuge(page)) { - page = compound_head(page); - psize <<= compound_order(page); - } - offset = gpa & (psize - 1); + offset = gpa & (PAGE_SIZE - 1); if (nb_ret) - *nb_ret = psize - offset; + *nb_ret = PAGE_SIZE - offset; return page_address(page) + offset; err: @@ -1170,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, return NULL; } -void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) +void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, + bool dirty) { struct page *page = virt_to_page(va); + struct kvm_memory_slot *memslot; + unsigned long gfn; + unsigned long *rmap; + int srcu_idx; put_page(page); + + if (!dirty || !kvm->arch.using_mmu_notifiers) + return; + + /* We need to mark this page dirty in the rmap chain */ + gfn = gpa >> PAGE_SHIFT; + srcu_idx = srcu_read_lock(&kvm->srcu); + memslot = gfn_to_memslot(kvm, gfn); + if (memslot) { + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; + lock_rmap(rmap); + *rmap |= KVMPPC_RMAP_CHANGED; + unlock_rmap(rmap); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); } /* diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1e521baf9a7d..5af0f2979833 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -259,7 +259,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, len = ((struct reg_vpa *)va)->length.hword; else len = ((struct reg_vpa *)va)->length.word; - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ if (len > nb || len < sizeof(struct reg_vpa)) @@ -359,13 +359,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) va = NULL; nb = 0; if (gpa) - va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); + va = kvmppc_pin_guest_page(kvm, gpa, &nb); spin_lock(&vcpu->arch.vpa_update_lock); if (gpa == vpap->next_gpa) break; /* sigh... unpin that one and try again */ if (va) - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, gpa, false); } vpap->update_pending = 0; @@ -375,12 +375,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) * has changed the mappings underlying guest memory, * so unregister the region. */ - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, gpa, false); va = NULL; } if (vpap->pinned_addr) - kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); + kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, + vpap->dirty); + vpap->gpa = gpa; vpap->pinned_addr = va; + vpap->dirty = false; if (va) vpap->pinned_end = va + vpap->len; } @@ -472,6 +475,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); vpa->dtl_idx = ++vcpu->arch.dtl_index; + vcpu->arch.dtl.dirty = true; } int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) @@ -913,15 +917,19 @@ out: return ERR_PTR(err); } +static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) +{ + if (vpa->pinned_addr) + kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, + vpa->dirty); +} + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->arch.vpa_update_lock); - if (vcpu->arch.dtl.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); - if (vcpu->arch.slb_shadow.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); - if (vcpu->arch.vpa.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); + unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); + unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); + unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e33d11f1b977..0f23bb851711 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -260,6 +260,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) lwz r5, LPPACA_YIELDCOUNT(r3) addi r5, r5, 1 stw r5, LPPACA_YIELDCOUNT(r3) + li r6, 1 + stb r6, VCPU_VPA_DIRTY(r4) 25: /* Load up DAR and DSISR */ ld r5, VCPU_DAR(r4) @@ -1018,6 +1020,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) lwz r3, LPPACA_YIELDCOUNT(r8) addi r3, r3, 1 stw r3, LPPACA_YIELDCOUNT(r8) + li r3, 1 + stb r3, VCPU_VPA_DIRTY(r9) 25: /* Save PMU registers if requested */ /* r8 and cr0.eq are live here */ -- cgit v1.2.3 From 5df554ad5b7522ea62b0ff9d5be35183494efc21 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 12 Apr 2013 14:08:46 +0000 Subject: kvm/ppc/mpic: in-kernel MPIC emulation Hook the MPIC code up to the KVM interfaces, add locking, etc. Signed-off-by: Scott Wood [agraf: add stub function for kvmppc_mpic_set_epr, non-booke, 64bit] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/devices/mpic.txt | 37 ++ arch/powerpc/include/asm/kvm_host.h | 8 +- arch/powerpc/include/asm/kvm_ppc.h | 17 + arch/powerpc/include/uapi/asm/kvm.h | 8 + arch/powerpc/kvm/Kconfig | 9 + arch/powerpc/kvm/Makefile | 2 + arch/powerpc/kvm/booke.c | 8 +- arch/powerpc/kvm/mpic.c | 762 ++++++++++++++++++++++------- arch/powerpc/kvm/powerpc.c | 12 +- include/linux/kvm_host.h | 2 + include/uapi/linux/kvm.h | 3 + virt/kvm/kvm_main.c | 6 + 12 files changed, 674 insertions(+), 200 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/mpic.txt (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt new file mode 100644 index 000000000000..ce98e3264fa8 --- /dev/null +++ b/Documentation/virtual/kvm/devices/mpic.txt @@ -0,0 +1,37 @@ +MPIC interrupt controller +========================= + +Device types supported: + KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0 + KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 + +Only one MPIC instance, of any type, may be instantiated. The created +MPIC will act as the system interrupt controller, connecting to each +vcpu's interrupt inputs. + +Groups: + KVM_DEV_MPIC_GRP_MISC + Attributes: + KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) + Base address of the 256 KiB MPIC register space. Must be + naturally aligned. A value of zero disables the mapping. + Reset value is zero. + + KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit) + Access an MPIC register, as if the access were made from the guest. + "attr" is the byte offset into the MPIC register space. Accesses + must be 4-byte aligned. + + MSIs may be signaled by using this attribute group to write + to the relevant MSIIR. + + KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit) + IRQ input line for each standard openpic source. 0 is inactive and 1 + is active, regardless of interrupt sense. + + For edge-triggered interrupts: Writing 1 is considered an activating + edge, and writing 0 is ignored. Reading returns 1 if a previously + signaled edge has not been acknowledged, and 0 otherwise. + + "attr" is the IRQ number. IRQ numbers for standard sources are the + byte offset of the relevant IVPR from EIVPR0, divided by 32. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1443768a6588..153c8c2b0f88 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -361,6 +361,11 @@ struct kvmppc_slb { #define KVMPPC_BOOKE_MAX_IAC 4 #define KVMPPC_BOOKE_MAX_DAC 2 +/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ +#define KVMPPC_EPR_NONE 0 /* EPR not supported */ +#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ +#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ + struct kvmppc_booke_debug_reg { u32 dbcr0; u32 dbcr1; @@ -526,7 +531,7 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; - u8 epr_enabled; + u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ @@ -593,5 +598,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x0060 #define __KVM_HAVE_ARCH_WQP +#define __KVM_HAVE_CREATE_DEVICE #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bcc68b1afc66..3810f9c7616c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. @@ -245,6 +247,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); +struct openpic; +void kvmppc_mpic_put(struct openpic *opp); + #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { @@ -270,6 +275,18 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #endif } +#ifdef CONFIG_KVM_MPIC + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); + +#else + +static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ +} + +#endif /* CONFIG_KVM_MPIC */ + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 41d59d8bdfe8..02ad96606860 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -382,6 +382,14 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* Device control API: PPC-specific devices */ +#define KVM_DEV_MPIC_GRP_MISC 1 +#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ + +#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */ +#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */ + +/* One-Reg API: PPC-specific registers */ #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 448952035b6e..f47e95e0b6de 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -151,6 +151,15 @@ config KVM_E500MC If unsure, say N. +config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM + help + Enable support for emulating MPIC devices inside the + host kernel, rather than relying on userspace to emulate. + Currently, support is limited to certain versions of + Freescale's MPIC implementation. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772eded8c26..4a2277a221bb 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -103,6 +103,8 @@ kvm-book3s_32-objs := \ book3s_32_mmu.o kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) +kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o + kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 02756537cd90..4da11ed48c59 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -346,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } - if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags) update_epr = true; switch (priority) { @@ -427,8 +427,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) - kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + if (update_epr == true) { + if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + } new_msr &= msr_mask; #if defined(CONFIG_64BIT) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 1df67aed7a91..cb451b91e342 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -23,6 +23,19 @@ * THE SOFTWARE. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iodev.h" + #define MAX_CPU 32 #define MAX_SRC 256 #define MAX_TMR 4 @@ -36,6 +49,7 @@ #define OPENPIC_FLAG_ILR (2 << 0) /* OpenPIC address map */ +#define OPENPIC_REG_SIZE 0x40000 #define OPENPIC_GLB_REG_START 0x0 #define OPENPIC_GLB_REG_SIZE 0x10F0 #define OPENPIC_TMR_REG_START 0x10F0 @@ -89,6 +103,7 @@ static struct fsl_mpic_info fsl_mpic_42 = { #define ILR_INTTGT_INT 0x00 #define ILR_INTTGT_CINT 0x01 /* critical */ #define ILR_INTTGT_MCP 0x02 /* machine check */ +#define NUM_OUTPUTS 3 #define MSIIR_OFFSET 0x140 #define MSIIR_SRS_SHIFT 29 @@ -98,18 +113,19 @@ static struct fsl_mpic_info fsl_mpic_42 = { static int get_current_cpu(void) { - CPUState *cpu_single_cpu; - - if (!cpu_single_env) - return -1; - - cpu_single_cpu = ENV_GET_CPU(cpu_single_env); - return cpu_single_cpu->cpu_index; +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; + return vcpu ? vcpu->vcpu_id : -1; +#else + /* XXX */ + return -1; +#endif } -static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx); -static void openpic_cpu_write_internal(void *opaque, gpa_t addr, - uint32_t val, int idx); +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx); +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx); enum irq_type { IRQ_TYPE_NORMAL = 0, @@ -131,7 +147,7 @@ struct irq_source { uint32_t idr; /* IRQ destination register */ uint32_t destmask; /* bitmap of CPU destinations */ int last_cpu; - int output; /* IRQ level, e.g. OPENPIC_OUTPUT_INT */ + int output; /* IRQ level, e.g. ILR_INTTGT_INT */ int pending; /* TRUE if IRQ is pending */ enum irq_type type; bool level:1; /* level-triggered */ @@ -158,16 +174,27 @@ struct irq_source { #define IDR_CI 0x40000000 /* critical interrupt */ struct irq_dest { + struct kvm_vcpu *vcpu; + int32_t ctpr; /* CPU current task priority */ struct irq_queue raised; struct irq_queue servicing; - qemu_irq *irqs; /* Count of IRQ sources asserting on non-INT outputs */ - uint32_t outputs_active[OPENPIC_OUTPUT_NB]; + uint32_t outputs_active[NUM_OUTPUTS]; }; struct openpic { + struct kvm *kvm; + struct kvm_device *dev; + struct kvm_io_device mmio; + struct list_head mmio_regions; + atomic_t users; + bool mmio_mapped; + + gpa_t reg_base; + spinlock_t lock; + /* Behavior control */ struct fsl_mpic_info *fsl; uint32_t model; @@ -208,6 +235,47 @@ struct openpic { uint32_t irq_msi; }; + +static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, + int output) +{ + struct kvm_interrupt irq = { + .irq = KVM_INTERRUPT_SET_LEVEL, + }; + + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq); +} + +static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, + int output) +{ + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvmppc_core_dequeue_external(dst->vcpu); +} + static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ) { set_bit(n_IRQ, q->queue); @@ -268,7 +336,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, pr_debug("%s: IRQ %d active %d was %d\n", __func__, n_IRQ, active, was_active); - if (src->output != OPENPIC_OUTPUT_INT) { + if (src->output != ILR_INTTGT_INT) { pr_debug("%s: output %d irq %d active %d was %d count %d\n", __func__, src->output, n_IRQ, active, was_active, dst->outputs_active[src->output]); @@ -282,14 +350,14 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, dst->outputs_active[src->output]++ == 0) { pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n", __func__, src->output, n_CPU, n_IRQ); - qemu_irq_raise(dst->irqs[src->output]); + mpic_irq_raise(opp, dst, src->output); } } else { if (was_active && --dst->outputs_active[src->output] == 0) { pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n", __func__, src->output, n_CPU, n_IRQ); - qemu_irq_lower(dst->irqs[src->output]); + mpic_irq_lower(opp, dst, src->output); } } @@ -322,8 +390,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, } else { pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", __func__, n_CPU, n_IRQ, dst->raised.next); - qemu_irq_raise(opp->dst[n_CPU]. - irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } } else { IRQ_get_next(opp, &dst->servicing); @@ -338,8 +405,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", __func__, n_IRQ, dst->ctpr, dst->servicing.priority, n_CPU); - qemu_irq_lower(opp->dst[n_CPU]. - irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); } } } @@ -415,8 +481,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) struct irq_source *src; if (n_IRQ >= MAX_IRQ) { - pr_err("%s: IRQ %d out of range\n", __func__, n_IRQ); - abort(); + WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ); + return; } src = &opp->src[n_IRQ]; @@ -433,7 +499,7 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) openpic_update_irq(opp, n_IRQ); } - if (src->output != OPENPIC_OUTPUT_INT) { + if (src->output != ILR_INTTGT_INT) { /* Edge-triggered interrupts shouldn't be used * with non-INT delivery, but just in case, * try to make it do something sane rather than @@ -446,15 +512,13 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) } } -static void openpic_reset(DeviceState *d) +static void openpic_reset(struct openpic *opp) { - struct openpic *opp = FROM_SYSBUS(typeof(*opp), SYS_BUS_DEVICE(d)); int i; opp->gcr = GCR_RESET; /* Initialise controller registers */ opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) | - ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) | (opp->vid << FRR_VID_SHIFT); opp->pir = 0; @@ -504,7 +568,7 @@ static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ) static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ) { if (opp->flags & OPENPIC_FLAG_ILR) - return output_to_inttgt(opp->src[n_IRQ].output); + return opp->src[n_IRQ].output; return 0xffffffff; } @@ -539,7 +603,7 @@ static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, __func__); } - src->output = OPENPIC_OUTPUT_CINT; + src->output = ILR_INTTGT_CINT; src->nomask = true; src->destmask = 0; @@ -550,7 +614,7 @@ static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, src->destmask |= 1UL << i; } } else { - src->output = OPENPIC_OUTPUT_INT; + src->output = ILR_INTTGT_INT; src->nomask = false; src->destmask = src->idr & normal_mask; } @@ -565,7 +629,7 @@ static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ, if (opp->flags & OPENPIC_FLAG_ILR) { struct irq_source *src = &opp->src[n_IRQ]; - src->output = inttgt_to_output(val & ILR_INTTGT_MASK); + src->output = val & ILR_INTTGT_MASK; pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, src->output); @@ -614,34 +678,23 @@ static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ, static void openpic_gcr_write(struct openpic *opp, uint64_t val) { - bool mpic_proxy = false; - if (val & GCR_RESET) { - openpic_reset(&opp->busdev.qdev); + openpic_reset(opp); return; } opp->gcr &= ~opp->mpic_mode_mask; opp->gcr |= val & opp->mpic_mode_mask; - - /* Set external proxy mode */ - if ((val & opp->mpic_mode_mask) == GCR_MODE_PROXY) - mpic_proxy = true; - - ppce500_set_mpic_proxy(mpic_proxy); } -static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; - struct irq_dest *dst; - int idx; + int err = 0; - pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); if (addr & 0xF) - return; + return 0; switch (addr) { case 0x00: /* Block Revision Register1 (BRR1) is Readonly */ @@ -654,7 +707,8 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, case 0x90: case 0xA0: case 0xB0: - openpic_cpu_write_internal(opp, addr, val, get_current_cpu()); + err = openpic_cpu_write_internal(opp, addr, val, + get_current_cpu()); break; case 0x1000: /* FRR */ break; @@ -664,21 +718,11 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, case 0x1080: /* VIR */ break; case 0x1090: /* PIR */ - for (idx = 0; idx < opp->nb_cpus; idx++) { - if ((val & (1 << idx)) && !(opp->pir & (1 << idx))) { - pr_debug("Raise OpenPIC RESET output for CPU %d\n", - idx); - dst = &opp->dst[idx]; - qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_RESET]); - } else if (!(val & (1 << idx)) && - (opp->pir & (1 << idx))) { - pr_debug("Lower OpenPIC RESET output for CPU %d\n", - idx); - dst = &opp->dst[idx]; - qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_RESET]); - } - } - opp->pir = val; + /* + * This register is used to reset a CPU core -- + * let userspace handle it. + */ + err = -ENXIO; break; case 0x10A0: /* IPI_IVPR */ case 0x10B0: @@ -695,21 +739,25 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, default: break; } + + return err; } -static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len) +static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; - uint32_t retval; + u32 retval; + int err = 0; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); retval = 0xFFFFFFFF; if (addr & 0xF) - return retval; + goto out; switch (addr) { case 0x1000: /* FRR */ retval = opp->frr; + retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT; break; case 0x1020: /* GCR */ retval = opp->gcr; @@ -731,8 +779,8 @@ static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len) case 0x90: case 0xA0: case 0xB0: - retval = - openpic_cpu_read_internal(opp, addr, get_current_cpu()); + err = openpic_cpu_read_internal(opp, addr, + &retval, get_current_cpu()); break; case 0x10A0: /* IPI_IVPR */ case 0x10B0: @@ -750,28 +798,28 @@ static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len) default: break; } - pr_debug("%s: => 0x%08x\n", __func__, retval); - return retval; +out: + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return err; } -static void openpic_tmr_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; int idx; addr += 0x10f0; - pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); if (addr & 0xF) - return; + return 0; if (addr == 0x10f0) { /* TFRR */ opp->tfrr = val; - return; + return 0; } idx = (addr >> 6) & 0x3; @@ -795,15 +843,17 @@ static void openpic_tmr_write(void *opaque, gpa_t addr, uint64_t val, write_IRQreg_idr(opp, opp->irq_tim0 + idx, val); break; } + + return 0; } -static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len) +static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; uint32_t retval = -1; int idx; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); if (addr & 0xF) goto out; @@ -813,6 +863,7 @@ static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len) retval = opp->tfrr; goto out; } + switch (addr & 0x30) { case 0x00: /* TCCR */ retval = opp->timers[idx].tccr; @@ -830,18 +881,16 @@ static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len) out: pr_debug("%s: => 0x%08x\n", __func__, retval); - - return retval; + *ptr = retval; + return 0; } -static void openpic_src_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_src_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; int idx; - pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); addr = addr & 0xffff; idx = addr >> 5; @@ -857,15 +906,17 @@ static void openpic_src_write(void *opaque, gpa_t addr, uint64_t val, write_IRQreg_ilr(opp, idx, val); break; } + + return 0; } -static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) +static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; uint32_t retval; int idx; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); retval = 0xFFFFFFFF; addr = addr & 0xffff; @@ -884,20 +935,19 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) } pr_debug("%s: => 0x%08x\n", __func__, retval); - return retval; + *ptr = retval; + return 0; } -static void openpic_msi_write(void *opaque, gpa_t addr, uint64_t val, - unsigned size) +static int openpic_msi_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; int idx = opp->irq_msi; int srs, ibs; - pr_debug("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); if (addr & 0xF) - return; + return 0; switch (addr) { case MSIIR_OFFSET: @@ -911,17 +961,19 @@ static void openpic_msi_write(void *opaque, gpa_t addr, uint64_t val, /* most registers are read-only, thus ignored */ break; } + + return 0; } -static uint64_t openpic_msi_read(void *opaque, gpa_t addr, unsigned size) +static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; - uint64_t r = 0; + uint32_t r = 0; int i, srs; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); if (addr & 0xF) - return -1; + return -ENXIO; srs = addr >> 4; @@ -945,45 +997,47 @@ static uint64_t openpic_msi_read(void *opaque, gpa_t addr, unsigned size) break; } - return r; + pr_debug("%s: => 0x%08x\n", __func__, r); + *ptr = r; + return 0; } -static uint64_t openpic_summary_read(void *opaque, gpa_t addr, unsigned size) +static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr) { - uint64_t r = 0; + uint32_t r = 0; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); /* TODO: EISR/EIMR */ - return r; + *ptr = r; + return 0; } -static void openpic_summary_write(void *opaque, gpa_t addr, uint64_t val, - unsigned size) +static int openpic_summary_write(void *opaque, gpa_t addr, u32 val) { - pr_debug("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); /* TODO: EISR/EIMR */ + return 0; } -static void openpic_cpu_write_internal(void *opaque, gpa_t addr, - uint32_t val, int idx) +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx) { struct openpic *opp = opaque; struct irq_source *src; struct irq_dest *dst; int s_IRQ, n_IRQ; - pr_debug("%s: cpu %d addr %#" HWADDR_PRIx " <= 0x%08x\n", __func__, idx, + pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx, addr, val); if (idx < 0) - return; + return 0; if (addr & 0xF) - return; + return 0; dst = &opp->dst[idx]; addr &= 0xFF0; @@ -1008,11 +1062,11 @@ static void openpic_cpu_write_internal(void *opaque, gpa_t addr, if (dst->raised.priority <= dst->ctpr) { pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", __func__, idx); - qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); } else if (dst->raised.priority > dst->servicing.priority) { pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n", __func__, idx, dst->raised.next); - qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } break; @@ -1043,18 +1097,22 @@ static void openpic_cpu_write_internal(void *opaque, gpa_t addr, IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { pr_debug("Raise OpenPIC INT output cpu %d irq %d\n", idx, n_IRQ); - qemu_irq_raise(opp->dst[idx].irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } break; default: break; } + + return 0; } -static void openpic_cpu_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val) { - openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12); + struct openpic *opp = opaque; + + return openpic_cpu_write_internal(opp, addr, val, + (addr & 0x1f000) >> 12); } static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, @@ -1064,7 +1122,7 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, int retval, irq; pr_debug("Lower OpenPIC INT output\n"); - qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); irq = IRQ_get_next(opp, &dst->raised); pr_debug("IACK: irq=%d\n", irq); @@ -1107,20 +1165,21 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, return retval; } -static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx) +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx) { struct openpic *opp = opaque; struct irq_dest *dst; uint32_t retval; - pr_debug("%s: cpu %d addr %#" HWADDR_PRIx "\n", __func__, idx, addr); + pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr); retval = 0xFFFFFFFF; if (idx < 0) - return retval; + goto out; if (addr & 0xF) - return retval; + goto out; dst = &opp->dst[idx]; addr &= 0xFF0; @@ -1142,49 +1201,67 @@ static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx) } pr_debug("%s: => 0x%08x\n", __func__, retval); - return retval; +out: + *ptr = retval; + return 0; } -static uint64_t openpic_cpu_read(void *opaque, gpa_t addr, unsigned len) +static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr) { - return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12); + struct openpic *opp = opaque; + + return openpic_cpu_read_internal(opp, addr, ptr, + (addr & 0x1f000) >> 12); } -static const struct kvm_io_device_ops openpic_glb_ops_be = { +struct mem_reg { + struct list_head list; + int (*read)(void *opaque, gpa_t addr, u32 *ptr); + int (*write)(void *opaque, gpa_t addr, u32 val); + gpa_t start_addr; + int size; +}; + +static struct mem_reg openpic_gbl_mmio = { .write = openpic_gbl_write, .read = openpic_gbl_read, + .start_addr = OPENPIC_GLB_REG_START, + .size = OPENPIC_GLB_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_tmr_ops_be = { +static struct mem_reg openpic_tmr_mmio = { .write = openpic_tmr_write, .read = openpic_tmr_read, + .start_addr = OPENPIC_TMR_REG_START, + .size = OPENPIC_TMR_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_cpu_ops_be = { +static struct mem_reg openpic_cpu_mmio = { .write = openpic_cpu_write, .read = openpic_cpu_read, + .start_addr = OPENPIC_CPU_REG_START, + .size = OPENPIC_CPU_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_src_ops_be = { +static struct mem_reg openpic_src_mmio = { .write = openpic_src_write, .read = openpic_src_read, + .start_addr = OPENPIC_SRC_REG_START, + .size = OPENPIC_SRC_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_msi_ops_be = { +static struct mem_reg openpic_msi_mmio = { .read = openpic_msi_read, .write = openpic_msi_write, + .start_addr = OPENPIC_MSI_REG_START, + .size = OPENPIC_MSI_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_summary_ops_be = { +static struct mem_reg openpic_summary_mmio = { .read = openpic_summary_read, .write = openpic_summary_write, -}; - -struct mem_reg { - const char *name; - const struct kvm_io_device_ops *ops; - gpa_t start_addr; - int size; + .start_addr = OPENPIC_SUMMARY_REG_START, + .size = OPENPIC_SUMMARY_REG_SIZE, }; static void fsl_common_init(struct openpic *opp) @@ -1192,6 +1269,9 @@ static void fsl_common_init(struct openpic *opp) int i; int virq = MAX_SRC; + list_add(&openpic_msi_mmio.list, &opp->mmio_regions); + list_add(&openpic_summary_mmio.list, &opp->mmio_regions); + opp->vid = VID_REVISION_1_2; opp->vir = VIR_GENERIC; opp->vector_mask = 0xFFFF; @@ -1205,11 +1285,10 @@ static void fsl_common_init(struct openpic *opp) opp->irq_tim0 = virq; virq += MAX_TMR; - assert(virq <= MAX_IRQ); + BUG_ON(virq > MAX_IRQ); opp->irq_msi = 224; - msi_supported = true; for (i = 0; i < opp->fsl->max_ext; i++) opp->src[i].level = false; @@ -1226,63 +1305,352 @@ static void fsl_common_init(struct openpic *opp) } } -static void map_list(struct openpic *opp, const struct mem_reg *list, - int *count) +static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr) { - while (list->name) { - assert(*count < ARRAY_SIZE(opp->sub_io_mem)); + struct list_head *node; - memory_region_init_io(&opp->sub_io_mem[*count], list->ops, opp, - list->name, list->size); + list_for_each(node, &opp->mmio_regions) { + struct mem_reg *mr = list_entry(node, struct mem_reg, list); - memory_region_add_subregion(&opp->mem, list->start_addr, - &opp->sub_io_mem[*count]); + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; - (*count)++; - list++; + return mr->read(opp, addr - mr->start_addr, ptr); } + + return -ENXIO; } -static int openpic_init(SysBusDevice *dev) +static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) { - struct openpic *opp = FROM_SYSBUS(typeof(*opp), dev); - int i, j; - int list_count = 0; - static const struct mem_reg list_le[] = { - {"glb", &openpic_glb_ops_le, - OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE}, - {"tmr", &openpic_tmr_ops_le, - OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE}, - {"src", &openpic_src_ops_le, - OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE}, - {"cpu", &openpic_cpu_ops_le, - OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE}, - {NULL} - }; - static const struct mem_reg list_be[] = { - {"glb", &openpic_glb_ops_be, - OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE}, - {"tmr", &openpic_tmr_ops_be, - OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE}, - {"src", &openpic_src_ops_be, - OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE}, - {"cpu", &openpic_cpu_ops_be, - OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE}, - {NULL} - }; - static const struct mem_reg list_fsl[] = { - {"msi", &openpic_msi_ops_be, - OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE}, - {"summary", &openpic_summary_ops_be, - OPENPIC_SUMMARY_REG_START, OPENPIC_SUMMARY_REG_SIZE}, - {NULL} - }; + struct list_head *node; + + list_for_each(node, &opp->mmio_regions) { + struct mem_reg *mr = list_entry(node, struct mem_reg, list); + + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; - memory_region_init(&opp->mem, "openpic", 0x40000); + return mr->write(opp, addr - mr->start_addr, val); + } + + return -ENXIO; +} + +static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, + int len, void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + union { + u32 val; + u8 bytes[4]; + } u; + + if (addr & (len - 1)) { + pr_debug("%s: bad alignment %llx/%d\n", + __func__, addr, len); + return -EINVAL; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val); + spin_unlock_irq(&opp->lock); + + /* + * Technically only 32-bit accesses are allowed, but be nice to + * people dumping registers a byte at a time -- it works in real + * hardware (reads only, not writes). + */ + if (len == 4) { + *(u32 *)ptr = u.val; + pr_debug("%s: addr %llx ret %d len 4 val %x\n", + __func__, addr, ret, u.val); + } else if (len == 1) { + *(u8 *)ptr = u.bytes[addr & 3]; + pr_debug("%s: addr %llx ret %d len 1 val %x\n", + __func__, addr, ret, u.bytes[addr & 3]); + } else { + pr_debug("%s: bad length %d\n", __func__, len); + return -EINVAL; + } + + return ret; +} + +static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, + int len, const void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + + if (len != 4) { + pr_debug("%s: bad length %d\n", __func__, len); + return -EOPNOTSUPP; + } + if (addr & 3) { + pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len); + return -EOPNOTSUPP; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_write_internal(opp, addr - opp->reg_base, + *(const u32 *)ptr); + spin_unlock_irq(&opp->lock); + + pr_debug("%s: addr %llx ret %d val %x\n", + __func__, addr, ret, *(const u32 *)ptr); + + return ret; +} + +static void kvm_mpic_dtor(struct kvm_io_device *this) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + + opp->mmio_mapped = false; +} + +static const struct kvm_io_device_ops mpic_mmio_ops = { + .read = kvm_mpic_read, + .write = kvm_mpic_write, + .destructor = kvm_mpic_dtor, +}; + +static void map_mmio(struct openpic *opp) +{ + BUG_ON(opp->mmio_mapped); + opp->mmio_mapped = true; + + kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops); + + kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS, + opp->reg_base, OPENPIC_REG_SIZE, + &opp->mmio); +} + +static void unmap_mmio(struct openpic *opp) +{ + BUG_ON(opp->mmio_mapped); + opp->mmio_mapped = false; + + kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); +} + +static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) +{ + u64 base; + + if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64))) + return -EFAULT; + + if (base & 0x3ffff) { + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n", + __func__, base); + return -EINVAL; + } + + if (base == opp->reg_base) + return 0; + + mutex_lock(&opp->kvm->slots_lock); + + unmap_mmio(opp); + opp->reg_base = base; + + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n", + __func__, base); + + if (base == 0) + goto out; + + map_mmio(opp); + + mutex_unlock(&opp->kvm->slots_lock); +out: + return 0; +} + +#define ATTR_SET 0 +#define ATTR_GET 1 + +static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) +{ + int ret; + + if (addr & 3) + return -ENXIO; + + spin_lock_irq(&opp->lock); + + if (type == ATTR_SET) + ret = kvm_mpic_write_internal(opp, addr, *val); + else + ret = kvm_mpic_read_internal(opp, addr, val); + + spin_unlock_irq(&opp->lock); + + pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val); + + return ret; +} + +static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u32 attr32; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return set_base_addr(opp, attr); + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return access_reg(opp, attr->attr, &attr32, ATTR_SET); + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + if (attr32 != 0 && attr32 != 1) + return -EINVAL; + + spin_lock_irq(&opp->lock); + openpic_set_irq(opp, attr->attr, attr32); + spin_unlock_irq(&opp->lock); + return 0; + } + + return -ENXIO; +} + +static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u64 attr64; + u32 attr32; + int ret; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + mutex_lock(&opp->kvm->slots_lock); + attr64 = opp->reg_base; + mutex_unlock(&opp->kvm->slots_lock); + + if (copy_to_user((u64 __user *)(long)attr->addr, + &attr64, sizeof(u64))) + return -EFAULT; + + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + ret = access_reg(opp, attr->attr, &attr32, ATTR_GET); + if (ret) + return ret; + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + spin_lock_irq(&opp->lock); + attr32 = opp->src[attr->attr].pending; + spin_unlock_irq(&opp->lock); + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + } + + return -ENXIO; +} + +static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + break; + + return 0; + } + + return -ENXIO; +} + +static void mpic_destroy(struct kvm_device *dev) +{ + struct openpic *opp = dev->private; + + if (opp->mmio_mapped) { + /* + * Normally we get unmapped by kvm_io_bus_destroy(), + * which happens before the VCPUs release their references. + * + * Thus, we should only get here if no VCPUs took a reference + * to us in the first place. + */ + WARN_ON(opp->nb_cpus != 0); + unmap_mmio(opp); + } + + kfree(opp); +} + +static int mpic_create(struct kvm_device *dev, u32 type) +{ + struct openpic *opp; + int ret; + + opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); + if (!opp) + return -ENOMEM; + + dev->private = opp; + opp->kvm = dev->kvm; + opp->dev = dev; + opp->model = type; + spin_lock_init(&opp->lock); + + INIT_LIST_HEAD(&opp->mmio_regions); + list_add(&openpic_gbl_mmio.list, &opp->mmio_regions); + list_add(&openpic_tmr_mmio.list, &opp->mmio_regions); + list_add(&openpic_src_mmio.list, &opp->mmio_regions); + list_add(&openpic_cpu_mmio.list, &opp->mmio_regions); switch (opp->model) { - case OPENPIC_MODEL_FSL_MPIC_20: - default: + case KVM_DEV_TYPE_FSL_MPIC_20: opp->fsl = &fsl_mpic_20; opp->brr1 = 0x00400200; opp->flags |= OPENPIC_FLAG_IDR_CRIT; @@ -1290,12 +1658,10 @@ static int openpic_init(SysBusDevice *dev) opp->mpic_mode_mask = GCR_MODE_MIXED; fsl_common_init(opp); - map_list(opp, list_be, &list_count); - map_list(opp, list_fsl, &list_count); break; - case OPENPIC_MODEL_FSL_MPIC_42: + case KVM_DEV_TYPE_FSL_MPIC_42: opp->fsl = &fsl_mpic_42; opp->brr1 = 0x00400402; opp->flags |= OPENPIC_FLAG_ILR; @@ -1303,11 +1669,27 @@ static int openpic_init(SysBusDevice *dev) opp->mpic_mode_mask = GCR_MODE_PROXY; fsl_common_init(opp); - map_list(opp, list_be, &list_count); - map_list(opp, list_fsl, &list_count); break; + + default: + ret = -ENODEV; + goto err; } + openpic_reset(opp); return 0; + +err: + kfree(opp); + return ret; } + +struct kvm_device_ops kvm_mpic_ops = { + .name = "kvm-mpic", + .create = mpic_create, + .destroy = mpic_destroy, + .set_attr = mpic_set_attr, + .get_attr = mpic_get_attr, + .has_attr = mpic_has_attr, +}; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6b8108624851..88d69cf1f953 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -317,6 +317,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -762,7 +763,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; case KVM_CAP_PPC_EPR: r = 0; - vcpu->arch.epr_enabled = cap->args[0]; + if (cap->args[0]) + vcpu->arch.epr_flags |= KVMPPC_EPR_USER; + else + vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER; break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: @@ -908,6 +912,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + struct kvm *kvm __maybe_unused = filp->private_data; void __user *argp = (void __user *)arg; long r; @@ -926,7 +931,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; - struct kvm *kvm = filp->private_data; r = -EFAULT; if (copy_from_user(&create_tce, argp, sizeof(create_tce))) @@ -938,7 +942,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { - struct kvm *kvm = filp->private_data; struct kvm_allocate_rma rma; r = kvm_vm_ioctl_allocate_rma(kvm, &rma); @@ -948,7 +951,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_ALLOCATE_HTAB: { - struct kvm *kvm = filp->private_data; u32 htab_order; r = -EFAULT; @@ -965,7 +967,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_GET_HTAB_FD: { - struct kvm *kvm = filp->private_data; struct kvm_get_htab_fd ghf; r = -EFAULT; @@ -978,7 +979,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_PPC_GET_SMMU_INFO: { - struct kvm *kvm = filp->private_data; struct kvm_ppc_smmu_info info; memset(&info, 0, sizeof(info)); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6dab6b5b3e3b..feffbdaf8986 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1099,6 +1099,8 @@ void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +extern struct kvm_device_ops kvm_mpic_ops; + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 38a0be0c199f..4148becdc93f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -837,6 +837,9 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; +#define KVM_DEV_TYPE_FSL_MPIC_20 1 +#define KVM_DEV_TYPE_FSL_MPIC_42 2 + /* * ioctls for VM fds */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5f0d78c2537b..f6cd14d2f0d9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2238,6 +2238,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm, int ret; switch (cd->type) { +#ifdef CONFIG_KVM_MPIC + case KVM_DEV_TYPE_FSL_MPIC_20: + case KVM_DEV_TYPE_FSL_MPIC_42: + ops = &kvm_mpic_ops; + break; +#endif default: return -ENODEV; } -- cgit v1.2.3 From eb1e4f43e0f47f2655372c7d32c43db9711c278e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 12 Apr 2013 14:08:47 +0000 Subject: kvm/ppc/mpic: add KVM_CAP_IRQ_MPIC Enabling this capability connects the vcpu to the designated in-kernel MPIC. Using explicit connections between vcpus and irqchips allows for flexibility, but the main benefit at the moment is that it simplifies the code -- KVM doesn't need vm-global state to remember which MPIC object is associated with this vm, and it doesn't need to care about ordering between irqchip creation and vcpu creation. Signed-off-by: Scott Wood [agraf: add stub functions for kvmppc_mpic_{dis,}connect_vcpu] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 8 ++++ arch/powerpc/include/asm/kvm_host.h | 9 ++++ arch/powerpc/include/asm/kvm_ppc.h | 15 ++++++- arch/powerpc/kvm/booke.c | 4 ++ arch/powerpc/kvm/mpic.c | 82 +++++++++++++++++++++++++++++++++---- arch/powerpc/kvm/powerpc.c | 30 ++++++++++++++ include/uapi/linux/kvm.h | 1 + 7 files changed, 141 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 66b58e494935..149558b1e81e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2744,3 +2744,11 @@ to receive the topmost interrupt vector. When disabled (args[0] == 0), behavior is as if this facility is unsupported. When this capability is enabled, KVM_EXIT_EPR can occur. + +6.6 KVM_CAP_IRQ_MPIC + +Architectures: ppc +Parameters: args[0] is the MPIC device fd + args[1] is the MPIC CPU number for this vcpu + +This capability connects the vcpu to an in-kernel MPIC device. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 153c8c2b0f88..c3f8ceffa412 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -377,6 +377,11 @@ struct kvmppc_booke_debug_reg { u64 dac[KVMPPC_BOOKE_MAX_DAC]; }; +#define KVMPPC_IRQ_DEFAULT 0 +#define KVMPPC_IRQ_MPIC 1 + +struct openpic; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -558,6 +563,10 @@ struct kvm_vcpu_arch { unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ + int irq_type; /* one of KVM_IRQ_* */ + int irq_cpu_id; + struct openpic *mpic; /* KVM_IRQ_MPIC */ + #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3810f9c7616c..df9c80b37905 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -248,7 +248,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); struct openpic; -void kvmppc_mpic_put(struct openpic *opp); #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) @@ -278,6 +277,9 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #ifdef CONFIG_KVM_MPIC void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 cpu); +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu); #else @@ -285,6 +287,17 @@ static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) { } +static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu) +{ + return -EINVAL; +} + +static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, + struct kvm_vcpu *vcpu) +{ +} + #endif /* CONFIG_KVM_MPIC */ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4da11ed48c59..1020119226db 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -430,6 +430,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, if (update_epr == true) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { + BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC); + kvmppc_mpic_set_epr(vcpu); + } } new_msr &= msr_mask; diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index cb451b91e342..10bc08a246fd 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -115,7 +115,7 @@ static int get_current_cpu(void) { #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; - return vcpu ? vcpu->vcpu_id : -1; + return vcpu ? vcpu->arch.irq_cpu_id : -1; #else /* XXX */ return -1; @@ -249,7 +249,7 @@ static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, return; } - pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id, + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, output); if (output != ILR_INTTGT_INT) /* TODO */ @@ -267,7 +267,7 @@ static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, return; } - pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id, + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, output); if (output != ILR_INTTGT_INT) /* TODO */ @@ -1165,6 +1165,20 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, return retval; } +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ + struct openpic *opp = vcpu->arch.mpic; + int cpu = vcpu->arch.irq_cpu_id; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + + if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY) + kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu)); + + spin_unlock_irqrestore(&opp->lock, flags); +} + static int openpic_cpu_read_internal(void *opaque, gpa_t addr, u32 *ptr, int idx) { @@ -1431,10 +1445,10 @@ static void map_mmio(struct openpic *opp) static void unmap_mmio(struct openpic *opp) { - BUG_ON(opp->mmio_mapped); - opp->mmio_mapped = false; - - kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); + if (opp->mmio_mapped) { + opp->mmio_mapped = false; + kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); + } } static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) @@ -1693,3 +1707,57 @@ struct kvm_device_ops kvm_mpic_ops = { .get_attr = mpic_get_attr, .has_attr = mpic_has_attr, }; + +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 cpu) +{ + struct openpic *opp = dev->private; + int ret = 0; + + if (dev->ops != &kvm_mpic_ops) + return -EPERM; + if (opp->kvm != vcpu->kvm) + return -EPERM; + if (cpu < 0 || cpu >= MAX_CPU) + return -EPERM; + + spin_lock_irq(&opp->lock); + + if (opp->dst[cpu].vcpu) { + ret = -EEXIST; + goto out; + } + if (vcpu->arch.irq_type) { + ret = -EBUSY; + goto out; + } + + opp->dst[cpu].vcpu = vcpu; + opp->nb_cpus = max(opp->nb_cpus, cpu + 1); + + vcpu->arch.mpic = opp; + vcpu->arch.irq_cpu_id = cpu; + vcpu->arch.irq_type = KVMPPC_IRQ_MPIC; + + /* This might need to be changed if GCR gets extended */ + if (opp->mpic_mode_mask == GCR_MODE_PROXY) + vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL; + + kvm_device_get(dev); +out: + spin_unlock_irq(&opp->lock); + return ret; +} + +/* + * This should only happen immediately before the mpic is destroyed, + * so we shouldn't need to worry about anything still trying to + * access the vcpu pointer. + */ +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) +{ + BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu); + + opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; + kvm_device_put(opp->dev); +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 88d69cf1f953..5d046bbdf11f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -326,6 +327,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_GET_PVINFO: #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: +#endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: #endif r = 1; break; @@ -460,6 +464,13 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); + + switch (vcpu->arch.irq_type) { + case KVMPPC_IRQ_MPIC: + kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); + break; + } + kvmppc_core_vcpu_free(vcpu); } @@ -786,6 +797,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg); break; } +#endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: { + struct file *filp; + struct kvm_device *dev; + + r = -EBADF; + filp = fget(cap->args[0]); + if (!filp) + break; + + r = -EPERM; + dev = kvm_device_from_filp(filp); + if (dev) + r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); + + fput(filp); + break; + } #endif default: r = -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4148becdc93f..0ebf59b50ed0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -667,6 +667,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_PSCI 87 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 #define KVM_CAP_DEVICE_CTRL 89 +#define KVM_CAP_IRQ_MPIC 90 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From de9ba2f36368d21314860ee24893a6ffef01e548 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 16 Apr 2013 17:42:19 +0200 Subject: KVM: PPC: Support irq routing and irqfd for in-kernel MPIC Now that all the irq routing and irqfd pieces are generic, we can expose real irqchip support to all of KVM's internal helpers. This allows us to use irqfd with the in-kernel MPIC. Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/devices/mpic.txt | 19 +++++ arch/powerpc/include/asm/kvm_host.h | 7 ++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/Kconfig | 3 + arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/irq.h | 17 +++++ arch/powerpc/kvm/mpic.c | 111 ++++++++++++++++++++++++++++- 7 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kvm/irq.h (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt index ce98e3264fa8..ad0ac778f20a 100644 --- a/Documentation/virtual/kvm/devices/mpic.txt +++ b/Documentation/virtual/kvm/devices/mpic.txt @@ -35,3 +35,22 @@ Groups: "attr" is the IRQ number. IRQ numbers for standard sources are the byte offset of the relevant IVPR from EIVPR0, divided by 32. + +IRQ Routing: + + The MPIC emulation supports IRQ routing. Only a single MPIC device can + be instantiated. Once that device has been created, it's available as + irqchip id 0. + + This irqchip 0 has 256 interrupt pins, which expose the interrupts in + the main array of interrupt sources (a.k.a. "SRC" interrupts). + + The numbering is the same as the MPIC device tree binding -- based on + the register offset from the beginning of the sources array, without + regard to any subdivisions in chip documentation such as "internal" + or "external" interrupts. + + Default routes are established for these pins, with the GSI being equal + to the pin number. + + Access to non-SRC interrupts is not implemented through IRQ routing mechanisms. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c3f8ceffa412..13740a645a6d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,10 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +/* These values are internal and can be increased later */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 256 + #if !defined(CONFIG_KVM_440) #include @@ -256,6 +260,9 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; #endif +#ifdef CONFIG_KVM_MPIC + struct openpic *mpic; +#endif }; /* diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 02ad96606860..ca871067a69b 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -25,6 +25,7 @@ /* Select powerpc specific features in */ #define __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT +#define __KVM_HAVE_IRQCHIP struct kvm_regs { __u64 pc; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f47e95e0b6de..4bf10b520765 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -154,6 +154,9 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_MSI help Enable support for emulating MPIC devices inside the host kernel, rather than relying on userspace to emulate. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4a2277a221bb..4eada0c01082 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -104,6 +104,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000000000000..f1e27fdc8c2e --- /dev/null +++ b/arch/powerpc/kvm/irq.h @@ -0,0 +1,17 @@ +#ifndef __IRQ_H +#define __IRQ_H + +#include + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm->arch.mpic != NULL); +#endif + smp_rmb(); + return ret; +} + +#endif diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 10bc08a246fd..89fe1d66a7fb 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1076,7 +1076,9 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, case 0xA0: /* IACK */ /* Read-only register */ break; - case 0xB0: /* EOI */ + case 0xB0: { /* EOI */ + int notify_eoi; + pr_debug("EOI\n"); s_IRQ = IRQ_get_next(opp, &dst->servicing); @@ -1087,6 +1089,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, } IRQ_resetbit(&dst->servicing, s_IRQ); + /* Notify listeners that the IRQ is over */ + notify_eoi = s_IRQ; /* Set up next servicing IRQ */ s_IRQ = IRQ_get_next(opp, &dst->servicing); /* Check queued interrupts. */ @@ -1099,7 +1103,13 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, idx, n_IRQ); mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } + + spin_unlock(&opp->lock); + kvm_notify_acked_irq(opp->kvm, 0, notify_eoi); + spin_lock(&opp->lock); + break; + } default: break; } @@ -1639,14 +1649,34 @@ static void mpic_destroy(struct kvm_device *dev) unmap_mmio(opp); } + dev->kvm->arch.mpic = NULL; kfree(opp); } +static int mpic_set_default_irq_routing(struct openpic *opp) +{ + struct kvm_irq_routing_entry *routing; + + /* Create a nop default map, so that dereferencing it still works */ + routing = kzalloc((sizeof(*routing)), GFP_KERNEL); + if (!routing) + return -ENOMEM; + + kvm_set_irq_routing(opp->kvm, routing, 0, 0); + + kfree(routing); + return 0; +} + static int mpic_create(struct kvm_device *dev, u32 type) { struct openpic *opp; int ret; + /* We only support one MPIC at a time for now */ + if (dev->kvm->arch.mpic) + return -EINVAL; + opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); if (!opp) return -ENOMEM; @@ -1691,7 +1721,15 @@ static int mpic_create(struct kvm_device *dev, u32 type) goto err; } + ret = mpic_set_default_irq_routing(opp); + if (ret) + goto err; + openpic_reset(opp); + + smp_wmb(); + dev->kvm->arch.mpic = opp; + return 0; err: @@ -1761,3 +1799,74 @@ void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; kvm_device_put(opp->dev); } + +/* + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + u32 irq = e->irqchip.pin; + struct openpic *opp = kvm->arch.mpic; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + openpic_set_irq(opp, irq, level); + spin_unlock_irqrestore(&opp->lock, flags); + + /* All code paths we care about don't check for the return value */ + return 0; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + struct openpic *opp = kvm->arch.mpic; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + + /* + * XXX We ignore the target address for now, as we only support + * a single MSI bank. + */ + openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data); + spin_unlock_irqrestore(&opp->lock, flags); + + /* All code paths we care about don't check for the return value */ + return 0; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = mpic_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + goto out; + rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + + r = 0; +out: + return r; +} -- cgit v1.2.3 From 5efdb4be598fc2af6937c3387586635ddf6fd2c8 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 17 Apr 2013 00:37:57 +0200 Subject: KVM: PPC: MPIC: Add support for KVM_IRQ_LINE Now that all pieces are in place for reusing generic irq infrastructure, we can copy x86's implementation of KVM_IRQ_LINE irq injection and simply reuse it for PPC, as it will work there just as well. Signed-off-by: Alexander Graf --- arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kvm/powerpc.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index ca871067a69b..03c7819a44a3 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -26,6 +26,7 @@ #define __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP +#define __KVM_HAVE_IRQ_LINE struct kvm_regs { __u64 pc; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5d046bbdf11f..d8e81e6c1afe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -33,6 +33,7 @@ #include #include #include "timing.h" +#include "irq.h" #include "../mm/mmu_decl.h" #define CREATE_TRACE_POINTS @@ -939,6 +940,18 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) return 0; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level, + line_status); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v1.2.3 From 8e591cb7204739efa8e15967ea334eb367039dde Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 17 Apr 2013 20:30:00 +0000 Subject: KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls For pseries machine emulation, in order to move the interrupt controller code to the kernel, we need to intercept some RTAS calls in the kernel itself. This adds an infrastructure to allow in-kernel handlers to be registered for RTAS services by name. A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to associate token values with those service names. Then, when the guest requests an RTAS service with one of those token values, it will be handled by the relevant in-kernel handler rather than being passed up to userspace as at present. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras [agraf: fix warning] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 19 ++++ arch/powerpc/include/asm/hvcall.h | 3 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 4 + arch/powerpc/include/uapi/asm/kvm.h | 6 ++ arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s_hv.c | 18 +++- arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/kvm/book3s_pr_papr.c | 7 ++ arch/powerpc/kvm/book3s_rtas.c | 182 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 8 ++ include/uapi/linux/kvm.h | 3 + 12 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kvm/book3s_rtas.c (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 149558b1e81e..fb308be8521b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2341,6 +2341,25 @@ and distributor interface, the ioctl must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. +4.82 KVM_PPC_RTAS_DEFINE_TOKEN + +Capability: KVM_CAP_PPC_RTAS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_rtas_token_args +Returns: 0 on success, -1 on error + +Defines a token value for a RTAS (Run Time Abstraction Services) +service in order to allow it to be handled in the kernel. The +argument struct gives the name of the service, which must be the name +of a service that has a kernel-side implementation. If the token +value is non-zero, it will be associated with that service, and +subsequent RTAS calls by the guest specifying that token will be +handled by the kernel. If the token value is 0, then any token +associated with the service will be forgotten, and subsequent RTAS +calls by the guest for that service will be passed to userspace to be +handled. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4bc2c3dad6ad..cf4df8e2139a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -270,6 +270,9 @@ #define H_SET_MODE 0x31C #define MAX_HCALL_OPCODE H_SET_MODE +/* Platform specific hcalls, used by KVM */ +#define H_RTAS 0xf000 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 13740a645a6d..311f7e6f09e9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -259,6 +259,7 @@ struct kvm_arch { #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; + struct list_head rtas_tokens; #endif #ifdef CONFIG_KVM_MPIC struct openpic *mpic; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index df9c80b37905..8a30eb7f2bec 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -166,6 +166,10 @@ extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); +extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); +extern void kvmppc_rtas_tokens_free(struct kvm *kvm); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 03c7819a44a3..eb9e25c194ad 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -324,6 +324,12 @@ struct kvm_allocate_rma { __u64 rma_size; }; +/* for KVM_CAP_PPC_RTAS */ +struct kvm_rtas_token_args { + char name[120]; + __u64 token; /* Use a token of 0 to undefine a mapping */ +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4eada0c01082..3faf5c07329c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \ emulate.o \ book3s.o \ book3s_64_vio.o \ + book3s_rtas.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5af0f2979833..f3d7af7981c7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -483,7 +483,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; struct kvm_vcpu *tvcpu; - int idx; + int idx, rc; switch (req) { case H_ENTER: @@ -519,6 +519,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); break; + case H_RTAS: + if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + return RESUME_HOST; + + rc = kvmppc_rtas_hcall(vcpu); + + if (rc == -ENOENT) + return RESUME_HOST; + else if (rc == 0) + break; + + /* Send the error out to userspace via KVM_RUN */ + return rc; default: return RESUME_HOST; } @@ -1829,6 +1842,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) cpumask_setall(&kvm->arch.need_tlb_flush); INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); kvm->arch.rma = NULL; @@ -1874,6 +1888,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) kvm->arch.rma = NULL; } + kvmppc_rtas_tokens_free(kvm); + kvmppc_free_hpt(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c1cffa882a69..d09baf143500 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1296,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) { #ifdef CONFIG_PPC64 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif if (firmware_has_feature(FW_FEATURE_SET_MODE)) { diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index ee02b30878ed..4efa4a4f3722 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -246,6 +246,13 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; + case H_RTAS: + if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + return RESUME_HOST; + if (kvmppc_rtas_hcall(vcpu)) + break; + kvmppc_set_gpr(vcpu, 3, 0); + return EMULATE_DONE; } return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c new file mode 100644 index 000000000000..6ad7050eb67d --- /dev/null +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -0,0 +1,182 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +struct rtas_handler { + void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); + char *name; +}; + +static struct rtas_handler rtas_handlers[] = { }; + +struct rtas_token_definition { + struct list_head list; + struct rtas_handler *handler; + u64 token; +}; + +static int rtas_name_matches(char *s1, char *s2) +{ + struct kvm_rtas_token_args args; + return !strncmp(s1, s2, sizeof(args.name)); +} + +static int rtas_token_undefine(struct kvm *kvm, char *name) +{ + struct rtas_token_definition *d, *tmp; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { + if (rtas_name_matches(d->handler->name, name)) { + list_del(&d->list); + kfree(d); + return 0; + } + } + + /* It's not an error to undefine an undefined token */ + return 0; +} + +static int rtas_token_define(struct kvm *kvm, char *name, u64 token) +{ + struct rtas_token_definition *d; + struct rtas_handler *h = NULL; + bool found; + int i; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { + if (d->token == token) + return -EEXIST; + } + + found = false; + for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) { + h = &rtas_handlers[i]; + if (rtas_name_matches(h->name, name)) { + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->handler = h; + d->token = token; + + list_add_tail(&d->list, &kvm->arch.rtas_tokens); + + return 0; +} + +int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) +{ + struct kvm_rtas_token_args args; + int rc; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + mutex_lock(&kvm->lock); + + if (args.token) + rc = rtas_token_define(kvm, args.name, args.token); + else + rc = rtas_token_undefine(kvm, args.name); + + mutex_unlock(&kvm->lock); + + return rc; +} + +int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) +{ + struct rtas_token_definition *d; + struct rtas_args args; + rtas_arg_t *orig_rets; + gpa_t args_phys; + int rc; + + /* r4 contains the guest physical address of the RTAS args */ + args_phys = kvmppc_get_gpr(vcpu, 4); + + rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + if (rc) + goto fail; + + /* + * args->rets is a pointer into args->args. Now that we've + * copied args we need to fix it up to point into our copy, + * not the guest args. We also need to save the original + * value so we can restore it on the way out. + */ + orig_rets = args.rets; + args.rets = &args.args[args.nargs]; + + mutex_lock(&vcpu->kvm->lock); + + rc = -ENOENT; + list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { + if (d->token == args.token) { + d->handler->handler(vcpu, &args); + rc = 0; + break; + } + } + + mutex_unlock(&vcpu->kvm->lock); + + if (rc == 0) { + args.rets = orig_rets; + rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + if (rc) + goto fail; + } + + return rc; + +fail: + /* + * We only get here if the guest has called RTAS with a bogus + * args pointer. That means we can't get to the args, and so we + * can't fail the RTAS call. So fail right out to userspace, + * which should kill the guest. + */ + return rc; +} + +void kvmppc_rtas_tokens_free(struct kvm *kvm) +{ + struct rtas_token_definition *d, *tmp; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { + list_del(&d->list); + kfree(d); + } +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d8e81e6c1afe..d4fd443ae7bd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -341,6 +341,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_PPC_ALLOC_HTAB: + case KVM_CAP_PPC_RTAS: r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -986,6 +987,7 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { struct kvm_allocate_rma rma; + struct kvm *kvm = filp->private_data; r = kvm_vm_ioctl_allocate_rma(kvm, &rma); if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) @@ -1030,6 +1032,12 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_RTAS_DEFINE_TOKEN: { + struct kvm *kvm = filp->private_data; + + r = kvm_vm_ioctl_rtas_define_token(kvm, argp); + break; + } #endif /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0ebf59b50ed0..d4005192ad6e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -668,6 +668,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 #define KVM_CAP_DEVICE_CTRL 89 #define KVM_CAP_IRQ_MPIC 90 +#define KVM_CAP_PPC_RTAS 91 #ifdef KVM_CAP_IRQ_ROUTING @@ -928,6 +929,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) +/* Available with KVM_CAP_PPC_RTAS */ +#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From bc5ad3f3701116e7db57268e6f89010ec714697e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 17 Apr 2013 20:30:26 +0000 Subject: KVM: PPC: Book3S: Add kernel emulation for the XICS interrupt controller This adds in-kernel emulation of the XICS (eXternal Interrupt Controller Specification) interrupt controller specified by PAPR, for both HV and PR KVM guests. The XICS emulation supports up to 1048560 interrupt sources. Interrupt source numbers below 16 are reserved; 0 is used to mean no interrupt and 2 is used for IPIs. Internally these are represented in blocks of 1024, called ICS (interrupt controller source) entities, but that is not visible to userspace. Each vcpu gets one ICP (interrupt controller presentation) entity, used to store the per-vcpu state such as vcpu priority, pending interrupt state, IPI request, etc. This does not include any API or any way to connect vcpus to their ICP state; that will be added in later patches. This is based on an initial implementation by Michael Ellerman reworked by Benjamin Herrenschmidt and Paul Mackerras. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras [agraf: fix typo, add dependency on !KVM_MPIC] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 2 + arch/powerpc/include/asm/kvm_host.h | 11 + arch/powerpc/include/asm/kvm_ppc.h | 29 ++ arch/powerpc/kvm/Kconfig | 8 + arch/powerpc/kvm/Makefile | 3 + arch/powerpc/kvm/book3s.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 9 + arch/powerpc/kvm/book3s_pr_papr.c | 14 + arch/powerpc/kvm/book3s_rtas.c | 54 +- arch/powerpc/kvm/book3s_xics.c | 946 ++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_xics.h | 113 ++++ arch/powerpc/kvm/powerpc.c | 3 + 12 files changed, 1192 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_xics.c create mode 100644 arch/powerpc/kvm/book3s_xics.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index c55f7e6affaa..349ed85c7d61 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); +extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, + unsigned int vec); extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 311f7e6f09e9..af326cde7cb6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -192,6 +192,10 @@ struct kvmppc_linear_info { int type; }; +/* XICS components, defined in book3s_xics.c */ +struct kvmppc_xics; +struct kvmppc_icp; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -264,6 +268,9 @@ struct kvm_arch { #ifdef CONFIG_KVM_MPIC struct openpic *mpic; #endif +#ifdef CONFIG_KVM_XICS + struct kvmppc_xics *xics; +#endif }; /* @@ -387,6 +394,7 @@ struct kvmppc_booke_debug_reg { #define KVMPPC_IRQ_DEFAULT 0 #define KVMPPC_IRQ_MPIC 1 +#define KVMPPC_IRQ_XICS 2 struct openpic; @@ -574,6 +582,9 @@ struct kvm_vcpu_arch { int irq_type; /* one of KVM_IRQ_* */ int irq_cpu_id; struct openpic *mpic; /* KVM_IRQ_MPIC */ +#ifdef CONFIG_KVM_XICS + struct kvmppc_icp *icp; /* XICS presentation controller */ +#endif #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8a30eb7f2bec..6582eed321ba 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -130,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); + extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, @@ -169,6 +170,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); extern void kvmppc_rtas_tokens_free(struct kvm *kvm); +extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, + u32 priority); +extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, + u32 *priority); /* * Cuts out inst bits with ordering according to spec. @@ -267,6 +272,30 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} + +#endif + +#ifdef CONFIG_KVM_XICS +static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; +} +extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); +extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); +extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +#else +static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) + { return 0; } +static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } +static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, + unsigned long server) + { return -EINVAL; } +static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm, + struct kvm_irq_level *args) + { return -ENOTTY; } +static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) + { return 0; } #endif static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 656e0bc29fe8..eb643f862579 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -163,6 +163,14 @@ config KVM_MPIC Currently, support is limited to certain versions of Freescale's MPIC implementation. +config KVM_XICS + bool "KVM in-kernel XICS emulation" + depends on KVM_BOOK3S_64 && !KVM_MPIC + ---help--- + Include support for the XICS (eXternal Interrupt Controller + Specification) interrupt controller architecture used on + IBM POWER (pSeries) servers. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3faf5c07329c..f9b87b540450 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -79,6 +79,9 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_ras.o \ book3s_hv_builtin.o +kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ + book3s_xics.o + kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ ../../../virt/kvm/eventfd.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 128ed3a856b9..1a4d787df507 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) return prio; } -static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, +void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) { unsigned long old_pending = vcpu->arch.pending_exceptions; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f3d7af7981c7..82ba00f68b07 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -532,6 +532,15 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; + + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu)) { + ret = kvmppc_xics_hcall(vcpu, req); + break; + } /* fallthrough */ default: return RESUME_HOST; } diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 4efa4a4f3722..b24309c6c2d5 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) +{ + long rc = kvmppc_xics_hcall(vcpu, cmd); + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -246,6 +253,13 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu)) + return kvmppc_h_pr_xics_hcall(vcpu, cmd); + break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) return RESUME_HOST; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 6ad7050eb67d..77f9aa5f4ba5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -17,13 +17,65 @@ #include #include +#ifdef CONFIG_KVM_XICS +static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 3 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + server = args->args[1]; + priority = args->args[2]; + + rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 1 || args->nret != 3) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + server = priority = 0; + rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); + if (rc) { + rc = -3; + goto out; + } + + args->rets[1] = server; + args->rets[2] = priority; +out: + args->rets[0] = rc; +} +#endif /* CONFIG_KVM_XICS */ struct rtas_handler { void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); char *name; }; -static struct rtas_handler rtas_handlers[] = { }; +static struct rtas_handler rtas_handlers[] = { +#ifdef CONFIG_KVM_XICS + { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, + { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, +#endif +}; struct rtas_token_definition { struct list_head list; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 000000000000..53af848116f2 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c @@ -0,0 +1,946 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "book3s_xics.h" + +#if 1 +#define XICS_DBG(fmt...) do { } while (0) +#else +#define XICS_DBG(fmt...) trace_printk(fmt) +#endif + +/* + * LOCKING + * ======= + * + * Each ICS has a mutex protecting the information about the IRQ + * sources and avoiding simultaneous deliveries if the same interrupt. + * + * ICP operations are done via a single compare & swap transaction + * (most ICP state fits in the union kvmppc_icp_state) + */ + +/* + * TODO + * ==== + * + * - To speed up resends, keep a bitmap of "resend" set bits in the + * ICS + * + * - Speed up server# -> ICP lookup (array ? hash table ?) + * + * - Make ICS lockless as well, or at least a per-interrupt lock or hashed + * locks array to improve scalability + * + * - ioctl's to save/restore the entire state for snapshot & migration + */ + +/* -- ICS routines -- */ + +static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 new_irq); + +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u16 src; + + XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); + return -EINVAL; + } + state = &ics->irq_state[src]; + if (!state->exists) + return -EINVAL; + + /* + * We set state->asserted locklessly. This should be fine as + * we are the only setter, thus concurrent access is undefined + * to begin with. + */ + if (level == KVM_INTERRUPT_SET_LEVEL) + state->asserted = 1; + else if (level == KVM_INTERRUPT_UNSET) { + state->asserted = 0; + return 0; + } + + /* Attempt delivery */ + icp_deliver_irq(xics, NULL, irq); + + return 0; +} + +static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct kvmppc_icp *icp) +{ + int i; + + mutex_lock(&ics->lock); + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct ics_irq_state *state = &ics->irq_state[i]; + + if (!state->resend) + continue; + + XICS_DBG("resend %#x prio %#x\n", state->number, + state->priority); + + mutex_unlock(&ics->lock); + icp_deliver_irq(xics, icp, state->number); + mutex_lock(&ics->lock); + } + + mutex_unlock(&ics->lock); +} + +int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + bool deliver; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, server); + if (!icp) + return -EINVAL; + + mutex_lock(&ics->lock); + + XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", + irq, server, priority, + state->masked_pending, state->resend); + + state->server = server; + state->priority = priority; + deliver = false; + if ((state->masked_pending || state->resend) && priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + if (deliver) + icp_deliver_irq(xics, icp, irq); + + return 0; +} + +int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + *server = state->server; + *priority = state->priority; + mutex_unlock(&ics->lock); + + return 0; +} + +/* -- ICP routines, including hcalls -- */ + +static inline bool icp_try_update(struct kvmppc_icp *icp, + union kvmppc_icp_state old, + union kvmppc_icp_state new, + bool change_self) +{ + bool success; + + /* Calculate new output value */ + new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); + + /* Attempt atomic update */ + success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; + if (!success) + goto bail; + + XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", + icp->server_num, + old.cppr, old.mfrr, old.pending_pri, old.xisr, + old.need_resend, old.out_ee); + XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", + new.cppr, new.mfrr, new.pending_pri, new.xisr, + new.need_resend, new.out_ee); + /* + * Check for output state update + * + * Note that this is racy since another processor could be updating + * the state already. This is why we never clear the interrupt output + * here, we only ever set it. The clear only happens prior to doing + * an update and only by the processor itself. Currently we do it + * in Accept (H_XIRR) and Up_Cppr (H_XPPR). + * + * We also do not try to figure out whether the EE state has changed, + * we unconditionally set it if the new state calls for it for the + * same reason. + */ + if (new.out_ee) { + kvmppc_book3s_queue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + if (!change_self) + kvm_vcpu_kick(icp->vcpu); + } + bail: + return success; +} + +static void icp_check_resend(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + u32 icsid; + + /* Order this load with the test for need_resend in the caller */ + smp_rmb(); + for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { + struct kvmppc_ics *ics = xics->ics[icsid]; + + if (!test_and_clear_bit(icsid, icp->resend_map)) + continue; + if (!ics) + continue; + ics_check_resend(xics, ics, icp); + } +} + +static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, + u32 *reject) +{ + union kvmppc_icp_state old_state, new_state; + bool success; + + XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, + icp->server_num); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + *reject = 0; + + /* See if we can deliver */ + success = new_state.cppr > priority && + new_state.mfrr > priority && + new_state.pending_pri > priority; + + /* + * If we can, check for a rejection and perform the + * delivery + */ + if (success) { + *reject = new_state.xisr; + new_state.xisr = irq; + new_state.pending_pri = priority; + } else { + /* + * If we failed to deliver we set need_resend + * so a subsequent CPPR state change causes us + * to try a new delivery. + */ + new_state.need_resend = true; + } + + } while (!icp_try_update(icp, old_state, new_state, false)); + + return success; +} + +static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 new_irq) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u32 reject; + u16 src; + + /* + * This is used both for initial delivery of an interrupt and + * for subsequent rejection. + * + * Rejection can be racy vs. resends. We have evaluated the + * rejection in an atomic ICP transaction which is now complete, + * so potentially the ICP can already accept the interrupt again. + * + * So we need to retry the delivery. Essentially the reject path + * boils down to a failed delivery. Always. + * + * Now the interrupt could also have moved to a different target, + * thus we may need to re-do the ICP lookup as well + */ + + again: + /* Get the ICS state and lock it */ + ics = kvmppc_xics_find_ics(xics, new_irq, &src); + if (!ics) { + XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); + return; + } + state = &ics->irq_state[src]; + + /* Get a lock on the ICS */ + mutex_lock(&ics->lock); + + /* Get our server */ + if (!icp || state->server != icp->server_num) { + icp = kvmppc_xics_find_server(xics->kvm, state->server); + if (!icp) { + pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", + new_irq, state->server); + goto out; + } + } + + /* Clear the resend bit of that interrupt */ + state->resend = 0; + + /* + * If masked, bail out + * + * Note: PAPR doesn't mention anything about masked pending + * when doing a resend, only when doing a delivery. + * + * However that would have the effect of losing a masked + * interrupt that was rejected and isn't consistent with + * the whole masked_pending business which is about not + * losing interrupts that occur while masked. + * + * I don't differenciate normal deliveries and resends, this + * implementation will differ from PAPR and not lose such + * interrupts. + */ + if (state->priority == MASKED) { + XICS_DBG("irq %#x masked pending\n", new_irq); + state->masked_pending = 1; + goto out; + } + + /* + * Try the delivery, this will set the need_resend flag + * in the ICP as part of the atomic transaction if the + * delivery is not possible. + * + * Note that if successful, the new delivery might have itself + * rejected an interrupt that was "delivered" before we took the + * icp mutex. + * + * In this case we do the whole sequence all over again for the + * new guy. We cannot assume that the rejected interrupt is less + * favored than the new one, and thus doesn't need to be delivered, + * because by the time we exit icp_try_to_deliver() the target + * processor may well have alrady consumed & completed it, and thus + * the rejected interrupt might actually be already acceptable. + */ + if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { + /* + * Delivery was successful, did we reject somebody else ? + */ + if (reject && reject != XICS_IPI) { + mutex_unlock(&ics->lock); + new_irq = reject; + goto again; + } + } else { + /* + * We failed to deliver the interrupt we need to set the + * resend map bit and mark the ICS state as needing a resend + */ + set_bit(ics->icsid, icp->resend_map); + state->resend = 1; + + /* + * If the need_resend flag got cleared in the ICP some time + * between icp_try_to_deliver() atomic update and now, then + * we know it might have missed the resend_map bit. So we + * retry + */ + smp_mb(); + if (!icp->state.need_resend) { + mutex_unlock(&ics->lock); + goto again; + } + } + out: + mutex_unlock(&ics->lock); +} + +static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u8 new_cppr) +{ + union kvmppc_icp_state old_state, new_state; + bool resend; + + /* + * This handles several related states in one operation: + * + * ICP State: Down_CPPR + * + * Load CPPR with new value and if the XISR is 0 + * then check for resends: + * + * ICP State: Resend + * + * If MFRR is more favored than CPPR, check for IPIs + * and notify ICS of a potential resend. This is done + * asynchronously (when used in real mode, we will have + * to exit here). + * + * We do not handle the complete Check_IPI as documented + * here. In the PAPR, this state will be used for both + * Set_MFRR and Down_CPPR. However, we know that we aren't + * changing the MFRR state here so we don't need to handle + * the case of an MFRR causing a reject of a pending irq, + * this will have been handled when the MFRR was set in the + * first place. + * + * Thus we don't have to handle rejects, only resends. + * + * When implementing real mode for HV KVM, resend will lead to + * a H_TOO_HARD return and the whole transaction will be handled + * in virtual mode. + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Down_CPPR */ + new_state.cppr = new_cppr; + + /* + * Cut down Resend / Check_IPI / IPI + * + * The logic is that we cannot have a pending interrupt + * trumped by an IPI at this point (see above), so we + * know that either the pending interrupt is already an + * IPI (in which case we don't care to override it) or + * it's either more favored than us or non existent + */ + if (new_state.mfrr < new_cppr && + new_state.mfrr <= new_state.pending_pri) { + WARN_ON(new_state.xisr != XICS_IPI && + new_state.xisr != 0); + new_state.pending_pri = new_state.mfrr; + new_state.xisr = XICS_IPI; + } + + /* Latch/clear resend bit */ + resend = new_state.need_resend; + new_state.need_resend = 0; + + } while (!icp_try_update(icp, old_state, new_state, true)); + + /* + * Now handle resend checks. Those are asynchronous to the ICP + * state update in HW (ie bus transactions) so we can handle them + * separately here too + */ + if (resend) + icp_check_resend(xics, icp); +} + +static noinline unsigned long h_xirr(struct kvm_vcpu *vcpu) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + /* First, remove EE from the processor */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + /* + * ICP State: Accept_Interrupt + * + * Return the pending interrupt (if any) along with the + * current CPPR, then clear the XISR & set CPPR to the + * pending priority + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + xirr = old_state.xisr | (((u32)old_state.cppr) << 24); + if (!old_state.xisr) + break; + new_state.cppr = new_state.pending_pri; + new_state.pending_pri = 0xff; + new_state.xisr = 0; + + } while (!icp_try_update(icp, old_state, new_state, true)); + + XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); + + return xirr; +} + +static noinline int h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp; + u32 reject; + bool resend; + bool local; + + XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", + vcpu->vcpu_id, server, mfrr); + + icp = vcpu->arch.icp; + local = icp->server_num == server; + if (!local) { + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + } + + /* + * ICP state: Set_MFRR + * + * If the CPPR is more favored than the new MFRR, then + * nothing needs to be rejected as there can be no XISR to + * reject. If the MFRR is being made less favored then + * there might be a previously-rejected interrupt needing + * to be resent. + * + * If the CPPR is less favored, then we might be replacing + * an interrupt, and thus need to possibly reject it as in + * + * ICP state: Check_IPI + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Set_MFRR */ + new_state.mfrr = mfrr; + + /* Check_IPI */ + reject = 0; + resend = false; + if (mfrr < new_state.cppr) { + /* Reject a pending interrupt if not an IPI */ + if (mfrr <= new_state.pending_pri) + reject = new_state.xisr; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } + + if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + resend = new_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_try_update(icp, old_state, new_state, local)); + + /* Handle reject */ + if (reject && reject != XICS_IPI) + icp_deliver_irq(xics, icp, reject); + + /* Handle resend */ + if (resend) + icp_check_resend(xics, icp); + + return H_SUCCESS; +} + +static noinline void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 reject; + + XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); + + /* + * ICP State: Set_CPPR + * + * We can safely compare the new value with the current + * value outside of the transaction as the CPPR is only + * ever changed by the processor on itself + */ + if (cppr > icp->state.cppr) + icp_down_cppr(xics, icp, cppr); + else if (cppr == icp->state.cppr) + return; + + /* + * ICP State: Up_CPPR + * + * The processor is raising its priority, this can result + * in a rejection of a pending interrupt: + * + * ICP State: Reject_Current + * + * We can remove EE from the current processor, the update + * transaction will set it again if needed + */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + reject = 0; + new_state.cppr = cppr; + + if (cppr <= new_state.pending_pri) { + reject = new_state.xisr; + new_state.xisr = 0; + new_state.pending_pri = 0xff; + } + + } while (!icp_try_update(icp, old_state, new_state, true)); + + /* + * Check for rejects. They are handled by doing a new delivery + * attempt (see comments in icp_deliver_irq). + */ + if (reject && reject != XICS_IPI) + icp_deliver_irq(xics, icp, reject); +} + +static noinline int h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u32 irq = xirr & 0x00ffffff; + u16 src; + + XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); + + /* + * ICP State: EOI + * + * Note: If EOI is incorrectly used by SW to lower the CPPR + * value (ie more favored), we do not check for rejection of + * a pending interrupt, this is a SW error and PAPR sepcifies + * that we don't have to deal with it. + * + * The sending of an EOI to the ICS is handled after the + * CPPR update + * + * ICP State: Down_CPPR which we handle + * in a separate function as it's shared with H_CPPR. + */ + icp_down_cppr(xics, icp, xirr >> 24); + + /* IPIs have no EOI */ + if (irq == XICS_IPI) + return H_SUCCESS; + /* + * EOI handling: If the interrupt is still asserted, we need to + * resend it. We can take a lockless "peek" at the ICS state here. + * + * "Message" interrupts will never have "asserted" set + */ + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); + return H_PARAMETER; + } + state = &ics->irq_state[src]; + + /* Still asserted, resend it */ + if (state->asserted) + icp_deliver_irq(xics, icp, irq); + + return H_SUCCESS; +} + +int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + unsigned long res; + int rc = H_SUCCESS; + + /* Check if we have an ICP */ + if (!vcpu->arch.icp || !vcpu->kvm->arch.xics) + return H_HARDWARE; + + switch (req) { + case H_XIRR: + res = h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + break; + case H_CPPR: + h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_EOI: + rc = h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_IPI: + rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + break; + } + + return rc; +} + + +/* -- Initialisation code etc. -- */ + +static int xics_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xics *xics = m->private; + struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + int icsid, i; + + if (!kvm) + return 0; + + seq_printf(m, "=========\nICP state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + continue; + + state.raw = ACCESS_ONCE(icp->state.raw); + seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", + icp->server_num, state.xisr, + state.pending_pri, state.cppr, state.mfrr, + state.out_ee, state.need_resend); + } + + for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { + struct kvmppc_ics *ics = xics->ics[icsid]; + + if (!ics) + continue; + + seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", + icsid); + + mutex_lock(&ics->lock); + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct ics_irq_state *irq = &ics->irq_state[i]; + + seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", + irq->number, irq->server, irq->priority, + irq->saved_priority, irq->asserted, + irq->resend, irq->masked_pending); + + } + mutex_unlock(&ics->lock); + } + return 0; +} + +static int xics_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xics_debug_show, inode->i_private); +} + +static const struct file_operations xics_debug_fops = { + .open = xics_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xics_debugfs_init(struct kvmppc_xics *xics) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics, &xics_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, + struct kvmppc_xics *xics, int irq) +{ + struct kvmppc_ics *ics; + int i, icsid; + + icsid = irq >> KVMPPC_XICS_ICS_SHIFT; + + mutex_lock(&kvm->lock); + + /* ICS already exists - somebody else got here first */ + if (xics->ics[icsid]) + goto out; + + /* Create the ICS */ + ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); + if (!ics) + goto out; + + mutex_init(&ics->lock); + ics->icsid = icsid; + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; + ics->irq_state[i].priority = MASKED; + ics->irq_state[i].saved_priority = MASKED; + } + smp_wmb(); + xics->ics[icsid] = ics; + + if (icsid > xics->max_icsid) + xics->max_icsid = icsid; + + out: + mutex_unlock(&kvm->lock); + return xics->ics[icsid]; +} + +int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) +{ + struct kvmppc_icp *icp; + + if (!vcpu->kvm->arch.xics) + return -ENODEV; + + if (kvmppc_xics_find_server(vcpu->kvm, server_num)) + return -EEXIST; + + icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); + if (!icp) + return -ENOMEM; + + icp->vcpu = vcpu; + icp->server_num = server_num; + icp->state.mfrr = MASKED; + icp->state.pending_pri = MASKED; + vcpu->arch.icp = icp; + + XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); + + return 0; +} + +/* -- ioctls -- */ + +int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) +{ + struct kvmppc_xics *xics; + int r; + + /* locking against multiple callers? */ + + xics = kvm->arch.xics; + if (!xics) + return -ENODEV; + + switch (args->level) { + case KVM_INTERRUPT_SET: + case KVM_INTERRUPT_SET_LEVEL: + case KVM_INTERRUPT_UNSET: + r = ics_deliver_irq(xics, args->irq, args->level); + break; + default: + r = -EINVAL; + } + + return r; +} + +void kvmppc_xics_free(struct kvmppc_xics *xics) +{ + int i; + struct kvm *kvm = xics->kvm; + + debugfs_remove(xics->dentry); + + if (kvm) + kvm->arch.xics = NULL; + + for (i = 0; i <= xics->max_icsid; i++) + kfree(xics->ics[i]); + kfree(xics); +} + +int kvm_xics_create(struct kvm *kvm, u32 type) +{ + struct kvmppc_xics *xics; + int ret = 0; + + xics = kzalloc(sizeof(*xics), GFP_KERNEL); + if (!xics) + return -ENOMEM; + + xics->kvm = kvm; + + /* Already there ? */ + mutex_lock(&kvm->lock); + if (kvm->arch.xics) + ret = -EEXIST; + else + kvm->arch.xics = xics; + mutex_unlock(&kvm->lock); + + if (ret) + return ret; + + xics_debugfs_init(xics); + + return 0; +} + +void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.icp) + return; + kfree(vcpu->arch.icp); + vcpu->arch.icp = NULL; + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; +} diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h new file mode 100644 index 000000000000..58ee190de5e5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.h @@ -0,0 +1,113 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef _KVM_PPC_BOOK3S_XICS_H +#define _KVM_PPC_BOOK3S_XICS_H + +/* + * We use a two-level tree to store interrupt source information. + * There are up to 1024 ICS nodes, each of which can represent + * 1024 sources. + */ +#define KVMPPC_XICS_MAX_ICS_ID 1023 +#define KVMPPC_XICS_ICS_SHIFT 10 +#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT) +#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1) + +/* + * Interrupt source numbers below this are reserved, for example + * 0 is "no interrupt", and 2 is used for IPIs. + */ +#define KVMPPC_XICS_FIRST_IRQ 16 +#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \ + KVMPPC_XICS_IRQ_PER_ICS) + +/* Priority value to use for disabling an interrupt */ +#define MASKED 0xff + +/* State for one irq source */ +struct ics_irq_state { + u32 number; + u32 server; + u8 priority; + u8 saved_priority; /* currently unused */ + u8 resend; + u8 masked_pending; + u8 asserted; /* Only for LSI */ + u8 exists; +}; + +/* Atomic ICP state, updated with a single compare & swap */ +union kvmppc_icp_state { + unsigned long raw; + struct { + u8 out_ee:1; + u8 need_resend:1; + u8 cppr; + u8 mfrr; + u8 pending_pri; + u32 xisr; + }; +}; + +/* One bit per ICS */ +#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1) + +struct kvmppc_icp { + struct kvm_vcpu *vcpu; + unsigned long server_num; + union kvmppc_icp_state state; + unsigned long resend_map[ICP_RESEND_MAP_SIZE]; +}; + +struct kvmppc_ics { + struct mutex lock; + u16 icsid; + struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; +}; + +struct kvmppc_xics { + struct kvm *kvm; + struct dentry *dentry; + u32 max_icsid; + struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1]; +}; + +static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, + u32 nr) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num) + return vcpu->arch.icp; + } + return NULL; +} + +static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, + u32 irq, u16 *source) +{ + u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; + u16 src = irq & KVMPPC_XICS_SRC_MASK; + struct kvmppc_ics *ics; + + if (source) + *source = src; + if (icsid > KVMPPC_XICS_MAX_ICS_ID) + return NULL; + ics = xics->ics[icsid]; + if (!ics) + return NULL; + return ics; +} + + +#endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d4fd443ae7bd..31084c6335c9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -471,6 +471,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) case KVMPPC_IRQ_MPIC: kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); break; + case KVMPPC_IRQ_XICS: + kvmppc_xics_free_icp(vcpu); + break; } kvmppc_core_vcpu_free(vcpu); -- cgit v1.2.3 From 54695c3088a74e25474db8eb6b490b45d1aeb0ca Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 17 Apr 2013 20:30:50 +0000 Subject: KVM: PPC: Book3S HV: Speed up wakeups of CPUs on HV KVM Currently, we wake up a CPU by sending a host IPI with smp_send_reschedule() to thread 0 of that core, which will take all threads out of the guest, and cause them to re-evaluate their interrupt status on the way back in. This adds a mechanism to differentiate real host IPIs from IPIs sent by KVM for guest threads to poke each other, in order to target the guest threads precisely when possible and avoid that global switch of the core to host state. We then use this new facility in the in-kernel XICS code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_asm.h | 8 ++- arch/powerpc/include/asm/kvm_ppc.h | 29 +++++++++ arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kvm/book3s_hv.c | 26 +++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 102 +++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/sysdev/xics/icp-native.c | 8 +++ 7 files changed, 158 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index cdc3d2717cc6..9039d3c97eec 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -20,6 +20,11 @@ #ifndef __ASM_KVM_BOOK3S_ASM_H__ #define __ASM_KVM_BOOK3S_ASM_H__ +/* XICS ICP register offsets */ +#define XICS_XIRR 4 +#define XICS_MFRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ + #ifdef __ASSEMBLY__ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -81,10 +86,11 @@ struct kvmppc_host_state { #ifdef CONFIG_KVM_BOOK3S_64_HV u8 hwthread_req; u8 hwthread_state; - + u8 host_ipi; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; + u32 saved_xirr; u64 dabr; u64 host_mmcr[3]; u32 host_pmc[8]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 6582eed321ba..1589fd8bf063 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -264,6 +264,21 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) paca[cpu].kvm_hstate.xics_phys = addr; } +static inline u32 kvmppc_get_xics_latch(void) +{ + u32 xirr = get_paca()->kvm_hstate.saved_xirr; + + get_paca()->kvm_hstate.saved_xirr = 0; + + return xirr; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{ + paca[cpu].kvm_hstate.host_ipi = host_ipi; +} + +extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); extern void kvm_linear_init(void); #else @@ -273,6 +288,18 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} +static inline u32 kvmppc_get_xics_latch(void) +{ + return 0; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{} + +static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_kick(vcpu); +} #endif #ifdef CONFIG_KVM_XICS @@ -393,4 +420,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) return ea; } +extern void xics_wake_cpu(int cpu); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index dbfd5498f440..a791229329cf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -574,6 +574,8 @@ int main(void) HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); + HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 82ba00f68b07..16191915e8d0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -66,6 +66,31 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu->stat.halt_wakeup; + } + + me = get_cpu(); + + /* CPU points to the first thread of the core */ + if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { + int real_cpu = cpu + vcpu->arch.ptid; + if (paca[real_cpu].kvm_hstate.xics_phys) + xics_wake_cpu(real_cpu); + else if (cpu_online(cpu)) + smp_send_reschedule(cpu); + } + put_cpu(); +} + /* * We use the vcpu_load/put functions to measure stolen time. * Stolen time is counted as time when either the vcpu is able to @@ -985,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) } extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0f23bb851711..56f8927b0ddf 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline) * * *****************************************************************************/ -#define XICS_XIRR 4 -#define XICS_QIRR 0xc -#define XICS_IPI 2 /* interrupt source # for IPIs */ - /* * We come in here when wakened from nap mode on a secondary hw thread. * Relocation is off and most register values are lost. @@ -122,7 +118,7 @@ kvm_start_guest: beq 27f 25: ld r5,HSTATE_XICS_PHYS(r13) li r0,0xff - li r6,XICS_QIRR + li r6,XICS_MFRR li r7,XICS_XIRR lwzcix r8,r5,r7 /* get and ack the interrupt */ sync @@ -678,17 +674,91 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode - /* Check for mediated interrupts (could be done earlier really ...) */ + /* Only handle external interrupts here on arch 206 and later */ BEGIN_FTR_SECTION - cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL - bne+ 1f - andi. r0,r11,MSR_EE - beq 1f - mfspr r5,SPRN_LPCR - andi. r0,r5,LPCR_MER + b ext_interrupt_to_host +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) + + /* External interrupt ? */ + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + bne+ ext_interrupt_to_host + + /* External interrupt, first check for host_ipi. If this is + * set, we know the host wants us out so let's do it now + */ + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne ext_interrupt_to_host + + /* Now read the interrupt from the ICP */ + ld r5, HSTATE_XICS_PHYS(r13) + li r7, XICS_XIRR + cmpdi r5, 0 + beq- ext_interrupt_to_host + lwzcix r3, r5, r7 + rlwinm. r0, r3, 0, 0xffffff + sync + bne 1f + + /* Nothing pending in the ICP, check for mediated interrupts + * and bounce it to the guest + */ + andi. r0, r11, MSR_EE + beq ext_interrupt_to_host /* shouldn't happen ?? */ + mfspr r5, SPRN_LPCR + andi. r0, r5, LPCR_MER bne bounce_ext_interrupt -1: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + b ext_interrupt_to_host /* shouldn't happen ?? */ + +1: /* We found something in the ICP... + * + * If it's not an IPI, stash it in the PACA and return to + * the host, we don't (yet) handle directing real external + * interrupts directly to the guest + */ + cmpwi r0, XICS_IPI + bne ext_stash_for_host + + /* It's an IPI, clear the MFRR and EOI it */ + li r0, 0xff + li r6, XICS_MFRR + stbcix r0, r5, r6 /* clear the IPI */ + stwcix r3, r5, r7 /* EOI it */ + sync + + /* We need to re-check host IPI now in case it got set in the + * meantime. If it's clear, we bounce the interrupt to the + * guest + */ + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne- 1f + + /* Allright, looks like an IPI for the guest, we need to set MER */ + mfspr r8,SPRN_LPCR + ori r8,r8,LPCR_MER + mtspr SPRN_LPCR,r8 + + /* And if the guest EE is set, we can deliver immediately, else + * we return to the guest with MER set + */ + andi. r0, r11, MSR_EE + bne bounce_ext_interrupt + mr r4, r9 + b fast_guest_return + + /* We raced with the host, we need to resend that IPI, bummer */ +1: li r0, IPI_PRIORITY + stbcix r0, r5, r6 /* set the IPI */ + sync + b ext_interrupt_to_host + +ext_stash_for_host: + /* It's not an IPI and it's for the host, stash it in the PACA + * before exit, it will be picked up by the host ICP driver + */ + stw r3, HSTATE_SAVED_XIRR(r13) +ext_interrupt_to_host: guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Save DEC */ @@ -831,7 +901,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) beq 44f ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ li r0,IPI_PRIORITY - li r7,XICS_QIRR + li r7,XICS_MFRR stbcix r0,r7,r8 /* trigger the IPI */ 44: srdi. r3,r3,1 addi r6,r6,PACA_SIZE @@ -1630,7 +1700,7 @@ secondary_nap: beq 37f sync li r0, 0xff - li r6, XICS_QIRR + li r6, XICS_MFRR stbcix r0, r5, r6 /* clear the IPI */ stwcix r3, r5, r7 /* EOI it */ 37: sync diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 53af848116f2..1417e65b6bbd 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -227,7 +227,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp, kvmppc_book3s_queue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); if (!change_self) - kvm_vcpu_kick(icp->vcpu); + kvmppc_fast_vcpu_kick(icp->vcpu); } bail: return success; diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 48861d3fcd07..20b328bb494d 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; static inline unsigned int icp_native_get_xirr(void) { int cpu = smp_processor_id(); + unsigned int xirr; + + /* Handled an interrupt latched by KVM */ + xirr = kvmppc_get_xics_latch(); + if (xirr) + return xirr; return in_be32(&icp_native_regs[cpu]->xirr.word); } @@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu, unsigned long data) { + kvmppc_set_host_ipi(cpu, 1); icp_native_set_qirr(cpu, IPI_PRIORITY); } @@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) { int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); icp_native_set_qirr(cpu, 0xff); return smp_ipi_demux(); -- cgit v1.2.3 From 4619ac88b72c43c622ef1eae3069de0e6f2cba9d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Apr 2013 20:31:41 +0000 Subject: KVM: PPC: Book3S HV: Improve real-mode handling of external interrupts This streamlines our handling of external interrupts that come in while we're in the guest. First, when waking up a hardware thread that was napping, we split off the "napping due to H_CEDE" case earlier, and use the code that handles an external interrupt (0x500) in the guest to handle that too. Secondly, the code that handles those external interrupts now checks if any other thread is exiting to the host before bouncing an external interrupt to the guest, and also checks that there is actually an external interrupt pending for the guest before setting the LPCR MER bit (mediated external request). This also makes sure that we clear the "ceded" flag when we handle a wakeup from cede in real mode, and fixes a potential infinite loop in kvmppc_run_vcpu() which can occur if we ever end up with the ceded flag set but MSR[EE] off. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_hv.c | 5 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 138 +++++++++++++++++--------------- 3 files changed, 80 insertions(+), 64 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c9c67fc888c9..799322433620 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -290,6 +290,7 @@ #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ #define LPCR_MER 0x00000800 /* Mediated External Exception */ +#define LPCR_MER_SH 11 #define LPCR_LPES 0x0000000c #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 16191915e8d0..178521e81ce4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1384,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; vc->runner = vcpu; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; + else + v->arch.ceded = 0; + } if (n_ceded == vc->n_runnable) kvmppc_vcore_blocked(vc); else diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index fd3b72d5dfe6..b02f91e4c70d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -97,50 +97,51 @@ kvm_start_guest: li r0,1 stb r0,PACA_NAPSTATELOST(r13) - /* get vcpu pointer, NULL if we have no vcpu to run */ - ld r4,HSTATE_KVM_VCPU(r13) - cmpdi cr1,r4,0 + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede + + /* + * We weren't napping due to cede, so this must be a secondary + * thread being woken up to run a guest, or being woken up due + * to a stray IPI. (Or due to some machine check or hypervisor + * maintenance interrupt while the core is in KVM.) + */ /* Check the wake reason in SRR1 to see why we got here */ mfspr r3,SPRN_SRR1 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - - /* - * External interrupt - for now assume it is an IPI, since we - * should never get any other interrupts sent to offline threads. - * Only do this for secondary threads. - */ - beq cr1,25f - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f -25: ld r5,HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_MFRR - li r7,XICS_XIRR + bne 27f /* if not */ + ld r5,HSTATE_XICS_PHYS(r13) + li r7,XICS_XIRR /* if it was an external interrupt, */ lwzcix r8,r5,r7 /* get and ack the interrupt */ sync clrldi. r9,r8,40 /* get interrupt source ID. */ - beq 27f /* none there? */ - cmpwi r9,XICS_IPI - bne 26f + beq 28f /* none there? */ + cmpwi r9,XICS_IPI /* was it an IPI? */ + bne 29f + li r0,0xff + li r6,XICS_MFRR stbcix r0,r5,r6 /* clear IPI */ -26: stwcix r8,r5,r7 /* EOI the interrupt */ + stwcix r8,r5,r7 /* EOI the interrupt */ + sync /* order loading of vcpu after that */ -27: /* XXX should handle hypervisor maintenance interrupts etc. here */ - - /* reload vcpu pointer after clearing the IPI */ + /* get vcpu pointer, NULL if we have no vcpu to run */ ld r4,HSTATE_KVM_VCPU(r13) cmpdi r4,0 /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest + b kvmppc_hv_entry - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + b kvm_no_guest +28: /* SRR1 said external but ICP said nope?? */ + b kvm_no_guest +29: /* External non-IPI interrupt to offline secondary thread? help?? */ + stw r8,HSTATE_SAVED_XIRR(r13) + b kvm_no_guest .global kvmppc_hv_entry kvmppc_hv_entry: @@ -483,20 +484,20 @@ toc_tlbie_lock: mtctr r6 mtxer r7 + ld r10, VCPU_PC(r4) + ld r11, VCPU_MSR(r4) kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ ld r6, VCPU_SRR0(r4) ld r7, VCPU_SRR1(r4) - ld r10, VCPU_PC(r4) - ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ + /* r11 = vcpu->arch.msr & ~MSR_HV */ rldicl r11, r11, 63 - MSR_HV_LG, 1 rotldi r11, r11, 1 + MSR_HV_LG ori r11, r11, MSR_ME /* Check if we can deliver an external or decrementer interrupt now */ ld r0,VCPU_PENDING_EXC(r4) - li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) - oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h and r0,r0,r8 cmpdi cr1,r0,0 andi. r0,r11,MSR_EE @@ -524,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Move SRR0 and SRR1 into the respective regs */ 5: mtspr SPRN_SRR0, r6 mtspr SPRN_SRR1, r7 - li r0,0 - stb r0,VCPU_CEDED(r4) /* cancel cede */ fast_guest_return: + li r0,0 + stb r0,VCPU_CEDED(r4) /* cancel cede */ mtspr SPRN_HSRR0,r10 mtspr SPRN_HSRR1,r11 @@ -686,6 +687,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) /* External interrupt, first check for host_ipi. If this is * set, we know the host wants us out so let's do it now */ +do_ext_interrupt: lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 bne ext_interrupt_to_host @@ -698,19 +700,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) lwzcix r3, r5, r7 rlwinm. r0, r3, 0, 0xffffff sync - bne 1f + beq 3f /* if nothing pending in the ICP */ - /* Nothing pending in the ICP, check for mediated interrupts - * and bounce it to the guest - */ - andi. r0, r11, MSR_EE - beq ext_interrupt_to_host /* shouldn't happen ?? */ - mfspr r5, SPRN_LPCR - andi. r0, r5, LPCR_MER - bne bounce_ext_interrupt - b ext_interrupt_to_host /* shouldn't happen ?? */ - -1: /* We found something in the ICP... + /* We found something in the ICP... * * If it's not an IPI, stash it in the PACA and return to * the host, we don't (yet) handle directing real external @@ -735,16 +727,33 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) bne- 1f /* Allright, looks like an IPI for the guest, we need to set MER */ - mfspr r8,SPRN_LPCR - ori r8,r8,LPCR_MER - mtspr SPRN_LPCR,r8 +3: + /* Check if any CPU is heading out to the host, if so head out too */ + ld r5, HSTATE_KVM_VCORE(r13) + lwz r0, VCORE_ENTRY_EXIT(r5) + cmpwi r0, 0x100 + bge ext_interrupt_to_host + + /* See if there is a pending interrupt for the guest */ + mfspr r8, SPRN_LPCR + ld r0, VCPU_PENDING_EXC(r9) + /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ + rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 + rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH + beq 2f /* And if the guest EE is set, we can deliver immediately, else * we return to the guest with MER set */ andi. r0, r11, MSR_EE - bne bounce_ext_interrupt - mr r4, r9 + beq 2f + mtspr SPRN_SRR0, r10 + mtspr SPRN_SRR1, r11 + li r10, BOOK3S_INTERRUPT_EXTERNAL + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 +2: mr r4, r9 + mtspr SPRN_LPCR, r8 b fast_guest_return /* We raced with the host, we need to resend that IPI, bummer */ @@ -1487,15 +1496,6 @@ ignore_hdec: mr r4,r9 b fast_guest_return -bounce_ext_interrupt: - mr r4,r9 - mtspr SPRN_SRR0,r10 - mtspr SPRN_SRR1,r11 - li r10,BOOK3S_INTERRUPT_EXTERNAL - li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11,r11,63 - b fast_guest_return - _GLOBAL(kvmppc_h_set_dabr) std r4,VCPU_DABR(r3) /* Work around P7 bug where DABR can get corrupted on mtspr */ @@ -1601,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) b . kvm_end_cede: + /* get vcpu pointer */ + ld r4, HSTATE_KVM_VCPU(r13) + /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) @@ -1640,6 +1643,16 @@ kvm_end_cede: li r0,0 stb r0,HSTATE_NAPPING(r13) + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3, SPRN_SRR1 + rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ + cmpwi r3, 4 /* was it an external interrupt? */ + li r12, BOOK3S_INTERRUPT_EXTERNAL + mr r9, r4 + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + beq do_ext_interrupt /* if so */ + /* see if any other thread is already exiting */ lwz r0,VCORE_ENTRY_EXIT(r5) cmpwi r0,0x100 @@ -1659,8 +1672,7 @@ kvm_cede_prodded: /* we've ceded but we want to give control to the host */ kvm_cede_exit: - li r3,H_TOO_HARD - blr + b hcall_real_fallback /* Try to handle a machine check in real mode */ machine_check_realmode: -- cgit v1.2.3 From d19bd86204f85d42873e07bb64a27587fc380b5b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Apr 2013 20:32:04 +0000 Subject: KVM: PPC: Book3S: Add support for ibm,int-on/off RTAS calls This adds support for the ibm,int-on and ibm,int-off RTAS calls to the in-kernel XICS emulation and corrects the handling of the saved priority by the ibm,set-xive RTAS call. With this, ibm,int-off sets the specified interrupt's priority in its saved_priority field and sets the priority to 0xff (the least favoured value). ibm,int-on restores the saved_priority to the priority field, and ibm,set-xive sets both the priority and the saved_priority to the specified priority value. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/kvm/book3s_rtas.c | 40 ++++++++++++++++++ arch/powerpc/kvm/book3s_xics.c | 84 +++++++++++++++++++++++++++++++------- arch/powerpc/kvm/book3s_xics.h | 2 +- 4 files changed, 113 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1589fd8bf063..cfaa47995c0e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -174,6 +174,8 @@ extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority); extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority); +extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); +extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); /* * Cuts out inst bits with ordering according to spec. diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 77f9aa5f4ba5..3219ba895246 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -63,6 +63,44 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) out: args->rets[0] = rc; } + +static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_off(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_on(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} #endif /* CONFIG_KVM_XICS */ struct rtas_handler { @@ -74,6 +112,8 @@ static struct rtas_handler rtas_handlers[] = { #ifdef CONFIG_KVM_XICS { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, + { .name = "ibm,int-off", .handler = kvm_rtas_int_off }, + { .name = "ibm,int-on", .handler = kvm_rtas_int_on }, #endif }; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 7fd247cbd0d1..9fb2d3909c46 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -123,6 +123,28 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, mutex_unlock(&ics->lock); } +static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct ics_irq_state *state, + u32 server, u32 priority, u32 saved_priority) +{ + bool deliver; + + mutex_lock(&ics->lock); + + state->server = server; + state->priority = priority; + state->saved_priority = saved_priority; + deliver = false; + if ((state->masked_pending || state->resend) && priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + return deliver; +} + int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) { struct kvmppc_xics *xics = kvm->arch.xics; @@ -130,7 +152,6 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) struct kvmppc_ics *ics; struct ics_irq_state *state; u16 src; - bool deliver; if (!xics) return -ENODEV; @@ -144,23 +165,11 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) if (!icp) return -EINVAL; - mutex_lock(&ics->lock); - XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", irq, server, priority, state->masked_pending, state->resend); - state->server = server; - state->priority = priority; - deliver = false; - if ((state->masked_pending || state->resend) && priority != MASKED) { - state->masked_pending = 0; - deliver = true; - } - - mutex_unlock(&ics->lock); - - if (deliver) + if (write_xive(xics, ics, state, server, priority, priority)) icp_deliver_irq(xics, icp, irq); return 0; @@ -189,6 +198,53 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) return 0; } +int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, state->server); + if (!icp) + return -EINVAL; + + if (write_xive(xics, ics, state, state->server, state->saved_priority, + state->saved_priority)) + icp_deliver_irq(xics, icp, irq); + + return 0; +} + +int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + write_xive(xics, ics, state, state->server, MASKED, state->priority); + + return 0; +} + /* -- ICP routines, including hcalls -- */ static inline bool icp_try_update(struct kvmppc_icp *icp, diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index c816c5a49c90..e4fdec3dde77 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -36,7 +36,7 @@ struct ics_irq_state { u32 number; u32 server; u8 priority; - u8 saved_priority; /* currently unused */ + u8 saved_priority; u8 resend; u8 masked_pending; u8 asserted; /* Only for LSI */ -- cgit v1.2.3 From 8b78645c93b5d469e8006d68dbc92edc2640c654 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Apr 2013 20:32:26 +0000 Subject: KVM: PPC: Book3S: Facilities to save/restore XICS presentation ctrler state This adds the ability for userspace to save and restore the state of the XICS interrupt presentation controllers (ICPs) via the KVM_GET/SET_ONE_REG interface. Since there is one ICP per vcpu, we simply define a new 64-bit register in the ONE_REG space for the ICP state. The state includes the CPU priority setting, the pending IPI priority, and the priority and source number of any pending external interrupt. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 12 +++++ arch/powerpc/kvm/book3s.c | 19 ++++++++ arch/powerpc/kvm/book3s_xics.c | 90 +++++++++++++++++++++++++++++++++++++ 5 files changed, 124 insertions(+) (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index fb308be8521b..c09d1832e935 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1808,6 +1808,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TLB2PS | 32 PPC | KVM_REG_PPC_TLB3PS | 32 PPC | KVM_REG_PPC_EPTCFG | 32 + PPC | KVM_REG_PPC_ICP_STATE | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index cfaa47995c0e..d7339df19259 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -313,6 +313,8 @@ extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); #else static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index eb9e25c194ad..427b9aca2a0f 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -390,6 +390,18 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* Per-vcpu XICS interrupt controller state */ +#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) + +#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */ +#define KVM_REG_PPC_ICP_CPPR_MASK 0xff +#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */ +#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff +#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */ +#define KVM_REG_PPC_ICP_MFRR_MASK 0xff +#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ +#define KVM_REG_PPC_ICP_PPRI_MASK 0xff + /* Device control API: PPC-specific devices */ #define KVM_DEV_MPIC_GRP_MISC 1 #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 1a4d787df507..700df6f1d32c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -535,6 +535,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) &opcode, sizeof(u32)); break; } +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu->arch.icp) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -597,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu->arch.icp) { + r = -ENXIO; + break; + } + r = kvmppc_xics_set_icp(vcpu, + set_reg_val(reg->id, val)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 9fb2d3909c46..ee841ed8a690 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -954,6 +954,96 @@ int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) return 0; } +u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + return 0; + state = icp->state; + return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | + ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | + ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | + ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); +} + +int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + union kvmppc_icp_state old_state, new_state; + struct kvmppc_ics *ics; + u8 cppr, mfrr, pending_pri; + u32 xisr; + u16 src; + bool resend; + + if (!icp || !xics) + return -ENOENT; + + cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; + xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & + KVM_REG_PPC_ICP_XISR_MASK; + mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; + pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; + + /* Require the new state to be internally consistent */ + if (xisr == 0) { + if (pending_pri != 0xff) + return -EINVAL; + } else if (xisr == XICS_IPI) { + if (pending_pri != mfrr || pending_pri >= cppr) + return -EINVAL; + } else { + if (pending_pri >= mfrr || pending_pri >= cppr) + return -EINVAL; + ics = kvmppc_xics_find_ics(xics, xisr, &src); + if (!ics) + return -EINVAL; + } + + new_state.raw = 0; + new_state.cppr = cppr; + new_state.xisr = xisr; + new_state.mfrr = mfrr; + new_state.pending_pri = pending_pri; + + /* + * Deassert the CPU interrupt request. + * icp_try_update will reassert it if necessary. + */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + /* + * Note that if we displace an interrupt from old_state.xisr, + * we don't mark it as rejected. We expect userspace to set + * the state of the interrupt sources to be consistent with + * the ICP states (either before or afterwards, which doesn't + * matter). We do handle resends due to CPPR becoming less + * favoured because that is necessary to end up with a + * consistent state in the situation where userspace restores + * the ICS states before the ICP states. + */ + do { + old_state = ACCESS_ONCE(icp->state); + + if (new_state.mfrr <= old_state.mfrr) { + resend = false; + new_state.need_resend = old_state.need_resend; + } else { + resend = old_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_try_update(icp, old_state, new_state, false)); + + if (resend) + icp_check_resend(xics, icp); + + return 0; +} + /* -- ioctls -- */ int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) -- cgit v1.2.3 From 5975a2e0950291a6bfe9fd5880e7952ff87764be Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 27 Apr 2013 00:28:37 +0000 Subject: KVM: PPC: Book3S: Add API for in-kernel XICS emulation This adds the API for userspace to instantiate an XICS device in a VM and connect VCPUs to it. The API consists of a new device type for the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl, which is used to assert and deassert interrupt inputs of the XICS. The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES. Each attribute within this group corresponds to the state of one interrupt source. The attribute number is the same as the interrupt source number. This does not support irq routing or irqfd yet. Signed-off-by: Paul Mackerras Acked-by: David Gibson Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 8 ++ Documentation/virtual/kvm/devices/xics.txt | 66 ++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 12 ++ arch/powerpc/kvm/book3s_xics.c | 190 +++++++++++++++++++++++++---- arch/powerpc/kvm/book3s_xics.h | 1 + arch/powerpc/kvm/irq.h | 3 + arch/powerpc/kvm/powerpc.c | 22 ++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 2 + virt/kvm/kvm_main.c | 5 + 11 files changed, 287 insertions(+), 25 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/xics.txt (limited to 'arch/powerpc/include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c09d1832e935..03492f95ed39 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2772,3 +2772,11 @@ Parameters: args[0] is the MPIC device fd args[1] is the MPIC CPU number for this vcpu This capability connects the vcpu to an in-kernel MPIC device. + +6.7 KVM_CAP_IRQ_XICS + +Architectures: ppc +Parameters: args[0] is the XICS device fd + args[1] is the XICS CPU number (server ID) for this vcpu + +This capability connects the vcpu to an in-kernel XICS device. diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt new file mode 100644 index 000000000000..42864935ac5d --- /dev/null +++ b/Documentation/virtual/kvm/devices/xics.txt @@ -0,0 +1,66 @@ +XICS interrupt controller + +Device type supported: KVM_DEV_TYPE_XICS + +Groups: + KVM_DEV_XICS_SOURCES + Attributes: One per interrupt source, indexed by the source number. + +This device emulates the XICS (eXternal Interrupt Controller +Specification) defined in PAPR. The XICS has a set of interrupt +sources, each identified by a 20-bit source number, and a set of +Interrupt Control Presentation (ICP) entities, also called "servers", +each associated with a virtual CPU. + +The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH +capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and +the interrupt server number (i.e. the vcpu number from the XICS's +point of view) in args[1] of the kvm_enable_cap struct. Each ICP has +64 bits of state which can be read and written using the +KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit +state word has the following bitfields, starting at the +least-significant end of the word: + +* Unused, 16 bits + +* Pending interrupt priority, 8 bits + Zero is the highest priority, 255 means no interrupt is pending. + +* Pending IPI (inter-processor interrupt) priority, 8 bits + Zero is the highest priority, 255 means no IPI is pending. + +* Pending interrupt source number, 24 bits + Zero means no interrupt pending, 2 means an IPI is pending + +* Current processor priority, 8 bits + Zero is the highest priority, meaning no interrupts can be + delivered, and 255 is the lowest priority. + +Each source has 64 bits of state that can be read and written using +the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the +KVM_DEV_XICS_SOURCES attribute group, with the attribute number being +the interrupt source number. The 64 bit state word has the following +bitfields, starting from the least-significant end of the word: + +* Destination (server number), 32 bits + This specifies where the interrupt should be sent, and is the + interrupt server number specified for the destination vcpu. + +* Priority, 8 bits + This is the priority specified for this interrupt source, where 0 is + the highest priority and 255 is the lowest. An interrupt with a + priority of 255 will never be delivered. + +* Level sensitive flag, 1 bit + This bit is 1 for a level-sensitive interrupt source, or 0 for + edge-sensitive (or MSI). + +* Masked flag, 1 bit + This bit is set to 1 if the interrupt is masked (cannot be delivered + regardless of its priority), for example by the ibm,int-off RTAS + call, or 0 if it is not masked. + +* Pending flag, 1 bit + This bit is 1 if the source has a pending interrupt, otherwise 0. + +Only one XICS instance may be created per VM. diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d7339df19259..a5287fe03d77 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -315,6 +315,8 @@ extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); +extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu); #else static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 427b9aca2a0f..0fb1a6e9ff90 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -499,4 +499,16 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) +/* PPC64 eXternal Interrupt Controller Specification */ +#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ + +/* Layout of 64-bit source attribute values */ +#define KVM_XICS_DESTINATION_SHIFT 0 +#define KVM_XICS_DESTINATION_MASK 0xffffffffULL +#define KVM_XICS_PRIORITY_SHIFT 32 +#define KVM_XICS_PRIORITY_MASK 0xff +#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) +#define KVM_XICS_MASKED (1ULL << 41) +#define KVM_XICS_PENDING (1ULL << 42) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index ee841ed8a690..f7a103756618 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -55,8 +56,6 @@ * * - Make ICS lockless as well, or at least a per-interrupt lock or hashed * locks array to improve scalability - * - * - ioctl's to save/restore the entire state for snapshot & migration */ /* -- ICS routines -- */ @@ -64,7 +63,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); -static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, + bool report_status) { struct ics_irq_state *state; struct kvmppc_ics *ics; @@ -81,6 +81,9 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) if (!state->exists) return -EINVAL; + if (report_status) + return state->asserted; + /* * We set state->asserted locklessly. This should be fine as * we are the only setter, thus concurrent access is undefined @@ -96,7 +99,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); - return 0; + return state->asserted; } static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, @@ -891,8 +894,8 @@ static void xics_debugfs_init(struct kvmppc_xics *xics) kfree(name); } -struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, - struct kvmppc_xics *xics, int irq) +static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, + struct kvmppc_xics *xics, int irq) { struct kvmppc_ics *ics; int i, icsid; @@ -1044,34 +1047,138 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) return 0; } -/* -- ioctls -- */ +static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) +{ + int ret; + struct kvmppc_ics *ics; + struct ics_irq_state *irqp; + u64 __user *ubufp = (u64 __user *) addr; + u16 idx; + u64 val, prio; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return -ENOENT; -int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) + irqp = &ics->irq_state[idx]; + mutex_lock(&ics->lock); + ret = -ENOENT; + if (irqp->exists) { + val = irqp->server; + prio = irqp->priority; + if (prio == MASKED) { + val |= KVM_XICS_MASKED; + prio = irqp->saved_priority; + } + val |= prio << KVM_XICS_PRIORITY_SHIFT; + if (irqp->asserted) + val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; + else if (irqp->masked_pending || irqp->resend) + val |= KVM_XICS_PENDING; + ret = 0; + } + mutex_unlock(&ics->lock); + + if (!ret && put_user(val, ubufp)) + ret = -EFAULT; + + return ret; +} + +static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) { - struct kvmppc_xics *xics; - int r; + struct kvmppc_ics *ics; + struct ics_irq_state *irqp; + u64 __user *ubufp = (u64 __user *) addr; + u16 idx; + u64 val; + u8 prio; + u32 server; + + if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) + return -ENOENT; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) { + ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); + if (!ics) + return -ENOMEM; + } + irqp = &ics->irq_state[idx]; + if (get_user(val, ubufp)) + return -EFAULT; + + server = val & KVM_XICS_DESTINATION_MASK; + prio = val >> KVM_XICS_PRIORITY_SHIFT; + if (prio != MASKED && + kvmppc_xics_find_server(xics->kvm, server) == NULL) + return -EINVAL; - /* locking against multiple callers? */ + mutex_lock(&ics->lock); + irqp->server = server; + irqp->saved_priority = prio; + if (val & KVM_XICS_MASKED) + prio = MASKED; + irqp->priority = prio; + irqp->resend = 0; + irqp->masked_pending = 0; + irqp->asserted = 0; + if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) + irqp->asserted = 1; + irqp->exists = 1; + mutex_unlock(&ics->lock); - xics = kvm->arch.xics; - if (!xics) - return -ENODEV; + if (val & KVM_XICS_PENDING) + icp_deliver_irq(xics, NULL, irqp->number); - switch (args->level) { - case KVM_INTERRUPT_SET: - case KVM_INTERRUPT_SET_LEVEL: - case KVM_INTERRUPT_UNSET: - r = ics_deliver_irq(xics, args->irq, args->level); - break; - default: - r = -EINVAL; + return 0; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + + return ics_deliver_irq(xics, irq, level, line_status); +} + +static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct kvmppc_xics *xics = dev->private; + + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + return xics_set_source(xics, attr->attr, attr->addr); } + return -ENXIO; +} - return r; +static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct kvmppc_xics *xics = dev->private; + + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + return xics_get_source(xics, attr->attr, attr->addr); + } + return -ENXIO; } -void kvmppc_xics_free(struct kvmppc_xics *xics) +static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && + attr->attr < KVMPPC_XICS_NR_IRQS) + return 0; + break; + } + return -ENXIO; +} + +static void kvmppc_xics_free(struct kvm_device *dev) +{ + struct kvmppc_xics *xics = dev->private; int i; struct kvm *kvm = xics->kvm; @@ -1083,17 +1190,21 @@ void kvmppc_xics_free(struct kvmppc_xics *xics) for (i = 0; i <= xics->max_icsid; i++) kfree(xics->ics[i]); kfree(xics); + kfree(dev); } -int kvm_xics_create(struct kvm *kvm, u32 type) +static int kvmppc_xics_create(struct kvm_device *dev, u32 type) { struct kvmppc_xics *xics; + struct kvm *kvm = dev->kvm; int ret = 0; xics = kzalloc(sizeof(*xics), GFP_KERNEL); if (!xics) return -ENOMEM; + dev->private = xics; + xics->dev = dev; xics->kvm = kvm; /* Already there ? */ @@ -1120,6 +1231,35 @@ int kvm_xics_create(struct kvm *kvm, u32 type) return 0; } +struct kvm_device_ops kvm_xics_ops = { + .name = "kvm-xics", + .create = kvmppc_xics_create, + .destroy = kvmppc_xics_free, + .set_attr = xics_set_attr, + .get_attr = xics_get_attr, + .has_attr = xics_has_attr, +}; + +int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 xcpu) +{ + struct kvmppc_xics *xics = dev->private; + int r = -EBUSY; + + if (dev->ops != &kvm_xics_ops) + return -EPERM; + if (xics->kvm != vcpu->kvm) + return -EPERM; + if (vcpu->arch.irq_type) + return -EBUSY; + + r = kvmppc_xics_create_icp(vcpu, xcpu); + if (!r) + vcpu->arch.irq_type = KVMPPC_IRQ_XICS; + + return r; +} + void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { if (!vcpu->arch.icp) diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index e4fdec3dde77..dd9326c5c19b 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -88,6 +88,7 @@ struct kvmppc_ics { struct kvmppc_xics { struct kvm *kvm; + struct kvm_device *dev; struct dentry *dentry; u32 max_icsid; bool real_mode; diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h index f1e27fdc8c2e..5a9a10b90762 100644 --- a/arch/powerpc/kvm/irq.h +++ b/arch/powerpc/kvm/irq.h @@ -9,6 +9,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm) #ifdef CONFIG_KVM_MPIC ret = ret || (kvm->arch.mpic != NULL); +#endif +#ifdef CONFIG_KVM_XICS + ret = ret || (kvm->arch.xics != NULL); #endif smp_rmb(); return ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 270773f0d955..6316ee336e88 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -342,6 +342,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SPAPR_TCE: case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: +#ifdef CONFIG_KVM_XICS + case KVM_CAP_IRQ_XICS: +#endif r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -837,6 +840,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif +#ifdef CONFIG_KVM_XICS + case KVM_CAP_IRQ_XICS: { + struct file *filp; + struct kvm_device *dev; + + r = -EBADF; + filp = fget(cap->args[0]); + if (!filp) + break; + + r = -EPERM; + dev = kvm_device_from_filp(filp); + if (dev) + r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); + + fput(filp); + break; + } +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 996661eb8e99..7823b6369c5e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1086,6 +1086,7 @@ void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; +extern struct kvm_device_ops kvm_xics_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 965e5b52dee0..a5c86fc34a37 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -665,6 +665,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_DEVICE_CTRL 89 #define KVM_CAP_IRQ_MPIC 90 #define KVM_CAP_PPC_RTAS 91 +#define KVM_CAP_IRQ_XICS 92 #ifdef KVM_CAP_IRQ_ROUTING @@ -837,6 +838,7 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 +#define KVM_DEV_TYPE_XICS 3 /* * ioctls for VM fds diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5da9f02a2a67..e6e7abe16d05 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2246,6 +2246,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_FSL_MPIC_42: ops = &kvm_mpic_ops; break; +#endif +#ifdef CONFIG_KVM_XICS + case KVM_DEV_TYPE_XICS: + ops = &kvm_xics_ops; + break; #endif default: return -ENODEV; -- cgit v1.2.3