diff options
30 files changed, 903 insertions, 458 deletions
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 3087ea9c5f2e..93bee7b93854 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -33,15 +33,27 @@ typedef u8 uprobe_opcode_t; #define UPROBE_SWBP_INSN 0xcc #define UPROBE_SWBP_INSN_SIZE 1 +struct uprobe_xol_ops; + struct arch_uprobe { - u16 fixups; union { u8 insn[MAX_UINSN_BYTES]; u8 ixol[MAX_UINSN_BYTES]; }; + + u16 fixups; + const struct uprobe_xol_ops *ops; + + union { #ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; + unsigned long rip_rela_target_address; #endif + struct { + s32 offs; + u8 ilen; + u8 opc1; + } branch; + }; }; struct arch_uprobe_task { diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ae407f7226c8..89f3b7c1af20 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n, return sched.state.unassigned; } +EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2ed845928b5f..ace22916ade3 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -53,7 +53,7 @@ #define OPCODE1(insn) ((insn)->opcode.bytes[0]) #define OPCODE2(insn) ((insn)->opcode.bytes[1]) #define OPCODE3(insn) ((insn)->opcode.bytes[2]) -#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) +#define MODRM_REG(insn) X86_MODRM_REG((insn)->modrm.value) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) return -ENOTSUPP; } -/* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, and - * annotate arch_uprobe->fixups accordingly. To start with, - * arch_uprobe->fixups is either zero or it reflects rip-related fixups. - */ -static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) -{ - bool fix_ip = true, fix_call = false; /* defaults */ - int reg; - - insn_get_opcode(insn); /* should be a nop */ - - switch (OPCODE1(insn)) { - case 0x9d: - /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; - break; - case 0xc3: /* ret/lret */ - case 0xcb: - case 0xc2: - case 0xca: - /* ip is correct */ - fix_ip = false; - break; - case 0xe8: /* call relative - Fix return addr */ - fix_call = true; - break; - case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xff: - insn_get_modrm(insn); - reg = MODRM_REG(insn); - if (reg == 2 || reg == 3) { - /* call or lcall, indirect */ - /* Fix return addr; ip is correct. */ - fix_call = true; - fix_ip = false; - } else if (reg == 4 || reg == 5) { - /* jmp or ljmp, indirect */ - /* ip is correct. */ - fix_ip = false; - } - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; - break; - default: - break; - } - if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; -} - #ifdef CONFIG_X86_64 /* * If arch_uprobe->insn doesn't use rip-relative addressing, return @@ -310,15 +253,11 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) * - The displacement is always 4 bytes. */ static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; - if (mm->context.ia32_compat) - return; - - auprobe->rip_rela_target_address = 0x0; if (!insn_rip_relative(insn)) return; @@ -372,7 +311,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins cursor++; memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); } - return; +} + +/* + * If we're emulating a rip-relative instruction, save the contents + * of the scratch register and store the target address in that register. + */ +static void +pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, + struct arch_uprobe_task *autask) +{ + if (auprobe->fixups & UPROBE_FIX_RIP_AX) { + autask->saved_scratch_register = regs->ax; + regs->ax = current->utask->vaddr; + regs->ax += auprobe->rip_rela_target_address; + } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { + autask->saved_scratch_register = regs->cx; + regs->cx = current->utask->vaddr; + regs->cx += auprobe->rip_rela_target_address; + } +} + +static void +handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +{ + if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + struct arch_uprobe_task *autask; + + autask = ¤t->utask->autask; + if (auprobe->fixups & UPROBE_FIX_RIP_AX) + regs->ax = autask->saved_scratch_register; + else + regs->cx = autask->saved_scratch_register; + + /* + * The original instruction includes a displacement, and so + * is 4 bytes longer than what we've just single-stepped. + * Caller may need to apply other fixups to handle stuff + * like "jmpq *...(%rip)" and "callq *...(%rip)". + */ + if (correction) + *correction += 4; + } } static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) @@ -401,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, return validate_insn_64bits(auprobe, insn); } #else /* 32-bit: */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +/* + * No RIP-relative addressing on 32-bit + */ +static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +{ +} +static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, + struct arch_uprobe_task *autask) +{ +} +static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, + long *correction) { - /* No RIP-relative addressing on 32-bit */ } static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) @@ -412,141 +402,311 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, } #endif /* CONFIG_X86_64 */ -/** - * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. - * @mm: the probed address space. - * @arch_uprobe: the probepoint information. - * @addr: virtual address at which to install the probepoint - * Return 0 on success or a -ve number on error. +struct uprobe_xol_ops { + bool (*emulate)(struct arch_uprobe *, struct pt_regs *); + int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); + int (*post_xol)(struct arch_uprobe *, struct pt_regs *); +}; + +static inline int sizeof_long(void) +{ + return is_ia32_task() ? 4 : 8; +} + +static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + return 0; +} + +/* + * Adjust the return address pushed by a call insn executed out of line. */ -int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) +static int adjust_ret_addr(unsigned long sp, long correction) { - int ret; - struct insn insn; + int rasize = sizeof_long(); + long ra; - auprobe->fixups = 0; - ret = validate_insn_bits(auprobe, mm, &insn); - if (ret != 0) - return ret; + if (copy_from_user(&ra, (void __user *)sp, rasize)) + return -EFAULT; - handle_riprel_insn(auprobe, mm, &insn); - prepare_fixups(auprobe, &insn); + ra += correction; + if (copy_to_user((void __user *)sp, &ra, rasize)) + return -EFAULT; return 0; } -#ifdef CONFIG_X86_64 -/* - * If we're emulating a rip-relative instruction, save the contents - * of the scratch register and store the target address in that register. - */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; + struct uprobe_task *utask = current->utask; + long correction = (long)(utask->vaddr - utask->xol_vaddr); + + handle_riprel_post_xol(auprobe, regs, &correction); + if (auprobe->fixups & UPROBE_FIX_IP) + regs->ip += correction; + + if (auprobe->fixups & UPROBE_FIX_CALL) { + if (adjust_ret_addr(regs->sp, correction)) { + regs->sp += sizeof_long(); + return -ERESTART; + } } + + return 0; } -#else -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) + +static struct uprobe_xol_ops default_xol_ops = { + .pre_xol = default_pre_xol_op, + .post_xol = default_post_xol_op, +}; + +static bool branch_is_call(struct arch_uprobe *auprobe) { - /* No RIP-relative addressing on 32-bit */ + return auprobe->branch.opc1 == 0xe8; } -#endif -/* - * arch_uprobe_pre_xol - prepare to execute out of line. - * @auprobe: the probepoint information. - * @regs: reflects the saved user state of current task. - */ -int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) -{ - struct arch_uprobe_task *autask; +#define CASE_COND \ + COND(70, 71, XF(OF)) \ + COND(72, 73, XF(CF)) \ + COND(74, 75, XF(ZF)) \ + COND(78, 79, XF(SF)) \ + COND(7a, 7b, XF(PF)) \ + COND(76, 77, XF(CF) || XF(ZF)) \ + COND(7c, 7d, XF(SF) != XF(OF)) \ + COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF)) - autask = ¤t->utask->autask; - autask->saved_trap_nr = current->thread.trap_nr; - current->thread.trap_nr = UPROBE_TRAP_NR; - regs->ip = current->utask->xol_vaddr; - pre_xol_rip_insn(auprobe, regs, autask); +#define COND(op_y, op_n, expr) \ + case 0x ## op_y: DO((expr) != 0) \ + case 0x ## op_n: DO((expr) == 0) - autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); - regs->flags |= X86_EFLAGS_TF; - if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) - set_task_blockstep(current, false); +#define XF(xf) (!!(flags & X86_EFLAGS_ ## xf)) - return 0; +static bool is_cond_jmp_opcode(u8 opcode) +{ + switch (opcode) { + #define DO(expr) \ + return true; + CASE_COND + #undef DO + + default: + return false; + } } -/* - * This function is called by arch_uprobe_post_xol() to adjust the return - * address pushed by a call instruction executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) +static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs) { - int rasize, ncopied; - long ra = 0; + unsigned long flags = regs->flags; - if (is_ia32_task()) - rasize = 4; - else - rasize = 8; + switch (auprobe->branch.opc1) { + #define DO(expr) \ + return expr; + CASE_COND + #undef DO - ncopied = copy_from_user(&ra, (void __user *)sp, rasize); - if (unlikely(ncopied)) - return -EFAULT; + default: /* not a conditional jmp */ + return true; + } +} - ra += correction; - ncopied = copy_to_user((void __user *)sp, &ra, rasize); - if (unlikely(ncopied)) - return -EFAULT; +#undef XF +#undef COND +#undef CASE_COND - return 0; +static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + unsigned long new_ip = regs->ip += auprobe->branch.ilen; + unsigned long offs = (long)auprobe->branch.offs; + + if (branch_is_call(auprobe)) { + unsigned long new_sp = regs->sp - sizeof_long(); + /* + * If it fails we execute this (mangled, see the comment in + * branch_clear_offset) insn out-of-line. In the likely case + * this should trigger the trap, and the probed application + * should die or restart the same insn after it handles the + * signal, arch_uprobe_post_xol() won't be even called. + * + * But there is corner case, see the comment in ->post_xol(). + */ + if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + return false; + regs->sp = new_sp; + } else if (!check_jmp_cond(auprobe, regs)) { + offs = 0; + } + + regs->ip = new_ip + offs; + return true; } -#ifdef CONFIG_X86_64 -static bool is_riprel_insn(struct arch_uprobe *auprobe) +static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); + BUG_ON(!branch_is_call(auprobe)); + /* + * We can only get here if branch_emulate_op() failed to push the ret + * address _and_ another thread expanded our stack before the (mangled) + * "call" insn was executed out-of-line. Just restore ->sp and restart. + * We could also restore ->ip and try to call branch_emulate_op() again. + */ + regs->sp += sizeof_long(); + return -ERESTART; } -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn) { - if (is_riprel_insn(auprobe)) { - struct arch_uprobe_task *autask; + /* + * Turn this insn into "call 1f; 1:", this is what we will execute + * out-of-line if ->emulate() fails. We only need this to generate + * a trap, so that the probed task receives the correct signal with + * the properly filled siginfo. + * + * But see the comment in ->post_xol(), in the unlikely case it can + * succeed. So we need to ensure that the new ->ip can not fall into + * the non-canonical area and trigger #GP. + * + * We could turn it into (say) "pushf", but then we would need to + * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte + * of ->insn[] for set_orig_insn(). + */ + memset(auprobe->insn + insn_offset_immediate(insn), + 0, insn->immediate.nbytes); +} - autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; +static struct uprobe_xol_ops branch_xol_ops = { + .emulate = branch_emulate_op, + .post_xol = branch_post_xol_op, +}; + +/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ +static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) +{ + u8 opc1 = OPCODE1(insn); + + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; + + switch (opc1) { + case 0xeb: /* jmp 8 */ + case 0xe9: /* jmp 32 */ + case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ + break; + + case 0xe8: /* call relative */ + branch_clear_offset(auprobe, insn); + break; + case 0x0f: + if (insn->opcode.nbytes != 2) + return -ENOSYS; /* - * The original instruction includes a displacement, and so - * is 4 bytes longer than what we've just single-stepped. - * Fall through to handle stuff like "jmpq *...(%rip)" and - * "callq *...(%rip)". + * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches + * OPCODE1() of the "short" jmp which checks the same condition. */ - if (correction) - *correction += 4; + opc1 = OPCODE2(insn) - 0x10; + default: + if (!is_cond_jmp_opcode(opc1)) + return -ENOSYS; } + + auprobe->branch.opc1 = opc1; + auprobe->branch.ilen = insn->length; + auprobe->branch.offs = insn->immediate.value; + + auprobe->ops = &branch_xol_ops; + return 0; } -#else -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) + +/** + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) +{ + struct insn insn; + bool fix_ip = true, fix_call = false; + int ret; + + ret = validate_insn_bits(auprobe, mm, &insn); + if (ret) + return ret; + + ret = branch_setup_xol_ops(auprobe, &insn); + if (ret != -ENOSYS) + return ret; + + /* + * Figure out which fixups arch_uprobe_post_xol() will need to perform, + * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups + * is either zero or it reflects rip-related fixups. + */ + switch (OPCODE1(&insn)) { + case 0x9d: /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; + case 0xc3: /* ret or lret -- ip is correct */ + case 0xcb: + case 0xc2: + case 0xca: + fix_ip = false; + break; + case 0x9a: /* call absolute - Fix return addr, not ip */ + fix_call = true; + fix_ip = false; + break; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip = false; + break; + case 0xff: + insn_get_modrm(&insn); + switch (MODRM_REG(&insn)) { + case 2: case 3: /* call or lcall, indirect */ + fix_call = true; + case 4: case 5: /* jmp or ljmp, indirect */ + fix_ip = false; + } + /* fall through */ + default: + handle_riprel_insn(auprobe, &insn); + } + + if (fix_ip) + auprobe->fixups |= UPROBE_FIX_IP; + if (fix_call) + auprobe->fixups |= UPROBE_FIX_CALL; + + auprobe->ops = &default_xol_ops; + return 0; +} + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - /* No RIP-relative addressing on 32-bit */ + struct uprobe_task *utask = current->utask; + + regs->ip = utask->xol_vaddr; + utask->autask.saved_trap_nr = current->thread.trap_nr; + current->thread.trap_nr = UPROBE_TRAP_NR; + + utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF); + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) + set_task_blockstep(current, false); + + if (auprobe->ops->pre_xol) + return auprobe->ops->pre_xol(auprobe, regs); + return 0; } -#endif /* * If xol insn itself traps and generates a signal(Say, @@ -592,22 +752,25 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - struct uprobe_task *utask; - long correction; - int result = 0; + struct uprobe_task *utask = current->utask; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); - utask = current->utask; - current->thread.trap_nr = utask->autask.saved_trap_nr; - correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) - regs->ip += correction; - - if (auprobe->fixups & UPROBE_FIX_CALL) - result = adjust_ret_addr(regs->sp, correction); + if (auprobe->ops->post_xol) { + int err = auprobe->ops->post_xol(auprobe, regs); + if (err) { + arch_uprobe_abort_xol(auprobe, regs); + /* + * Restart the probed insn. ->post_xol() must ensure + * this is really possible if it returns -ERESTART. + */ + if (err == -ERESTART) + return 0; + return err; + } + } + current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need @@ -618,7 +781,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) else if (!(auprobe->fixups & UPROBE_FIX_SETF)) regs->flags &= ~X86_EFLAGS_TF; - return result; + return 0; } /* callback routine for handling exceptions. */ @@ -652,8 +815,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, so reset the instruction pointer to its - * probed address. + * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. + * Reset the instruction pointer to its probed address for the potential + * restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { @@ -668,25 +832,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) regs->flags &= ~X86_EFLAGS_TF; } -/* - * Skip these instructions as per the currently known x86 ISA. - * rep=0x66*; nop=0x90 - */ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - int i; - - for (i = 0; i < MAX_UINSN_BYTES; i++) { - if (auprobe->insn[i] == 0x66) - continue; - - if (auprobe->insn[i] == 0x90) { - regs->ip += i + 1; - return true; - } - - break; - } + if (auprobe->ops->emulate) + return auprobe->ops->emulate(auprobe, regs); return false; } @@ -701,23 +850,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) { - int rasize, ncopied; + int rasize = sizeof_long(), nleft; unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ - rasize = is_ia32_task() ? 4 : 8; - ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); - if (unlikely(ncopied)) + if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) return -1; /* check whether address has been already hijacked */ if (orig_ret_vaddr == trampoline_vaddr) return orig_ret_vaddr; - ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); - if (likely(!ncopied)) + nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); + if (likely(!nleft)) return orig_ret_vaddr; - if (ncopied != rasize) { + if (nleft != rasize) { pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3356abcfff18..af6dcf1d9e47 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -172,6 +172,7 @@ struct perf_event; struct pmu { struct list_head entry; + struct module *module; struct device *dev; const struct attribute_group **attr_groups; const char *name; diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..5129b1201050 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -39,6 +39,7 @@ #include <linux/hw_breakpoint.h> #include <linux/mm_types.h> #include <linux/cgroup.h> +#include <linux/module.h> #include "internal.h" @@ -3229,6 +3230,9 @@ static void __free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); + if (event->pmu) + module_put(event->pmu->module); + call_rcu(&event->rcu_head, free_event_rcu); } static void free_event(struct perf_event *event) @@ -6551,6 +6555,7 @@ free_pdc: free_percpu(pmu->pmu_disable_count); goto unlock; } +EXPORT_SYMBOL_GPL(perf_pmu_register); void perf_pmu_unregister(struct pmu *pmu) { @@ -6572,6 +6577,7 @@ void perf_pmu_unregister(struct pmu *pmu) put_device(pmu->dev); free_pmu_context(pmu); } +EXPORT_SYMBOL_GPL(perf_pmu_unregister); struct pmu *perf_init_event(struct perf_event *event) { @@ -6585,6 +6591,10 @@ struct pmu *perf_init_event(struct perf_event *event) pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); if (pmu) { + if (!try_module_get(pmu->module)) { + pmu = ERR_PTR(-ENODEV); + goto unlock; + } event->pmu = pmu; ret = pmu->event_init(event); if (ret) @@ -6593,6 +6603,10 @@ struct pmu *perf_init_event(struct perf_event *event) } list_for_each_entry_rcu(pmu, &pmus, entry) { + if (!try_module_get(pmu->module)) { + pmu = ERR_PTR(-ENODEV); + goto unlock; + } event->pmu = pmu; ret = pmu->event_init(event); if (!ret) @@ -6771,6 +6785,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, err_pmu: if (event->destroy) event->destroy(event); + module_put(pmu->module); err_ns: if (event->ns) put_pid_ns(event->ns); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 04709b66369d..d1edc5e6fd03 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem; /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 -/* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP 1 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ @@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->offset = offset; init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); - /* For now assume that the instruction need not be single-stepped */ - __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); - /* a uprobe exists for this inode:offset combination */ if (cur_uprobe) { kfree(uprobe); @@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void) return true; } -/* - * Avoid singlestepping the original instruction if the original instruction - * is a NOP or can be emulated. - */ -static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) -{ - if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) { - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) - return true; - clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); - } - return false; -} - static void mmf_recalc_uprobes(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); - if (can_skip_sstep(uprobe, regs)) + if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; if (!pre_ssout(uprobe, regs, bp_vaddr)) return; - /* can_skip_sstep() succeeded, or restart if can't singlestep */ + /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ out: put_uprobe(uprobe); } @@ -1886,10 +1867,11 @@ out: static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) { struct uprobe *uprobe; + int err = 0; uprobe = utask->active_uprobe; if (utask->state == UTASK_SSTEP_ACK) - arch_uprobe_post_xol(&uprobe->arch, regs); + err = arch_uprobe_post_xol(&uprobe->arch, regs); else if (utask->state == UTASK_SSTEP_TRAPPED) arch_uprobe_abort_xol(&uprobe->arch, regs); else @@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) spin_lock_irq(¤t->sighand->siglock); recalc_sigpending(); /* see uprobe_deny_signal() */ spin_unlock_irq(¤t->sighand->siglock); + + if (unlikely(err)) { + uprobe_warn(current, "execute the probed insn, sending SIGILL."); + force_sig_info(SIGILL, SEND_SIG_FORCED, current); + } } /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092ceee29..53d26829cd4d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1017,6 +1017,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, return ret; } +EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns); /** * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index fdfceee0ffd0..fbfa1192923c 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -33,17 +33,20 @@ OPTIONS -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands - file://filename entries. + file://filename entries. This option will affect the percentage + of the Baseline/Delta column. See --percentage for more info. -C:: --comms=:: Only consider symbols in these comms. CSV that understands - file://filename entries. + file://filename entries. This option will affect the percentage + of the Baseline/Delta column. See --percentage for more info. -S:: --symbols=:: Only consider these symbols. CSV that understands - file://filename entries. + file://filename entries. This option will affect the percentage + of the Baseline/Delta column. See --percentage for more info. -s:: --sort=:: @@ -89,6 +92,14 @@ OPTIONS --order:: Specify compute sorting column number. +--percentage:: + Determine how to display the overhead percentage of filtered entries. + Filters can be applied by --comms, --dsos and/or --symbols options. + + "relative" means it's relative to filtered entries only so that the + sum of shown entries will be always 100%. "absolute" means it retains + the original value before and after the filter is applied. + COMPARISON ---------- The comparison is governed by the baseline file. The baseline perf.data @@ -157,6 +168,10 @@ with: - period_percent being the % of the hist entry period value within single data file + - with filtering by -C, -d and/or -S, period_percent might be changed + relative to how entries are filtered. Use --percentage=absolute to + prevent such fluctuation. + ratio ~~~~~ If specified the 'Ratio' column is displayed with value 'r' computed as: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8eab8a4bdeb8..09af66298564 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -25,10 +25,6 @@ OPTIONS --verbose:: Be more verbose. (show symbol address, etc) --d:: ---dsos=:: - Only consider symbols in these dsos. CSV that understands - file://filename entries. -n:: --show-nr-samples:: Show the number of samples for each symbol @@ -42,11 +38,18 @@ OPTIONS -c:: --comms=:: Only consider symbols in these comms. CSV that understands - file://filename entries. + file://filename entries. This option will affect the percentage of + the overhead column. See --percentage for more info. +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. This option will affect the percentage of + the overhead column. See --percentage for more info. -S:: --symbols=:: Only consider these symbols. CSV that understands - file://filename entries. + file://filename entries. This option will affect the percentage of + the overhead column. See --percentage for more info. --symbol-filter=:: Only show symbols that match (partially) with this filter. @@ -237,6 +240,15 @@ OPTIONS Do not show entries which have an overhead under that percent. (Default: 0). +--percentage:: + Determine how to display the overhead percentage of filtered entries. + Filters can be applied by --comms, --dsos and/or --symbols options and + Zoom operations on the TUI (thread, dso, etc). + + "relative" means it's relative to filtered entries only so that the + sum of shown entries will be always 100%. "absolute" means it retains + the original value before and after the filter is applied. + --header:: Show header information in the perf.data file. This includes various information like hostname, OS and perf version, cpu/mem diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 976b00c6cdb1..64ed79c43639 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -123,13 +123,16 @@ Default is to monitor all CPUS. Show a column with the sum of periods. --dsos:: - Only consider symbols in these dsos. + Only consider symbols in these dsos. This option will affect the + percentage of the overhead column. See --percentage for more info. --comms:: - Only consider symbols in these comms. + Only consider symbols in these comms. This option will affect the + percentage of the overhead column. See --percentage for more info. --symbols:: - Only consider these symbols. + Only consider these symbols. This option will affect the + percentage of the overhead column. See --percentage for more info. -M:: --disassembler-style=:: Set disassembler style for objdump. @@ -165,6 +168,15 @@ Default is to monitor all CPUS. Do not show entries which have an overhead under that percent. (Default: 0). +--percentage:: + Determine how to display the overhead percentage of filtered entries. + Filters can be applied by --comms, --dsos and/or --symbols options and + Zoom operations on the TUI (thread, dso, etc). + + "relative" means it's relative to filtered entries only so that the + sum of shown entries will be always 100%. "absolute" means it retains + the original value before and after the filter is applied. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 204fffe22532..6ef80f22c1e2 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -220,7 +220,8 @@ static int setup_compute(const struct option *opt, const char *str, static double period_percent(struct hist_entry *he, u64 period) { - u64 total = he->hists->stats.total_period; + u64 total = hists__total_period(he->hists); + return (period * 100.0) / total; } @@ -259,11 +260,18 @@ static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair) static int formula_delta(struct hist_entry *he, struct hist_entry *pair, char *buf, size_t size) { + u64 he_total = he->hists->stats.total_period; + u64 pair_total = pair->hists->stats.total_period; + + if (symbol_conf.filter_relative) { + he_total = he->hists->stats.total_non_filtered_period; + pair_total = pair->hists->stats.total_non_filtered_period; + } return scnprintf(buf, size, "(%" PRIu64 " * 100 / %" PRIu64 ") - " "(%" PRIu64 " * 100 / %" PRIu64 ")", - pair->stat.period, pair->hists->stats.total_period, - he->stat.period, he->hists->stats.total_period); + pair->stat.period, pair_total, + he->stat.period, he_total); } static int formula_ratio(struct hist_entry *he, struct hist_entry *pair, @@ -327,15 +335,16 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - if (al.filtered) - return 0; - if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight, sample->transaction)) { pr_warning("problem incrementing symbol period, skipping event\n"); return -1; } + if (al.filtered == 0) { + evsel->hists.stats.total_non_filtered_period += sample->period; + evsel->hists.nr_non_filtered_entries++; + } evsel->hists.stats.total_period += sample->period; return 0; } @@ -565,7 +574,9 @@ static void hists__compute_resort(struct hists *hists) next = rb_first(root); hists->nr_entries = 0; + hists->nr_non_filtered_entries = 0; hists->stats.total_period = 0; + hists->stats.total_non_filtered_period = 0; hists__reset_col_len(hists); while (next != NULL) { @@ -732,13 +743,16 @@ static const struct option options[] = { OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."), + OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", + "How to display percentage of filtered entries", parse_filter_percentage), OPT_END() }; static double baseline_percent(struct hist_entry *he) { - struct hists *hists = he->hists; - return 100.0 * he->stat.period / hists->stats.total_period; + u64 total = hists__total_period(he->hists); + + return 100.0 * he->stat.period / total; } static int hpp__color_baseline(struct perf_hpp_fmt *fmt, @@ -1120,6 +1134,8 @@ static int data_init(int argc, const char **argv) int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) { + perf_config(perf_default_config, NULL); + sort_order = diff__default_sort_order; argc = parse_options(argc, argv, options, diff_usage, 0); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 929462aa4943..f91fa4376f4b 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -14,6 +14,7 @@ #include "util/parse-options.h" #include "util/trace-event.h" #include "util/data.h" +#include "util/cpumap.h" #include "util/debug.h" @@ -31,9 +32,6 @@ static int caller_lines = -1; static bool raw_ip; -static int *cpunode_map; -static int max_cpu_num; - struct alloc_stat { u64 call_site; u64 ptr; @@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted; static unsigned long total_requested, total_allocated; static unsigned long nr_allocs, nr_cross_allocs; -#define PATH_SYS_NODE "/sys/devices/system/node" - -static int init_cpunode_map(void) -{ - FILE *fp; - int i, err = -1; - - fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); - if (!fp) { - max_cpu_num = 4096; - return 0; - } - - if (fscanf(fp, "%d", &max_cpu_num) < 1) { - pr_err("Failed to read 'kernel_max' from sysfs"); - goto out_close; - } - - max_cpu_num++; - - cpunode_map = calloc(max_cpu_num, sizeof(int)); - if (!cpunode_map) { - pr_err("%s: calloc failed\n", __func__); - goto out_close; - } - - for (i = 0; i < max_cpu_num; i++) - cpunode_map[i] = -1; - - err = 0; -out_close: - fclose(fp); - return err; -} - -static int setup_cpunode_map(void) -{ - struct dirent *dent1, *dent2; - DIR *dir1, *dir2; - unsigned int cpu, mem; - char buf[PATH_MAX]; - - if (init_cpunode_map()) - return -1; - - dir1 = opendir(PATH_SYS_NODE); - if (!dir1) - return 0; - - while ((dent1 = readdir(dir1)) != NULL) { - if (dent1->d_type != DT_DIR || - sscanf(dent1->d_name, "node%u", &mem) < 1) - continue; - - snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); - dir2 = opendir(buf); - if (!dir2) - continue; - while ((dent2 = readdir(dir2)) != NULL) { - if (dent2->d_type != DT_LNK || - sscanf(dent2->d_name, "cpu%u", &cpu) < 1) - continue; - cpunode_map[cpu] = mem; - } - closedir(dir2); - } - closedir(dir1); - return 0; -} - static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, int bytes_req, int bytes_alloc, int cpu) { @@ -235,7 +163,7 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel, int ret = perf_evsel__process_alloc_event(evsel, sample); if (!ret) { - int node1 = cpunode_map[sample->cpu], + int node1 = cpu__get_node(sample->cpu), node2 = perf_evsel__intval(evsel, sample, "node"); if (node1 != node2) @@ -756,11 +684,13 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), OPT_END() }; - const char * const kmem_usage[] = { - "perf kmem [<options>] {record|stat}", + const char *const kmem_subcommands[] = { "record", "stat", NULL }; + const char *kmem_usage[] = { + NULL, NULL }; - argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); + argc = parse_options_subcommand(argc, argv, kmem_options, + kmem_subcommands, kmem_usage, 0); if (!argc) usage_with_options(kmem_usage, kmem_options); @@ -770,7 +700,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); } else if (!strcmp(argv[0], "stat")) { - if (setup_cpunode_map()) + if (cpu__setup_cpunode_map()) return -1; if (list_empty(&caller_sort)) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index c852c7a85d32..6148afc995c6 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -961,8 +961,10 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) "perf lock info [<options>]", NULL }; - const char * const lock_usage[] = { - "perf lock [<options>] {record|report|script|info}", + const char *const lock_subcommands[] = { "record", "report", "script", + "info", NULL }; + const char *lock_usage[] = { + NULL, NULL }; const char * const report_usage[] = { @@ -976,8 +978,8 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) for (i = 0; i < LOCKHASH_SIZE; i++) INIT_LIST_HEAD(lockhash_table + i); - argc = parse_options(argc, argv, lock_options, lock_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands, + lock_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(lock_usage, lock_options); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 2e3ade69a58e..4a1a6c94a5eb 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -21,11 +21,6 @@ struct perf_mem { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; -static const char * const mem_usage[] = { - "perf mem [<options>] {record <command> |report}", - NULL -}; - static int __cmd_record(int argc, const char **argv) { int rec_argc, i = 0, j; @@ -220,9 +215,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) " between columns '.' is reserved."), OPT_END() }; + const char *const mem_subcommands[] = { "record", "report", NULL }; + const char *mem_usage[] = { + NULL, + NULL + }; + - argc = parse_options(argc, argv, mem_options, mem_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, + mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) usage_with_options(mem_usage, mem_options); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c8f21137dfd8..76e2bb6cf571 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -123,6 +123,8 @@ static int report__add_mem_hist_entry(struct report *rep, struct addr_location * evsel->hists.stats.total_period += cost; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + if (!he->filtered) + evsel->hists.stats.nr_non_filtered_samples++; err = hist_entry__append_callchain(he, sample); out: return err; @@ -176,6 +178,8 @@ static int report__add_branch_hist_entry(struct report *rep, struct addr_locatio evsel->hists.stats.total_period += 1; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + if (!he->filtered) + evsel->hists.stats.nr_non_filtered_samples++; } else goto out; } @@ -209,6 +213,8 @@ static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel, err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); evsel->hists.stats.total_period += sample->period; + if (!he->filtered) + evsel->hists.stats.nr_non_filtered_samples++; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); out: return err; @@ -337,6 +343,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report char buf[512]; size_t size = sizeof(buf); + if (symbol_conf.filter_relative) { + nr_samples = hists->stats.nr_non_filtered_samples; + nr_events = hists->stats.total_non_filtered_period; + } + if (perf_evsel__is_group_event(evsel)) { struct perf_evsel *pos; @@ -344,8 +355,13 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report evname = buf; for_each_group_member(pos, evsel) { - nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; - nr_events += pos->hists.stats.total_period; + if (symbol_conf.filter_relative) { + nr_samples += pos->hists.stats.nr_non_filtered_samples; + nr_events += pos->hists.stats.total_non_filtered_period; + } else { + nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + nr_events += pos->hists.stats.total_period; + } } } @@ -573,11 +589,9 @@ static int __cmd_report(struct report *rep) } static int -parse_callchain_opt(const struct option *opt, const char *arg, int unset) +report_parse_callchain_opt(const struct option *opt, const char *arg, int unset) { struct report *rep = (struct report *)opt->value; - char *tok, *tok2; - char *endptr; /* * --no-call-graph @@ -587,80 +601,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) return 0; } - symbol_conf.use_callchain = true; - - if (!arg) - return 0; - - tok = strtok((char *)arg, ","); - if (!tok) - return -1; - - /* get the output mode */ - if (!strncmp(tok, "graph", strlen(arg))) - callchain_param.mode = CHAIN_GRAPH_ABS; - - else if (!strncmp(tok, "flat", strlen(arg))) - callchain_param.mode = CHAIN_FLAT; - - else if (!strncmp(tok, "fractal", strlen(arg))) - callchain_param.mode = CHAIN_GRAPH_REL; - - else if (!strncmp(tok, "none", strlen(arg))) { - callchain_param.mode = CHAIN_NONE; - symbol_conf.use_callchain = false; - - return 0; - } - - else - return -1; - - /* get the min percentage */ - tok = strtok(NULL, ","); - if (!tok) - goto setup; - - callchain_param.min_percent = strtod(tok, &endptr); - if (tok == endptr) - return -1; - - /* get the print limit */ - tok2 = strtok(NULL, ","); - if (!tok2) - goto setup; - - if (tok2[0] != 'c') { - callchain_param.print_limit = strtoul(tok2, &endptr, 0); - tok2 = strtok(NULL, ","); - if (!tok2) - goto setup; - } - - /* get the call chain order */ - if (!strncmp(tok2, "caller", strlen("caller"))) - callchain_param.order = ORDER_CALLER; - else if (!strncmp(tok2, "callee", strlen("callee"))) - callchain_param.order = ORDER_CALLEE; - else - return -1; - - /* Get the sort key */ - tok2 = strtok(NULL, ","); - if (!tok2) - goto setup; - if (!strncmp(tok2, "function", strlen("function"))) - callchain_param.key = CCKEY_FUNCTION; - else if (!strncmp(tok2, "address", strlen("address"))) - callchain_param.key = CCKEY_ADDRESS; - else - return -1; -setup: - if (callchain_register_param(&callchain_param) < 0) { - pr_err("Can't register callchain params\n"); - return -1; - } - return 0; + return parse_callchain_report_opt(arg); } int @@ -772,7 +713,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " - "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), + "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " @@ -823,6 +764,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), OPT_CALLBACK(0, "percent-limit", &report, "percent", "Don't show entries under that percent", parse_percent_limit), + OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", + "how to display percentage of filtered entries", parse_filter_percentage), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 9ac0a495c954..d3fb0ed7240a 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1713,8 +1713,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "perf sched replay [<options>]", NULL }; - const char * const sched_usage[] = { - "perf sched [<options>] {record|latency|map|replay|script}", + const char *const sched_subcommands[] = { "record", "latency", "map", + "replay", "script", NULL }; + const char *sched_usage[] = { + NULL, NULL }; struct trace_sched_handler lat_ops = { @@ -1736,8 +1738,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) sched.curr_pid[i] = -1; - argc = parse_options(argc, argv, sched_options, sched_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, + sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(sched_usage, sched_options); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 65aaa5bbf7ec..37d30460bada 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -253,6 +253,9 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, return NULL; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + if (!he->filtered) + evsel->hists.stats.nr_non_filtered_samples++; + return he; } @@ -694,8 +697,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) top->exact_samples++; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 || - al.filtered) + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) return; if (!top->kptr_restrict_warned && @@ -1116,6 +1118,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", "Don't show entries under that percent", parse_percent_limit), + OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", + "How to display percentage of filtered entries", parse_filter_percentage), OPT_END() }; const char * const top_usage[] = { diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index ae3a57694b6b..33569847fdcc 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -121,8 +121,8 @@ __perf_main () elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" - # List subcommands for 'perf kvm' - elif [[ $prev == "kvm" ]]; then + # List subcommands for perf commands + elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then subcmds=$($cmd $prev --list-cmds) __perfcomp_colon "$subcmds" "$cur" # List long option names diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 7ec871af3f6f..4d416984c59d 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -769,12 +769,15 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) for (nd = browser->top; nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - float percent = h->stat.period * 100.0 / - hb->hists->stats.total_period; + u64 total = hists__total_period(h->hists); + float percent = 0.0; if (h->filtered) continue; + if (total) + percent = h->stat.period * 100.0 / total; + if (percent < hb->min_pcnt) continue; @@ -792,8 +795,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - float percent = h->stat.period * 100.0 / - hists->stats.total_period; + u64 total = hists__total_period(hists); + float percent = 0.0; + + if (total) + percent = h->stat.period * 100.0 / total; if (percent < min_pcnt) return NULL; @@ -813,8 +819,11 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - float percent = h->stat.period * 100.0 / - hists->stats.total_period; + u64 total = hists__total_period(hists); + float percent = 0.0; + + if (total) + percent = h->stat.period * 100.0 / total; if (!h->filtered && percent >= min_pcnt) return nd; @@ -1189,6 +1198,11 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, char buf[512]; size_t buflen = sizeof(buf); + if (symbol_conf.filter_relative) { + nr_samples = hists->stats.nr_non_filtered_samples; + nr_events = hists->stats.total_non_filtered_period; + } + if (perf_evsel__is_group_event(evsel)) { struct perf_evsel *pos; @@ -1196,8 +1210,13 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, ev_name = buf; for_each_group_member(pos, evsel) { - nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; - nr_events += pos->hists.stats.total_period; + if (symbol_conf.filter_relative) { + nr_samples += pos->hists.stats.nr_non_filtered_samples; + nr_events += pos->hists.stats.total_non_filtered_period; + } else { + nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + nr_events += pos->hists.stats.total_period; + } } } @@ -1370,6 +1389,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "C Collapse all callchains\n" \ "d Zoom into current DSO\n" \ "E Expand all callchains\n" \ + "F Toggle percentage of filtered entries\n" \ /* help messages are sorted by lexical order of the hotkey */ const char report_help[] = HIST_BROWSER_HELP_COMMON @@ -1475,6 +1495,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (env->arch) tui__header_window(env); continue; + case 'F': + symbol_conf.filter_relative ^= 1; + continue; case K_F1: case 'h': case '?': diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index e395ef9b0ae0..91f10f3f6dd1 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -228,12 +228,15 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; - float percent = h->stat.period * 100.0 / - hists->stats.total_period; + u64 total = hists__total_period(h->hists); + float percent = 0.0; if (h->filtered) continue; + if (total) + percent = h->stat.period * 100.0 / total; + if (percent < min_pcnt) continue; @@ -261,12 +264,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, } if (symbol_conf.use_callchain && sort__has_sym) { - u64 total; - if (callchain_param.mode == CHAIN_GRAPH_REL) total = h->stat.period; - else - total = hists->stats.total_period; perf_gtk__add_callchain(&h->sorted_chain, store, &iter, sym_col, total); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 0f403b83e9d1..0912805c08f4 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -32,10 +32,10 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, if (fmt_percent) { double percent = 0.0; + u64 total = hists__total_period(hists); - if (hists->stats.total_period) - percent = 100.0 * get_field(he) / - hists->stats.total_period; + if (total) + percent = 100.0 * get_field(he) / total; ret += hpp__call_print_fn(hpp, print_fn, fmt, percent); } else @@ -50,7 +50,7 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, list_for_each_entry(pair, &he->pairs.head, pairs.node) { u64 period = get_field(pair); - u64 total = pair->hists->stats.total_period; + u64 total = hists__total_period(pair->hists); if (!total) continue; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 8d9db454f1a9..9a42382b3921 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -25,6 +25,84 @@ __thread struct callchain_cursor callchain_cursor; +int +parse_callchain_report_opt(const char *arg) +{ + char *tok, *tok2; + char *endptr; + + symbol_conf.use_callchain = true; + + if (!arg) + return 0; + + tok = strtok((char *)arg, ","); + if (!tok) + return -1; + + /* get the output mode */ + if (!strncmp(tok, "graph", strlen(arg))) { + callchain_param.mode = CHAIN_GRAPH_ABS; + + } else if (!strncmp(tok, "flat", strlen(arg))) { + callchain_param.mode = CHAIN_FLAT; + } else if (!strncmp(tok, "fractal", strlen(arg))) { + callchain_param.mode = CHAIN_GRAPH_REL; + } else if (!strncmp(tok, "none", strlen(arg))) { + callchain_param.mode = CHAIN_NONE; + symbol_conf.use_callchain = false; + return 0; + } else { + return -1; + } + + /* get the min percentage */ + tok = strtok(NULL, ","); + if (!tok) + goto setup; + + callchain_param.min_percent = strtod(tok, &endptr); + if (tok == endptr) + return -1; + + /* get the print limit */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + + if (tok2[0] != 'c') { + callchain_param.print_limit = strtoul(tok2, &endptr, 0); + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + } + + /* get the call chain order */ + if (!strncmp(tok2, "caller", strlen("caller"))) + callchain_param.order = ORDER_CALLER; + else if (!strncmp(tok2, "callee", strlen("callee"))) + callchain_param.order = ORDER_CALLEE; + else + return -1; + + /* Get the sort key */ + tok2 = strtok(NULL, ","); + if (!tok2) + goto setup; + if (!strncmp(tok2, "function", strlen("function"))) + callchain_param.key = CCKEY_FUNCTION; + else if (!strncmp(tok2, "address", strlen("address"))) + callchain_param.key = CCKEY_ADDRESS; + else + return -1; +setup: + if (callchain_register_param(&callchain_param) < 0) { + pr_err("Can't register callchain params\n"); + return -1; + } + return 0; +} + static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, enum chain_mode mode) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 8ad97e9b119f..dda4cf8b534c 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -157,4 +157,5 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); extern const char record_callchain_help[]; +int parse_callchain_report_opt(const char *arg); #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3e0fdd369ccb..24519e14ac56 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -11,6 +11,7 @@ #include "util.h" #include "cache.h" #include "exec_cmd.h" +#include "util/hist.h" /* perf_hist_config */ #define MAXNAME (256) @@ -355,6 +356,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); + if (!prefixcmp(var, "hist.")) + return perf_hist_config(var, value); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 7fe4994eeb63..c4e55b71010c 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -317,3 +317,163 @@ int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) { return cpu_map__build_map(cpus, corep, cpu_map__get_core); } + +/* setup simple routines to easily access node numbers given a cpu number */ +static int get_max_num(char *path, int *max) +{ + size_t num; + char *buf; + int err = 0; + + if (filename__read_str(path, &buf, &num)) + return -1; + + buf[num] = '\0'; + + /* start on the right, to find highest node num */ + while (--num) { + if ((buf[num] == ',') || (buf[num] == '-')) { + num++; + break; + } + } + if (sscanf(&buf[num], "%d", max) < 1) { + err = -1; + goto out; + } + + /* convert from 0-based to 1-based */ + (*max)++; + +out: + free(buf); + return err; +} + +/* Determine highest possible cpu in the system for sparse allocation */ +static void set_max_cpu_num(void) +{ + const char *mnt; + char path[PATH_MAX]; + int ret = -1; + + /* set up default */ + max_cpu_num = 4096; + + mnt = sysfs__mountpoint(); + if (!mnt) + goto out; + + /* get the highest possible cpu number for a sparse allocation */ + ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt); + if (ret == PATH_MAX) { + pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); + goto out; + } + + ret = get_max_num(path, &max_cpu_num); + +out: + if (ret) + pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num); +} + +/* Determine highest possible node in the system for sparse allocation */ +static void set_max_node_num(void) +{ + const char *mnt; + char path[PATH_MAX]; + int ret = -1; + + /* set up default */ + max_node_num = 8; + + mnt = sysfs__mountpoint(); + if (!mnt) + goto out; + + /* get the highest possible cpu number for a sparse allocation */ + ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt); + if (ret == PATH_MAX) { + pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); + goto out; + } + + ret = get_max_num(path, &max_node_num); + +out: + if (ret) + pr_err("Failed to read max nodes, using default of %d\n", max_node_num); +} + +static int init_cpunode_map(void) +{ + int i; + + set_max_cpu_num(); + set_max_node_num(); + + cpunode_map = calloc(max_cpu_num, sizeof(int)); + if (!cpunode_map) { + pr_err("%s: calloc failed\n", __func__); + return -1; + } + + for (i = 0; i < max_cpu_num; i++) + cpunode_map[i] = -1; + + return 0; +} + +int cpu__setup_cpunode_map(void) +{ + struct dirent *dent1, *dent2; + DIR *dir1, *dir2; + unsigned int cpu, mem; + char buf[PATH_MAX]; + char path[PATH_MAX]; + const char *mnt; + int n; + + /* initialize globals */ + if (init_cpunode_map()) + return -1; + + mnt = sysfs__mountpoint(); + if (!mnt) + return 0; + + n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt); + if (n == PATH_MAX) { + pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); + return -1; + } + + dir1 = opendir(path); + if (!dir1) + return 0; + + /* walk tree and setup map */ + while ((dent1 = readdir(dir1)) != NULL) { + if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1) + continue; + + n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name); + if (n == PATH_MAX) { + pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); + continue; + } + + dir2 = opendir(buf); + if (!dir2) + continue; + while ((dent2 = readdir(dir2)) != NULL) { + if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1) + continue; + cpunode_map[cpu] = mem; + } + closedir(dir2); + } + closedir(dir1); + return 0; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index b123bb9d6f55..61a654849002 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -4,6 +4,9 @@ #include <stdio.h> #include <stdbool.h> +#include "perf.h" +#include "util/debug.h" + struct cpu_map { int nr; int map[]; @@ -46,4 +49,36 @@ static inline bool cpu_map__empty(const struct cpu_map *map) return map ? map->map[0] == -1 : true; } +int max_cpu_num; +int max_node_num; +int *cpunode_map; + +int cpu__setup_cpunode_map(void); + +static inline int cpu__max_node(void) +{ + if (unlikely(!max_node_num)) + pr_debug("cpu_map not initialized\n"); + + return max_node_num; +} + +static inline int cpu__max_cpu(void) +{ + if (unlikely(!max_cpu_num)) + pr_debug("cpu_map not initialized\n"); + + return max_cpu_num; +} + +static inline int cpu__get_node(int cpu) +{ + if (unlikely(cpunode_map == NULL)) { + pr_debug("cpu_map not initialized\n"); + return -1; + } + + return cpunode_map[cpu]; +} + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f38590d7561b..5a892477aa50 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -321,9 +321,11 @@ void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) { if (!h->filtered) { hists__calc_col_len(hists, h); - ++hists->nr_entries; - hists->stats.total_period += h->stat.period; + hists->nr_non_filtered_entries++; + hists->stats.total_non_filtered_period += h->stat.period; } + hists->nr_entries++; + hists->stats.total_period += h->stat.period; } static u8 symbol__parent_filter(const struct symbol *parent) @@ -674,8 +676,9 @@ void hists__output_resort(struct hists *hists) next = rb_first(root); hists->entries = RB_ROOT; - hists->nr_entries = 0; + hists->nr_non_filtered_entries = 0; hists->stats.total_period = 0; + hists->stats.total_non_filtered_period = 0; hists__reset_col_len(hists); while (next) { @@ -694,12 +697,12 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h if (h->filtered) return; - ++hists->nr_entries; + ++hists->nr_non_filtered_entries; if (h->ms.unfolded) - hists->nr_entries += h->nr_rows; + hists->nr_non_filtered_entries += h->nr_rows; h->row_offset = 0; - hists->stats.total_period += h->stat.period; - hists->stats.nr_events[PERF_RECORD_SAMPLE] += h->stat.nr_events; + hists->stats.total_non_filtered_period += h->stat.period; + hists->stats.nr_non_filtered_samples += h->stat.nr_events; hists__calc_col_len(hists, h); } @@ -721,8 +724,9 @@ void hists__filter_by_dso(struct hists *hists) { struct rb_node *nd; - hists->nr_entries = hists->stats.total_period = 0; - hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists->nr_non_filtered_entries = 0; + hists->stats.total_non_filtered_period = 0; + hists->stats.nr_non_filtered_samples = 0; hists__reset_col_len(hists); for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { @@ -754,8 +758,9 @@ void hists__filter_by_thread(struct hists *hists) { struct rb_node *nd; - hists->nr_entries = hists->stats.total_period = 0; - hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists->nr_non_filtered_entries = 0; + hists->stats.total_non_filtered_period = 0; + hists->stats.nr_non_filtered_samples = 0; hists__reset_col_len(hists); for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { @@ -785,8 +790,9 @@ void hists__filter_by_symbol(struct hists *hists) { struct rb_node *nd; - hists->nr_entries = hists->stats.total_period = 0; - hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0; + hists->nr_non_filtered_entries = 0; + hists->stats.total_non_filtered_period = 0; + hists->stats.nr_non_filtered_samples = 0; hists__reset_col_len(hists); for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { @@ -931,3 +937,30 @@ int hists__link(struct hists *leader, struct hists *other) return 0; } + +u64 hists__total_period(struct hists *hists) +{ + return symbol_conf.filter_relative ? hists->stats.total_non_filtered_period : + hists->stats.total_period; +} + +int parse_filter_percentage(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) +{ + if (!strcmp(arg, "relative")) + symbol_conf.filter_relative = true; + else if (!strcmp(arg, "absolute")) + symbol_conf.filter_relative = false; + else + return -1; + + return 0; +} + +int perf_hist_config(const char *var, const char *value) +{ + if (!strcmp(var, "hist.percentage")) + return parse_filter_percentage(NULL, value, 0); + + return 0; +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1f1f513dfe7f..5a0343eb22e2 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -37,9 +37,11 @@ enum hist_filter { */ struct events_stats { u64 total_period; + u64 total_non_filtered_period; u64 total_lost; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; + u32 nr_non_filtered_samples; u32 nr_lost_warned; u32 nr_unknown_events; u32 nr_invalid_chains; @@ -83,6 +85,7 @@ struct hists { struct rb_root entries; struct rb_root entries_collapsed; u64 nr_entries; + u64 nr_non_filtered_entries; const struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; @@ -112,6 +115,7 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__output_recalc_col_len(struct hists *hists, int max_rows); +u64 hists__total_period(struct hists *hists); void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists, u32 type); void events_stats__inc(struct events_stats *stats, u32 type); @@ -250,4 +254,10 @@ static inline int script_browse(const char *script_opt __maybe_unused) #endif unsigned int hists__sort_list_width(struct hists *hists); + +struct option; +int parse_filter_percentage(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused); +int perf_hist_config(const char *var, const char *value); + #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 00a7dcb2f55c..7a811eb61f75 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -284,17 +284,17 @@ static int pmu_aliases(const char *name, struct list_head *head) static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms) { - struct parse_events_term *term, *clone; + struct parse_events_term *term, *cloned; LIST_HEAD(list); int ret; list_for_each_entry(term, &alias->terms, list) { - ret = parse_events_term__clone(&clone, term); + ret = parse_events_term__clone(&cloned, term); if (ret) { parse_events__free_terms(&list); return ret; } - list_add_tail(&clone->list, &list); + list_add_tail(&cloned->list, &list); } list_splice(&list, terms); return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 501e4e722e8e..ae94e006a52d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -115,7 +115,8 @@ struct symbol_conf { annotate_asm_raw, annotate_src, event_group, - demangle; + demangle, + filter_relative; const char *vmlinux_name, *kallsyms_name, *source_prefix, |