From bb871a9c8d68692ed2513b3f0e1c010c2ac12f44 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Oct 2014 12:50:25 -0300 Subject: perf tools: A thread's machine can be found via thread->mg->machine So stop passing both machine and thread to several thread methods, reducing function signature length. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ckcy19dcp1jfkmdihdjcqdn1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/util/event.h') diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 5699e7e2a790..5f0e0b89e130 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -322,7 +322,6 @@ bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); void perf_event__preprocess_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct addr_location *al); -- cgit v1.2.3 From 3c659eedada2fbf909c5818848753a6647a56426 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 27 Oct 2014 15:49:22 +0200 Subject: perf tools: Add id index Add an index of the event identifiers, in preparation for Intel PT. The event id (also called the sample id) is a unique number allocated by the kernel to the event created by perf_event_open(). Events can include the event id by having a sample type including PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER. Currently the main use of the event id is to match an event back to the evsel to which it belongs i.e. perf_evlist__id2evsel() The purpose of this patch is to make it possible to match an event back to the mmap from which it was read. The reason that is useful is because the mmap represents a time-ordered context (either for a cpu or for a thread). Intel PT decodes trace information on that basis. In full-trace mode, that information can be recorded when the Intel PT trace is read, but in sample-mode the Intel PT trace data is embedded in a sample and it is in that case that the "id index" is needed. So the mmaps are numbered (idx) and the cpu and tid recorded against the id by perf_evlist__set_sid_idx() which is called by perf_evlist__mmap_per_evsel(). That information is recorded on the perf.data file in the new "id index". idx, cpu and tid are added to struct perf_sample_id (which is the node of evlist's hash table to match ids to evsels). The information can be retrieved using perf_evlist__id2sid(). Note however this all depends on having a sample type including PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER, otherwise ids are not recorded. The "id index" is a synthesized event record which will be created when Intel PT sampling is used by calling perf_event__synthesize_id_index(). Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414417770-18602-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/event.h | 15 ++++++ tools/perf/util/evlist.c | 26 ++++++++-- tools/perf/util/evsel.h | 3 ++ tools/perf/util/session.c | 122 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/session.h | 10 ++++ tools/perf/util/tool.h | 3 +- 8 files changed, 177 insertions(+), 4 deletions(-) (limited to 'tools/perf/util/event.h') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 06f1758951f1..84df2deed988 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -409,6 +409,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .tracing_data = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_op2_synth, .build_id = perf_event__repipe_op2_synth, + .id_index = perf_event__repipe_op2_synth, }, .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e00a29fb099f..6c6d044e959a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -28,6 +28,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", + [PERF_RECORD_ID_INDEX] = "ID_INDEX", }; const char *perf_event__name(unsigned int id) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 5f0e0b89e130..8c7fe9d64e79 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -187,6 +187,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_HEADER_TRACING_DATA = 66, PERF_RECORD_HEADER_BUILD_ID = 67, PERF_RECORD_FINISHED_ROUND = 68, + PERF_RECORD_ID_INDEX = 69, PERF_RECORD_HEADER_MAX }; @@ -239,6 +240,19 @@ struct tracing_data_event { u32 size; }; +struct id_index_entry { + u64 id; + u64 idx; + u64 cpu; + u64 tid; +}; + +struct id_index_event { + struct perf_event_header header; + u64 nr; + struct id_index_entry entries[0]; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -253,6 +267,7 @@ union perf_event { struct event_type_event event_type; struct tracing_data_event tracing_data; struct build_id_event build_id; + struct id_index_event id_index; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3c9e77d6b4c2..0babd390963c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -527,6 +527,22 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist, return 0; } +static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, + struct perf_evsel *evsel, int idx, int cpu, + int thread) +{ + struct perf_sample_id *sid = SID(evsel, cpu, thread); + sid->idx = idx; + if (evlist->cpus && cpu >= 0) + sid->cpu = evlist->cpus->map[cpu]; + else + sid->cpu = -1; + if (!evsel->system_wide && evlist->threads && thread >= 0) + sid->tid = evlist->threads->map[thread]; + else + sid->tid = -1; +} + struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) { struct hlist_head *head; @@ -805,9 +821,13 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, return -1; } - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) - return -1; + if (evsel->attr.read_format & PERF_FORMAT_ID) { + if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, + fd) < 0) + return -1; + perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, + thread); + } } return 0; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d3854c4f52e1..979790951bfb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -36,6 +36,9 @@ struct perf_sample_id { struct hlist_node node; u64 id; struct perf_evsel *evsel; + int idx; + int cpu; + pid_t tid; /* Holds total ID period value for PERF_SAMPLE_READ processing. */ u64 period; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 776010844cdc..27a0049118b5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -228,6 +228,15 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +static int process_id_index_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *perf_session + __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + void perf_tool__fill_defaults(struct perf_tool *tool) { if (tool->sample == NULL) @@ -262,6 +271,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) else tool->finished_round = process_finished_round_stub; } + if (tool->id_index == NULL) + tool->id_index = process_id_index_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -460,6 +471,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, [PERF_RECORD_HEADER_BUILD_ID] = NULL, + [PERF_RECORD_ID_INDEX] = perf_event__all64_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -888,6 +900,8 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: return tool->finished_round(tool, event, session); + case PERF_RECORD_ID_INDEX: + return tool->id_index(tool, event, session); default: return -EINVAL; } @@ -1594,3 +1608,111 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, out: return err; } + +int perf_event__process_id_index(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct id_index_event *ie = &event->id_index; + size_t i, nr, max_nr; + + max_nr = (ie->header.size - sizeof(struct id_index_event)) / + sizeof(struct id_index_entry); + nr = ie->nr; + if (nr > max_nr) + return -EINVAL; + + if (dump_trace) + fprintf(stdout, " nr: %zu\n", nr); + + for (i = 0; i < nr; i++) { + struct id_index_entry *e = &ie->entries[i]; + struct perf_sample_id *sid; + + if (dump_trace) { + fprintf(stdout, " ... id: %"PRIu64, e->id); + fprintf(stdout, " idx: %"PRIu64, e->idx); + fprintf(stdout, " cpu: %"PRId64, e->cpu); + fprintf(stdout, " tid: %"PRId64"\n", e->tid); + } + + sid = perf_evlist__id2sid(evlist, e->id); + if (!sid) + return -ENOENT; + sid->idx = e->idx; + sid->cpu = e->cpu; + sid->tid = e->tid; + } + return 0; +} + +int perf_event__synthesize_id_index(struct perf_tool *tool, + perf_event__handler_t process, + struct perf_evlist *evlist, + struct machine *machine) +{ + union perf_event *ev; + struct perf_evsel *evsel; + size_t nr = 0, i = 0, sz, max_nr, n; + int err; + + pr_debug2("Synthesizing id index\n"); + + max_nr = (UINT16_MAX - sizeof(struct id_index_event)) / + sizeof(struct id_index_entry); + + list_for_each_entry(evsel, &evlist->entries, node) + nr += evsel->ids; + + n = nr > max_nr ? max_nr : nr; + sz = sizeof(struct id_index_event) + n * sizeof(struct id_index_entry); + ev = zalloc(sz); + if (!ev) + return -ENOMEM; + + ev->id_index.header.type = PERF_RECORD_ID_INDEX; + ev->id_index.header.size = sz; + ev->id_index.nr = n; + + list_for_each_entry(evsel, &evlist->entries, node) { + u32 j; + + for (j = 0; j < evsel->ids; j++) { + struct id_index_entry *e; + struct perf_sample_id *sid; + + if (i >= n) { + err = process(tool, ev, NULL, machine); + if (err) + goto out_err; + nr -= n; + i = 0; + } + + e = &ev->id_index.entries[i++]; + + e->id = evsel->id[j]; + + sid = perf_evlist__id2sid(evlist, e->id); + if (!sid) { + free(ev); + return -ENOENT; + } + + e->idx = sid->idx; + e->cpu = sid->cpu; + e->tid = sid->tid; + } + } + + sz = sizeof(struct id_index_event) + nr * sizeof(struct id_index_entry); + ev->id_index.header.size = sz; + ev->id_index.nr = nr; + + err = process(tool, ev, NULL, machine); +out_err: + free(ev); + + return err; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index a4be851f1a90..d8521ac73a10 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -126,4 +126,14 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, extern volatile int session_done; #define session_done() ACCESS_ONCE(session_done) + +int perf_event__process_id_index(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); + +int perf_event__synthesize_id_index(struct perf_tool *tool, + perf_event__handler_t process, + struct perf_evlist *evlist, + struct machine *machine); + #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index f11636966a0f..bb2708bbfaca 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -39,7 +39,8 @@ struct perf_tool { event_attr_op attr; event_op2 tracing_data; event_op2 finished_round, - build_id; + build_id, + id_index; bool ordered_events; bool ordering_requires_timestamps; }; -- cgit v1.2.3 From 00447ccdf3335ea467841fc3c7d65ffd30748895 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:42 +0200 Subject: perf tools: Add a thread stack for synthesizing call chains Add a thread stack for synthesizing call chains from call and return events. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 + tools/perf/util/event.h | 26 +++++++ tools/perf/util/thread-stack.c | 172 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread-stack.h | 32 ++++++++ tools/perf/util/thread.c | 3 + tools/perf/util/thread.h | 3 + 6 files changed, 238 insertions(+) create mode 100644 tools/perf/util/thread-stack.c create mode 100644 tools/perf/util/thread-stack.h (limited to 'tools/perf/util/event.h') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3caf7dab50e8..0ebcc4ad0244 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -317,6 +317,7 @@ LIB_H += ui/util.h LIB_H += ui/ui.h LIB_H += util/data.h LIB_H += util/kvm-stat.h +LIB_H += util/thread-stack.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -394,6 +395,7 @@ LIB_OBJS += $(OUTPUT)util/srcline.o LIB_OBJS += $(OUTPUT)util/data.o LIB_OBJS += $(OUTPUT)util/tsc.o LIB_OBJS += $(OUTPUT)util/cloexec.o +LIB_OBJS += $(OUTPUT)util/thread-stack.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8c7fe9d64e79..7be389735402 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -143,6 +143,32 @@ struct branch_stack { struct branch_entry entries[0]; }; +enum { + PERF_IP_FLAG_BRANCH = 1ULL << 0, + PERF_IP_FLAG_CALL = 1ULL << 1, + PERF_IP_FLAG_RETURN = 1ULL << 2, + PERF_IP_FLAG_CONDITIONAL = 1ULL << 3, + PERF_IP_FLAG_SYSCALLRET = 1ULL << 4, + PERF_IP_FLAG_ASYNC = 1ULL << 5, + PERF_IP_FLAG_INTERRUPT = 1ULL << 6, + PERF_IP_FLAG_TX_ABORT = 1ULL << 7, + PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8, + PERF_IP_FLAG_TRACE_END = 1ULL << 9, + PERF_IP_FLAG_IN_TX = 1ULL << 10, +}; + +#define PERF_BRANCH_MASK (\ + PERF_IP_FLAG_BRANCH |\ + PERF_IP_FLAG_CALL |\ + PERF_IP_FLAG_RETURN |\ + PERF_IP_FLAG_CONDITIONAL |\ + PERF_IP_FLAG_SYSCALLRET |\ + PERF_IP_FLAG_ASYNC |\ + PERF_IP_FLAG_INTERRUPT |\ + PERF_IP_FLAG_TX_ABORT |\ + PERF_IP_FLAG_TRACE_BEGIN |\ + PERF_IP_FLAG_TRACE_END) + struct perf_sample { u64 ip; u32 pid, tid; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c new file mode 100644 index 000000000000..85b60d2e738f --- /dev/null +++ b/tools/perf/util/thread-stack.c @@ -0,0 +1,172 @@ +/* + * thread-stack.c: Synthesize a thread's stack using call / return events + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include "thread.h" +#include "event.h" +#include "util.h" +#include "debug.h" +#include "thread-stack.h" + +#define STACK_GROWTH 4096 + +struct thread_stack_entry { + u64 ret_addr; +}; + +struct thread_stack { + struct thread_stack_entry *stack; + size_t cnt; + size_t sz; + u64 trace_nr; +}; + +static int thread_stack__grow(struct thread_stack *ts) +{ + struct thread_stack_entry *new_stack; + size_t sz, new_sz; + + new_sz = ts->sz + STACK_GROWTH; + sz = new_sz * sizeof(struct thread_stack_entry); + + new_stack = realloc(ts->stack, sz); + if (!new_stack) + return -ENOMEM; + + ts->stack = new_stack; + ts->sz = new_sz; + + return 0; +} + +static struct thread_stack *thread_stack__new(void) +{ + struct thread_stack *ts; + + ts = zalloc(sizeof(struct thread_stack)); + if (!ts) + return NULL; + + if (thread_stack__grow(ts)) { + free(ts); + return NULL; + } + + return ts; +} + +static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) +{ + int err = 0; + + if (ts->cnt == ts->sz) { + err = thread_stack__grow(ts); + if (err) { + pr_warning("Out of memory: discarding thread stack\n"); + ts->cnt = 0; + } + } + + ts->stack[ts->cnt++].ret_addr = ret_addr; + + return err; +} + +static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) +{ + size_t i; + + /* + * In some cases there may be functions which are not seen to return. + * For example when setjmp / longjmp has been used. Or the perf context + * switch in the kernel which doesn't stop and start tracing in exactly + * the same code path. When that happens the return address will be + * further down the stack. If the return address is not found at all, + * we assume the opposite (i.e. this is a return for a call that wasn't + * seen for some reason) and leave the stack alone. + */ + for (i = ts->cnt; i; ) { + if (ts->stack[--i].ret_addr == ret_addr) { + ts->cnt = i; + return; + } + } +} + +int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, + u64 to_ip, u16 insn_len, u64 trace_nr) +{ + if (!thread) + return -EINVAL; + + if (!thread->ts) { + thread->ts = thread_stack__new(); + if (!thread->ts) { + pr_warning("Out of memory: no thread stack\n"); + return -ENOMEM; + } + thread->ts->trace_nr = trace_nr; + } + + /* + * When the trace is discontinuous, the trace_nr changes. In that case + * the stack might be completely invalid. Better to report nothing than + * to report something misleading, so reset the stack count to zero. + */ + if (trace_nr != thread->ts->trace_nr) { + thread->ts->trace_nr = trace_nr; + thread->ts->cnt = 0; + } + + if (flags & PERF_IP_FLAG_CALL) { + u64 ret_addr; + + if (!to_ip) + return 0; + ret_addr = from_ip + insn_len; + if (ret_addr == to_ip) + return 0; /* Zero-length calls are excluded */ + return thread_stack__push(thread->ts, ret_addr); + } else if (flags & PERF_IP_FLAG_RETURN) { + if (!from_ip) + return 0; + thread_stack__pop(thread->ts, to_ip); + } + + return 0; +} + +void thread_stack__free(struct thread *thread) +{ + if (thread->ts) { + zfree(&thread->ts->stack); + zfree(&thread->ts); + } +} + +void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, + size_t sz, u64 ip) +{ + size_t i; + + if (!thread || !thread->ts) + chain->nr = 1; + else + chain->nr = min(sz, thread->ts->cnt + 1); + + chain->ips[0] = ip; + + for (i = 1; i < chain->nr; i++) + chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; +} diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h new file mode 100644 index 000000000000..7c41579aec74 --- /dev/null +++ b/tools/perf/util/thread-stack.h @@ -0,0 +1,32 @@ +/* + * thread-stack.h: Synthesize a thread's stack using call / return events + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_THREAD_STACK_H +#define __PERF_THREAD_STACK_H + +#include + +#include + +struct thread; +struct ip_callchain; + +int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, + u64 to_ip, u16 insn_len, u64 trace_nr); +void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, + size_t sz, u64 ip); +void thread_stack__free(struct thread *thread); + +#endif diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index bf5bf858b7f6..a2157f0ef1df 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -4,6 +4,7 @@ #include #include "session.h" #include "thread.h" +#include "thread-stack.h" #include "util.h" #include "debug.h" #include "comm.h" @@ -66,6 +67,8 @@ void thread__delete(struct thread *thread) { struct comm *comm, *tmp; + thread_stack__free(thread); + if (thread->mg) { map_groups__put(thread->mg); thread->mg = NULL; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index d34cf5c0d0d9..160fd066a7d1 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -8,6 +8,8 @@ #include "symbol.h" #include +struct thread_stack; + struct thread { union { struct rb_node rb_node; @@ -26,6 +28,7 @@ struct thread { u64 db_id; void *priv; + struct thread_stack *ts; }; struct machine; -- cgit v1.2.3 From 6a21c0b5c2abd2fdfa6fff79f11df3d6082c1873 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 24 Sep 2014 13:48:39 +0200 Subject: perf tools: Add core support for sampling intr machine state regs Add the infrastructure to setup, collect and report the interrupt machine state regs which can be captured by the kernel. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: cebbert.lkml@gmail.com Cc: Adrian Hunter Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Jean Pihet Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Stephane Eranian Cc: Waiman Long Cc: Wang Nan Link: http://lkml.kernel.org/r/1411559322-16548-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/perf.h | 1 + tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/header.c | 1 + tools/perf/util/session.c | 44 +++++++++++++++++++++++++++++++++++++++----- 5 files changed, 87 insertions(+), 6 deletions(-) (limited to 'tools/perf/util/event.h') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 511c2831aa81..1dabb8553499 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,6 +52,7 @@ struct record_opts { bool sample_weight; bool sample_time; bool period; + bool sample_intr_regs; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 7be389735402..09b9e8d3fcf7 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -188,6 +188,7 @@ struct perf_sample { struct ip_callchain *callchain; struct branch_stack *branch_stack; struct regs_dump user_regs; + struct regs_dump intr_regs; struct stack_dump user_stack; struct sample_read read; }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 12b4396c7175..34344ffa79ca 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -661,6 +661,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (callchain_param.enabled && !evsel->no_aux_samples) perf_evsel__config_callgraph(evsel); + if (opts->sample_intr_regs) { + attr->sample_regs_intr = PERF_REGS_MASK; + perf_evsel__set_sample_bit(evsel, REGS_INTR); + } + if (target__has_cpu(&opts->target)) perf_evsel__set_sample_bit(evsel, CPU); @@ -1037,6 +1042,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) ret += PRINT_ATTR_X64(branch_sample_type); ret += PRINT_ATTR_X64(sample_regs_user); ret += PRINT_ATTR_U32(sample_stack_user); + ret += PRINT_ATTR_X64(sample_regs_intr); ret += fprintf(fp, "%.60s\n", graph_dotted_line); @@ -1536,6 +1542,23 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } + data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE; + if (type & PERF_SAMPLE_REGS_INTR) { + OVERFLOW_CHECK_u64(array); + data->intr_regs.abi = *array; + array++; + + if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 mask = evsel->attr.sample_regs_intr; + + sz = hweight_long(mask) * sizeof(u64); + OVERFLOW_CHECK(array, sz, max_size); + data->intr_regs.mask = mask; + data->intr_regs.regs = (u64 *)array; + array = (void *)array + sz; + } + } + return 0; } @@ -1631,6 +1654,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_TRANSACTION) result += sizeof(u64); + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + result += sizeof(u64); + sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + result += sz; + } else { + result += sizeof(u64); + } + } + return result; } @@ -1809,6 +1842,17 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, array++; } + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + *array++ = sample->intr_regs.abi; + sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + memcpy(array, sample->intr_regs.regs, sz); + array = (void *)array + sz; + } else { + *array++ = 0; + } + } + return 0; } @@ -1938,7 +1982,7 @@ static int sample_type__fprintf(FILE *fp, bool *first, u64 value) bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 76442caca37e..05fab7a188dc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2143,6 +2143,7 @@ static const int attr_file_abi_sizes[] = { [1] = PERF_ATTR_SIZE_VER1, [2] = PERF_ATTR_SIZE_VER2, [3] = PERF_ATTR_SIZE_VER3, + [4] = PERF_ATTR_SIZE_VER4, 0, }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f4478ce72fdb..6ac62ae6b8fa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -592,15 +592,46 @@ static void regs_dump__printf(u64 mask, u64 *regs) } } +static const char *regs_abi[] = { + [PERF_SAMPLE_REGS_ABI_NONE] = "none", + [PERF_SAMPLE_REGS_ABI_32] = "32-bit", + [PERF_SAMPLE_REGS_ABI_64] = "64-bit", +}; + +static inline const char *regs_dump_abi(struct regs_dump *d) +{ + if (d->abi > PERF_SAMPLE_REGS_ABI_64) + return "unknown"; + + return regs_abi[d->abi]; +} + +static void regs__printf(const char *type, struct regs_dump *regs) +{ + u64 mask = regs->mask; + + printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n", + type, + mask, + regs_dump_abi(regs)); + + regs_dump__printf(mask, regs->regs); +} + static void regs_user__printf(struct perf_sample *sample) { struct regs_dump *user_regs = &sample->user_regs; - if (user_regs->regs) { - u64 mask = user_regs->mask; - printf("... user regs: mask 0x%" PRIx64 "\n", mask); - regs_dump__printf(mask, user_regs->regs); - } + if (user_regs->regs) + regs__printf("user", user_regs); +} + +static void regs_intr__printf(struct perf_sample *sample) +{ + struct regs_dump *intr_regs = &sample->intr_regs; + + if (intr_regs->regs) + regs__printf("intr", intr_regs); } static void stack_user__printf(struct stack_dump *dump) @@ -699,6 +730,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample); + if (sample_type & PERF_SAMPLE_REGS_INTR) + regs_intr__printf(sample); + if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); -- cgit v1.2.3