From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:21:42 +0800 Subject: tracing/profile: Add filter support - Add an ioctl to allocate a filter for a perf event. - Free the filter when the associated perf event is to be freed. - Do the filtering in perf_swevent_match(). Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD69546.8050401@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..df9d964c15fc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -221,6 +221,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_RESET _IO ('$', 3) #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -633,7 +634,12 @@ struct perf_event { struct pid_namespace *ns; u64 id; + +#ifdef CONFIG_EVENT_PROFILE + struct event_filter *filter; #endif + +#endif /* CONFIG_PERF_EVENTS */ }; /** -- cgit v1.2.3 From f7d7986060b2890fc26db6ab5203efbd33aa2497 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:09:29 +0000 Subject: perf_event: Add alignment-faults and emulation-faults software events Add two more software events that are common to many cpus. Alignment faults: When a load or store is not aligned properly. Emulation faults: When an instruction is emulated in software. Both cause a very significant slowdown (100x or worse), so identifying and fixing them is very important. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..a33707a3a788 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -102,6 +102,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; -- cgit v1.2.3 From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 25 Sep 2009 12:25:56 +0200 Subject: perf/core: Provide a kernel-internal interface to get to performance counters There are reasons for kernel code to ask for, and use, performance counters. For example, in CPU freq governors this tends to be a good idea, but there are other examples possible as well of course. This patch adds the needed bits to do enable this functionality; they have been tested in an experimental cpufreq driver that I'm working on, and the changes are all that I needed to access counters properly. [fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so that we can profile a particular task too TODO: Have a better error reporting, don't just return NULL in fail case.] v2: Remove the wrong comment about the fact perf_event_create_kernel_counter must be called from a kernel thread. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt Cc: Jan Kiszka Cc: Avi Kivity LKML-Reference: <20090925122556.2f8bd939@infradead.org> Signed-off-by: Frederic Weisbecker --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df9d964c15fc..fa151d49a2ee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -744,6 +744,12 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, int cpu); extern void perf_event_update_userpage(struct perf_event *event); +extern int perf_event_release_kernel(struct perf_event *event); +extern struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, + int cpu, + pid_t pid); +extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { u64 type; -- cgit v1.2.3 From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Oct 2009 15:33:50 +0200 Subject: perf/core: Add a callback to perf events A simple callback in a perf event can be used for multiple purposes. For example it is useful for triggered based events like hardware breakpoints that need a callback to dispatch a triggered breakpoint event. v2: Simplify a bit the callback attribution as suggested by Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Mike Galbraith Cc: Paul Mundt --- include/linux/perf_event.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa151d49a2ee..8d54e6d25eeb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -544,6 +544,8 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; +typedef void (*perf_callback_t)(struct perf_event *, void *); + /** * struct perf_event - performance event kernel representation: */ @@ -639,6 +641,8 @@ struct perf_event { struct event_filter *filter; #endif + perf_callback_t callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -748,7 +752,8 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid); + pid_t pid, + perf_callback_t callback); extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { -- cgit v1.2.3 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- include/linux/perf_event.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8d54e6d25eeb..cead64ea6c15 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -18,6 +18,10 @@ #include #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include +#endif + /* * User-space ABI bits: */ @@ -31,6 +35,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -207,6 +212,15 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; + + union { + struct { /* Hardware breakpoint info */ + __u64 bp_addr; + __u32 bp_type; + __u32 bp_len; + }; + }; + __u32 __reserved_2; __u64 __reserved_3; @@ -476,6 +490,11 @@ struct hw_perf_event { atomic64_t count; struct hrtimer hrtimer; }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + union { /* breakpoint */ + struct arch_hw_breakpoint info; + }; +#endif }; atomic64_t prev_count; u64 sample_period; @@ -588,7 +607,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -643,6 +662,8 @@ struct perf_event { perf_callback_t callback; + perf_callback_t event_callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } -- cgit v1.2.3 From 4c49b12853fbb5eff4849b7b6a1e895776f027a1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 13 Nov 2009 21:47:33 -0800 Subject: perf_event: Fix invalid type in ioctl definition u64 is invalid in userspace headers, including ioctl definitions; use __u64 instead Signed-off-by: Arjan van de Ven Cc: LKML-Reference: <20091113214733.7cd76be9@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45b56faf5cdc..ec3768a81058 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -219,7 +219,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) #define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -- cgit v1.2.3 From 559fdc3c1b624edb1933a875022fe7e27934d11c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2009 12:45:14 +0100 Subject: perf_event: Optimize perf_output_lock() The purpose of perf_output_{un,}lock() is to: 1) avoid publishing incomplete data [ possible when publishing a head that is ahead of an entry that is still being written ] 2) guarantee fwd progress [ a simple refcount on pending writers doesn't need to drop to 0, making it so would end up implementing something like forced quiecent states of RCU ] To satisfy the above without undue complexity it serializes between CPUs, this means that a pending writer can only be the same cpu in a nested context, and since (under normal operation) a cpu always makes progress we're good -- if the head is only published when the bottom most writer completes. Now we don't need to disable IRQs in order to serialize between CPUs, disabling preemption ought to be sufficient, esp since we already deal with nesting due to NMIs. This avoids potentially expensive (and needless) local IRQ disable/enable ops. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <1258373161.26714.254.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df4e73e33774..7f87563c8485 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -714,7 +714,6 @@ struct perf_output_handle { int nmi; int sample; int locked; - unsigned long flags; }; #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3 From 453f19eea7dbad837425e9b07d84568d14898794 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:43 +0100 Subject: perf: Allow for custom overflow handlers in-kernel perf users might wish to have custom actions on the sample interrupt. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.222339539@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b5cdac0de370..a430ac3074af 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -567,6 +567,8 @@ struct perf_pending_entry { typedef void (*perf_callback_t)(struct perf_event *, void *); +struct perf_sample_data; + /** * struct perf_event - performance event kernel representation: */ @@ -658,6 +660,10 @@ struct perf_event { struct pid_namespace *ns; u64 id; + void (*overflow_handler)(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs); + #ifdef CONFIG_EVENT_PROFILE struct event_filter *filter; #endif -- cgit v1.2.3 From 59ed446f792cc07d37b1536b9c4664d14e25e425 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:55 +0100 Subject: perf: Fix event scaling for inherited counters Properly account the full hierarchy of counters for both the count (we already did so) and the scale times (new). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.153379276@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a430ac3074af..36fe89f72641 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -782,7 +782,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, pid_t pid, perf_callback_t callback); -extern u64 perf_event_read_value(struct perf_event *event); +extern u64 perf_event_read_value(struct perf_event *event, + u64 *enabled, u64 *running); struct perf_sample_data { u64 type; -- cgit v1.2.3 From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:26:55 +0100 Subject: tracing: Use the perf recursion protection from trace event When we commit a trace to perf, we first check if we are recursing in the same buffer so that we don't mess-up the buffer with a recursing trace. But later on, we do the same check from perf to avoid commit recursion. The recursion check is desired early before we touch the buffer but we want to do this check only once. Then export the recursion protection from perf and use it from the trace events before submitting a trace. v2: Put appropriate Reported-by tag Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36fe89f72641..74e98b1d3391 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(int **recursion); +extern void perf_swevent_put_recursion_context(int *recursion); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static int perf_swevent_get_recursion_context(int **recursion) { return -1; } +static void perf_swevent_put_recursion_context(int *recursion) { } #endif -- cgit v1.2.3 From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:29 +0100 Subject: perf_events: Undo some recursion damage Make perf_swevent_get_recursion_context return a context number and disable preemption. This could be used to remove the IRQ disable from the trace bit and index the per-cpu buffer with. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <20091123103819.993226816@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 74e98b1d3391..43adbd7f0010 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(int **recursion); -extern void perf_swevent_put_recursion_context(int *recursion); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static int perf_swevent_get_recursion_context(int **recursion) { return -1; } -static void perf_swevent_put_recursion_context(int *recursion) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif -- cgit v1.2.3