From 09e61b4f78498bd9f213b0a536e80b79507ea89f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jul 2016 18:02:43 +0200 Subject: perf/x86/intel: Rework the large PEBS setup code In order to allow optimizing perf_pmu_sched_task() we must ensure perf_sched_cb_{inc,dec}() are no longer called from NMI context; this means that pmu::{start,stop}() can no longer use them. Prepare for this by reworking the whole large PEBS setup code. The current code relied on the cpuc->pebs_enabled state, however since that reflects the current active state as per pmu::{start,stop}() we can no longer rely on this. Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which count the total number of PEBS events and the number of PEBS events that have FREERUNNING set, resp.. With this we can tell if the current setup requires a single record interrupt threshold or can use a larger buffer. This also improves the code in that it re-enables the large threshold once the PEBS event that required single record gets removed. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 102 +++++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 35 deletions(-) (limited to 'arch/x86/events/intel/ds.c') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f669e6..c791ff961079 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -806,9 +806,55 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) return &emptyconstraint; } -static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) +/* + * We need the sched_task callback even for per-cpu events when we use + * the large interrupt threshold, such that we can provide PID and TID + * to PEBS samples. + */ +static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) +{ + return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); +} + +static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + u64 threshold; + + if (cpuc->n_pebs == cpuc->n_large_pebs) { + threshold = ds->pebs_absolute_maximum - + x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + } else { + threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; + } + + ds->pebs_interrupt_threshold = threshold; +} + +static void +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) +{ + if (needed_cb != pebs_needs_sched_cb(cpuc)) { + if (!needed_cb) + perf_sched_cb_inc(pmu); + else + perf_sched_cb_dec(pmu); + + pebs_update_threshold(cpuc); + } +} + +static void intel_pmu_pebs_add(struct perf_event *event) { - return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + bool needed_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs++; + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + cpuc->n_large_pebs++; + + pebs_update_state(needed_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -816,12 +862,11 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; - bool first_pebs; - u64 threshold; + + intel_pmu_pebs_add(event); hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - first_pebs = !pebs_is_enabled(cpuc); cpuc->pebs_enabled |= 1ULL << hwc->idx; if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) @@ -830,46 +875,34 @@ void intel_pmu_pebs_enable(struct perf_event *event) cpuc->pebs_enabled |= 1ULL << 63; /* - * When the event is constrained enough we can use a larger - * threshold and run the event with less frequent PMI. + * Use auto-reload if possible to save a MSR write in the PMI. + * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { - threshold = ds->pebs_absolute_maximum - - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; - - if (first_pebs) - perf_sched_cb_inc(event->ctx->pmu); - } else { - threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; - - /* - * If not all events can use larger buffer, - * roll back to threshold = 1 - */ - if (!first_pebs && - (ds->pebs_interrupt_threshold > threshold)) - perf_sched_cb_dec(event->ctx->pmu); - } - - /* Use auto-reload if possible to save a MSR write in the PMI */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { ds->pebs_event_reset[hwc->idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } +} + +static void intel_pmu_pebs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + bool needed_cb = pebs_needs_sched_cb(cpuc); + + cpuc->n_pebs--; + if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + cpuc->n_large_pebs--; - if (first_pebs || ds->pebs_interrupt_threshold > threshold) - ds->pebs_interrupt_threshold = threshold; + pebs_update_state(needed_cb, cpuc, event->ctx->pmu); } void intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct debug_store *ds = cpuc->ds; - bool large_pebs = ds->pebs_interrupt_threshold > - ds->pebs_buffer_base + x86_pmu.pebs_record_size; - if (large_pebs) + if (cpuc->n_pebs == cpuc->n_large_pebs) intel_pmu_drain_pebs_buffer(); cpuc->pebs_enabled &= ~(1ULL << hwc->idx); @@ -879,13 +912,12 @@ void intel_pmu_pebs_disable(struct perf_event *event) else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled &= ~(1ULL << 63); - if (large_pebs && !pebs_is_enabled(cpuc)) - perf_sched_cb_dec(event->ctx->pmu); - if (cpuc->enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; + + intel_pmu_pebs_del(event); } void intel_pmu_pebs_enable_all(void) -- cgit v1.2.3 From 68f7082ffb0575154ccdec36109e293174f48a4c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jul 2016 18:02:43 +0200 Subject: perf/x86: Ensure perf_sched_cb_{inc,dec}() is only called from pmu::{add,del}() Currently perf_sched_cb_{inc,dec}() are called from pmu::{start,stop}(), which has the problem that this can happen from NMI context, this is making it hard to optimize perf_pmu_sched_task(). Furthermore, we really only need this accounting on pmu::{add,del}(), so doing it from pmu::{start,stop}() is doing more work than we really need. Introduce x86_pmu::{add,del}() and wire up the LBR and PEBS. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86/events/intel/ds.c') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c791ff961079..248023f54c87 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -844,7 +844,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) } } -static void intel_pmu_pebs_add(struct perf_event *event) +void intel_pmu_pebs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -863,8 +863,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; - intel_pmu_pebs_add(event); - hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; @@ -884,7 +882,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) } } -static void intel_pmu_pebs_del(struct perf_event *event) +void intel_pmu_pebs_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -916,8 +914,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; - - intel_pmu_pebs_del(event); } void intel_pmu_pebs_enable_all(void) -- cgit v1.2.3 From b6a32f023fcc9cbd1602f78a467fd8d41bbc9457 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 18 Aug 2016 11:09:52 +0200 Subject: perf/x86: Fix PEBS threshold initialization Latest PEBS rework change could skip initialization of the ds->pebs_interrupt_threshold for single event PEBS threshold events. Make sure the PEBS threshold gets always initialized. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 09e61b4f7849 ("perf/x86/intel: Rework the large PEBS setup code") Link: http://lkml.kernel.org/r/1471511392-29875-1-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86/events/intel/ds.c') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 248023f54c87..e0288d555367 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -834,14 +834,24 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) static void pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) { + /* + * Make sure we get updated with the first PEBS + * event. It will trigger also during removal, but + * that does not hurt: + */ + bool update = cpuc->n_pebs == 1; + if (needed_cb != pebs_needs_sched_cb(cpuc)) { if (!needed_cb) perf_sched_cb_inc(pmu); else perf_sched_cb_dec(pmu); - pebs_update_threshold(cpuc); + update = true; } + + if (update) + pebs_update_threshold(cpuc); } void intel_pmu_pebs_add(struct perf_event *event) -- cgit v1.2.3