summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Nabirushkin <inabirushkin@nvidia.com>2014-01-27 12:42:30 +0400
committerRiham Haidar <rhaidar@nvidia.com>2014-02-19 16:09:42 -0800
commita9ba97327795942c221dccd49209a399fb1c3c34 (patch)
tree54f309304308aa0e5f207f5a837d9590e6f14d73
parentd88d5588b4065128fd3eb7300ea498954d982441 (diff)
tegra-profiler: non-disruptive counter tracking
Tegra Profiler: do not affect the already used counters Bug 1447839 Change-Id: I4da319e6c2bc853a63c9e1ae4210be9b5a60cc3b Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com> Reviewed-on: http://git-master/r/365898 (cherry picked from commit 5cefbee263b6616ef748cbd848bb14752da36c52) Reviewed-on: http://git-master/r/368216 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Tested-by: Maxim Morin <mmorin@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
-rw-r--r--drivers/misc/tegra-profiler/armv7_pmu.c588
-rw-r--r--drivers/misc/tegra-profiler/armv7_pmu.h21
-rw-r--r--drivers/misc/tegra-profiler/main.c1
-rw-r--r--drivers/misc/tegra-profiler/version.h2
4 files changed, 446 insertions, 166 deletions
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.c b/drivers/misc/tegra-profiler/armv7_pmu.c
index 9ca4e535b84d..5a3c20b1f7c0 100644
--- a/drivers/misc/tegra-profiler/armv7_pmu.c
+++ b/drivers/misc/tegra-profiler/armv7_pmu.c
@@ -16,7 +16,10 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+
#include <asm/cputype.h>
#include <asm/pmu.h>
@@ -28,7 +31,20 @@
static struct armv7_pmu_ctx pmu_ctx;
-DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val);
+struct quadd_pmu_info {
+ DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
+ u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
+ int is_already_active;
+};
+
+struct quadd_cntrs_info {
+ int pcntrs;
+ int ccntr;
+
+ spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
[QUADD_EVENT_TYPE_INSTRUCTIONS] =
@@ -79,138 +95,238 @@ static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
QUADD_ARMV7_UNSUPPORTED_EVENT,
};
-static u32 armv7_pmu_pmnc_read(void)
+static inline u32
+armv7_pmu_pmnc_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
return val;
}
-static void armv7_pmu_pmnc_write(u32 val)
+static inline void
+armv7_pmu_pmnc_write(u32 val)
{
- val &= QUADD_ARMV7_PMNC_MASK;
- asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+ /* Read Performance MoNitor Control (PMNC) register */
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : :
+ "r"(val & QUADD_ARMV7_PMNC_MASK));
}
-static void armv7_pmu_pmnc_enable_counter(int index)
+static inline u32
+armv7_pmu_cntens_read(void)
{
u32 val;
- if (index == QUADD_ARMV7_CYCLE_COUNTER)
- val = QUADD_ARMV7_CCNT;
- else
- val = 1 << index;
+ /* Read CouNT ENable Set (CNTENS) register */
+ asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(val));
+ return val;
+}
+static inline void
+armv7_pmu_cntens_write(u32 val)
+{
+ /* Write CouNT ENable Set (CNTENS) register */
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
}
-static void armv7_pmu_select_counter(unsigned int idx)
+static inline void
+armv7_pmu_cntenc_write(u32 val)
{
- u32 val;
+ /* Write CouNT ENable Clear (CNTENC) register */
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+}
- val = idx & QUADD_ARMV7_SELECT_MASK;
- asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+static inline void
+armv7_pmu_pmnxsel_write(u32 val)
+{
+ /* Read Performance Counter SELection (PMNXSEL) register */
+ asm volatile("mcr p15, 0, %0, c9, c12, 5" : :
+ "r" (val & QUADD_ARMV7_SELECT_MASK));
}
-static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+static inline u32
+armv7_pmu_ccnt_read(void)
{
- /*
- * Cortex A8/A9: l1 cache performance counters
- * don't differentiate between read and write data accesses/misses,
- * so currently we are devided by two
- */
- if (pmu_ctx.l1_cache_rw &&
- (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
- pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
- (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
- event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
- return value / 2;
- }
- return value;
+ u32 val;
+
+ /* Read Cycle CouNT (CCNT) register */
+ asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
+ return val;
}
-static u32 armv7_pmu_read_counter(int idx)
+static inline void
+armv7_pmu_ccnt_write(u32 val)
{
- u32 val = 0;
+ /* Write Cycle CouNT (CCNT) register */
+ asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(val));
+}
- if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
- /* Cycle count register (PMCCNTR) reading */
- asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val));
- } else {
- /* counter selection*/
- armv7_pmu_select_counter(idx);
- /* event count register reading */
- asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val));
- }
+static inline u32
+armv7_pmu_pmcnt_read(void)
+{
+ u32 val;
+ /* Read Performance Monitor CouNT (PMCNTx) registers */
+ asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(val));
return val;
}
-static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value)
+static inline void
+armv7_pmu_pmcnt_write(u32 val)
{
- if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
- /* Cycle count register (PMCCNTR) writing */
- asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
- } else {
- /* counter selection*/
- armv7_pmu_select_counter(idx);
- /* event count register writing */
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
- }
+ /* Write Performance Monitor CouNT (PMCNTx) registers */
+ asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(val));
}
-static void armv7_pmu_event_select(u32 event)
+static inline void
+armv7_pmu_evtsel_write(u32 event)
{
- event &= QUADD_ARMV7_EVTSEL_MASK;
- asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event));
+ /* Write Event SELection (EVTSEL) register */
+ asm volatile("mcr p15, 0, %0, c9, c13, 1" : :
+ "r" (event & QUADD_ARMV7_EVTSEL_MASK));
}
-static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx)
+static inline u32
+armv7_pmu_intens_read(void)
{
u32 val;
- if (idx == QUADD_ARMV7_CYCLE_COUNTER)
- val = QUADD_ARMV7_CCNT;
- else
- val = 1 << idx;
+ /* Read INTerrupt ENable Set (INTENS) register */
+ asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(val));
+ return val;
+}
+
+static inline void
+armv7_pmu_intens_write(u32 val)
+{
+ /* Write INTerrupt ENable Set (INTENS) register */
+ asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(val));
+}
+
+static inline void
+armv7_pmu_intenc_write(u32 val)
+{
+ /* Write INTerrupt ENable Clear (INTENC) register */
+ asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(val));
+}
- asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
+static void enable_counter(int idx)
+{
+ armv7_pmu_cntens_write(1UL << idx);
}
-static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx)
+static void disable_counter(int idx)
+{
+ armv7_pmu_cntenc_write(1UL << idx);
+}
+
+static void select_counter(unsigned int counter)
+{
+ armv7_pmu_pmnxsel_write(counter);
+}
+
+static int is_pmu_enabled(void)
+{
+ u32 pmnc = armv7_pmu_pmnc_read();
+
+ if (pmnc & QUADD_ARMV7_PMNC_E) {
+ u32 cnten = armv7_pmu_cntens_read();
+ cnten &= pmu_ctx.counters_mask | QUADD_ARMV7_CCNT;
+ return cnten ? 1 : 0;
+ }
+
+ return 0;
+}
+
+static u32 read_counter(int idx)
{
u32 val;
- if (idx == QUADD_ARMV7_CYCLE_COUNTER)
- val = QUADD_ARMV7_CCNT;
- else
- val = 1 << idx;
+ if (idx == QUADD_ARMV7_CCNT_BIT) {
+ val = armv7_pmu_ccnt_read();
+ } else {
+ select_counter(idx);
+ val = armv7_pmu_pmcnt_read();
+ }
- asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+ return val;
}
-static void armv7_pmnc_disable_all_interrupts(void)
+static void write_counter(int idx, u32 value)
{
- u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+ if (idx == QUADD_ARMV7_CCNT_BIT) {
+ armv7_pmu_ccnt_write(value);
+ } else {
+ select_counter(idx);
+ armv7_pmu_pmcnt_write(value);
+ }
+}
+
+static int
+get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
+{
+ int cc;
+ u32 cntens;
+
+ cntens = armv7_pmu_cntens_read();
+ cntens = ~cntens & (pmu_ctx.counters_mask | QUADD_ARMV7_CCNT);
+
+ bitmap_zero(bitmap, nbits);
+ bitmap_copy(bitmap, (unsigned long *)&cntens,
+ BITS_PER_BYTE * sizeof(u32));
+
+ cc = (cntens & QUADD_ARMV7_CCNT) ? 1 : 0;
+
+ if (ccntr)
+ *ccntr = cc;
+
+ return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
+}
+
+static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+{
+ /*
+ * Cortex A8/A9: l1 cache performance counters
+ * don't differentiate between read and write data accesses/misses,
+ * so currently we are devided by two
+ */
+ if (pmu_ctx.l1_cache_rw &&
+ (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
+ pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
+ (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
+ event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
+ return value / 2;
+ }
+ return value;
+}
- asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+static void __maybe_unused
+disable_interrupt(int idx)
+{
+ armv7_pmu_intenc_write(1UL << idx);
}
-static void armv7_pmnc_reset_overflow_flags(void)
+static void
+disable_all_interrupts(void)
{
u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+ armv7_pmu_intenc_write(val);
+}
+static void
+armv7_pmnc_reset_overflow_flags(void)
+{
+ u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
}
-static inline void select_event(unsigned int idx, unsigned int event)
+static void
+select_event(unsigned int idx, unsigned int event)
{
- /* counter selection */
- armv7_pmu_select_counter(idx);
- armv7_pmu_event_select(event);
+ select_counter(idx);
+ armv7_pmu_evtsel_write(event);
}
-static inline void disable_all_counters(void)
+static void disable_all_counters(void)
{
u32 val;
@@ -218,9 +334,11 @@ static inline void disable_all_counters(void)
val = armv7_pmu_pmnc_read();
if (val & QUADD_ARMV7_PMNC_E)
armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
+
+ armv7_pmu_cntenc_write(QUADD_ARMV7_CCNT | pmu_ctx.counters_mask);
}
-static inline void enable_all_counters(void)
+static void enable_all_counters(void)
{
u32 val;
@@ -230,13 +348,7 @@ static inline void enable_all_counters(void)
armv7_pmu_pmnc_write(val);
}
-static inline void quadd_init_pmu(void)
-{
- armv7_pmnc_reset_overflow_flags();
- armv7_pmnc_disable_all_interrupts();
-}
-
-static inline void reset_all_counters(void)
+static void reset_all_counters(void)
{
u32 val;
@@ -245,6 +357,12 @@ static inline void reset_all_counters(void)
armv7_pmu_pmnc_write(val);
}
+static void quadd_init_pmu(void)
+{
+ armv7_pmnc_reset_overflow_flags();
+ disable_all_interrupts();
+}
+
static int pmu_enable(void)
{
int err;
@@ -258,45 +376,114 @@ static int pmu_enable(void)
return 0;
}
+static void __pmu_disable(void *arg)
+{
+ struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+
+ if (!pi->is_already_active) {
+ pr_info("[%d] reset all counters\n",
+ smp_processor_id());
+
+ disable_all_counters();
+ reset_all_counters();
+ } else {
+ int idx;
+
+ for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
+ pr_info("[%d] reset counter: %d\n",
+ smp_processor_id(), idx);
+
+ disable_counter(idx);
+ write_counter(idx, 0);
+ }
+ }
+}
+
static void pmu_disable(void)
{
release_pmu(ARM_PMU_DEVICE_CPU);
+ on_each_cpu(__pmu_disable, NULL, 1);
pr_info("pmu was released\n");
}
static void pmu_start(void)
{
- int i, idx;
+ int idx = 0, pcntrs, ccntr;
u32 event;
- u32 *prevp = __get_cpu_var(pmu_prev_val);
+ DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
+ struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+ u32 *prevp = pi->prev_vals;
+ struct quadd_pmu_event_info *ei;
- disable_all_counters();
- quadd_init_pmu();
+ bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
- for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
- struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+ if (is_pmu_enabled()) {
+ pi->is_already_active = 1;
+ } else {
+ disable_all_counters();
+ quadd_init_pmu();
+
+ pi->is_already_active = 0;
+ }
+
+ pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
+
+ list_for_each_entry(ei, &pmu_ctx.used_events, list) {
+ int index;
- prevp[i] = 0;
+ *prevp++ = 0;
- event = pmu_event->hw_value;
- idx = pmu_event->counter_idx;
+ event = ei->hw_value;
- if (idx != QUADD_ARMV7_CYCLE_COUNTER)
- select_event(idx, event);
+ if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+ if (!ccntr) {
+ pr_err_once("Error: cpu cycles counter is already occupied\n");
+ return;
+ }
+ index = QUADD_ARMV7_CCNT_BIT;
+ } else {
+ if (!pcntrs--) {
+ pr_err_once("Error: too many performance events\n");
+ return;
+ }
+
+ index = find_next_bit(free_bitmap,
+ QUADD_MAX_PMU_COUNTERS, idx);
+ if (index >= QUADD_MAX_PMU_COUNTERS) {
+ pr_err_once("Error: too many events\n");
+ return;
+ }
+ idx = index + 1;
+ select_event(index, event);
+ }
+ set_bit(index, pi->used_cntrs);
- armv7_pmu_pmnc_enable_counter(idx);
+ write_counter(index, 0);
+ enable_counter(index);
}
- reset_all_counters();
- enable_all_counters();
+ if (!pi->is_already_active) {
+ reset_all_counters();
+ enable_all_counters();
+ }
qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
}
static void pmu_stop(void)
{
- reset_all_counters();
- disable_all_counters();
+ int idx;
+ struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+
+ if (!pi->is_already_active) {
+ disable_all_counters();
+ reset_all_counters();
+ } else {
+ for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
+ disable_counter(idx);
+ write_counter(idx, 0);
+ }
+ }
qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
}
@@ -304,104 +491,185 @@ static void pmu_stop(void)
static int __maybe_unused
pmu_read(struct event_data *events, int max_events)
{
- int idx, i, nr;
u32 val;
- u32 *prevp = __get_cpu_var(pmu_prev_val);
+ int idx = 0, i = 0;
+ struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+ u32 *prevp = pi->prev_vals;
+ struct quadd_pmu_event_info *ei;
- if (pmu_ctx.nr_used_counters == 0) {
- pr_warn_once("error: counters were not initialized\n");
+ if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
+ pr_err_once("Error: counters were not initialized\n");
return 0;
}
- nr = min_t(int, pmu_ctx.nr_used_counters, max_events);
+ list_for_each_entry(ei, &pmu_ctx.used_events, list) {
+ int index;
+
+ if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+ if (!test_bit(QUADD_ARMV7_CCNT_BIT, pi->used_cntrs)) {
+ pr_err_once("Error: ccntr is not used\n");
+ return 0;
+ }
+ index = QUADD_ARMV7_CCNT_BIT;
+ } else {
+ index = find_next_bit(pi->used_cntrs,
+ QUADD_MAX_PMU_COUNTERS, idx);
+ idx = index + 1;
+
+ if (index >= QUADD_MAX_PMU_COUNTERS) {
+ pr_err_once("Error: perf counter is not used\n");
+ return 0;
+ }
+ }
- for (i = 0; i < nr; i++) {
- struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+ val = read_counter(index);
+ val = armv7_pmu_adjust_value(val, ei->quadd_event_id);
- idx = pmu_event->counter_idx;
+ events->event_source = QUADD_EVENT_SOURCE_PMU;
+ events->event_id = ei->quadd_event_id;
- val = armv7_pmu_read_counter(idx);
- val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id);
+ events->val = val;
+ events->prev_val = *prevp;
- events[i].event_source = QUADD_EVENT_SOURCE_PMU;
- events[i].event_id = pmu_event->quadd_event_id;
+ *prevp = val;
- events[i].val = val;
- events[i].prev_val = prevp[i];
+ qm_debug_read_counter(events->event_id, events->prev_val,
+ events->val);
- prevp[i] = val;
+ if (++i >= max_events)
+ break;
- qm_debug_read_counter(events[i].event_id, events[i].prev_val,
- events[i].val);
+ events++;
+ prevp++;
}
- return nr;
+ return i;
}
static int __maybe_unused
pmu_read_emulate(struct event_data *events, int max_events)
{
- int i, nr;
+ int i = 0;
static u32 val = 100;
- u32 *prevp = __get_cpu_var(pmu_prev_val);
+ struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+ u32 *prevp = pi->prev_vals;
+ struct quadd_pmu_event_info *ei;
- nr = min_t(int, pmu_ctx.nr_used_counters, max_events);
-
- for (i = 0; i < nr; i++) {
+ list_for_each_entry(ei, &pmu_ctx.used_events, list) {
if (val > 200)
val = 100;
- events[i].event_id = prevp[i];
- events[i].val = val;
+ events->event_id = *prevp;
+ events->val = val;
+ *prevp = val;
val += 5;
+
+ if (++i >= max_events)
+ break;
+
+ events++;
+ prevp++;
}
- return nr;
+ return i;
+}
+
+static void __get_free_counters(void *arg)
+{
+ int pcntrs, ccntr;
+ DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
+ struct quadd_cntrs_info *ci = arg;
+
+ pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
+
+ spin_lock(&ci->lock);
+
+ ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
+
+ if (!ccntr)
+ ci->ccntr = 0;
+
+ pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
+ smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
+
+ spin_unlock(&ci->lock);
+}
+
+static void free_events(struct list_head *head)
+{
+ struct quadd_pmu_event_info *entry, *next;
+
+ list_for_each_entry_safe(entry, next, head, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
}
static int set_events(int *events, int size)
{
- int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0;
+ int free_pcntrs, err;
+ int i, nr_l1_r = 0, nr_l1_w = 0;
+ struct quadd_cntrs_info free_ci;
pmu_ctx.l1_cache_rw = 0;
- pmu_ctx.nr_used_counters = 0;
- if (!events || size == 0)
- return 0;
+ free_events(&pmu_ctx.used_events);
- if (size > QUADD_MAX_PMU_COUNTERS) {
- pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS);
- return -ENOSPC;
- }
+ if (!events || !size)
+ return 0;
if (!pmu_ctx.current_map) {
pr_err("Invalid current_map\n");
return -ENODEV;
}
+ spin_lock_init(&free_ci.lock);
+ free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
+ free_ci.ccntr = 1;
+
+ on_each_cpu(__get_free_counters, &free_ci, 1);
+
+ free_pcntrs = free_ci.pcntrs;
+ pr_info("free counters: pcntrs/ccntr: %d/%d\n",
+ free_pcntrs, free_ci.ccntr);
+
for (i = 0; i < size; i++) {
- struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+ struct quadd_pmu_event_info *ei;
if (events[i] > QUADD_EVENT_TYPE_MAX) {
pr_err("Error event: %d\n", events[i]);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_free;
}
- if (curr_idx >= pmu_ctx.nr_counters) {
- pr_err("Too many events (> %d)\n",
- pmu_ctx.nr_counters);
- return -ENOSPC;
+ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ if (!ei) {
+ err = -ENOMEM;
+ goto out_free;
}
+ INIT_LIST_HEAD(&ei->list);
+ list_add_tail(&ei->list, &pmu_ctx.used_events);
+
if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
- pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
- pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER;
+ ei->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
+ if (!free_ci.ccntr) {
+ pr_err("Error: cpu cycles counter is already occupied\n");
+ err = -EBUSY;
+ goto out_free;
+ }
} else {
- pmu_event->hw_value = pmu_ctx.current_map[events[i]];
- pmu_event->counter_idx = curr_idx++;
+ if (!free_pcntrs--) {
+ pr_err("Error: too many performance events\n");
+ err = -ENOSPC;
+ goto out_free;
+ }
+
+ ei->hw_value = pmu_ctx.current_map[events[i]];
}
- pmu_event->quadd_event_id = events[i];
+
+ ei->quadd_event_id = events[i];
if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
nr_l1_r++;
@@ -410,14 +678,17 @@ static int set_events(int *events, int size)
pr_info("Event has been added: id/pmu value: %s/%#x\n",
quadd_get_event_str(events[i]),
- pmu_event->hw_value);
+ ei->hw_value);
}
- pmu_ctx.nr_used_counters = size;
if (nr_l1_r > 0 && nr_l1_w > 0)
pmu_ctx.l1_cache_rw = 1;
return 0;
+
+out_free:
+ free_events(&pmu_ctx.used_events);
+ return err;
}
static int get_supported_events(int *events, int max_events)
@@ -435,14 +706,17 @@ static int get_supported_events(int *events, int max_events)
static int get_current_events(int *events, int max_events)
{
- int i;
+ int i = 0;
+ struct quadd_pmu_event_info *ei;
- max_events = min_t(int, pmu_ctx.nr_used_counters, max_events);
+ list_for_each_entry(ei, &pmu_ctx.used_events, list) {
+ events[i++] = ei->quadd_event_id;
- for (i = 0; i < max_events; i++)
- events[i] = pmu_ctx.pmu_events[i].quadd_event_id;
+ if (i >= max_events)
+ break;
+ }
- return max_events;
+ return i;
}
static struct quadd_event_source_interface pmu_armv7_int = {
@@ -476,7 +750,6 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9:
pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9;
strcpy(pmu_ctx.arch_name, "Cortex A9");
- pmu_ctx.nr_counters = 6;
pmu_ctx.counters_mask =
QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
pmu_ctx.current_map = quadd_armv7_a9_events_map;
@@ -486,7 +759,6 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15:
pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15;
strcpy(pmu_ctx.arch_name, "Cortex A15");
- pmu_ctx.nr_counters = 6;
pmu_ctx.counters_mask =
QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
pmu_ctx.current_map = quadd_armv7_a15_events_map;
@@ -496,13 +768,19 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
default:
pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN;
strcpy(pmu_ctx.arch_name, "Unknown");
- pmu_ctx.nr_counters = 0;
pmu_ctx.current_map = NULL;
break;
}
}
- pr_info("arch: %s, number of counters: %d\n",
- pmu_ctx.arch_name, pmu_ctx.nr_counters);
+ INIT_LIST_HEAD(&pmu_ctx.used_events);
+
+ pr_info("arch: %s\n", pmu_ctx.arch_name);
+
return pmu;
}
+
+void quadd_armv7_pmu_deinit(void)
+{
+ free_events(&pmu_ctx.used_events);
+}
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.h b/drivers/misc/tegra-profiler/armv7_pmu.h
index 827fe4292a33..1e4b556b18ca 100644
--- a/drivers/misc/tegra-profiler/armv7_pmu.h
+++ b/drivers/misc/tegra-profiler/armv7_pmu.h
@@ -1,7 +1,7 @@
/*
* drivers/misc/tegra-profiler/armv7_pmu.h
*
- * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -17,6 +17,8 @@
#ifndef __ARMV7_PMU_H
#define __ARMV7_PMU_H
+#include <linux/list.h>
+
#define QUADD_ARM_CPU_IMPLEMENTER 0x41
enum {
@@ -32,32 +34,32 @@ enum {
#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9 0xC090
#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15 0xC0F0
-
#define QUADD_MAX_PMU_COUNTERS 32
struct quadd_pmu_event_info {
int quadd_event_id;
int hw_value;
- int counter_idx;
+
+ struct list_head list;
};
struct armv7_pmu_ctx {
int arch;
char arch_name[32];
- int nr_counters;
u32 counters_mask;
- struct quadd_pmu_event_info pmu_events[QUADD_MAX_PMU_COUNTERS];
- int nr_used_counters;
+ struct list_head used_events;
int l1_cache_rw;
int *current_map;
};
+
struct quadd_event_source_interface;
extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void);
+extern void quadd_armv7_pmu_deinit(void);
/*
* PMNC Register
@@ -80,10 +82,9 @@ extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void);
/* Mask for writable bits */
#define QUADD_ARMV7_PMNC_MASK 0x3f
-
-#define QUADD_ARMV7_CCNT (1 << 31) /* Cycle counter */
-
-#define QUADD_ARMV7_CYCLE_COUNTER -1
+/* Cycle counter */
+#define QUADD_ARMV7_CCNT_BIT 31
+#define QUADD_ARMV7_CCNT (1 << QUADD_ARMV7_CCNT_BIT)
/*
* CNTENS: counters enable reg
diff --git a/drivers/misc/tegra-profiler/main.c b/drivers/misc/tegra-profiler/main.c
index 01544d0418bc..c28748b01a6b 100644
--- a/drivers/misc/tegra-profiler/main.c
+++ b/drivers/misc/tegra-profiler/main.c
@@ -518,6 +518,7 @@ static void __exit quadd_module_exit(void)
quadd_comm_events_exit();
quadd_auth_deinit();
quadd_proc_deinit();
+ quadd_armv7_pmu_deinit();
}
module_init(quadd_module_init);
diff --git a/drivers/misc/tegra-profiler/version.h b/drivers/misc/tegra-profiler/version.h
index 7260267c4e12..d4a6b60f8ceb 100644
--- a/drivers/misc/tegra-profiler/version.h
+++ b/drivers/misc/tegra-profiler/version.h
@@ -18,7 +18,7 @@
#ifndef __QUADD_VERSION_H
#define __QUADD_VERSION_H
-#define QUADD_MODULE_VERSION "1.45"
+#define QUADD_MODULE_VERSION "1.46"
#define QUADD_MODULE_BRANCH "Dev"
#endif /* __QUADD_VERSION_H */