diff options
author | Igor Nabirushkin <inabirushkin@nvidia.com> | 2014-01-27 12:42:30 +0400 |
---|---|---|
committer | Riham Haidar <rhaidar@nvidia.com> | 2014-02-19 16:09:42 -0800 |
commit | a9ba97327795942c221dccd49209a399fb1c3c34 (patch) | |
tree | 54f309304308aa0e5f207f5a837d9590e6f14d73 | |
parent | d88d5588b4065128fd3eb7300ea498954d982441 (diff) |
tegra-profiler: non-disruptive counter tracking
Tegra Profiler: do not affect the already used counters
Bug 1447839
Change-Id: I4da319e6c2bc853a63c9e1ae4210be9b5a60cc3b
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/365898
(cherry picked from commit 5cefbee263b6616ef748cbd848bb14752da36c52)
Reviewed-on: http://git-master/r/368216
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Tested-by: Maxim Morin <mmorin@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
-rw-r--r-- | drivers/misc/tegra-profiler/armv7_pmu.c | 588 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/armv7_pmu.h | 21 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/main.c | 1 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/version.h | 2 |
4 files changed, 446 insertions, 166 deletions
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.c b/drivers/misc/tegra-profiler/armv7_pmu.c index 9ca4e535b84d..5a3c20b1f7c0 100644 --- a/drivers/misc/tegra-profiler/armv7_pmu.c +++ b/drivers/misc/tegra-profiler/armv7_pmu.c @@ -16,7 +16,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> +#include <linux/err.h> +#include <linux/bitmap.h> +#include <linux/slab.h> + #include <asm/cputype.h> #include <asm/pmu.h> @@ -28,7 +31,20 @@ static struct armv7_pmu_ctx pmu_ctx; -DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val); +struct quadd_pmu_info { + DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS); + u32 prev_vals[QUADD_MAX_PMU_COUNTERS]; + int is_already_active; +}; + +struct quadd_cntrs_info { + int pcntrs; + int ccntr; + + spinlock_t lock; +}; + +static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info); static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = { [QUADD_EVENT_TYPE_INSTRUCTIONS] = @@ -79,138 +95,238 @@ static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = { QUADD_ARMV7_UNSUPPORTED_EVENT, }; -static u32 armv7_pmu_pmnc_read(void) +static inline u32 +armv7_pmu_pmnc_read(void) { u32 val; asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); return val; } -static void armv7_pmu_pmnc_write(u32 val) +static inline void +armv7_pmu_pmnc_write(u32 val) { - val &= QUADD_ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); + /* Read Performance MoNitor Control (PMNC) register */ + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : + "r"(val & QUADD_ARMV7_PMNC_MASK)); } -static void armv7_pmu_pmnc_enable_counter(int index) +static inline u32 +armv7_pmu_cntens_read(void) { u32 val; - if (index == QUADD_ARMV7_CYCLE_COUNTER) - val = QUADD_ARMV7_CCNT; - else - val = 1 << index; + /* Read CouNT ENable Set (CNTENS) register */ + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(val)); + return val; +} +static inline void +armv7_pmu_cntens_write(u32 val) +{ + /* Write CouNT ENable Set (CNTENS) register */ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); } -static void armv7_pmu_select_counter(unsigned int idx) +static inline void +armv7_pmu_cntenc_write(u32 val) { - u32 val; + /* Write CouNT ENable Clear (CNTENC) register */ + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); +} - val = idx & QUADD_ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); +static inline void +armv7_pmu_pmnxsel_write(u32 val) +{ + /* Read Performance Counter SELection (PMNXSEL) register */ + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : + "r" (val & QUADD_ARMV7_SELECT_MASK)); } -static u32 armv7_pmu_adjust_value(u32 value, int event_id) +static inline u32 +armv7_pmu_ccnt_read(void) { - /* - * Cortex A8/A9: l1 cache performance counters - * don't differentiate between read and write data accesses/misses, - * so currently we are devided by two - */ - if (pmu_ctx.l1_cache_rw && - (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 || - pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) && - (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES || - event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) { - return value / 2; - } - return value; + u32 val; + + /* Read Cycle CouNT (CCNT) register */ + asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); + return val; } -static u32 armv7_pmu_read_counter(int idx) +static inline void +armv7_pmu_ccnt_write(u32 val) { - u32 val = 0; + /* Write Cycle CouNT (CCNT) register */ + asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(val)); +} - if (idx == QUADD_ARMV7_CYCLE_COUNTER) { - /* Cycle count register (PMCCNTR) reading */ - asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val)); - } else { - /* counter selection*/ - armv7_pmu_select_counter(idx); - /* event count register reading */ - asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val)); - } +static inline u32 +armv7_pmu_pmcnt_read(void) +{ + u32 val; + /* Read Performance Monitor CouNT (PMCNTx) registers */ + asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(val)); return val; } -static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value) +static inline void +armv7_pmu_pmcnt_write(u32 val) { - if (idx == QUADD_ARMV7_CYCLE_COUNTER) { - /* Cycle count register (PMCCNTR) writing */ - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - } else { - /* counter selection*/ - armv7_pmu_select_counter(idx); - /* event count register writing */ - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); - } + /* Write Performance Monitor CouNT (PMCNTx) registers */ + asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(val)); } -static void armv7_pmu_event_select(u32 event) +static inline void +armv7_pmu_evtsel_write(u32 event) { - event &= QUADD_ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event)); + /* Write Event SELection (EVTSEL) register */ + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : + "r" (event & QUADD_ARMV7_EVTSEL_MASK)); } -static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx) +static inline u32 +armv7_pmu_intens_read(void) { u32 val; - if (idx == QUADD_ARMV7_CYCLE_COUNTER) - val = QUADD_ARMV7_CCNT; - else - val = 1 << idx; + /* Read INTerrupt ENable Set (INTENS) register */ + asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(val)); + return val; +} + +static inline void +armv7_pmu_intens_write(u32 val) +{ + /* Write INTerrupt ENable Set (INTENS) register */ + asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(val)); +} + +static inline void +armv7_pmu_intenc_write(u32 val) +{ + /* Write INTerrupt ENable Clear (INTENC) register */ + asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(val)); +} - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); +static void enable_counter(int idx) +{ + armv7_pmu_cntens_write(1UL << idx); } -static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx) +static void disable_counter(int idx) +{ + armv7_pmu_cntenc_write(1UL << idx); +} + +static void select_counter(unsigned int counter) +{ + armv7_pmu_pmnxsel_write(counter); +} + +static int is_pmu_enabled(void) +{ + u32 pmnc = armv7_pmu_pmnc_read(); + + if (pmnc & QUADD_ARMV7_PMNC_E) { + u32 cnten = armv7_pmu_cntens_read(); + cnten &= pmu_ctx.counters_mask | QUADD_ARMV7_CCNT; + return cnten ? 1 : 0; + } + + return 0; +} + +static u32 read_counter(int idx) { u32 val; - if (idx == QUADD_ARMV7_CYCLE_COUNTER) - val = QUADD_ARMV7_CCNT; - else - val = 1 << idx; + if (idx == QUADD_ARMV7_CCNT_BIT) { + val = armv7_pmu_ccnt_read(); + } else { + select_counter(idx); + val = armv7_pmu_pmcnt_read(); + } - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + return val; } -static void armv7_pmnc_disable_all_interrupts(void) +static void write_counter(int idx, u32 value) { - u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask; + if (idx == QUADD_ARMV7_CCNT_BIT) { + armv7_pmu_ccnt_write(value); + } else { + select_counter(idx); + armv7_pmu_pmcnt_write(value); + } +} + +static int +get_free_counters(unsigned long *bitmap, int nbits, int *ccntr) +{ + int cc; + u32 cntens; + + cntens = armv7_pmu_cntens_read(); + cntens = ~cntens & (pmu_ctx.counters_mask | QUADD_ARMV7_CCNT); + + bitmap_zero(bitmap, nbits); + bitmap_copy(bitmap, (unsigned long *)&cntens, + BITS_PER_BYTE * sizeof(u32)); + + cc = (cntens & QUADD_ARMV7_CCNT) ? 1 : 0; + + if (ccntr) + *ccntr = cc; + + return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc; +} + +static u32 armv7_pmu_adjust_value(u32 value, int event_id) +{ + /* + * Cortex A8/A9: l1 cache performance counters + * don't differentiate between read and write data accesses/misses, + * so currently we are devided by two + */ + if (pmu_ctx.l1_cache_rw && + (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 || + pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) && + (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES || + event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) { + return value / 2; + } + return value; +} - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); +static void __maybe_unused +disable_interrupt(int idx) +{ + armv7_pmu_intenc_write(1UL << idx); } -static void armv7_pmnc_reset_overflow_flags(void) +static void +disable_all_interrupts(void) { u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask; + armv7_pmu_intenc_write(val); +} +static void +armv7_pmnc_reset_overflow_flags(void) +{ + u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask; asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); } -static inline void select_event(unsigned int idx, unsigned int event) +static void +select_event(unsigned int idx, unsigned int event) { - /* counter selection */ - armv7_pmu_select_counter(idx); - armv7_pmu_event_select(event); + select_counter(idx); + armv7_pmu_evtsel_write(event); } -static inline void disable_all_counters(void) +static void disable_all_counters(void) { u32 val; @@ -218,9 +334,11 @@ static inline void disable_all_counters(void) val = armv7_pmu_pmnc_read(); if (val & QUADD_ARMV7_PMNC_E) armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E); + + armv7_pmu_cntenc_write(QUADD_ARMV7_CCNT | pmu_ctx.counters_mask); } -static inline void enable_all_counters(void) +static void enable_all_counters(void) { u32 val; @@ -230,13 +348,7 @@ static inline void enable_all_counters(void) armv7_pmu_pmnc_write(val); } -static inline void quadd_init_pmu(void) -{ - armv7_pmnc_reset_overflow_flags(); - armv7_pmnc_disable_all_interrupts(); -} - -static inline void reset_all_counters(void) +static void reset_all_counters(void) { u32 val; @@ -245,6 +357,12 @@ static inline void reset_all_counters(void) armv7_pmu_pmnc_write(val); } +static void quadd_init_pmu(void) +{ + armv7_pmnc_reset_overflow_flags(); + disable_all_interrupts(); +} + static int pmu_enable(void) { int err; @@ -258,45 +376,114 @@ static int pmu_enable(void) return 0; } +static void __pmu_disable(void *arg) +{ + struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info); + + if (!pi->is_already_active) { + pr_info("[%d] reset all counters\n", + smp_processor_id()); + + disable_all_counters(); + reset_all_counters(); + } else { + int idx; + + for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) { + pr_info("[%d] reset counter: %d\n", + smp_processor_id(), idx); + + disable_counter(idx); + write_counter(idx, 0); + } + } +} + static void pmu_disable(void) { release_pmu(ARM_PMU_DEVICE_CPU); + on_each_cpu(__pmu_disable, NULL, 1); pr_info("pmu was released\n"); } static void pmu_start(void) { - int i, idx; + int idx = 0, pcntrs, ccntr; u32 event; - u32 *prevp = __get_cpu_var(pmu_prev_val); + DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS); + struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info); + u32 *prevp = pi->prev_vals; + struct quadd_pmu_event_info *ei; - disable_all_counters(); - quadd_init_pmu(); + bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS); - for (i = 0; i < pmu_ctx.nr_used_counters; i++) { - struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; + if (is_pmu_enabled()) { + pi->is_already_active = 1; + } else { + disable_all_counters(); + quadd_init_pmu(); + + pi->is_already_active = 0; + } + + pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr); + + list_for_each_entry(ei, &pmu_ctx.used_events, list) { + int index; - prevp[i] = 0; + *prevp++ = 0; - event = pmu_event->hw_value; - idx = pmu_event->counter_idx; + event = ei->hw_value; - if (idx != QUADD_ARMV7_CYCLE_COUNTER) - select_event(idx, event); + if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) { + if (!ccntr) { + pr_err_once("Error: cpu cycles counter is already occupied\n"); + return; + } + index = QUADD_ARMV7_CCNT_BIT; + } else { + if (!pcntrs--) { + pr_err_once("Error: too many performance events\n"); + return; + } + + index = find_next_bit(free_bitmap, + QUADD_MAX_PMU_COUNTERS, idx); + if (index >= QUADD_MAX_PMU_COUNTERS) { + pr_err_once("Error: too many events\n"); + return; + } + idx = index + 1; + select_event(index, event); + } + set_bit(index, pi->used_cntrs); - armv7_pmu_pmnc_enable_counter(idx); + write_counter(index, 0); + enable_counter(index); } - reset_all_counters(); - enable_all_counters(); + if (!pi->is_already_active) { + reset_all_counters(); + enable_all_counters(); + } qm_debug_start_source(QUADD_EVENT_SOURCE_PMU); } static void pmu_stop(void) { - reset_all_counters(); - disable_all_counters(); + int idx; + struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info); + + if (!pi->is_already_active) { + disable_all_counters(); + reset_all_counters(); + } else { + for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) { + disable_counter(idx); + write_counter(idx, 0); + } + } qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU); } @@ -304,104 +491,185 @@ static void pmu_stop(void) static int __maybe_unused pmu_read(struct event_data *events, int max_events) { - int idx, i, nr; u32 val; - u32 *prevp = __get_cpu_var(pmu_prev_val); + int idx = 0, i = 0; + struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info); + u32 *prevp = pi->prev_vals; + struct quadd_pmu_event_info *ei; - if (pmu_ctx.nr_used_counters == 0) { - pr_warn_once("error: counters were not initialized\n"); + if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) { + pr_err_once("Error: counters were not initialized\n"); return 0; } - nr = min_t(int, pmu_ctx.nr_used_counters, max_events); + list_for_each_entry(ei, &pmu_ctx.used_events, list) { + int index; + + if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) { + if (!test_bit(QUADD_ARMV7_CCNT_BIT, pi->used_cntrs)) { + pr_err_once("Error: ccntr is not used\n"); + return 0; + } + index = QUADD_ARMV7_CCNT_BIT; + } else { + index = find_next_bit(pi->used_cntrs, + QUADD_MAX_PMU_COUNTERS, idx); + idx = index + 1; + + if (index >= QUADD_MAX_PMU_COUNTERS) { + pr_err_once("Error: perf counter is not used\n"); + return 0; + } + } - for (i = 0; i < nr; i++) { - struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; + val = read_counter(index); + val = armv7_pmu_adjust_value(val, ei->quadd_event_id); - idx = pmu_event->counter_idx; + events->event_source = QUADD_EVENT_SOURCE_PMU; + events->event_id = ei->quadd_event_id; - val = armv7_pmu_read_counter(idx); - val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id); + events->val = val; + events->prev_val = *prevp; - events[i].event_source = QUADD_EVENT_SOURCE_PMU; - events[i].event_id = pmu_event->quadd_event_id; + *prevp = val; - events[i].val = val; - events[i].prev_val = prevp[i]; + qm_debug_read_counter(events->event_id, events->prev_val, + events->val); - prevp[i] = val; + if (++i >= max_events) + break; - qm_debug_read_counter(events[i].event_id, events[i].prev_val, - events[i].val); + events++; + prevp++; } - return nr; + return i; } static int __maybe_unused pmu_read_emulate(struct event_data *events, int max_events) { - int i, nr; + int i = 0; static u32 val = 100; - u32 *prevp = __get_cpu_var(pmu_prev_val); + struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info); + u32 *prevp = pi->prev_vals; + struct quadd_pmu_event_info *ei; - nr = min_t(int, pmu_ctx.nr_used_counters, max_events); - - for (i = 0; i < nr; i++) { + list_for_each_entry(ei, &pmu_ctx.used_events, list) { if (val > 200) val = 100; - events[i].event_id = prevp[i]; - events[i].val = val; + events->event_id = *prevp; + events->val = val; + *prevp = val; val += 5; + + if (++i >= max_events) + break; + + events++; + prevp++; } - return nr; + return i; +} + +static void __get_free_counters(void *arg) +{ + int pcntrs, ccntr; + DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS); + struct quadd_cntrs_info *ci = arg; + + pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr); + + spin_lock(&ci->lock); + + ci->pcntrs = min_t(int, pcntrs, ci->pcntrs); + + if (!ccntr) + ci->ccntr = 0; + + pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n", + smp_processor_id(), pcntrs, ccntr, free_bitmap[0]); + + spin_unlock(&ci->lock); +} + +static void free_events(struct list_head *head) +{ + struct quadd_pmu_event_info *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) { + list_del(&entry->list); + kfree(entry); + } } static int set_events(int *events, int size) { - int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0; + int free_pcntrs, err; + int i, nr_l1_r = 0, nr_l1_w = 0; + struct quadd_cntrs_info free_ci; pmu_ctx.l1_cache_rw = 0; - pmu_ctx.nr_used_counters = 0; - if (!events || size == 0) - return 0; + free_events(&pmu_ctx.used_events); - if (size > QUADD_MAX_PMU_COUNTERS) { - pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS); - return -ENOSPC; - } + if (!events || !size) + return 0; if (!pmu_ctx.current_map) { pr_err("Invalid current_map\n"); return -ENODEV; } + spin_lock_init(&free_ci.lock); + free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS; + free_ci.ccntr = 1; + + on_each_cpu(__get_free_counters, &free_ci, 1); + + free_pcntrs = free_ci.pcntrs; + pr_info("free counters: pcntrs/ccntr: %d/%d\n", + free_pcntrs, free_ci.ccntr); + for (i = 0; i < size; i++) { - struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; + struct quadd_pmu_event_info *ei; if (events[i] > QUADD_EVENT_TYPE_MAX) { pr_err("Error event: %d\n", events[i]); - return -EINVAL; + err = -EINVAL; + goto out_free; } - if (curr_idx >= pmu_ctx.nr_counters) { - pr_err("Too many events (> %d)\n", - pmu_ctx.nr_counters); - return -ENOSPC; + ei = kzalloc(sizeof(*ei), GFP_KERNEL); + if (!ei) { + err = -ENOMEM; + goto out_free; } + INIT_LIST_HEAD(&ei->list); + list_add_tail(&ei->list, &pmu_ctx.used_events); + if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) { - pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT; - pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER; + ei->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT; + if (!free_ci.ccntr) { + pr_err("Error: cpu cycles counter is already occupied\n"); + err = -EBUSY; + goto out_free; + } } else { - pmu_event->hw_value = pmu_ctx.current_map[events[i]]; - pmu_event->counter_idx = curr_idx++; + if (!free_pcntrs--) { + pr_err("Error: too many performance events\n"); + err = -ENOSPC; + goto out_free; + } + + ei->hw_value = pmu_ctx.current_map[events[i]]; } - pmu_event->quadd_event_id = events[i]; + + ei->quadd_event_id = events[i]; if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES) nr_l1_r++; @@ -410,14 +678,17 @@ static int set_events(int *events, int size) pr_info("Event has been added: id/pmu value: %s/%#x\n", quadd_get_event_str(events[i]), - pmu_event->hw_value); + ei->hw_value); } - pmu_ctx.nr_used_counters = size; if (nr_l1_r > 0 && nr_l1_w > 0) pmu_ctx.l1_cache_rw = 1; return 0; + +out_free: + free_events(&pmu_ctx.used_events); + return err; } static int get_supported_events(int *events, int max_events) @@ -435,14 +706,17 @@ static int get_supported_events(int *events, int max_events) static int get_current_events(int *events, int max_events) { - int i; + int i = 0; + struct quadd_pmu_event_info *ei; - max_events = min_t(int, pmu_ctx.nr_used_counters, max_events); + list_for_each_entry(ei, &pmu_ctx.used_events, list) { + events[i++] = ei->quadd_event_id; - for (i = 0; i < max_events; i++) - events[i] = pmu_ctx.pmu_events[i].quadd_event_id; + if (i >= max_events) + break; + } - return max_events; + return i; } static struct quadd_event_source_interface pmu_armv7_int = { @@ -476,7 +750,6 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void) case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9: pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9; strcpy(pmu_ctx.arch_name, "Cortex A9"); - pmu_ctx.nr_counters = 6; pmu_ctx.counters_mask = QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9; pmu_ctx.current_map = quadd_armv7_a9_events_map; @@ -486,7 +759,6 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void) case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15: pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15; strcpy(pmu_ctx.arch_name, "Cortex A15"); - pmu_ctx.nr_counters = 6; pmu_ctx.counters_mask = QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15; pmu_ctx.current_map = quadd_armv7_a15_events_map; @@ -496,13 +768,19 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void) default: pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN; strcpy(pmu_ctx.arch_name, "Unknown"); - pmu_ctx.nr_counters = 0; pmu_ctx.current_map = NULL; break; } } - pr_info("arch: %s, number of counters: %d\n", - pmu_ctx.arch_name, pmu_ctx.nr_counters); + INIT_LIST_HEAD(&pmu_ctx.used_events); + + pr_info("arch: %s\n", pmu_ctx.arch_name); + return pmu; } + +void quadd_armv7_pmu_deinit(void) +{ + free_events(&pmu_ctx.used_events); +} diff --git a/drivers/misc/tegra-profiler/armv7_pmu.h b/drivers/misc/tegra-profiler/armv7_pmu.h index 827fe4292a33..1e4b556b18ca 100644 --- a/drivers/misc/tegra-profiler/armv7_pmu.h +++ b/drivers/misc/tegra-profiler/armv7_pmu.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/armv7_pmu.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,6 +17,8 @@ #ifndef __ARMV7_PMU_H #define __ARMV7_PMU_H +#include <linux/list.h> + #define QUADD_ARM_CPU_IMPLEMENTER 0x41 enum { @@ -32,32 +34,32 @@ enum { #define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9 0xC090 #define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15 0xC0F0 - #define QUADD_MAX_PMU_COUNTERS 32 struct quadd_pmu_event_info { int quadd_event_id; int hw_value; - int counter_idx; + + struct list_head list; }; struct armv7_pmu_ctx { int arch; char arch_name[32]; - int nr_counters; u32 counters_mask; - struct quadd_pmu_event_info pmu_events[QUADD_MAX_PMU_COUNTERS]; - int nr_used_counters; + struct list_head used_events; int l1_cache_rw; int *current_map; }; + struct quadd_event_source_interface; extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void); +extern void quadd_armv7_pmu_deinit(void); /* * PMNC Register @@ -80,10 +82,9 @@ extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void); /* Mask for writable bits */ #define QUADD_ARMV7_PMNC_MASK 0x3f - -#define QUADD_ARMV7_CCNT (1 << 31) /* Cycle counter */ - -#define QUADD_ARMV7_CYCLE_COUNTER -1 +/* Cycle counter */ +#define QUADD_ARMV7_CCNT_BIT 31 +#define QUADD_ARMV7_CCNT (1 << QUADD_ARMV7_CCNT_BIT) /* * CNTENS: counters enable reg diff --git a/drivers/misc/tegra-profiler/main.c b/drivers/misc/tegra-profiler/main.c index 01544d0418bc..c28748b01a6b 100644 --- a/drivers/misc/tegra-profiler/main.c +++ b/drivers/misc/tegra-profiler/main.c @@ -518,6 +518,7 @@ static void __exit quadd_module_exit(void) quadd_comm_events_exit(); quadd_auth_deinit(); quadd_proc_deinit(); + quadd_armv7_pmu_deinit(); } module_init(quadd_module_init); diff --git a/drivers/misc/tegra-profiler/version.h b/drivers/misc/tegra-profiler/version.h index 7260267c4e12..d4a6b60f8ceb 100644 --- a/drivers/misc/tegra-profiler/version.h +++ b/drivers/misc/tegra-profiler/version.h @@ -18,7 +18,7 @@ #ifndef __QUADD_VERSION_H #define __QUADD_VERSION_H -#define QUADD_MODULE_VERSION "1.45" +#define QUADD_MODULE_VERSION "1.46" #define QUADD_MODULE_BRANCH "Dev" #endif /* __QUADD_VERSION_H */ |