diff options
author | Igor Nabirushkin <inabirushkin@nvidia.com> | 2014-01-26 17:59:25 +0400 |
---|---|---|
committer | Riham Haidar <rhaidar@nvidia.com> | 2014-02-19 16:09:36 -0800 |
commit | 0dad535ec93d0adc709a498ed4776f07a0196f47 (patch) | |
tree | 43766f55be3f7bcc7d42fdc5c181d9893defc539 | |
parent | 587e0115007c1bb7dbe092462d8491a4e5d7f824 (diff) |
misc: tegra-profiler: add group samples
Group CPU cycles and cache misses samples.
To reduce the amount of data passed from the target to the host,
we can group samples that have a lot of common information.
Bug 1447582
Change-Id: I9b16bf4f18455ff6219fd58373eceba4cb71e352
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/365849
(cherry picked from commit 48c34477a883e3b7a7872ab9f3de725f9542060a)
Reviewed-on: http://git-master/r/368205
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Tested-by: Maxim Morin <mmorin@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
-rw-r--r-- | drivers/misc/tegra-profiler/armv7_pmu.c | 43 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/backtrace.c | 10 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/backtrace.h | 4 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/comm.c | 153 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/comm.h | 11 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/debug.c | 53 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/hrt.c | 246 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/hrt.h | 5 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/ma.c | 13 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/main.c | 19 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/mmap.c | 26 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/pl310.c | 35 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/power_clk.c | 25 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/power_clk.h | 4 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/quadd.h | 7 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/quadd_proc.c | 42 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/tegra.h | 19 | ||||
-rw-r--r-- | drivers/misc/tegra-profiler/version.h | 4 | ||||
-rw-r--r-- | include/linux/tegra_profiler.h | 87 |
19 files changed, 467 insertions, 339 deletions
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.c b/drivers/misc/tegra-profiler/armv7_pmu.c index 04436f8c7e02..9ca4e535b84d 100644 --- a/drivers/misc/tegra-profiler/armv7_pmu.c +++ b/drivers/misc/tegra-profiler/armv7_pmu.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/armv7_pmu.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -301,9 +301,10 @@ static void pmu_stop(void) qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU); } -static int __maybe_unused pmu_read(struct event_data *events) +static int __maybe_unused +pmu_read(struct event_data *events, int max_events) { - int idx, i; + int idx, i, nr; u32 val; u32 *prevp = __get_cpu_var(pmu_prev_val); @@ -312,7 +313,9 @@ static int __maybe_unused pmu_read(struct event_data *events) return 0; } - for (i = 0; i < pmu_ctx.nr_used_counters; i++) { + nr = min_t(int, pmu_ctx.nr_used_counters, max_events); + + for (i = 0; i < nr; i++) { struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i]; idx = pmu_event->counter_idx; @@ -332,16 +335,19 @@ static int __maybe_unused pmu_read(struct event_data *events) events[i].val); } - return pmu_ctx.nr_used_counters; + return nr; } -static int __maybe_unused pmu_read_emulate(struct event_data *events) +static int __maybe_unused +pmu_read_emulate(struct event_data *events, int max_events) { - int i; + int i, nr; static u32 val = 100; u32 *prevp = __get_cpu_var(pmu_prev_val); - for (i = 0; i < pmu_ctx.nr_used_counters; i++) { + nr = min_t(int, pmu_ctx.nr_used_counters, max_events); + + for (i = 0; i < nr; i++) { if (val > 200) val = 100; @@ -351,7 +357,7 @@ static int __maybe_unused pmu_read_emulate(struct event_data *events) val += 5; } - return pmu_ctx.nr_used_counters; + return nr; } static int set_events(int *events, int size) @@ -414,17 +420,31 @@ static int set_events(int *events, int size) return 0; } -static int get_supported_events(int *events) +static int get_supported_events(int *events, int max_events) { int i, nr_events = 0; - for (i = 0; i < QUADD_EVENT_TYPE_MAX; i++) { + max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events); + + for (i = 0; i < max_events; i++) { if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT) events[nr_events++] = i; } return nr_events; } +static int get_current_events(int *events, int max_events) +{ + int i; + + max_events = min_t(int, pmu_ctx.nr_used_counters, max_events); + + for (i = 0; i < max_events; i++) + events[i] = pmu_ctx.pmu_events[i].quadd_event_id; + + return max_events; +} + static struct quadd_event_source_interface pmu_armv7_int = { .enable = pmu_enable, .disable = pmu_disable, @@ -439,6 +459,7 @@ static struct quadd_event_source_interface pmu_armv7_int = { #endif .set_events = set_events, .get_supported_events = get_supported_events, + .get_current_events = get_current_events, }; struct quadd_event_source_interface *quadd_armv7_pmu_init(void) diff --git a/drivers/misc/tegra-profiler/backtrace.c b/drivers/misc/tegra-profiler/backtrace.c index 3191def82ce0..ce02f82d17e1 100644 --- a/drivers/misc/tegra-profiler/backtrace.c +++ b/drivers/misc/tegra-profiler/backtrace.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/backtrace.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -28,13 +28,11 @@ #define QUADD_USER_SPACE_MIN_ADDR 0x8000 static inline void -quadd_callchain_store(struct quadd_callchain *callchain_data, u32 ip) +quadd_callchain_store(struct quadd_callchain *callchain_data, + quadd_bt_addr_t ip) { - if (callchain_data->nr < QUADD_MAX_STACK_DEPTH) { - /* pr_debug("[%d] Add entry: %#llx\n", - callchain_data->nr, ip); */ + if (callchain_data->nr < QUADD_MAX_STACK_DEPTH) callchain_data->callchain[callchain_data->nr++] = ip; - } } static int diff --git a/drivers/misc/tegra-profiler/backtrace.h b/drivers/misc/tegra-profiler/backtrace.h index 82b55db496f0..ce7608217629 100644 --- a/drivers/misc/tegra-profiler/backtrace.h +++ b/drivers/misc/tegra-profiler/backtrace.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/backtrace.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -23,7 +23,7 @@ struct quadd_callchain { int nr; - u32 callchain[QUADD_MAX_STACK_DEPTH]; + quadd_bt_addr_t callchain[QUADD_MAX_STACK_DEPTH]; }; unsigned int diff --git a/drivers/misc/tegra-profiler/comm.c b/drivers/misc/tegra-profiler/comm.c index 35a4a8e15fae..8d15fb1cc296 100644 --- a/drivers/misc/tegra-profiler/comm.c +++ b/drivers/misc/tegra-profiler/comm.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/comm.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -24,6 +24,7 @@ #include <linux/miscdevice.h> #include <linux/sched.h> #include <linux/poll.h> +#include <linux/bitops.h> #include <linux/tegra_profiler.h> @@ -136,10 +137,10 @@ rb_write(struct quadd_ring_buffer *rb, char *data, size_t length) return length; } -static size_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length) +static ssize_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length) { if (rb_get_free_space(rb) < length) - return 0; + return -EIO; if (rb->pos_read > length) rb->pos_read -= length; @@ -174,7 +175,7 @@ static size_t rb_read(struct quadd_ring_buffer *rb, char *data, size_t length) return length; } -static size_t +static ssize_t rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length) { size_t new_pos_read, chunk1; @@ -186,23 +187,17 @@ rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length) if (new_pos_read < rb->pos_read) { chunk1 = rb->size - rb->pos_read; - if (copy_to_user(data, rb->buf + rb->pos_read, chunk1)) { - pr_err_once("Error: copy_to_user\n"); - return 0; - } + if (copy_to_user(data, rb->buf + rb->pos_read, chunk1)) + return -EFAULT; if (new_pos_read > 0) { if (copy_to_user(data + chunk1, rb->buf, - new_pos_read)) { - pr_err_once("Error: copy_to_user\n"); - return 0; - } + new_pos_read)) + return -EFAULT; } } else { - if (copy_to_user(data, rb->buf + rb->pos_read, length)) { - pr_err_once("Error: copy_to_user\n"); - return 0; - } + if (copy_to_user(data, rb->buf + rb->pos_read, length)) + return -EFAULT; } rb->pos_read = new_pos_read; @@ -212,17 +207,22 @@ rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length) } static void -write_sample(struct quadd_record_data *sample, void *extra_data, - size_t extra_length) +write_sample(struct quadd_record_data *sample, + struct quadd_iovec *vec, int vec_count) { + int i; unsigned long flags; struct quadd_ring_buffer *rb = &comm_ctx.rb; - int length_sample = sizeof(struct quadd_record_data) + extra_length; + size_t length_sample; + + length_sample = sizeof(struct quadd_record_data); + for (i = 0; i < vec_count; i++) + length_sample += vec[i].len; spin_lock_irqsave(&rb->lock, flags); if (length_sample > rb_get_free_space(rb)) { - pr_err_once("Error: Buffer overflowed, skip sample\n"); + pr_err_once("Error: Buffer has been overflowed\n"); spin_unlock_irqrestore(&rb->lock, flags); return; } @@ -232,10 +232,10 @@ write_sample(struct quadd_record_data *sample, void *extra_data, return; } - if (extra_data && extra_length > 0) { - if (!rb_write(rb, extra_data, extra_length)) { - pr_err_once("Buffer overflowed, skip sample\n"); + for (i = 0; i < vec_count; i++) { + if (!rb_write(rb, vec[i].base, vec[i].len)) { spin_unlock_irqrestore(&rb->lock, flags); + pr_err_once("%s: error: ring buffer\n", __func__); return; } } @@ -248,55 +248,59 @@ write_sample(struct quadd_record_data *sample, void *extra_data, wake_up_interruptible(&comm_ctx.read_wait); } -static int read_sample(char __user *buffer, size_t max_length) +static ssize_t read_sample(char __user *buffer, size_t max_length) { + int retval = -EIO; unsigned long flags; struct quadd_ring_buffer *rb = &comm_ctx.rb; struct quadd_record_data record; - size_t length_extra = 0; + size_t length_extra = 0, nr_events; + struct quadd_sample_data *sample; spin_lock_irqsave(&rb->lock, flags); if (rb_is_empty(rb)) { - spin_unlock_irqrestore(&rb->lock, flags); - return 0; + retval = 0; + goto out; } - if (rb->fill_count < sizeof(struct quadd_record_data)) { - pr_err_once("Error: data\n"); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; - } + if (rb->fill_count < sizeof(struct quadd_record_data)) + goto out; - if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data))) { - pr_err_once("Error: read sample\n"); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; - } + if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data))) + goto out; if (record.magic != QUADD_RECORD_MAGIC) { - pr_err_once("Bad magic: %#x\n", record.magic); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; + pr_err("Error: bad magic: %#x\n", record.magic); + goto out; } switch (record.record_type) { case QUADD_RECORD_TYPE_SAMPLE: - length_extra = record.sample.callchain_nr * - sizeof(record.sample.ip); + sample = &record.sample; + length_extra = sample->callchain_nr * sizeof(quadd_bt_addr_t); + + nr_events = __sw_hweight32(record.sample.events_flags); + length_extra += nr_events * sizeof(u32); break; case QUADD_RECORD_TYPE_MMAP: if (record.mmap.filename_length > 0) { length_extra = record.mmap.filename_length; } else { - length_extra = 0; - pr_err_once("Error: filename\n"); + pr_err("Error: filename is empty\n"); + goto out; } break; - case QUADD_RECORD_TYPE_DEBUG: case QUADD_RECORD_TYPE_HEADER: + length_extra = record.hdr.nr_events * sizeof(u32); + break; + + case QUADD_RECORD_TYPE_DEBUG: + length_extra = record.debug.extra_length; + break; + case QUADD_RECORD_TYPE_MA: length_extra = 0; break; @@ -310,50 +314,49 @@ static int read_sample(char __user *buffer, size_t max_length) break; default: - pr_err_once("Error: Unknown sample: %u\n", record.record_type); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; + goto out; } if (sizeof(struct quadd_record_data) + length_extra > max_length) { - if (!rb_read_undo(rb, sizeof(struct quadd_record_data))) - pr_err_once("Error: rb_read_undo\n"); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; - } + retval = rb_read_undo(rb, sizeof(struct quadd_record_data)); + if (retval < 0) + goto out; - if (length_extra > rb_get_free_space(rb)) { - pr_err_once("Error: Incompleted sample\n"); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; + retval = 0; + goto out; } - if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data))) { - pr_err_once("Error: copy_to_user\n"); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; - } + if (length_extra > rb->fill_count) + goto out; + + if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data))) + goto out_fault_error; if (length_extra > 0) { - if (!rb_read_user(rb, buffer + sizeof(struct quadd_record_data), - length_extra)) { - pr_err_once("Error: copy_to_user\n"); - spin_unlock_irqrestore(&rb->lock, flags); - return 0; - } + retval = rb_read_user(rb, buffer + sizeof(record), + length_extra); + if (retval <= 0) + goto out; } spin_unlock_irqrestore(&rb->lock, flags); return sizeof(struct quadd_record_data) + length_extra; + +out_fault_error: + retval = -EFAULT; + +out: + spin_unlock_irqrestore(&rb->lock, flags); + return retval; } -static void put_sample(struct quadd_record_data *data, char *extra_data, - unsigned int extra_length) +static void put_sample(struct quadd_record_data *data, + struct quadd_iovec *vec, int vec_count) { if (!atomic_read(&comm_ctx.active)) return; - write_sample(data, extra_data, extra_length); + write_sample(data, vec, vec_count); } static void comm_reset(void) @@ -460,11 +463,17 @@ device_read(struct file *filp, if (!atomic_read(&comm_ctx.active)) { mutex_unlock(&comm_ctx.io_mutex); - return -1; + return -EPIPE; } while (was_read + sizeof(struct quadd_record_data) < length) { res = read_sample(buffer + was_read, length - was_read); + if (res < 0) { + mutex_unlock(&comm_ctx.io_mutex); + pr_err("Error: data is corrupted\n"); + return res; + } + if (res == 0) break; diff --git a/drivers/misc/tegra-profiler/comm.h b/drivers/misc/tegra-profiler/comm.h index a19319d0a71f..1bed2d98d7ce 100644 --- a/drivers/misc/tegra-profiler/comm.h +++ b/drivers/misc/tegra-profiler/comm.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/comm.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -35,6 +35,11 @@ struct quadd_ring_buffer { size_t max_fill_count; }; +struct quadd_iovec { + void *base; + size_t len; +}; + struct quadd_parameters; struct quadd_comm_control_interface { @@ -47,8 +52,8 @@ struct quadd_comm_control_interface { }; struct quadd_comm_data_interface { - void (*put_sample)(struct quadd_record_data *data, char *extra_data, - unsigned int extra_length); + void (*put_sample)(struct quadd_record_data *data, + struct quadd_iovec *vec, int vec_count); void (*reset)(void); int (*is_active)(void); }; diff --git a/drivers/misc/tegra-profiler/debug.c b/drivers/misc/tegra-profiler/debug.c index fbe95b52f473..b7acd9499934 100644 --- a/drivers/misc/tegra-profiler/debug.c +++ b/drivers/misc/tegra-profiler/debug.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/debug.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -22,12 +22,14 @@ #include "debug.h" #include "hrt.h" #include "tegra.h" +#include "comm.h" #ifdef QM_DEBUG_SAMPLES_ENABLE static inline void init_sample(struct quadd_record_data *record, struct pt_regs *regs) { + unsigned int flags; struct quadd_debug_data *s = &record->debug; record->magic = QUADD_RECORD_MAGIC; @@ -37,19 +39,24 @@ init_sample(struct quadd_record_data *record, struct pt_regs *regs) regs = get_irq_regs(); if (!regs) - record->cpu_mode = QUADD_CPU_MODE_NONE; + s->user_mode = 0; else - record->cpu_mode = user_mode(regs) ? - QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL; + s->user_mode = user_mode(regs) ? 1 : 0; + + s->cpu = quadd_get_processor_id(regs, &flags); + + s->lp_mode = flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP ? 1 : 0; + s->thumb_mode = flags & QUADD_CPUMODE_THUMB ? 1 : 0; + + s->reserved = 0; - s->cpu = quadd_get_processor_id(regs); s->pid = 0; s->time = quadd_get_time(); - s->timer_period = 0; - s->extra_value1 = 0; - s->extra_value2 = 0; - s->extra_value3 = 0; + s->extra_value[0] = 0; + s->extra_value[1] = 0; + + s->extra_length = 0; } void qm_debug_handler_sample(struct pt_regs *regs) @@ -72,7 +79,6 @@ void qm_debug_timer_forward(struct pt_regs *regs, u64 period) init_sample(&record, regs); s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_FORWARD; - s->timer_period = period; quadd_put_sample(&record, NULL, 0); } @@ -85,7 +91,6 @@ void qm_debug_timer_start(struct pt_regs *regs, u64 period) init_sample(&record, regs); s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_START; - s->timer_period = period; quadd_put_sample(&record, NULL, 0); } @@ -105,6 +110,7 @@ void qm_debug_timer_cancel(void) void qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active) { + struct quadd_iovec vec; struct quadd_record_data record; struct quadd_debug_data *s = &record.debug; @@ -112,15 +118,18 @@ qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active) s->type = QM_DEBUG_SAMPLE_TYPE_SCHED_IN; - s->extra_value1 = prev_pid; - s->extra_value2 = current_pid; - s->extra_value3 = prev_nr_active; + s->extra_value[0] = prev_pid; + s->extra_value[1] = current_pid; - quadd_put_sample(&record, NULL, 0); + vec.base = &prev_nr_active; + vec.len = s->extra_length = sizeof(prev_nr_active); + + quadd_put_sample(&record, &vec, 1); } void qm_debug_read_counter(int event_id, u32 prev_val, u32 val) { + struct quadd_iovec vec; struct quadd_record_data record; struct quadd_debug_data *s = &record.debug; @@ -128,11 +137,13 @@ void qm_debug_read_counter(int event_id, u32 prev_val, u32 val) s->type = QM_DEBUG_SAMPLE_TYPE_READ_COUNTER; - s->extra_value1 = event_id; - s->extra_value2 = prev_val; - s->extra_value3 = val; + s->extra_value[0] = event_id; + s->extra_value[1] = prev_val; - quadd_put_sample(&record, NULL, 0); + vec.base = &val; + vec.len = s->extra_length = sizeof(val); + + quadd_put_sample(&record, &vec, 1); } void qm_debug_start_source(int source_type) @@ -143,7 +154,7 @@ void qm_debug_start_source(int source_type) init_sample(&record, NULL); s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_START; - s->extra_value1 = source_type; + s->extra_value[0] = source_type; quadd_put_sample(&record, NULL, 0); } @@ -156,7 +167,7 @@ void qm_debug_stop_source(int source_type) init_sample(&record, NULL); s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_STOP; - s->extra_value1 = source_type; + s->extra_value[0] = source_type; quadd_put_sample(&record, NULL, 0); } diff --git a/drivers/misc/tegra-profiler/hrt.c b/drivers/misc/tegra-profiler/hrt.c index fef37de8c288..04fcf1316f11 100644 --- a/drivers/misc/tegra-profiler/hrt.c +++ b/drivers/misc/tegra-profiler/hrt.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/hrt.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -46,6 +46,11 @@ static void sample_time_prepare(void); static void sample_time_finish(void); static void sample_time_reset(struct quadd_cpu_context *cpu_ctx); +struct hrt_event_value { + int event_id; + u32 value; +}; + static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer) { struct pt_regs *regs; @@ -142,16 +147,21 @@ static void sample_time_reset(struct quadd_cpu_context *cpu_ctx) static void put_header(void) { - int power_rate_period; + int nr_events = 0, max_events = QUADD_MAX_COUNTERS; + unsigned int events[QUADD_MAX_COUNTERS]; struct quadd_record_data record; struct quadd_header_data *hdr = &record.hdr; struct quadd_parameters *param = &hrt.quadd_ctx->param; - struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm; + unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA]; + struct quadd_iovec vec; + struct quadd_ctx *ctx = hrt.quadd_ctx; + struct quadd_event_source_interface *pmu = ctx->pmu; + struct quadd_event_source_interface *pl310 = ctx->pl310; record.magic = QUADD_RECORD_MAGIC; record.record_type = QUADD_RECORD_TYPE_HEADER; - record.cpu_mode = QUADD_CPU_MODE_NONE; + hdr->magic = QUADD_HEADER_MAGIC; hdr->version = QUADD_SAMPLES_VERSION; hdr->backtrace = param->backtrace; @@ -165,152 +175,190 @@ static void put_header(void) hdr->debug_samples = 0; #endif - hdr->period = hrt.sample_period; - hdr->ma_period = hrt.ma_period; + hdr->freq = param->freq; + hdr->ma_freq = param->ma_freq; + hdr->power_rate_freq = param->power_rate_freq; + + hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0; + hdr->get_mmap = (extra & QUADD_PARAM_IDX_EXTRA_GET_MMAP) ? 1 : 0; + + hdr->reserved = 0; + hdr->extra_length = 0; - hdr->power_rate = quadd_power_clk_is_enabled(&power_rate_period); - hdr->power_rate_period = power_rate_period; + if (pmu) + nr_events += pmu->get_current_events(events, max_events); - comm->put_sample(&record, NULL, 0); + if (pl310) + nr_events += pl310->get_current_events(events + nr_events, + max_events - nr_events); + + hdr->nr_events = nr_events; + + vec.base = events; + vec.len = nr_events * sizeof(events[0]); + + quadd_put_sample(&record, &vec, 1); } void quadd_put_sample(struct quadd_record_data *data, - char *extra_data, unsigned int extra_length) + struct quadd_iovec *vec, int vec_count) { struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm; - if (data->record_type == QUADD_RECORD_TYPE_SAMPLE && - data->sample.period > 0x7FFFFFFF) { - struct quadd_sample_data *sample = &data->sample; - pr_err_once("very big period, sample id: %d\n", - sample->event_id); - return; - } - - comm->put_sample(data, extra_data, extra_length); + comm->put_sample(data, vec, vec_count); atomic64_inc(&hrt.counter_samples); } -static int get_sample_data(struct event_data *event, - struct pt_regs *regs, - struct quadd_sample_data *sample) +static int get_sample_data(struct quadd_sample_data *sample, + struct pt_regs *regs, pid_t pid) { - u32 period; - u32 prev_val, val; + unsigned int cpu, flags; + struct quadd_thread_data *t_data; + struct quadd_ctx *quadd_ctx = hrt.quadd_ctx; + struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); - prev_val = event->prev_val; - val = event->val; + cpu = quadd_get_processor_id(regs, &flags); + sample->cpu = cpu; - sample->event_id = event->event_id; + sample->lp_mode = + (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0; + sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0; + sample->user_mode = user_mode(regs) ? 1 : 0; sample->ip = instruction_pointer(regs); - sample->cpu = quadd_get_processor_id(regs); - sample->time = get_sample_time(); - if (prev_val <= val) - period = val - prev_val; + /* For security reasons, hide IPs from the kernel space. */ + if (!sample->user_mode && !quadd_ctx->collect_kernel_ips) + sample->ip = 0; else - period = QUADD_U32_MAX - prev_val + val; + sample->ip = instruction_pointer(regs); + + sample->time = get_sample_time(); + sample->reserved = 0; - if (event->event_source == QUADD_EVENT_SOURCE_PL310) { - int nr_current_active = atomic_read(&hrt.nr_active_all_core); - if (nr_current_active > 1) - period = period / nr_current_active; + if (pid > 0) { + sample->pid = pid; + } else { + t_data = &cpu_ctx->active_thread; + sample->pid = t_data->pid; } - sample->period = period; return 0; } -static void read_source(struct quadd_event_source_interface *source, - struct pt_regs *regs, pid_t pid) +static int read_source(struct quadd_event_source_interface *source, + struct pt_regs *regs, pid_t pid, + struct hrt_event_value *events_vals, int max_events) { int nr_events, i; + u32 prev_val, val, res_val; struct event_data events[QUADD_MAX_COUNTERS]; - struct quadd_record_data record_data; - struct quadd_thread_data *t_data; - char *extra_data = NULL; - unsigned int extra_length = 0, callchain_nr = 0; - struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); - struct quadd_callchain *callchain_data = &cpu_ctx->callchain_data; - struct quadd_ctx *quadd_ctx = hrt.quadd_ctx; - struct pt_regs *user_regs; if (!source) - return; + return 0; - nr_events = source->read(events); + max_events = min_t(int, max_events, QUADD_MAX_COUNTERS); + nr_events = source->read(events, max_events); - if (nr_events == 0 || nr_events > QUADD_MAX_COUNTERS) { - pr_err_once("Error number of counters: %d, source: %p\n", - nr_events, source); - return; - } - - if (atomic_read(&cpu_ctx->nr_active) == 0) - return; + for (i = 0; i < nr_events; i++) { + struct event_data *s = &events[i]; - if (user_mode(regs)) - user_regs = regs; - else - user_regs = task_pt_regs(current); + prev_val = s->prev_val; + val = s->val; - if (hrt.quadd_ctx->param.backtrace) { - callchain_nr = - quadd_get_user_callchain(user_regs, callchain_data); - if (callchain_nr > 0) { - extra_data = (char *)cpu_ctx->callchain_data.callchain; - extra_length = callchain_nr * sizeof(u32); - } - } + if (prev_val <= val) + res_val = val - prev_val; + else + res_val = QUADD_U32_MAX - prev_val + val; - for (i = 0; i < nr_events; i++) { - if (get_sample_data(&events[i], regs, &record_data.sample)) - return; - - record_data.magic = QUADD_RECORD_MAGIC; - record_data.record_type = QUADD_RECORD_TYPE_SAMPLE; - record_data.cpu_mode = user_mode(regs) ? - QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL; - - /* For security reasons, hide IPs from the kernel space. */ - if (record_data.cpu_mode == QUADD_CPU_MODE_KERNEL && - !quadd_ctx->collect_kernel_ips) - record_data.sample.ip = 0; - - if (pid > 0) { - record_data.sample.pid = pid; - } else { - t_data = &cpu_ctx->active_thread; - record_data.sample.pid = t_data->pid; + if (s->event_source == QUADD_EVENT_SOURCE_PL310) { + int nr_active = atomic_read(&hrt.nr_active_all_core); + if (nr_active > 1) + res_val = res_val / nr_active; } - if (i == 0) { - record_data.sample.callchain_nr = callchain_nr; - quadd_put_sample(&record_data, extra_data, - extra_length); - } else { - record_data.sample.callchain_nr = 0; - quadd_put_sample(&record_data, NULL, 0); - } + events_vals[i].event_id = s->event_id; + events_vals[i].value = res_val; } + + return nr_events; } static void read_all_sources(struct pt_regs *regs, pid_t pid) { + int i, vec_idx = 0, bt_size = 0; + int nr_events = 0, nr_positive_events = 0; + struct pt_regs *user_regs; + struct quadd_iovec vec[2]; + struct hrt_event_value events[QUADD_MAX_COUNTERS]; + u32 events_extra[QUADD_MAX_COUNTERS]; + + struct quadd_record_data record_data; + struct quadd_sample_data *s = &record_data.sample; + struct quadd_ctx *ctx = hrt.quadd_ctx; struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx); + struct quadd_callchain *cc_data = &cpu_ctx->callchain_data; if (!regs) return; + if (atomic_read(&cpu_ctx->nr_active) == 0) + return; + quadd_get_mmap_object(cpu_ctx, regs, pid); if (ctx->pmu && ctx->pmu_info.active) - read_source(ctx->pmu, regs, pid); + nr_events += read_source(ctx->pmu, regs, pid, + events, QUADD_MAX_COUNTERS); if (ctx->pl310 && ctx->pl310_info.active) - read_source(ctx->pl310, regs, pid); + nr_events += read_source(ctx->pl310, regs, pid, + events + nr_events, + QUADD_MAX_COUNTERS - nr_events); + + if (!nr_events) + return; + + if (user_mode(regs)) + user_regs = regs; + else + user_regs = task_pt_regs(current); + + if (get_sample_data(s, regs, pid)) + return; + + if (ctx->param.backtrace) { + bt_size = quadd_get_user_callchain(user_regs, cc_data); + if (bt_size > 0) { + vec[vec_idx].base = cc_data->callchain; + vec[vec_idx].len = + bt_size * sizeof(cc_data->callchain[0]); + vec_idx++; + } + } + s->callchain_nr = bt_size; + + record_data.magic = QUADD_RECORD_MAGIC; + record_data.record_type = QUADD_RECORD_TYPE_SAMPLE; + + s->events_flags = 0; + for (i = 0; i < nr_events; i++) { + u32 value = events[i].value; + if (value > 0) { + s->events_flags |= 1 << i; + events_extra[nr_positive_events++] = value; + } + } + + if (nr_positive_events == 0) + return; + + vec[vec_idx].base = events_extra; + vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]); + vec_idx++; + + quadd_put_sample(&record_data, vec, vec_idx); } static inline int is_profile_process(pid_t pid) @@ -389,7 +437,7 @@ void __quadd_task_sched_in(struct task_struct *prev, ctx->pmu->start(); if (ctx->pl310) - ctx->pl310->read(events); + ctx->pl310->read(events, 1); start_hrtimer(cpu_ctx); atomic_inc(&hrt.nr_active_all_core); diff --git a/drivers/misc/tegra-profiler/hrt.h b/drivers/misc/tegra-profiler/hrt.h index 7eff30a6ec85..1e3eb72fa04c 100644 --- a/drivers/misc/tegra-profiler/hrt.h +++ b/drivers/misc/tegra-profiler/hrt.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/hrt.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -72,6 +72,7 @@ struct quadd_hrt_ctx { struct quadd_hrt_ctx; struct quadd_record_data; struct quadd_module_state; +struct quadd_iovec; struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx); void quadd_hrt_deinit(void); @@ -80,7 +81,7 @@ int quadd_hrt_start(void); void quadd_hrt_stop(void); void quadd_put_sample(struct quadd_record_data *data, - char *extra_data, unsigned int extra_length); + struct quadd_iovec *vec, int vec_count); void quadd_hrt_get_state(struct quadd_module_state *state); u64 quadd_get_time(void); diff --git a/drivers/misc/tegra-profiler/ma.c b/drivers/misc/tegra-profiler/ma.c index d561a2411c5a..a1422cc98445 100644 --- a/drivers/misc/tegra-profiler/ma.c +++ b/drivers/misc/tegra-profiler/ma.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/ma.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -39,18 +39,13 @@ static void make_sample(struct quadd_hrt_ctx *hrt_ctx, record.magic = QUADD_RECORD_MAGIC; record.record_type = QUADD_RECORD_TYPE_MA; - record.cpu_mode = QUADD_CPU_MODE_NONE; ma->pid = pid; ma->time = quadd_get_time(); - ma->vm_size = vm_size << PAGE_SHIFT; - ma->rss_size = rss_size << PAGE_SHIFT; -/* - pr_debug("vm: %llu bytes (%llu mb), rss: %llu bytes (%llu mb)\n", - ma->vm_size, ma->vm_size / 0x100000, - ma->rss_size, ma->rss_size / 0x100000); -*/ + ma->vm_size = vm_size << (PAGE_SHIFT-10); + ma->rss_size = rss_size << (PAGE_SHIFT-10); + comm->put_sample(&record, NULL, 0); } diff --git a/drivers/misc/tegra-profiler/main.c b/drivers/misc/tegra-profiler/main.c index 2b9b8ad1b59b..7186e3529602 100644 --- a/drivers/misc/tegra-profiler/main.c +++ b/drivers/misc/tegra-profiler/main.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/main.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -303,8 +303,9 @@ static void get_capabilities(struct quadd_comm_cap *cap) break; default: - BUG(); - break; + pr_err_once("%s: error: invalid event\n", + __func__); + return; } } } @@ -352,8 +353,9 @@ static void get_capabilities(struct quadd_comm_cap *cap) break; default: - BUG(); - break; + pr_err_once("%s: error: invalid event\n", + __func__); + return; } } } @@ -364,6 +366,7 @@ static void get_capabilities(struct quadd_comm_cap *cap) extra |= QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX; extra |= QUADD_COMM_CAP_EXTRA_GET_MMAP; + extra |= QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES; cap->reserved[QUADD_COMM_CAP_IDX_EXTRA] = extra; } @@ -417,7 +420,8 @@ static int __init quadd_module_init(void) return -ENODEV; } else { events = ctx.pmu_info.supported_events; - nr_events = ctx.pmu->get_supported_events(events); + nr_events = ctx.pmu->get_supported_events(events, + QUADD_MAX_COUNTERS); ctx.pmu_info.nr_supported_events = nr_events; pr_info("PMU: amount of events: %d\n", nr_events); @@ -430,7 +434,8 @@ static int __init quadd_module_init(void) ctx.pl310 = quadd_l2x0_events_init(); if (ctx.pl310) { events = ctx.pl310_info.supported_events; - nr_events = ctx.pl310->get_supported_events(events); + nr_events = ctx.pl310->get_supported_events(events, + QUADD_MAX_COUNTERS); ctx.pl310_info.nr_supported_events = nr_events; pr_info("pl310 success, amount of events: %d\n", diff --git a/drivers/misc/tegra-profiler/mmap.c b/drivers/misc/tegra-profiler/mmap.c index cf6f408a0d80..bcec4b32fb24 100644 --- a/drivers/misc/tegra-profiler/mmap.c +++ b/drivers/misc/tegra-profiler/mmap.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/mmap.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -27,6 +27,7 @@ #include <linux/tegra_profiler.h> #include "mmap.h" +#include "comm.h" #include "hrt.h" #include "debug.h" @@ -122,22 +123,25 @@ static int find_file(const char *file_name, unsigned long addr, } static void -put_mmap_sample(struct quadd_mmap_data *s, char *extra_data, - size_t extra_length) +put_mmap_sample(struct quadd_mmap_data *s, char *filename, + size_t length) { struct quadd_record_data r; + struct quadd_iovec vec; r.magic = QUADD_RECORD_MAGIC; r.record_type = QUADD_RECORD_TYPE_MMAP; - r.cpu_mode = QUADD_CPU_MODE_USER; memcpy(&r.mmap, s, sizeof(*s)); - r.mmap.filename_length = extra_length; + r.mmap.filename_length = length; - pr_debug("MMAP: pid: %d, file_name: '%s', addr: %#x, length: %u", - s->pid, extra_data, s->addr, extra_length); + vec.base = filename; + vec.len = length; - quadd_put_sample(&r, extra_data, extra_length); + pr_debug("MMAP: pid: %u, file_name: '%s', addr: %#llx, length: %llu", + s->pid, filename, s->addr, s->len); + + quadd_put_sample(&r, &vec, 1); } void quadd_get_mmap_object(struct quadd_cpu_context *cpu_ctx, @@ -176,8 +180,7 @@ void quadd_get_mmap_object(struct quadd_cpu_context *cpu_ctx, } else { sample.addr = vma->vm_start; sample.len = vma->vm_end - vma->vm_start; - sample.pgoff = - (u64)vma->vm_pgoff << PAGE_SHIFT; + sample.pgoff = vma->vm_pgoff; } break; } @@ -262,12 +265,11 @@ int quadd_get_current_mmap(struct quadd_cpu_context *cpu_ctx, pid_t pid) sample.pid = pid; sample.addr = vma->vm_start; sample.len = vma->vm_end - vma->vm_start; - sample.pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT; + sample.pgoff = vma->vm_pgoff; if (!find_file(file_name, sample.addr, sample.len)) { strcpy(cpu_ctx->mmap_filename, file_name); length_aligned = ALIGN(length, 8); - put_mmap_sample(&sample, file_name, length_aligned); } } diff --git a/drivers/misc/tegra-profiler/pl310.c b/drivers/misc/tegra-profiler/pl310.c index 010830823817..f2ad8424f2cd 100644 --- a/drivers/misc/tegra-profiler/pl310.c +++ b/drivers/misc/tegra-profiler/pl310.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/pl310.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -176,14 +176,16 @@ static void l2x0_events_stop(void) qm_debug_stop_source(QUADD_EVENT_SOURCE_PL310); } -static int __maybe_unused l2x0_events_read(struct event_data *events) +static int __maybe_unused +l2x0_events_read(struct event_data *events, int max_events) { unsigned long flags; - if (l2x0_ctx.l2x0_event_type < 0) { - pr_err_once("pl310 value: %u\n", events[0].val); + if (l2x0_ctx.l2x0_event_type < 0) + return 0; + + if (max_events == 0) return 0; - } events[0].event_source = QUADD_EVENT_SOURCE_PL310; events[0].event_id = l2x0_ctx.event_id; @@ -202,10 +204,14 @@ static int __maybe_unused l2x0_events_read(struct event_data *events) return 1; } -static int __maybe_unused l2x0_events_read_emulate(struct event_data *events) +static int __maybe_unused +l2x0_events_read_emulate(struct event_data *events, int max_events) { static u32 val; + if (max_events == 0) + return 0; + if (val > 100) val = 0; @@ -256,14 +262,28 @@ static int l2x0_set_events(int *events, int size) return 0; } -static int get_supported_events(int *events) +static int get_supported_events(int *events, int max_events) { + if (max_events < 3) + return 0; + events[0] = QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES; events[1] = QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES; events[2] = QUADD_EVENT_TYPE_L2_ICACHE_MISSES; + return 3; } +static int get_current_events(int *events, int max_events) +{ + if (max_events == 0) + return 0; + + *events = l2x0_ctx.event_id; + + return 1; +} + static struct quadd_event_source_interface l2x0_int = { .enable = l2x0_events_enable, .disable = l2x0_events_disable, @@ -278,6 +298,7 @@ static struct quadd_event_source_interface l2x0_int = { #endif .set_events = l2x0_set_events, .get_supported_events = get_supported_events, + .get_current_events = get_current_events, }; struct quadd_event_source_interface *quadd_l2x0_events_init(void) diff --git a/drivers/misc/tegra-profiler/power_clk.c b/drivers/misc/tegra-profiler/power_clk.c index b5b73f4afcea..261cf87c79b1 100644 --- a/drivers/misc/tegra-profiler/power_clk.c +++ b/drivers/misc/tegra-profiler/power_clk.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/power_clk.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -110,7 +110,8 @@ static void read_source(struct power_clk_source *s) break; default: - BUG(); + pr_err_once("%s: error: invalid power_clk type\n", __func__); + return; } mutex_unlock(&s->lock); @@ -155,6 +156,7 @@ static void make_sample(void) int i; u32 extra_cpus[NR_CPUS]; struct power_clk_source *s; + struct quadd_iovec vec; struct quadd_record_data record; struct quadd_power_rate_data *power_rate = &record.power_rate; @@ -162,7 +164,6 @@ static void make_sample(void) record.magic = QUADD_RECORD_MAGIC; record.record_type = QUADD_RECORD_TYPE_POWER_RATE; - record.cpu_mode = QUADD_CPU_MODE_NONE; power_rate->time = quadd_get_time(); @@ -199,8 +200,10 @@ static void make_sample(void) extra_cpus[0], extra_cpus[1], extra_cpus[2], extra_cpus[3], power_rate->gpu, power_rate->emc); */ - comm->put_sample(&record, (char *)extra_cpus, - power_rate->nr_cpus * sizeof(extra_cpus[0])); + vec.base = extra_cpus; + vec.len = power_rate->nr_cpus * sizeof(extra_cpus[0]); + + comm->put_sample(&record, &vec, 1); } static inline int is_data_changed(struct power_clk_source *s) @@ -308,18 +311,6 @@ static void power_clk_timer(unsigned long data) add_timer(timer); } -int quadd_power_clk_is_enabled(int *period) -{ - struct quadd_parameters *param = &power_ctx.quadd_ctx->param; - - *period = power_ctx.period; - - if (param->power_rate_freq == 0) - return 0; - - return 1; -} - int quadd_power_clk_start(void) { struct power_clk_source *s; diff --git a/drivers/misc/tegra-profiler/power_clk.h b/drivers/misc/tegra-profiler/power_clk.h index 6854687392f3..eae519b8a4d9 100644 --- a/drivers/misc/tegra-profiler/power_clk.h +++ b/drivers/misc/tegra-profiler/power_clk.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/power_clk.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -25,6 +25,4 @@ int quadd_power_clk_init(struct quadd_ctx *quadd_ctx); int quadd_power_clk_start(void); void quadd_power_clk_stop(void); -int quadd_power_clk_is_enabled(int *period); - #endif /* __QUADD_POWER_CLK_H */ diff --git a/drivers/misc/tegra-profiler/quadd.h b/drivers/misc/tegra-profiler/quadd.h index 41367c3bec96..3815f616eaec 100644 --- a/drivers/misc/tegra-profiler/quadd.h +++ b/drivers/misc/tegra-profiler/quadd.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/quadd.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -32,9 +32,10 @@ struct quadd_event_source_interface { void (*disable)(void); void (*start)(void); void (*stop)(void); - int (*read)(struct event_data *events); + int (*read)(struct event_data *events, int max_events); int (*set_events)(int *events, int size); - int (*get_supported_events)(int *events); + int (*get_supported_events)(int *events, int max_events); + int (*get_current_events)(int *events, int max_events); }; struct source_info { diff --git a/drivers/misc/tegra-profiler/quadd_proc.c b/drivers/misc/tegra-profiler/quadd_proc.c index 980a810f4e40..36201a77cb5b 100644 --- a/drivers/misc/tegra-profiler/quadd_proc.c +++ b/drivers/misc/tegra-profiler/quadd_proc.c @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/quadd_proc.c * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -58,48 +58,52 @@ static int show_capabilities(struct seq_file *f, void *offset) struct quadd_events_cap *event = &cap->events_cap; unsigned int extra = cap->reserved[QUADD_COMM_CAP_IDX_EXTRA]; - seq_printf(f, "pmu: %s\n", + seq_printf(f, "pmu: %s\n", YES_NO(cap->pmu)); - seq_printf(f, "tegra 3 LP cluster: %s\n", + seq_printf(f, "tegra 3 LP cluster: %s\n", YES_NO(cap->tegra_lp_cluster)); - seq_printf(f, "power rate samples: %s\n", + seq_printf(f, "power rate samples: %s\n", YES_NO(cap->power_rate)); - seq_printf(f, "l2 cache: %s\n", + seq_printf(f, "l2 cache: %s\n", YES_NO(cap->l2_cache)); if (cap->l2_cache) { - seq_printf(f, "multiple l2 events: %s\n", + seq_printf(f, "multiple l2 events: %s\n", YES_NO(cap->l2_multiple_events)); } - seq_printf(f, "support polling mode: %s\n", + seq_printf(f, "support polling mode: %s\n", YES_NO(cap->blocked_read)); - seq_printf(f, "backtrace from the kernel ctx: %s\n", + seq_printf(f, "backtrace from the kernel ctx: %s\n", YES_NO(extra & QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX)); + seq_printf(f, "send mmap regions at the start: %s\n", + YES_NO(extra & QUADD_COMM_CAP_EXTRA_GET_MMAP)); + seq_printf(f, "group samples: %s\n", + YES_NO(extra & QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES)); seq_puts(f, "\n"); seq_puts(f, "Supported events:\n"); - seq_printf(f, "cpu_cycles: %s\n", + seq_printf(f, "cpu_cycles: %s\n", YES_NO(event->cpu_cycles)); - seq_printf(f, "instructions: %s\n", + seq_printf(f, "instructions: %s\n", YES_NO(event->instructions)); - seq_printf(f, "branch_instructions: %s\n", + seq_printf(f, "branch_instructions: %s\n", YES_NO(event->branch_instructions)); - seq_printf(f, "branch_misses: %s\n", + seq_printf(f, "branch_misses: %s\n", YES_NO(event->branch_misses)); - seq_printf(f, "bus_cycles: %s\n", + seq_printf(f, "bus_cycles: %s\n", YES_NO(event->bus_cycles)); - seq_printf(f, "l1_dcache_read_misses: %s\n", + seq_printf(f, "l1_dcache_read_misses: %s\n", YES_NO(event->l1_dcache_read_misses)); - seq_printf(f, "l1_dcache_write_misses: %s\n", + seq_printf(f, "l1_dcache_write_misses: %s\n", YES_NO(event->l1_dcache_write_misses)); - seq_printf(f, "l1_icache_misses: %s\n", + seq_printf(f, "l1_icache_misses: %s\n", YES_NO(event->l1_icache_misses)); - seq_printf(f, "l2_dcache_read_misses: %s\n", + seq_printf(f, "l2_dcache_read_misses: %s\n", YES_NO(event->l2_dcache_read_misses)); - seq_printf(f, "l2_dcache_write_misses: %s\n", + seq_printf(f, "l2_dcache_write_misses: %s\n", YES_NO(event->l2_dcache_write_misses)); - seq_printf(f, "l2_icache_misses: %s\n", + seq_printf(f, "l2_icache_misses: %s\n", YES_NO(event->l2_icache_misses)); return 0; diff --git a/drivers/misc/tegra-profiler/tegra.h b/drivers/misc/tegra-profiler/tegra.h index 65cd6cd0e142..fd7fc31adcad 100644 --- a/drivers/misc/tegra-profiler/tegra.h +++ b/drivers/misc/tegra-profiler/tegra.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/tegra.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -25,19 +25,22 @@ #include <../../mach-tegra/pm.h> #endif -static inline int quadd_get_processor_id(struct pt_regs *regs) +static inline unsigned int +quadd_get_processor_id(struct pt_regs *regs, unsigned int *flags) { - int cpu_id = smp_processor_id(); + if (flags) { + *flags = 0; #ifdef CONFIG_TEGRA_CLUSTER_CONTROL - if (is_lp_cluster()) - cpu_id |= QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP; + if (is_lp_cluster()) + *flags |= QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP; #endif - if (thumb_mode(regs)) - cpu_id |= QUADD_CPUMODE_THUMB; + if (regs && thumb_mode(regs)) + *flags |= QUADD_CPUMODE_THUMB; + } - return cpu_id; + return smp_processor_id(); } static inline int quadd_is_cpu_with_lp_cluster(void) diff --git a/drivers/misc/tegra-profiler/version.h b/drivers/misc/tegra-profiler/version.h index 3dddadb66156..b6b9a9c9a57f 100644 --- a/drivers/misc/tegra-profiler/version.h +++ b/drivers/misc/tegra-profiler/version.h @@ -1,7 +1,7 @@ /* * drivers/misc/tegra-profiler/hrt.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -18,7 +18,7 @@ #ifndef __QUADD_VERSION_H #define __QUADD_VERSION_H -#define QUADD_MODULE_VERSION "1.40" +#define QUADD_MODULE_VERSION "1.41" #define QUADD_MODULE_BRANCH "Dev" #endif /* __QUADD_VERSION_H */ diff --git a/include/linux/tegra_profiler.h b/include/linux/tegra_profiler.h index b122b97dd521..e3f90a2688c4 100644 --- a/include/linux/tegra_profiler.h +++ b/include/linux/tegra_profiler.h @@ -1,7 +1,7 @@ /* * include/linux/tegra_profiler.h * - * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -19,7 +19,7 @@ #include <linux/ioctl.h> -#define QUADD_SAMPLES_VERSION 17 +#define QUADD_SAMPLES_VERSION 18 #define QUADD_IO_VERSION 9 #define QUADD_IO_VERSION_DYNAMIC_RB 5 @@ -29,6 +29,7 @@ #define QUADD_IO_VERSION_GET_MMAP 9 #define QUADD_SAMPLE_VERSION_THUMB_MODE_FLAG 17 +#define QUADD_SAMPLE_VERSION_GROUP_SAMPLES 18 #define QUADD_MAX_COUNTERS 32 #define QUADD_MAX_PROCESS 64 @@ -93,8 +94,6 @@ enum quadd_events_id { QUADD_EVENT_TYPE_MAX, }; -#pragma pack(push, 4) - struct event_data { int event_source; int event_id; @@ -125,49 +124,56 @@ enum quadd_cpu_mode { QUADD_CPU_MODE_NONE, }; -struct quadd_sample_data { - u32 event_id; +typedef u32 quadd_bt_addr_t; + +#pragma pack(push, 1) - u32 ip; +struct quadd_sample_data { + u64 ip; u32 pid; u64 time; - u32 cpu; - u64 period; - u32 callchain_nr; + u16 cpu:6, + user_mode:1, + lp_mode:1, + thumb_mode:1, + reserved:7; + + u8 callchain_nr; + u32 events_flags; }; struct quadd_mmap_data { u32 pid; - u32 addr; + u64 addr; u64 len; - u64 pgoff; + u32 pgoff; - u32 filename_length; + u16 filename_length; }; struct quadd_ma_data { u32 pid; u64 time; - u64 vm_size; - u64 rss_size; + u32 vm_size; + u32 rss_size; }; struct quadd_power_rate_data { u64 time; - u32 nr_cpus; + u8 nr_cpus; u32 gpu; u32 emc; }; struct quadd_additional_sample { - u32 type; + u8 type; - u32 values[8]; - u32 extra_length; + u32 values[6]; + u16 extra_length; }; enum { @@ -186,42 +192,48 @@ enum { }; struct quadd_debug_data { - u32 type; + u8 type; - u32 cpu; u32 pid; u64 time; - u64 timer_period; + u16 cpu:6, + user_mode:1, + lp_mode:1, + thumb_mode:1, + reserved:7; - u32 extra_value1; - u32 extra_value2; - u32 extra_value3; + u32 extra_value[2]; + u16 extra_length; }; +#define QUADD_HEADER_MAGIC 0x1122 struct quadd_header_data { - u32 version; + u16 magic; + u16 version; u32 backtrace:1, use_freq:1, system_wide:1, power_rate:1, - debug_samples:1; + debug_samples:1, + get_mmap:1, + reserved:26; /* reserved fields for future extensions */ - u64 period; - u32 ma_period; - u32 power_rate_period; + u32 freq; + u16 ma_freq; + u16 power_rate_freq; - u32 reserved[4]; /* reserved fields for future extensions */ + u8 nr_events; + u16 extra_length; }; -#define QUADD_RECORD_MAGIC 0x33557799 +#define QUADD_RECORD_MAGIC 0x335577aa struct quadd_record_data { - u32 magic; /* for debug */ - u32 record_type; - u32 cpu_mode; + u32 magic; /* temporary, it will be removed later */ + u8 record_type; union { struct quadd_sample_data sample; @@ -232,7 +244,9 @@ struct quadd_record_data { struct quadd_power_rate_data power_rate; struct quadd_additional_sample additional_sample; }; -}; +} __aligned(4); + +#pragma pack(4) #define QUADD_MAX_PACKAGE_NAME 320 @@ -286,6 +300,7 @@ enum { #define QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX (1 << 0) #define QUADD_COMM_CAP_EXTRA_GET_MMAP (1 << 1) +#define QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES (1 << 2) struct quadd_comm_cap { u32 pmu:1, |