From 944ac4259e39801c843a915c3da8194ac9af0440 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 19:26:08 -0400 Subject: ftrace: ftrace dump on oops control Impact: add (default-off) dump-trace-on-oops flag Currently, ftrace is set up to dump its contents to the console if the kernel panics or oops. This can be annoying if you have trace data in the buffers and you experience an oops, but the trace data is old or static. Usually when you want ftrace to dump its contents is when you are debugging your system and you have set up ftrace to trace the events leading to an oops. This patch adds a control variable called "ftrace_dump_on_oops" that will enable the ftrace dump to console on oops. This variable is default off but a developer can enable it either through the kernel command line by adding "ftrace_dump_on_oops" or at run time by setting (or disabling) /proc/sys/kernel/ftrace_dump_on_oops. v2: Replaced /** with /* as Randy explained that kernel-doc does not yet handle variables. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d345d649d073..47f46cbdd860 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -63,6 +63,28 @@ static cpumask_t __read_mostly tracing_buffer_mask; static int tracing_disabled = 1; +/* + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops + * + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops + * is set, then ftrace_dump is called. This will output the contents + * of the ftrace buffers to the console. This is very useful for + * capturing traces that lead to crashes and outputing it to a + * serial console. + * + * It is default off, but you can enable it with either specifying + * "ftrace_dump_on_oops" in the kernel command line, or setting + * /proc/sys/kernel/ftrace_dump_on_oops to true. + */ +int ftrace_dump_on_oops; + +static int __init set_ftrace_dump_on_oops(char *str) +{ + ftrace_dump_on_oops = 1; + return 1; +} +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); + long ns2usecs(cycle_t nsec) { @@ -3021,7 +3043,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); return NOTIFY_OK; } @@ -3037,7 +3060,8 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); break; default: break; @@ -3078,7 +3102,6 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } - void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); -- cgit v1.2.3 From b807c3d0f8e39ed7cbbbe6da162650e305e8de15 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 30 Oct 2008 16:08:33 -0400 Subject: ftrace: nmi update statistics Impact: add more debug info to /debugfs/tracing/dyn_ftrace_total_info This patch adds dynamic ftrace NMI update statistics to the /debugfs/tracing/dyn_ftrace_total_info stat file. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a610ca771558..bc36febc0771 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2815,22 +2815,39 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE +#define DYN_INFO_BUF_SIZE 1023 +static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1]; +static DEFINE_MUTEX(dyn_info_mutex); + +int __weak ftrace_arch_read_dyn_info(char *buf, int size) +{ + return 0; +} + static ssize_t -tracing_read_long(struct file *filp, char __user *ubuf, +tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { unsigned long *p = filp->private_data; - char buf[64]; + char *buf = ftrace_dyn_info_buffer; int r; - r = sprintf(buf, "%ld\n", *p); + mutex_lock(&dyn_info_mutex); + r = sprintf(buf, "%ld ", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r); + buf[r++] = '\n'; + + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + + mutex_unlock(&dyn_info_mutex); + + return r; } -static struct file_operations tracing_read_long_fops = { +static struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, - .read = tracing_read_long, + .read = tracing_read_dyn_info, }; #endif @@ -2939,7 +2956,7 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, - &tracing_read_long_fops); + &tracing_dyn_info_fops); if (!entry) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); -- cgit v1.2.3 From a26a2a27396c0a0877aa701f8f92d08ba550a6c9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 00:03:22 -0400 Subject: ftrace: nmi safe code clean ups Impact: cleanup This patch cleans up the NMI safe code for dynamic ftrace as suggested by Andrew Morton. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bc36febc0771..7f86067d760c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2815,10 +2815,6 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE -#define DYN_INFO_BUF_SIZE 1023 -static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1]; -static DEFINE_MUTEX(dyn_info_mutex); - int __weak ftrace_arch_read_dyn_info(char *buf, int size) { return 0; @@ -2828,14 +2824,17 @@ static ssize_t tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + static char ftrace_dyn_info_buffer[1024]; + static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; char *buf = ftrace_dyn_info_buffer; + int size = ARRAY_SIZE(ftrace_dyn_info_buffer); int r; mutex_lock(&dyn_info_mutex); r = sprintf(buf, "%ld ", *p); - r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r); + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); buf[r++] = '\n'; r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -- cgit v1.2.3 From d9e540762f5cdd89f24e518ad1fd31142d0b9726 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Nov 2008 19:57:37 +0100 Subject: ftrace: ftrace_dump_on_oops=[tracer] Impact: add new (optional) debug boot option In order to facilitate early boot trouble, allow one to specify a tracer on the kernel boot line. Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 58 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bdb1df00fb10..482583eb8001 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -79,6 +79,15 @@ static int tracing_disabled = 1; */ int ftrace_dump_on_oops; +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + static int __init set_ftrace_dump_on_oops(char *str) { ftrace_dump_on_oops = 1; @@ -2394,29 +2403,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_set_tracer(char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + int ret = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2440,6 +2431,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); + return ret; +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + ret = tracing_set_tracer(buf); + if (!ret) + ret = cnt; + if (ret > 0) filp->f_pos += ret; -- cgit v1.2.3 From 182e9f5f704ed6b9175142fe8da33c9ce0c52b52 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 3 Nov 2008 23:15:56 -0500 Subject: ftrace: insert in the ftrace_preempt_disable()/enable() functions Impact: use new, consolidated APIs in ftrace plugins This patch replaces the schedule safe preempt disable code with the ftrace_preempt_disable() and ftrace_preempt_enable() safe functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e4c40c868d67..3e7bf5eb9007 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -904,8 +904,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) return; pc = preempt_count(); - resched = need_resched(); - preempt_disable_notrace(); + resched = ftrace_preempt_disable(); local_save_flags(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -915,10 +914,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) trace_function(tr, data, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + ftrace_preempt_enable(resched); } static struct ftrace_ops trace_ops __read_mostly = -- cgit v1.2.3 From b2a866f9344cb30d7ddf5d67b5b8393daf8bef07 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 3 Nov 2008 23:15:57 -0500 Subject: ftrace: function tracer with irqs disabled Impact: disable interrupts during trace entry creation (as opposed to preempt) To help with performance, I set the ftracer to not disable interrupts, and only to disable preemption. If an interrupt occurred, it would not be traced, because the function tracer protects itself from recursion. This may be faster, but the trace output might miss some traces. This patch makes the fuction trace disable interrupts, but it also adds a runtime feature to disable preemption instead. It does this by having two different tracer functions. When the function tracer is enabled, it will check to see which version is requested (irqs disabled or preemption disabled). Then it will use the corresponding function as the tracer. Irq disabling is the default behaviour, but if the user wants better performance, with the chance of missing traces, then they can choose the preempt disabled version. Running hackbench 3 times with the irqs disabled and 3 times with the preempt disabled function tracer yielded: tracing type times entries recorded ------------ -------- ---------------- irq disabled 43.393 166433066 43.282 166172618 43.298 166256704 preempt disabled 38.969 159871710 38.943 159972935 39.325 161056510 Average: irqs disabled: 43.324 166287462 preempt disabled: 39.079 160300385 preempt is 10.8 percent faster than irqs disabled. I wrote a patch to count function trace recursion and reran hackbench. With irq disabled: 1,150 times the function tracer did not trace due to recursion. with preempt disabled: 5,117,718 times. The thousand times with irq disabled could be due to NMIs, or simply a case where it called a function that was not protected by notrace. But we also see that a large amount of the trace is lost with the preempt version. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3e7bf5eb9007..d576dbd6defe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -244,6 +244,7 @@ static const char *trace_options[] = { "stacktrace", "sched-tree", "ftrace_printk", + "ftrace_preempt", NULL }; @@ -891,7 +892,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) #ifdef CONFIG_FUNCTION_TRACER static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -917,6 +918,37 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) ftrace_preempt_enable(resched); } +static void +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + if (unlikely(!ftrace_function_enabled)) + return; + + /* + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + trace_function(tr, data, ip, parent_ip, flags, pc); + } + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, @@ -925,6 +957,12 @@ static struct ftrace_ops trace_ops __read_mostly = void tracing_start_function_trace(void) { ftrace_function_enabled = 0; + + if (trace_flags & TRACE_ITER_PREEMPTONLY) + trace_ops.func = function_trace_call_preempt_only; + else + trace_ops.func = function_trace_call; + register_ftrace_function(&trace_ops); if (tracer_enabled) ftrace_function_enabled = 1; -- cgit v1.2.3 From d7ad44b697c9d13e445ddc7d16f736fbac333249 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 13:20:08 +0100 Subject: tracing/fastboot: use sched switch tracer from boot tracer Impact: enhance boot trace output with scheduling events Use the sched_switch tracer from the boot tracer. We also can trace schedule events inside the initcalls. Sched tracing is disabled after the initcall has finished and then reenabled before the next one is started. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e4c40c868d67..50d7018163f6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3251,6 +3251,8 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); #ifdef CONFIG_BOOT_TRACER + /* We don't want to launch sched_switch tracer yet */ + global_trace.ctrl = 0; register_tracer(&boot_tracer); current_trace = &boot_tracer; current_trace->init(&global_trace); -- cgit v1.2.3 From 0f04870148ecb825133bc2733f473b1c5773ac0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: soft tracing stop and start Impact: add way to quickly start stop tracing from the kernel This patch adds a soft stop and start to the trace. This simply disables function tracing via the ftrace_disabled flag, and disables the trace buffers to prevent recording. The tracing code may still be executed, but the trace will not be recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 29ab40a764c8..113aea9447ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,6 +43,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; + +/* + * Kill all tracing for good (never come back). + * It is initialized to 1 but will turn to zero if the initialization + * of the tracer is successful. But that is the only place that sets + * this back to zero. + */ +int tracing_disabled = 1; + static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) @@ -62,8 +71,6 @@ static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -static int tracing_disabled = 1; - /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops * @@ -613,6 +620,76 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +static int trace_stop_count; +static DEFINE_SPINLOCK(tracing_start_lock); + +/** + * tracing_start - quick start of the tracer + * + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. + */ +void tracing_start(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + if (tracing_disabled) + return; + + spin_lock_irqsave(&tracing_start_lock, flags); + if (--trace_stop_count) + goto out; + + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ + WARN_ON_ONCE(1); + trace_stop_count = 0; + goto out; + } + + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + ftrace_start(); + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + ftrace_stop(); + spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) -- cgit v1.2.3 From 9036990d462e09366f7297a2d1da6582c3e6b1d3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: restructure tracing start/stop infrastructure Impact: change where tracing is started up and stopped Currently, when a new tracer is selected via echo'ing a tracer name into the current_tracer file, the startup is only done if tracing_enabled is set to one. If tracing_enabled is changed to zero (by echo'ing 0 into the tracing_enabled file) a full shutdown is performed. The full startup and shutdown of a tracer can be expensive and the user can lose out traces when echo'ing in 0 to the tracing_enabled file, because the process takes too long. There can also be places that the user would like to start and stop the tracer several times and doing the full startup and shutdown of a tracer might be too expensive. This patch performs the full startup and shutdown when a tracer is selected. It also adds a way to do a quick start or stop of a tracer. The quick version is just a flag that prevents the tracing from taking place, but the overhead of the code is still there. For example, the startup of a tracer may enable tracepoints, or enable the function tracer. The stop and start will just set a flag to have the tracer ignore the calls when the tracepoint or function trace is called. The overhead of the tracer may still be present when the tracer is stopped, but no tracing will occur. Setting the tracer to the 'nop' tracer (or any other tracer) will perform the shutdown of the tracer which will disable the tracepoint or disable the function tracer. The tracing_enabled file will simply start or stop tracing. This change is all internal. The end result for the user should be the same as before. If tracing_enabled is not set, no trace will happen. If tracing_enabled is set, then the trace will happen. The tracing_enabled variable is static between tracers. Enabling tracing_enabled and going to another tracer will keep tracing_enabled enabled. Same is true with disabling tracing_enabled. This patch will now provide a fast start/stop method to the users for enabling or disabling tracing. Note: There were two methods to the struct tracer that were never used: The methods start and stop. These were to be used as a hook to the reading of the trace output, but ended up not being necessary. These two methods are now used to enable the start and stop of each tracer, in case the tracer needs to do more than just not write into the buffer. For example, the irqsoff tracer must stop recording max latencies when tracing is stopped. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 64 +++++++++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 113aea9447ec..ff1e9ed9b587 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -150,6 +150,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data); /* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; +/** + * tracing_is_enabled - return tracer_enabled status + * + * This function is used by other tracers to know the status + * of the tracer_enabled flag. Tracers may use this function + * to know if it should enable their features when starting + * up. See irqsoff tracer for an example (start_irqsoff_tracer). + */ +int tracing_is_enabled(void) +{ + return tracer_enabled; +} + /* function tracing enabled */ int ftrace_function_enabled; @@ -1041,8 +1054,7 @@ void tracing_start_function_trace(void) trace_ops.func = function_trace_call; register_ftrace_function(&trace_ops); - if (tracer_enabled) - ftrace_function_enabled = 1; + ftrace_function_enabled = 1; } void tracing_stop_function_trace(void) @@ -1189,10 +1201,6 @@ static void *s_start(struct seq_file *m, loff_t *pos) atomic_inc(&trace_record_cmdline_disabled); - /* let the tracer grab locks here if needed */ - if (current_trace->start) - current_trace->start(iter); - if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; @@ -1219,14 +1227,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { - struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); - - /* let the tracer release locks here if needed */ - if (current_trace && current_trace == iter->trace && iter->trace->stop) - iter->trace->stop(iter); - mutex_unlock(&trace_types_lock); } @@ -2056,10 +2057,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) m->private = iter; /* stop the trace while dumping */ - if (iter->tr->ctrl) { - tracer_enabled = 0; - ftrace_function_enabled = 0; - } + tracing_stop(); if (iter->trace && iter->trace->open) iter->trace->open(iter); @@ -2104,14 +2102,7 @@ int tracing_release(struct inode *inode, struct file *file) iter->trace->close(iter); /* reenable tracing if it was previously enabled */ - if (iter->tr->ctrl) { - tracer_enabled = 1; - /* - * It is safe to enable function tracing even if it - * isn't used - */ - ftrace_function_enabled = 1; - } + tracing_start(); mutex_unlock(&trace_types_lock); seq_release(inode, file); @@ -2449,11 +2440,10 @@ static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_array *tr = filp->private_data; char buf[64]; int r; - r = sprintf(buf, "%ld\n", tr->ctrl); + r = sprintf(buf, "%u\n", tracer_enabled); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2481,16 +2471,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, val = !!val; mutex_lock(&trace_types_lock); - if (tr->ctrl ^ val) { - if (val) + if (tracer_enabled ^ val) { + if (val) { tracer_enabled = 1; - else + if (current_trace->start) + current_trace->start(tr); + tracing_start(); + } else { tracer_enabled = 0; - - tr->ctrl = val; - - if (current_trace && current_trace->ctrl_update) - current_trace->ctrl_update(tr); + tracing_stop(); + if (current_trace->stop) + current_trace->stop(tr); + } } mutex_unlock(&trace_types_lock); @@ -3372,7 +3364,7 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ - global_trace.ctrl = tracer_enabled; + global_trace.ctrl = 1; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, -- cgit v1.2.3 From 49833fc232bd6a5076496994d855f601354501d7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: enable trace_printk by default Impact: have the ftrace_printk enabled on startup It is confusing to have to "echo trace_printk > /debug/tracing/iter_ctrl" after adding ftrace_printk in the kernel. Currently the trace_printk is set to off by default. ftrace_printk should only be in open kernel code when used for debugging, and thus it should be enabled by default. It may also be used to record data within a tracer, but those ftrace_printks should be within wrappers that are either enabled by trace_points or have a variable protecting the code path from being entered when the tracer is disabled. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d55ccfc8d674..dbbdacfaaf9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -205,7 +205,7 @@ static DEFINE_MUTEX(trace_types_lock); static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds iter_ctrl options */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; /** * trace_wake_up - wake up tasks waiting for trace input -- cgit v1.2.3 From bbf5b1a0cecb56de6236db8b01c5bfb7ab8ba8b2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: remove ctrl_update method Impact: Remove the ctrl_update tracer method With the new quick start/stop method of tracing, the ctrl_update method is out of date. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dbbdacfaaf9e..9e83188172a1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3357,8 +3357,6 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); #ifdef CONFIG_BOOT_TRACER - /* We don't want to launch sched_switch tracer yet */ - global_trace.ctrl = 0; register_tracer(&boot_tracer); current_trace = &boot_tracer; current_trace->init(&global_trace); -- cgit v1.2.3 From c76f06945be50564f925799ddfb6235ee4c26aa0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: remove trace array ctrl Impact: remove obsolete variable in trace_array structure With the new start / stop method of ftrace, the ctrl variable in the trace_array structure is now obsolete. Remove it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9e83188172a1..58435415b366 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -537,7 +537,6 @@ int register_tracer(struct tracer *type) if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; - int saved_ctrl = tr->ctrl; int i; /* * Run a selftest on this tracer. @@ -550,13 +549,11 @@ int register_tracer(struct tracer *type) tracing_reset(tr, i); } current_trace = type; - tr->ctrl = 0; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); /* the test is responsible for resetting too */ current_trace = saved_tracer; - tr->ctrl = saved_ctrl; if (ret) { printk(KERN_CONT "FAILED!\n"); goto out; @@ -966,7 +963,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) int cpu; int pc; - if (tracing_disabled || !tr->ctrl) + if (tracing_disabled) return; pc = preempt_count(); @@ -2820,7 +2817,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; int ret; - struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2840,12 +2836,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); - if (tr->ctrl) { - cnt = -EBUSY; - pr_info("ftrace: please disable tracing" - " before modifying buffer size\n"); - goto out; - } + tracing_stop(); if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); @@ -2878,6 +2869,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) cnt = -ENOMEM; out: + tracing_start(); max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); @@ -2900,9 +2892,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, { char *buf; char *end; - struct trace_array *tr = &global_trace; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -3131,7 +3122,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (!tr->ctrl || tracing_disabled) + if (tracing_disabled) return 0; pc = preempt_count(); @@ -3365,7 +3356,6 @@ __init static int tracer_alloc_buffers(void) #endif /* All seems OK, enable tracing */ - global_trace.ctrl = 1; tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, -- cgit v1.2.3 From a309720c876d7ad2e224bfd1982c92ae4364c82e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: display start of CPU buffer in trace output Impact: change in trace output Because the trace buffers are per cpu ring buffers, the start of the trace can be confusing. If one CPU is very active at the end of the trace, its history will not go as far back as the other CPU traces. This means that output for a particular CPU may not appear for the first part of a trace. To help annotate what is happening, and to prevent any more confusion, this patch adds a line that annotates the start of a CPU buffer output. For example: automount-3495 [001] 184.596443: dnotify_parent <-vfs_write [...] automount-3495 [001] 184.596449: dput <-path_put automount-3496 [002] 184.596450: down_read_trylock <-do_page_fault [...] sshd-3497 [001] 184.597069: up_read <-do_page_fault -0 [000] 184.597074: __exit_idle <-exit_idle [...] automount-3496 [002] 184.597257: filemap_fault <-__do_fault -0 [003] 184.597261: exit_idle <-smp_apic_timer_interrupt Note, parsers of a trace output should always ignore any lines that start with a '#'. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 58435415b366..f147f198b9a6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1478,6 +1478,17 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter) trace_seq_putc(s, '\n'); } +static void test_cpu_buff_start(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + + if (cpu_isset(iter->cpu, iter->started)) + return; + + cpu_set(iter->cpu, iter->started); + trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); +} + static enum print_line_t print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -1497,6 +1508,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + next_entry = find_next_entry(iter, NULL, &next_ts); if (!next_entry) next_ts = iter->ts; @@ -1612,6 +1625,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; + test_cpu_buff_start(iter); + comm = trace_find_cmdline(iter->ent->pid); t = ns2usecs(iter->ts); @@ -2631,6 +2646,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; mutex_lock(&trace_types_lock); + + /* trace pipe does not show start of buffer */ + cpus_setall(iter->started); + iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; -- cgit v1.2.3 From 5aa1ba6a6c710e747838a22d798ac97a8b248745 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Nov 2008 23:07:30 -0500 Subject: ftrace: prevent ftrace_special from recursion Impact: stop ftrace_special from recursion The ftrace_special is used to help debug areas of the kernel. Because of this, if it is put in certain locations, the fact that it allows recursion can become a problem if the kernel developer using does not realize that. This patch changes ftrace_special to not allow recursion into itself to make it more robust. It also changes from preempt disable interrupts disable to prevent any loss of trace entries. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c22fe2d43a7..216bbe7547a4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -960,6 +960,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; + unsigned long flags; int cpu; int pc; @@ -967,14 +968,15 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) return; pc = preempt_count(); - preempt_disable_notrace(); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (likely(!atomic_read(&data->disabled))) + if (likely(atomic_inc_return(&data->disabled) == 1)) ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); - preempt_enable_notrace(); + atomic_dec(&data->disabled); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_TRACER -- cgit v1.2.3 From 15e6cb3673ea6277999642802406a764b49391b0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 07:14:25 +0100 Subject: tracing: add a tracer to catch execution time of kernel functions Impact: add new tracing plugin which can trace full (entry+exit) function calls This tracer uses the low level function return ftrace plugin to measure the execution time of the kernel functions. The first field is the caller of the function, the second is the measured function, and the last one is the execution time in nanoseconds. - v3: - HAVE_FUNCTION_RET_TRACER have been added. Each arch that support ftrace return should enable it. - ftrace_return_stub becomes ftrace_stub. - CONFIG_FUNCTION_RET_TRACER depends now on CONFIG_FUNCTION_TRACER - Return traces printing can be used for other tracers on trace.c - Adapt to the new tracing API (no more ctrl_update callback) - Correct the check of "disabled" during insertion. - Minor changes... Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 65 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 216bbe7547a4..a3f7ae9cd8e1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -244,13 +244,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -/* - * TRACE_ITER_SYM_MASK masks the options in trace_flags that - * control the output of kernel symbols. - */ -#define TRACE_ITER_SYM_MASK \ - (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) - /* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", @@ -810,6 +803,35 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } +#ifdef CONFIG_FUNCTION_RET_TRACER +static void __trace_function_return(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_retfunc *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_ret_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_FN_RET; + entry->ip = trace->func; + entry->parent_ip = trace->ret; + entry->rettime = trace->rettime; + entry->calltime = trace->calltime; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} +#endif + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -1038,6 +1060,29 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) raw_local_irq_restore(flags); } +#ifdef CONFIG_FUNCTION_RET_TRACER +void trace_function_return(struct ftrace_retfunc *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_function_return(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} +#endif /* CONFIG_FUNCTION_RET_TRACER */ + static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, @@ -1285,7 +1330,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static int +int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -1738,6 +1783,10 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } + case TRACE_FN_RET: { + return print_return_function(iter); + break; + } } return TRACE_TYPE_HANDLED; } -- cgit v1.2.3 From 52f232cb720a7babb752849cbc2cab2d24021209 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 00:14:40 -0500 Subject: tracing: likely/unlikely branch annotation tracer Impact: new likely/unlikely branch tracer This patch adds a way to record the instances of the likely() and unlikely() branch condition annotations. When "unlikely" is set in /debugfs/tracing/iter_ctrl the unlikely conditions will be added to any of the ftrace tracers. The change takes effect when a new tracer is passed into the current_tracer file. For example: bash-3471 [003] 357.014755: [INCORRECT] sched_info_dequeued:sched_stats.h:177 bash-3471 [003] 357.014756: [correct] update_curr:sched_fair.c:489 bash-3471 [003] 357.014758: [correct] calc_delta_fair:sched_fair.c:411 bash-3471 [003] 357.014759: [correct] account_group_exec_runtime:sched_stats.h:356 bash-3471 [003] 357.014761: [correct] update_curr:sched_fair.c:489 bash-3471 [003] 357.014763: [INCORRECT] calc_delta_fair:sched_fair.c:411 bash-3471 [003] 357.014765: [correct] calc_delta_mine:sched.c:1279 Which shows the normal tracer heading, as well as whether the condition was correct "[correct]" or was mistaken "[INCORRECT]", followed by the function, file name and line number. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3f7ae9cd8e1..83d38634bc90 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -258,6 +258,9 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", +#ifdef CONFIG_UNLIKELY_TRACER + "unlikely", +#endif NULL }; @@ -1648,6 +1651,18 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->file, + field->line); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1787,6 +1802,18 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) return print_return_function(iter); break; } + case TRACE_UNLIKELY: { + struct trace_unlikely *field; + + trace_assign_type(field, entry); + + trace_seq_printf(s, "[%s] %s:%s:%d\n", + field->correct ? "correct" : "INCORRECT", + field->func, + field->file, + field->line); + break; + } } return TRACE_TYPE_HANDLED; } @@ -2592,6 +2619,7 @@ static int tracing_set_tracer(char *buf) if (t == current_trace) goto out; + trace_unlikely_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2599,6 +2627,7 @@ static int tracing_set_tracer(char *buf) if (t->init) t->init(tr); + trace_unlikely_enable(tr); out: mutex_unlock(&trace_types_lock); -- cgit v1.2.3 From f88c4ae9f8c3939bee4337c75c7a673b5de7a8a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 11:55:41 +0100 Subject: tracing: branch tracer, tweak output Impact: modify the tracer output, to make it a bit easier to read Change the output from: > bash-3471 [003] 357.014755: [INCORRECT] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [correct] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [correct] calc_delta_fair:sched_fair.c:411 to: > bash-3471 [003] 357.014755: [ MISS ] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [ .... ] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [ .... ] calc_delta_fair:sched_fair.c:411 it's good to have fields aligned vertically, and the only important information is a prediction miss, so display only that information. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83d38634bc90..728a46ec6b06 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1657,7 +1657,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? "correct" : "INCORRECT", + field->correct ? " .... " : " MISS ", field->func, field->file, field->line); @@ -1808,7 +1808,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? "correct" : "INCORRECT", + field->correct ? " .... " : " MISS ", field->func, field->file, field->line); -- cgit v1.2.3 From 68d119f0a66f7e3663304343b072e56a2693446b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 14:09:30 +0100 Subject: tracing: finetune branch-tracer output Steve suggested the to change the output from this: > bash-3471 [003] 357.014755: [ MISS ] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [ .... ] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [ .... ] calc_delta_fair:sched_fair.c:411 to this: > bash-3471 [003] 357.014755: [ MISS ] sched_info_dequeued:sched_stats.h:177 > bash-3471 [003] 357.014756: [ ok ] update_curr:sched_fair.c:489 > bash-3471 [003] 357.014758: [ ok ] calc_delta_fair:sched_fair.c:411 as it makes it clearer to the user what it means exactly. Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 728a46ec6b06..d842db14a59b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1657,7 +1657,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " .... " : " MISS ", + field->correct ? " ok " : " MISS ", field->func, field->file, field->line); @@ -1808,7 +1808,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); trace_seq_printf(s, "[%s] %s:%s:%d\n", - field->correct ? " .... " : " MISS ", + field->correct ? " ok " : " MISS ", field->func, field->file, field->line); -- cgit v1.2.3 From 2ed84eeb8808cf3c9f039213ca137ffd7d753f0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: trace: rename unlikely profiler to branch profiler Impact: name change of unlikely tracer and profiler Ingo Molnar suggested changing the config from UNLIKELY_PROFILE to BRANCH_PROFILING. I never did like the "unlikely" name so I went one step farther, and renamed all the unlikely configurations to a "BRANCH" variant. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d842db14a59b..bad59d32a4a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -258,7 +258,7 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER "unlikely", #endif NULL -- cgit v1.2.3 From 9f029e83e968e5661d7be045bbcb620dbb909938 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: ftrace: rename unlikely iter_ctrl to branch Impact: rename of iter_ctrl unlikely to branch The unlikely name is ugly. This patch converts the iter_ctrl command "unlikely" and "nounlikely" to "branch" and "nobranch" respectively. It also renames a lot of internal functions to use "branch" instead of "unlikely". Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bad59d32a4a9..4bf070bb5272 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -259,7 +259,7 @@ static const char *trace_options[] = { "ftrace_printk", "ftrace_preempt", #ifdef CONFIG_BRANCH_TRACER - "unlikely", + "branch", #endif NULL }; @@ -1651,8 +1651,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_print_cont(s, iter); break; } - case TRACE_UNLIKELY: { - struct trace_unlikely *field; + case TRACE_BRANCH: { + struct trace_branch *field; trace_assign_type(field, entry); @@ -1802,8 +1802,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) return print_return_function(iter); break; } - case TRACE_UNLIKELY: { - struct trace_unlikely *field; + case TRACE_BRANCH: { + struct trace_branch *field; trace_assign_type(field, entry); @@ -2619,7 +2619,7 @@ static int tracing_set_tracer(char *buf) if (t == current_trace) goto out; - trace_unlikely_disable(); + trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); @@ -2627,7 +2627,7 @@ static int tracing_set_tracer(char *buf) if (t->init) t->init(tr); - trace_unlikely_enable(tr); + trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); -- cgit v1.2.3 From a94c80e78bc9f4493ffc25a02d5d7bcd93c399d0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:36 -0500 Subject: ftrace: rename trace_entries to buffer_size_kb Impact: rename of debugfs file trace_entries to buffer_size_kb The original ftrace had fixed size entries, and the number of entries was shown and modified via the file called trace_entries. By converting to the unified trace buffer, we now allow for variable size entries which makes the meaning of trace_entries pointless. Since trace_size might be confused to the size of the trace, this patch names it "buffer_size_kb" (thanks to Arjan van de Ven for this idea). [ mingo@elte.hu: changed from buffer_size to buffer_size_kb ] ( Note, the units are still bytes - the next patch changes that, to keep the wide rename patch separate from the unit-change patch. ) Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4bf070bb5272..b42d42056fa4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3198,11 +3198,11 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); - entry = debugfs_create_file("trace_entries", 0644, d_tracer, + entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); if (!entry) pr_warning("Could not create debugfs " - "'trace_entries' entry\n"); + "'buffer_size_kb' entry\n"); entry = debugfs_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); -- cgit v1.2.3 From 1696b2b0f44a8d42f3e6b1ea90c21790871c04d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 13 Nov 2008 00:09:35 -0500 Subject: ftrace: show buffer size in kilobytes Impact: change the units of buffer_size_kb to kilobytes This patch changes the units of the buffer_size_kb file to kilobytes. Reading and writing to the file uses kilobytes as units. To help users to know what units are used, the output of the file now looks like: # cat /debug/tracing/buffer_size_kb 1408 Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b42d42056fa4..d664aae2e10a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2905,7 +2905,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = sprintf(buf, "%lu\n", tr->entries); + r = sprintf(buf, "%lu\n", tr->entries >> 10); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2945,6 +2945,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, atomic_inc(&max_tr.data[cpu]->disabled); } + /* value is in KB */ + val <<= 10; + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { -- cgit v1.2.3 From ee6bce52276c0717ed3e63296e5d9465d339e923 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:37 -0500 Subject: ftrace: rename iter_ctrl to trace_options Impact: rename file /debug/tracing/iter_ctrl to /debug/tracing/trace_options The original ftrace had a file called "iter_ctrl" that would control the way the output was iterated. But this file grew into a catch all for different trace options. This patch renames the file from iter_ctrl to trace_options to reflect this change. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d664aae2e10a..240423a9d1af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -204,7 +204,7 @@ static DEFINE_MUTEX(trace_types_lock); /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); -/* trace_flags holds iter_ctrl options */ +/* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; /** @@ -2411,7 +2411,7 @@ static struct file_operations tracing_cpumask_fops = { }; static ssize_t -tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, +tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { char *buf; @@ -2448,7 +2448,7 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, } static ssize_t -tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, +tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; @@ -2493,8 +2493,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, static struct file_operations tracing_iter_fops = { .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .read = tracing_trace_options_read, + .write = tracing_trace_options_write, }; static const char readme_msg[] = @@ -2508,9 +2508,9 @@ static const char readme_msg[] = "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/iter_ctrl\n" + "# cat /debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" "echo 0 > /debug/tracing/tracing_enabled\n" @@ -3148,10 +3148,10 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); - entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + entry = debugfs_create_file("trace_options", 0644, d_tracer, NULL, &tracing_iter_fops); if (!entry) - pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + pr_warning("Could not create debugfs 'trace_options' entry\n"); entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, NULL, &tracing_cpumask_fops); -- cgit v1.2.3 From 12ef7d448613ead2babd41c3ebfa1fe03c20edef Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 17:52:38 -0500 Subject: ftrace: CPU buffer start annotation clean ups Impact: better handling of CPU buffer start annotation Because of the confusion with the per CPU buffers wrapping where one CPU might be more active at the end of the trace than the other CPUs causing that one CPU to have a shorter history. Kernel developers were confused by the "missing" data of that one CPU at the beginning of the trace output. An annotation was added to the trace output to show that the buffer had started: # tracer: function # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | ##### CPU 3 buffer started #### -0 [003] 158.192959: smp_apic_timer_interrupt [...] -0 [003] 161.556520: default_idle ##### CPU 1 buffer started #### -0 [001] 161.592494: hrtimer_force_reprogram [etc] But this annotation gets a bit messy when tracers do not fill the buffers. This patch does a couple of things: One) it adds a flag to trace_options to disable these annotations Two) it does not annotate if the tracer did not overflow its buffer. This makes the output much cleaner. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 240423a9d1af..4a904623e05d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -205,7 +205,8 @@ static DEFINE_MUTEX(trace_types_lock); static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK; +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | + TRACE_ITER_ANNOTATE; /** * trace_wake_up - wake up tasks waiting for trace input @@ -261,6 +262,7 @@ static const char *trace_options[] = { #ifdef CONFIG_BRANCH_TRACER "branch", #endif + "annotate", NULL }; @@ -1113,6 +1115,7 @@ void tracing_stop_function_trace(void) enum trace_file_type { TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, }; static void trace_iterator_increment(struct trace_iterator *iter, int cpu) @@ -1532,6 +1535,12 @@ static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; + if (!(trace_flags & TRACE_ITER_ANNOTATE)) + return; + + if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) + return; + if (cpu_isset(iter->cpu, iter->started)) return; @@ -2132,6 +2141,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Annotate start of buffers if we had overruns */ + if (ring_buffer_overruns(iter->tr->buffer)) + iter->iter_flags |= TRACE_FILE_ANNOTATE; + + for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = -- cgit v1.2.3 From d51ad7ac48f991c4a8834485727efa99a691cb87 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 15 Nov 2008 15:48:29 -0500 Subject: ftrace: replace raw_local_irq_save with local_irq_save Impact: fix lockdep disabling itself when function tracing is enabled The raw_local_irq_saves used in ftrace is causing problems with lockdep. (it thinks the irq flags are out of sync and disables itself with a warning) The raw ops here are not needed, and the normal local_irq_save is fine. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a904623e05d..dff4bee591b9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1051,7 +1051,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) * Need to use raw, since this must be called before the * recursive protection is performed. */ - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1062,7 +1062,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_RET_TRACER -- cgit v1.2.3 From e6e7a65aabdb696cf05a56cfd495c49a11fd4cde Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 05:53:19 +0100 Subject: tracing/ftrace: fix unexpected -EINVAL when longest tracer name is set Impact: fix confusing write() -EINVAL when changing the tracer The following commit d9e540762f5cdd89f24e518ad1fd31142d0b9726 remade alive the bug which made the set of a new tracer returning -EINVAL if this is the longest name of tracer. This patch corrects it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dff4bee591b9..80898f4870cc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2655,6 +2655,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, char buf[max_tracer_type_len+1]; int i; size_t ret; + int err; + + ret = cnt; if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; @@ -2668,12 +2671,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - ret = tracing_set_tracer(buf); - if (!ret) - ret = cnt; + err = tracing_set_tracer(buf); + if (err) + return err; - if (ret > 0) - filp->f_pos += ret; + filp->f_pos += ret; return ret; } -- cgit v1.2.3 From 1c80025a49855b12fa09bb6db71820e3367b1369 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 05:57:26 +0100 Subject: tracing/ftrace: change the type of the init() callback Impact: extend the ->init() method with the ability to fail This bring a way to know if the initialization of a tracer successed. A tracer must return 0 on success and a traditional error (ie: -ENOMEM) if it fails. If a tracer fails to init, it is free to print a detailed warn. The tracing api will not and switch to a new tracer will just return the error from the init callback. Note: this will be used for the return tracer. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 80898f4870cc..396fda034e3f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2638,8 +2638,11 @@ static int tracing_set_tracer(char *buf) current_trace->reset(tr); current_trace = t; - if (t->init) - t->init(tr); + if (t->init) { + ret = t->init(tr); + if (ret) + goto out; + } trace_branch_enable(tr); out: -- cgit v1.2.3 From 0c726da983de0704254250ef6495ca152e7abcca Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 16 Nov 2008 16:07:58 +0530 Subject: tracing: branch tracer, fix writing to trace/trace_options Impact: fix trace_options behavior writing to trace/trace_options use the index of the array to find the value of the flag. With branch tracer flag defined conditionally, this breaks writing to trace_options with branch tracer disabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a904623e05d..b04923b72ce0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -259,9 +259,7 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", -#ifdef CONFIG_BRANCH_TRACER "branch", -#endif "annotate", NULL }; -- cgit v1.2.3 From adf9f19574334c9a29a2bc956009fcac7edf1a6b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 19:23:42 +0100 Subject: tracing/ftrace: implement a set_flag callback for tracers Impact: give a way to send specific messages to tracers The current implementation of tracing uses some flags to control the output of general tracers. But we have no way to implement custom flags handling for a specific tracer. This patch proposes a new callback for the struct tracer which called set_flag and a structure that represents a 32 bits variable flag. A tracer can implement a struct tracer_flags on which it puts the initial value of the flag integer. Than it can place a range of flags with their name and their flag mask on the flag integer. The structure that implement a single flag is called struct tracer_opt. These custom flags will be available through the trace_options file like the general tracing flags. Changing their value is done like the other general flags. For example if you have a flag that calls "foo", you can activate it by writing "foo" or "nofoo" on trace_options. Note that the set_flag callback is optional and is only needed if you want the flags changing to be signaled to your tracer and let it to accept or refuse their assignment. V2: Some arrangements in coding style.... Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2596b5a968c4..9531fddcfb8d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,6 +43,20 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* For tracers that don't implement custom flags */ +static struct tracer_opt dummy_tracer_opt[] = { + { } +}; + +static struct tracer_flags dummy_tracer_flags = { + .val = 0, + .opts = dummy_tracer_opt +}; + +static int dummy_set_flag(u32 old_flags, u32 bit, int set) +{ + return 0; +} /* * Kill all tracing for good (never come back). @@ -529,6 +543,14 @@ int register_tracer(struct tracer *type) } } + if (!type->set_flag) + type->set_flag = &dummy_set_flag; + if (!type->flags) + type->flags = &dummy_tracer_flags; + else + if (!type->flags->opts) + type->flags->opts = dummy_tracer_opt; + #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest) { struct tracer *saved_tracer = current_trace; @@ -2426,10 +2448,13 @@ static ssize_t tracing_trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + int i; char *buf; int r = 0; int len = 0; - int i; + u32 tracer_flags = current_trace->flags->val; + struct tracer_opt *trace_opts = current_trace->flags->opts; + /* calulate max size */ for (i = 0; trace_options[i]; i++) { @@ -2437,6 +2462,15 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, len += 3; /* "no" and space */ } + /* + * Increase the size with names of options specific + * of the current tracer. + */ + for (i = 0; trace_opts[i].name; i++) { + len += strlen(trace_opts[i].name); + len += 3; /* "no" and space */ + } + /* +2 for \n and \0 */ buf = kmalloc(len + 2, GFP_KERNEL); if (!buf) @@ -2449,6 +2483,15 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "no%s ", trace_options[i]); } + for (i = 0; trace_opts[i].name; i++) { + if (tracer_flags & trace_opts[i].bit) + r += sprintf(buf + r, "%s ", + trace_opts[i].name); + else + r += sprintf(buf + r, "no%s ", + trace_opts[i].name); + } + r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); @@ -2459,6 +2502,40 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf, return r; } +/* Try to assign a tracer specific option */ +static int set_tracer_option(struct tracer *trace, char *cmp, int neg) +{ + struct tracer_flags *trace_flags = trace->flags; + struct tracer_opt *opts = NULL; + int ret = 0, i = 0; + int len; + + for (i = 0; trace_flags->opts[i].name; i++) { + opts = &trace_flags->opts[i]; + len = strlen(opts->name); + + if (strncmp(cmp, opts->name, len) == 0) { + ret = trace->set_flag(trace_flags->val, + opts->bit, !neg); + break; + } + } + /* Not found */ + if (!trace_flags->opts[i].name) + return -EINVAL; + + /* Refused to handle */ + if (ret) + return ret; + + if (neg) + trace_flags->val &= ~opts->bit; + else + trace_flags->val |= opts->bit; + + return 0; +} + static ssize_t tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -2466,6 +2543,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, char buf[64]; char *cmp = buf; int neg = 0; + int ret; int i; if (cnt >= sizeof(buf)) @@ -2492,11 +2570,13 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, break; } } - /* - * If no option could be set, return an error: - */ - if (!trace_options[i]) - return -EINVAL; + + /* If no option could be set, test the specific tracer options */ + if (!trace_options[i]) { + ret = set_tracer_option(current_trace, cmp, neg); + if (ret) + return ret; + } filp->f_pos += cnt; -- cgit v1.2.3 From 0231022cc32d5f2e7f3c06b75691dda0ad6aec33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 03:22:41 +0100 Subject: tracing/function-return-tracer: add the overrun field Impact: help to find the better depth of trace We decided to arbitrary define the depth of function return trace as "20". Perhaps this is not enough. To help finding an optimal depth, we measure now the overrun: the number of functions that have been missed for the current thread. By default this is not displayed, we have to do set a particular flag on the return tracer: echo overrun > /debug/tracing/trace_options And the overrun will be printed on the right. As the trace shows below, the current 20 depth is not enough. update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838) update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838) do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838) tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838) tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838) vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274) vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274) set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274) con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274) release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274) release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274) con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274) con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274) n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274) irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274) ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274) ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274) ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274) hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9531fddcfb8d..e97c29a6e7b0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -853,6 +853,7 @@ static void __trace_function_return(struct trace_array *tr, entry->parent_ip = trace->ret; entry->rettime = trace->rettime; entry->calltime = trace->calltime; + entry->overrun = trace->overrun; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif -- cgit v1.2.3 From a22506347d038a66506c6f57e9b97104128e280d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 18 Nov 2008 18:06:35 +0100 Subject: ftrace: preemptoff selftest not working Impact: fix preemptoff and preemptirqsoff tracer self-tests I was wondering why the preemptoff and preemptirqsoff tracer selftests don't work on s390. After all its just that they get called from non-preemptible context: kernel_init() will execute all initcalls, however the first line in kernel_init() is lock_kernel(), which causes the preempt_count to be increased. Any later calls to add_preempt_count() (especially those from the selftests) will therefore not result in a call to trace_preempt_off() since the check below in add_preempt_count() will be false: if (preempt_count() == val) trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); Hence the trace buffer will be empty. Fix this by releasing the BKL during the self-tests. Signed-off-by: Heiko Carstens Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 396fda034e3f..16892121cb7c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -532,6 +532,13 @@ int register_tracer(struct tracer *type) } #ifdef CONFIG_FTRACE_STARTUP_TEST + /* + * When this gets called we hold the BKL which means that preemption + * is disabled. Various trace selftests however need to disable + * and enable preemption for successful tests. So we drop the BKL here + * and grab it after the tests again. + */ + unlock_kernel(); if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; @@ -562,6 +569,7 @@ int register_tracer(struct tracer *type) } printk(KERN_CONT "PASSED\n"); } + lock_kernel(); #endif type->next = trace_types; -- cgit v1.2.3 From 86fa2f60674540df0b34f5c547ed0c1cf3a8f212 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 19 Nov 2008 10:00:15 +0100 Subject: ftrace: fix selftest locking Impact: fix self-test boot crash Self-test failure forgot to re-lock the BKL - crashing the next initcall: Testing tracer irqsoff: .. no entries found ..FAILED! initcall init_irqsoff_tracer+0x0/0x11 returned 0 after 3906 usecs calling init_mmio_trace+0x0/0xf @ 1 ------------[ cut here ]------------ Kernel BUG at c0c0a915 [verbose debug info unavailable] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC last sysfs file: Pid: 1, comm: swapper Not tainted (2.6.28-rc5-tip #53704) EIP: 0060:[] EFLAGS: 00010286 CPU: 1 EIP is at unlock_kernel+0x10/0x2b EAX: ffffffff EBX: 00000000 ECX: 00000000 EDX: f7030000 ESI: c12da19c EDI: 00000000 EBP: f7039f54 ESP: f7039f54 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 1, ti=f7038000 task=f7030000 task.ti=f7038000) Stack: f7039f6c c0164d30 c013fed8 a7d8d7b4 00000000 00000000 f7039f74 c12fb78a f7039fd0 c0101132 c12fb77d 00000000 6f727200 6f632072 2d206564 c1002031 0000000f f7039fa2 f7039fb0 3531b171 00000000 00000000 0000002f c12ca480 Call Trace: [] ? register_tracer+0x66/0x13f [] ? ktime_get+0x19/0x1b [] ? init_mmio_trace+0xd/0xf [] ? do_one_initcall+0x4a/0x111 [] ? init_mmio_trace+0x0/0xf [] ? init_irq_proc+0x46/0x59 [] ? kernel_init+0x104/0x152 [] ? kernel_init+0x0/0x152 [] ? kernel_thread_helper+0x7/0x10 Code: 58 14 43 75 0a b8 00 9b 2d c1 e8 51 43 7a ff 64 a1 00 a0 37 c1 89 58 14 5b 5d c3 55 64 8b 15 00 a0 37 c1 83 7a 14 00 89 e5 79 04 <0f> 0b eb fe 8b 42 14 48 85 c0 89 42 14 79 0a b8 00 9b 2d c1 e8 EIP: [] unlock_kernel+0x10/0x2b SS:ESP 0068:f7039f54 ---[ end trace a7919e7f17c0a725 ]--- Kernel panic - not syncing: Attempted to kill init! So clean up the flow a bit. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 16892121cb7c..24b6238884f0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -520,7 +520,15 @@ int register_tracer(struct tracer *type) return -1; } + /* + * When this gets called we hold the BKL which means that + * preemption is disabled. Various trace selftests however + * need to disable and enable preemption for successful tests. + * So we drop the BKL here and grab it after the tests again. + */ + unlock_kernel(); mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -532,13 +540,6 @@ int register_tracer(struct tracer *type) } #ifdef CONFIG_FTRACE_STARTUP_TEST - /* - * When this gets called we hold the BKL which means that preemption - * is disabled. Various trace selftests however need to disable - * and enable preemption for successful tests. So we drop the BKL here - * and grab it after the tests again. - */ - unlock_kernel(); if (type->selftest) { struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; @@ -550,9 +551,9 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - for_each_tracing_cpu(i) { + for_each_tracing_cpu(i) tracing_reset(tr, i); - } + current_trace = type; /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -564,12 +565,11 @@ int register_tracer(struct tracer *type) goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - for_each_tracing_cpu(i) { + for_each_tracing_cpu(i) tracing_reset(tr, i); - } + printk(KERN_CONT "PASSED\n"); } - lock_kernel(); #endif type->next = trace_types; @@ -580,6 +580,7 @@ int register_tracer(struct tracer *type) out: mutex_unlock(&trace_types_lock); + lock_kernel(); return ret; } -- cgit v1.2.3 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..ced8b4fa9f51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -275,6 +275,7 @@ static const char *trace_options[] = { "ftrace_preempt", "branch", "annotate", + "userstacktrace", NULL }; @@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr, ftrace_trace_stack(tr, data, flags, skip, preempt_count()); } +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, int pc) +{ + struct userstack_entry *entry; + struct stack_trace trace; + struct ring_buffer_event *event; + unsigned long irq_flags; + + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_USER_STACK; + + memset(&entry->caller, 0, sizeof(entry->caller)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = 0; + trace.entries = entry->caller; + + save_stack_trace_user(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +} + +void __trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags) +{ + ftrace_trace_userstack(tr, data, flags, preempt_count()); +} + static void ftrace_trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, irq_flags, 4, pc); + ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } @@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 5, pc); + ftrace_trace_userstack(tr, data, flags, pc); } void @@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 6, pc); + ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + int ret = 1; + unsigned i; + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + ret = trace_seq_puts(s, "??"); + continue; + } + if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + } + + return ret; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + seq_print_userip_objs(field, s, sym_flags); + if (entry->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + ret = seq_print_userip_objs(field, s, sym_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_putc(s, '\n'); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + break; + } } return TRACE_TYPE_HANDLED; } @@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; -- cgit v1.2.3 From b54d3de9f3b8956653b06f1a32e9f9321c6d9027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sat, 22 Nov 2008 13:28:48 +0200 Subject: tracing: identify which executable object the userspace address belongs to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: modify+improve the userstacktrace tracing visualization feature Store thread group leader id, and use it to lookup the address in the process's map. We could have looked up the address on thread's map, but the thread might not exist by the time we are called. The process might not exist either, but if you are reading trace_pipe, that is unlikely. Example usage: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sym-userobj >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled cat trace_pipe >/tmp/trace& .... run application ... echo 0 >tracing_enabled cat /tmp/trace You'll see stack entries like: /lib/libpthread-2.7.so[+0xd370] You can convert them to function/line using: addr2line -fie /lib/libpthread-2.7.so 0xd370 Or: addr2line -fie /usr/lib/debug/libpthread-2.7.so 0xd370 For non-PIC/PIE executables this won't work: a.out[+0x73b] You need to run the following: addr2line -fie a.out 0x40073b (where 0x400000 is the default load address of a.out) Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ced8b4fa9f51..62776b71b1c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ static const char *trace_options[] = { "branch", "annotate", "userstacktrace", + "sym-userobj", NULL }; @@ -422,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } +static int +trace_seq_path(struct trace_seq *s, struct path *path) +{ + unsigned char *p; + + if (s->len >= (PAGE_SIZE - 1)) + return 0; + p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); + if (!IS_ERR(p)) { + p = mangle_path(s->buffer + s->len, p, "\n"); + if (p) { + s->len = p - s->buffer; + return 1; + } + } else { + s->buffer[s->len++] = '?'; + return 1; + } + + return 0; +} + static void trace_seq_reset(struct trace_seq *s) { @@ -802,6 +826,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->tgid = (tsk) ? tsk->tgid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1429,28 +1454,73 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags) +{ + struct file *file = NULL; + unsigned long vmstart = 0; + int ret = 1; + + if (mm) { + const struct vm_area_struct *vma = find_vma(mm, ip); + if (vma) { + file = vma->vm_file; + vmstart = vma->vm_start; + } + } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", + ip - vmstart); + } + if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; +} + static int seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) + unsigned long sym_flags) { + struct mm_struct *mm = NULL; int ret = 1; unsigned i; + if (trace_flags & TRACE_ITER_SYM_USEROBJ) { + struct task_struct *task; + /* + * we do the lookup on the thread group leader, + * since individual threads might have already quit! + */ + rcu_read_lock(); + task = find_task_by_vpid(entry->ent.tgid); + rcu_read_unlock(); + + if (task) + mm = get_task_mm(task); + } + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { unsigned long ip = entry->caller[i]; if (ip == ULONG_MAX || !ret) break; - if (i) + if (i && ret) ret = trace_seq_puts(s, " <- "); if (!ip) { - ret = trace_seq_puts(s, "??"); + if (ret) + ret = trace_seq_puts(s, "??"); continue; } - if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + if (!ret) + break; + if (ret) + ret = seq_print_user_ip(s, mm, ip, sym_flags); } + if (mm) + mmput(mm); return ret; } @@ -1775,8 +1845,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); seq_print_userip_objs(field, s, sym_flags); - if (entry->flags & TRACE_FLAG_CONT) - trace_seq_print_cont(s, iter); + trace_seq_putc(s, '\n'); break; } default: @@ -3581,6 +3650,9 @@ void ftrace_dump(void) atomic_inc(&global_trace.data[cpu]->disabled); } + /* don't look at user memory in panic mode */ + trace_flags &= ~TRACE_ITER_SYM_USEROBJ; + printk(KERN_TRACE "Dumping ftrace buffer:\n"); iter.tr = &global_trace; -- cgit v1.2.3 From 69bb54ec05f57da7f6fac2cec0820cbc970df20f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:59:38 -0500 Subject: ftrace: add ftrace_off_permanent Impact: add new API to disable all of ftrace on anomalies It case of a serious anomaly being detected (like something caught by lockdep) it is a good idea to disable all tracing immediately, without grabing any locks. This patch adds ftrace_off_permanent that disables the tracers, function tracing and ring buffers without a way to enable them again. This should only be used when something serious has been detected. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..0dbfb23ced97 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -660,6 +660,21 @@ static void trace_init_cmdlines(void) static int trace_stop_count; static DEFINE_SPINLOCK(tracing_start_lock); +/** + * ftrace_off_permanent - disable all ftrace code permanently + * + * This should only be called when a serious anomally has + * been detected. This will turn off the function tracing, + * ring buffers, and other tracing utilites. It takes no + * locks and can be called from any context. + */ +void ftrace_off_permanent(void) +{ + tracing_disabled = 1; + ftrace_stop(); + tracing_off_permanent(); +} + /** * tracing_start - quick start of the tracer * -- cgit v1.2.3 From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 12:39:06 +0200 Subject: tracing/stack-tracer: fix style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62776b71b1c5..dedf35f36971 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -948,9 +948,9 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { + struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - struct ring_buffer_event *event; unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) @@ -1471,8 +1471,7 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, if (file) { ret = trace_seq_path(s, &file->f_path); if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", - ip - vmstart); + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) ret = trace_seq_printf(s, " <" IP_FMT ">", ip); @@ -1485,7 +1484,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, { struct mm_struct *mm = NULL; int ret = 1; - unsigned i; + unsigned int i; if (trace_flags & TRACE_ITER_SYM_USEROBJ) { struct task_struct *task; -- cgit v1.2.3 From cffa10aecb6891f090a4d53a075bc40c082c45fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 12:39:07 +0200 Subject: tracing/stack-tracer: fix locking and refcounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: fix refcounting/object-access bug Hold mmap_sem while looking up/accessing vma. Hold the RCU lock while using the task we looked up. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dedf35f36971..4c3bd82cec49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1462,11 +1462,15 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, int ret = 1; if (mm) { - const struct vm_area_struct *vma = find_vma(mm, ip); + const struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ip); if (vma) { file = vma->vm_file; vmstart = vma->vm_start; } + up_read(&mm->mmap_sem); } if (file) { ret = trace_seq_path(s, &file->f_path); @@ -1494,10 +1498,9 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, */ rcu_read_lock(); task = find_task_by_vpid(entry->ent.tgid); - rcu_read_unlock(); - if (task) mm = get_task_mm(task); + rcu_read_unlock(); } for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { -- cgit v1.2.3 From e38da59269be8c0196d16dff1be5bb26076afc6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 13:08:10 +0200 Subject: tracing/stack-tracer: avoid races accessing file Impact: fix race vma->vm_file reference is only stable while holding the mmap_sem, so move usage of it to within the critical section. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4c3bd82cec49..48d1536f1ca4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1470,13 +1470,13 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, file = vma->vm_file; vmstart = vma->vm_start; } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); + } up_read(&mm->mmap_sem); } - if (file) { - ret = trace_seq_path(s, &file->f_path); - if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); - } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) ret = trace_seq_printf(s, " <" IP_FMT ">", ip); return ret; -- cgit v1.2.3 From 8bba1bf5e2434c83f2fe8b1422604ace9bbe4cb8 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 09:12:31 +0100 Subject: x86, ftrace: call trace->open() before stopping tracing; add trace->print_header() Add a callback to allow an ftrace plug-in to write its own header. Move the call to trace->open() up a few lines. The changes are required by the BTS ftrace plug-in. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a45b59e53fbc..8df8fdd69c95 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2298,7 +2298,9 @@ static int s_show(struct seq_file *m, void *v) seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); } - if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (iter->trace && iter->trace->print_header) + iter->trace->print_header(m); + else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return 0; @@ -2350,6 +2352,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) iter->trace = current_trace; iter->pos = -1; + /* Notify the tracer early; before we stop tracing. */ + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + /* Annotate start of buffers if we had overruns */ if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; @@ -2375,9 +2381,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) /* stop the trace while dumping */ tracing_stop(); - if (iter->trace && iter->trace->open) - iter->trace->open(iter); - mutex_unlock(&trace_types_lock); out: -- cgit v1.2.3 From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 25 Nov 2008 21:07:04 +0100 Subject: tracing/function-return-tracer: change the name into function-graph-tracer Impact: cleanup This patch changes the name of the "return function tracer" into function-graph-tracer which is a more suitable name for a tracing which makes one able to retrieve the ordered call stack during the code flow. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8df8fdd69c95..f21ab2c68fd4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -878,15 +878,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -static void __trace_function_return(struct trace_array *tr, +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_function_graph(struct trace_array *tr, struct trace_array_cpu *data, - struct ftrace_retfunc *trace, + struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_ret_entry *entry; + struct ftrace_graph_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -1177,8 +1177,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -void trace_function_return(struct ftrace_retfunc *trace) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void trace_function_graph(struct ftrace_graph_ret *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,12 +1193,12 @@ void trace_function_return(struct ftrace_retfunc *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_return(tr, data, trace, flags, pc); + __trace_function_graph(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -2001,7 +2001,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_FN_RET: { - return print_return_function(iter); + return print_graph_function(iter); break; } case TRACE_BRANCH: { -- cgit v1.2.3 From 287b6e68ca7209caec40b2f44f837c580a413bae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Nov 2008 00:57:25 +0100 Subject: tracing/function-return-tracer: set a more human readable output Impact: feature This patch sets a C-like output for the function graph tracing. For this aim, we now call two handler for each function: one on the entry and one other on return. This way we can draw a well-ordered call stack. The pid of the previous trace is loosely stored to be compared against the one of the current trace to see if there were a context switch. Without this little feature, the call tree would seem broken at some locations. We could use the sched_tracer to capture these sched_events but this way of processing is much more simpler. 2 spaces have been chosen for indentation to fit the screen while deep calls. The time of execution in nanosecs is printed just after closed braces, it seems more easy this way to find the corresponding function. If the time was printed as a first column, it would be not so easy to find the corresponding function if it is called on a deep depth. I plan to output the return value but on 32 bits CPU, the return value can be 32 or 64, and its difficult to guess on which case we are. I don't know what would be the better solution on X86-32: only print eax (low-part) or even edx (high-part). Actually it's thee same problem when a function return a 8 bits value, the high part of eax could contain junk values... Here is an example of trace: sys_read() { fget_light() { } 526 vfs_read() { rw_verify_area() { security_file_permission() { cap_file_permission() { } 519 } 1564 } 2640 do_sync_read() { pipe_read() { __might_sleep() { } 511 pipe_wait() { prepare_to_wait() { } 760 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { update_min_vruntime() { } 504 } 1587 clear_buddies() { } 512 add_cfs_task_weight() { } 519 update_min_vruntime() { } 511 } 5602 dequeue_entity() { update_curr() { update_min_vruntime() { } 496 } 1631 clear_buddies() { } 496 update_min_vruntime() { } 527 } 4580 hrtick_update() { hrtick_start_fair() { } 488 } 1489 } 13700 } 14949 } 16016 msecs_to_jiffies() { } 496 put_prev_task_fair() { } 504 pick_next_task_fair() { } 489 pick_next_task_rt() { } 496 pick_next_task_fair() { } 489 pick_next_task_idle() { } 489 ------------8<---------- thread 4 ------------8<---------- finish_task_switch() { } 1203 do_softirq() { __do_softirq() { __local_bh_disable() { } 669 rcu_process_callbacks() { __rcu_process_callbacks() { cpu_quiet() { rcu_start_batch() { } 503 } 1647 } 3128 __rcu_process_callbacks() { } 542 } 5362 _local_bh_enable() { } 587 } 8880 } 9986 kthread_should_stop() { } 669 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { calc_delta_mine() { } 511 update_min_vruntime() { } 511 } 2813 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 67 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f21ab2c68fd4..9d5f7c94f251 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -879,14 +879,38 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void __trace_function_graph(struct trace_array *tr, +static void __trace_graph_entry(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_ENT; + entry->graph_ent = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} + +static void __trace_graph_return(struct trace_array *tr, struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_graph_entry *entry; + struct ftrace_graph_ret_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -898,12 +922,8 @@ static void __trace_function_graph(struct trace_array *tr, return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN_RET; - entry->ip = trace->func; - entry->parent_ip = trace->ret; - entry->rettime = trace->rettime; - entry->calltime = trace->calltime; - entry->overrun = trace->overrun; + entry->ent.type = TRACE_GRAPH_RET; + entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif @@ -1178,7 +1198,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_function_graph(struct ftrace_graph_ret *trace) +void trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,7 +1213,28 @@ void trace_function_graph(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_graph(tr, data, trace, flags, pc); + __trace_graph_entry(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); @@ -2000,9 +2041,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } - case TRACE_FN_RET: { + case TRACE_GRAPH_RET: { + return print_graph_function(iter); + } + case TRACE_GRAPH_ENT: { return print_graph_function(iter); - break; } case TRACE_BRANCH: { struct trace_branch *field; -- cgit v1.2.3 From 660c7f9be96321fc80026d76411bd15e6f418a72 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:26 -0500 Subject: ftrace: add thread comm to function graph tracer Impact: enhancement to function graph tracer Export the trace_find_cmdline so the function graph tracer can use it to print the comms of the threads. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d5f7c94f251..5811e0a5f732 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -804,7 +804,7 @@ static void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static char *trace_find_cmdline(int pid) +char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; -- cgit v1.2.3 From c7425acb42fff1e723b05fbf4ea11e9a455d95dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Fri, 28 Nov 2008 11:17:56 +0200 Subject: tracing, alpha: fix build: add missing #ifdef CONFIG_STACKTRACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are architectures that still have no stacktrace support. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5811e0a5f732..91887a280ab9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -983,6 +983,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { +#ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -1008,6 +1009,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, save_stack_trace_user(&trace); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +#endif } void __trace_userstack(struct trace_array *tr, -- cgit v1.2.3 From a5e25883a445dce94a087ca479b21a5959cd5c18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 15:34:05 -0500 Subject: ftrace: replace raw_local_irq_save with local_irq_save Impact: fix for lockdep and ftrace The raw_local_irq_save/restore confuses lockdep. This patch converts them to the local_irq_save/restore variants. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 91887a280ab9..380de630ebce 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,7 +1209,7 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1218,7 +1218,7 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) __trace_graph_entry(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void trace_graph_return(struct ftrace_graph_ret *trace) @@ -1230,7 +1230,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) int cpu; int pc; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1239,7 +1239,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -2645,7 +2645,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - raw_local_irq_disable(); + local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* @@ -2662,7 +2662,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, } } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_enable(); + local_irq_enable(); tracing_cpumask = tracing_cpumask_new; -- cgit v1.2.3 From e49dc19c6a19ea112fcb94b7c62ec62cdd5c08aa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 23:50:05 -0500 Subject: ftrace: function graph return for function entry Impact: feature, let entry function decide to trace or not This patch lets the graph tracer entry function decide if the tracing should be done at the end as well. This requires all function graph entry functions return 1 if it should trace, or 0 if the return should not be traced. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 380de630ebce..8b6409a62b54 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1200,7 +1200,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_graph_entry(struct ftrace_graph_ent *trace) +int trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1219,6 +1219,8 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) } atomic_dec(&data->disabled); local_irq_restore(flags); + + return 1; } void trace_graph_return(struct ftrace_graph_ret *trace) -- cgit v1.2.3 From ea4e2bc4d9f7370e57a343ccb5e7c0ad3222ec3c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 15:36:57 -0500 Subject: ftrace: graph of a single function This patch adds the file: /debugfs/tracing/set_graph_function which can be used along with the function graph tracer. When this file is empty, the function graph tracer will act as usual. When the file has a function in it, the function graph tracer will only trace that function. For example: # echo blk_unplug > /debugfs/tracing/set_graph_function # cat /debugfs/tracing/trace [...] ------------------------------------------ | 2) make-19003 => kjournald-2219 ------------------------------------------ 2) | blk_unplug() { 2) | dm_unplug_all() { 2) | dm_get_table() { 2) 1.381 us | _read_lock(); 2) 0.911 us | dm_table_get(); 2) 1. 76 us | _read_unlock(); 2) + 12.912 us | } 2) | dm_table_unplug_all() { 2) | blk_unplug() { 2) 0.778 us | generic_unplug_device(); 2) 2.409 us | } 2) 5.992 us | } 2) 0.813 us | dm_table_put(); 2) + 29. 90 us | } 2) + 34.532 us | } You can add up to 32 functions into this file. Currently we limit it to 32, but this may change with later improvements. To add another function, use the append '>>': # echo sys_read >> /debugfs/tracing/set_graph_function # cat /debugfs/tracing/set_graph_function blk_unplug sys_read Using the '>' will clear out the function and write anew: # echo sys_write > /debug/tracing/set_graph_function # cat /debug/tracing/set_graph_function sys_write Note, if you have function graph running while doing this, the small time between clearing it and updating it will cause the graph to record all functions. This should not be an issue because after it sets the filter, only those functions will be recorded from then on. If you need to only record a particular function then set this file first before starting the function graph tracer. In the future this side effect may be corrected. The set_graph_function file is similar to the set_ftrace_filter but it does not take wild cards nor does it allow for more than one function to be set with a single write. There is no technical reason why this is the case, I just do not have the time yet to implement that. Note, dynamic ftrace must be enabled for this to appear because it uses the dynamic ftrace records to match the name to the mcount call sites. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b6409a62b54..710b39acd81b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,6 +1209,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; + if (!ftrace_graph_addr(trace->func)) + return 0; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -1217,6 +1220,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) pc = preempt_count(); __trace_graph_entry(tr, data, trace, flags, pc); } + /* Only do the atomic if it is not already set */ + if (!test_tsk_trace_graph(current)) + set_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -1240,6 +1246,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) pc = preempt_count(); __trace_graph_return(tr, data, trace, flags, pc); } + if (!trace->depth) + clear_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); } -- cgit v1.2.3 From 804a685162a7080386714166776f57255a75238e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 15:36:59 -0500 Subject: ftrace: trace single pid for function graph tracer Impact: New feature This patch makes the changes to set_ftrace_pid apply to the function graph tracer. # echo $$ > /debugfs/tracing/set_ftrace_pid # echo function_graph > /debugfs/tracing/current_tracer Will cause only the current task to be traced. Note, the trace flags are also inherited by child processes, so the children of the shell will also be traced. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 710b39acd81b..1bd9574404e5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,6 +1209,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; + if (!ftrace_trace_task(current)) + return 0; + if (!ftrace_graph_addr(trace->func)) return 0; -- cgit v1.2.3 From 1fd8f2a3f9a91b287a876cef830b21baafc8a799 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 3 Dec 2008 23:45:11 +0100 Subject: tracing/function-graph-tracer: handle ftrace_printk entries Handle the TRACE_PRINT entries from the function grapg tracer and output them as a C comment just below the function that called it, as if it was a comment inside this function. Example with an ftrace_printk inside might_sleep() function: void __might_sleep(char *file, int line) { static unsigned long prev_jiffy; /* ratelimiting */ ftrace_printk("Hi I'm a comment in might_sleep() :-)"); A chunk of a resulting trace: 0) | _reiserfs_free_block() { 0) | reiserfs_read_bitmap_block() { 0) | __bread() { 0) | __getblk() { 0) | __find_get_block() { 0) 0.698 us | mark_page_accessed(); 0) 2.267 us | } 0) | __might_sleep() { 0) | /* Hi I'm a comment in might_sleep() :-) */ 0) 1.321 us | } 0) 5.872 us | } 0) 7.313 us | } 0) 8.718 us | } And this patch brings two minor fixes: - The newline after a switch-out task has disappeared - The "|" sign just before the cpu number on task-switch has been deleted. 0) 0.616 us | pick_next_task_rt(); 0) 1.457 us | _spin_trylock(); 0) 0.653 us | _spin_unlock(); 0) 0.728 us | _spin_trylock(); 0) 0.631 us | _spin_unlock(); 0) 0.729 us | native_load_sp0(); 0) 0.593 us | native_load_tls(); ------------------------------------------ 0) cat-2834 => migrati-3 ------------------------------------------ 0) | finish_task_switch() { 0) 0.841 us | _spin_unlock_irq(); 0) 0.616 us | post_schedule_rt(); 0) 3.882 us | } Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b6409a62b54..1ca74c0cee6a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3335,7 +3335,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); + ret = trace_vprintk(0, -1, fmt, args); va_end(args); return ret; } @@ -3564,9 +3564,16 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - static DEFINE_SPINLOCK(trace_buf_lock); + /* + * Raw Spinlock because a normal spinlock would be traced here + * and append an irrelevant couple spin_lock_irqsave/ + * spin_unlock_irqrestore traced by ftrace around this + * TRACE_PRINTK trace. + */ + static raw_spinlock_t trace_buf_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -3587,7 +3594,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - spin_lock_irqsave(&trace_buf_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&trace_buf_lock); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3601,13 +3609,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) tracing_generic_entry_update(&entry->ent, flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; + entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); + __raw_spin_unlock(&trace_buf_lock); + local_irq_restore(flags); out: preempt_enable_notrace(); @@ -3625,7 +3635,13 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, fmt, ap); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ret = trace_vprintk(ip, current->curr_ret_stack, fmt, ap); +#else + ret = trace_vprintk(ip, -1, fmt, ap); +#endif + va_end(ap); return ret; } -- cgit v1.2.3 From ff32504fdc56407654584ef187b20022c94a3486 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Dec 2008 23:47:35 +0100 Subject: tracing/ftrace: don't insert TRACE_PRINT during selftests Impact: fix tracer selfstests false results After setting a ftrace_printk somewhere in th kernel, I saw the Function tracer selftest failing. When a selftest occurs, the ring buffer is lurked to see if some entries were inserted. But concurrent insertion such as ftrace_printk could occured at the same time and could give false positive or negative results. This patch prevent prevent from TRACE_PRINT entries insertion during selftests. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea38652d631c..5dca6ef1fbeb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -44,6 +44,14 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* We need to change this state when a selftest is running. + * A selftest will lurk into the ring-buffer to count the + * entries inserted during the selftest although some concurrent + * insertions into the ring-buffer such as ftrace_printk could occurred + * at the same time, giving false positive or negative results. + */ +static atomic_t tracing_selftest_running = ATOMIC_INIT(0); + /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { { } @@ -589,6 +597,8 @@ int register_tracer(struct tracer *type) struct tracer *saved_tracer = current_trace; struct trace_array *tr = &global_trace; int i; + + atomic_set(&tracing_selftest_running, 1); /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -603,6 +613,7 @@ int register_tracer(struct tracer *type) /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); + atomic_set(&tracing_selftest_running, 0); /* the test is responsible for resetting too */ current_trace = saved_tracer; if (ret) { @@ -3594,7 +3605,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (tracing_disabled) + if (tracing_disabled || atomic_read(&tracing_selftest_running)) return 0; pc = preempt_count(); -- cgit v1.2.3 From 21a8c466f99063eeb8567318b4e305eda9015408 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Dec 2008 23:51:23 +0100 Subject: tracing/ftrace: provide the macro task_curr_ret_stack() Impact: cleanup As suggested by Steven Rostedt, this patch provide a new macro task_curr_ret_stack() to move the cpp conditionnal CONFIG into the linux/ftrace.h headers. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5dca6ef1fbeb..7a93c663e52a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3657,13 +3657,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ret = trace_vprintk(ip, current->curr_ret_stack, fmt, ap); -#else - ret = trace_vprintk(ip, -1, fmt, ap); -#endif - + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } -- cgit v1.2.3 From 8e1b82e0866befaa0b2920be296c6e4c3fc7f422 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:41:33 +0100 Subject: tracing/function-graph-tracer: turn tracing_selftest_running into an int Impact: cleanup Apply some suggestions of Steven Rostedt: _turn tracing_selftest_running into a simple int (no need of an atomic_t) _set it __read_mostly _fix a comment style Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7a93c663e52a..33549537f30f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -44,13 +44,14 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; -/* We need to change this state when a selftest is running. +/* + * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent * insertions into the ring-buffer such as ftrace_printk could occurred * at the same time, giving false positive or negative results. */ -static atomic_t tracing_selftest_running = ATOMIC_INIT(0); +static bool __read_mostly tracing_selftest_running; /* For tracers that don't implement custom flags */ static struct tracer_opt dummy_tracer_opt[] = { @@ -574,6 +575,8 @@ int register_tracer(struct tracer *type) unlock_kernel(); mutex_lock(&trace_types_lock); + tracing_selftest_running = true; + for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ @@ -598,7 +601,6 @@ int register_tracer(struct tracer *type) struct trace_array *tr = &global_trace; int i; - atomic_set(&tracing_selftest_running, 1); /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -613,7 +615,6 @@ int register_tracer(struct tracer *type) /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); - atomic_set(&tracing_selftest_running, 0); /* the test is responsible for resetting too */ current_trace = saved_tracer; if (ret) { @@ -635,6 +636,7 @@ int register_tracer(struct tracer *type) max_tracer_type_len = len; out: + tracing_selftest_running = false; mutex_unlock(&trace_types_lock); lock_kernel(); @@ -3605,7 +3607,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) unsigned long flags, irq_flags; int cpu, len = 0, size, pc; - if (tracing_disabled || atomic_read(&tracing_selftest_running)) + if (tracing_disabled || tracing_selftest_running) return 0; pc = preempt_count(); -- cgit v1.2.3 From 380c4b1411ccd6885f92b2c8ceb08433a720f44e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:43:41 +0100 Subject: tracing/function-graph-tracer: append the tracing_graph_flag Impact: Provide a way to pause the function graph tracer As suggested by Steven Rostedt, the previous patch that prevented from spinlock function tracing shouldn't use the raw_spinlock to fix it. It's much better to follow lockdep with normal spinlock, so this patch adds a new flag for each task to make the function graph tracer able to be paused. We also can send an ftrace_printk whithout worrying of the irrelevant traced spinlock during insertion. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 33549537f30f..0b8659bd5ad2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3590,14 +3590,7 @@ static __init int tracer_init_debugfs(void) int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - /* - * Raw Spinlock because a normal spinlock would be traced here - * and append an irrelevant couple spin_lock_irqsave/ - * spin_unlock_irqrestore traced by ftrace around this - * TRACE_PRINTK trace. - */ - static raw_spinlock_t trace_buf_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -3618,8 +3611,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - local_irq_save(flags); - __raw_spin_lock(&trace_buf_lock); + pause_graph_tracing(); + spin_lock_irqsave(&trace_buf_lock, irq_flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3640,9 +3633,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - __raw_spin_unlock(&trace_buf_lock); - local_irq_restore(flags); - + spin_unlock_irqrestore(&trace_buf_lock, irq_flags); + unpause_graph_tracing(); out: preempt_enable_notrace(); -- cgit v1.2.3 From e726f5f91effd8944c76475a2688093a03ba0d10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Dec 2008 16:55:53 +0100 Subject: tracing/function-graph-tracer: fix 'flags' variable mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: kernel/trace/trace.c: In function ‘trace_vprintk’: kernel/trace/trace.c:3626: warning: ‘flags’ may be used uninitialized in this function shows some confusion about irq_flags / flags use here. We already have irq_flags so remove the extra flags variable. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0b8659bd5ad2..8ebe0070c47a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3596,9 +3596,9 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct print_entry *entry; - unsigned long flags, irq_flags; int cpu, len = 0, size, pc; + struct print_entry *entry; + unsigned long irq_flags; if (tracing_disabled || tracing_selftest_running) return 0; @@ -3623,7 +3623,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); + tracing_generic_entry_update(&entry->ent, irq_flags, pc); entry->ent.type = TRACE_PRINT; entry->ip = ip; entry->depth = depth; -- cgit v1.2.3 From a93751cab71d63126687551823ed3e70cd85854a Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Thu, 11 Dec 2008 13:53:26 +0100 Subject: x86, bts, ftrace: adapt the hw-branch-tracer to the ds.c interface Impact: restructure code, cleanup Remove BTS bits from the hw-branch-tracer (renamed from bts-tracer) and use the ds interface. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8ebe0070c47a..639344a4d3a2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2425,7 +2425,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) /* Notify the tracer early; before we stop tracing. */ if (iter->trace && iter->trace->open) - iter->trace->open(iter); + iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ if (ring_buffer_overruns(iter->tr->buffer)) -- cgit v1.2.3 From 66896a85cf2890b6bbbc4c9ccdcd296600ffbf89 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Dec 2008 20:18:13 +0100 Subject: tracing/ftrace: add the printk-msg-only option Impact: display ftrace_printk messages "as is" By default, ftrace_printk() messages find their output with some other informations like pid, caller, ... Sometimes a developer just want to have the ftrace_printk left "as is", without other information. This is done by providing a default-off option called printk-msg-only. To enable it, just do `echo printk-msg-only > /debugfs/tracing/trace_options` Before the patch: <...>-2739 [000] 145.692153: __might_sleep: I'm an ftrace_printk msg in __might_sleep <...>-2739 [000] 145.692155: __might_sleep: I'm another ftrace_printk msg in __might_sleep After the patch and the printk-msg-only option enabled: I'm an ftrace_printk msg in __might_sleep I'm another ftrace_printk msg in __might_sleep Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 639344a4d3a2..1a3d6b329782 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -287,6 +287,7 @@ static const char *trace_options[] = { "annotate", "userstacktrace", "sym-userobj", + "printk-msg-only", NULL }; @@ -2265,6 +2266,25 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct print_entry *field; + int ret; + + trace_assign_type(field, entry); + + ret = trace_seq_printf(s, field->buf); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (entry->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + + return TRACE_TYPE_HANDLED; +} + static enum print_line_t print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -2345,6 +2365,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return ret; } + if (iter->ent->type == TRACE_PRINT && + trace_flags & TRACE_ITER_PRINTK && + trace_flags & TRACE_ITER_PRINTK_MSGONLY) + return print_printk_msg_only(iter); + if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); -- cgit v1.2.3 From 3d9101e92529e1ff6014f95a69afc82f37b9b13a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Dec 2008 22:34:13 +0100 Subject: trace: fix task state printout Impact: fix occasionally incorrect trace output The tracing code has interesting varieties of printing out task state. Unfortunalely only one of the instances is correct as it copies the code from sched.c:sched_show_task(). The others are plain wrong as they treatthe bitfield as an integer offset into the character array. Also the size check of the character array is wrong as it includes the trailing \0. Use a common state decoder inline which does the Right Thing. Signed-off-by: Thomas Gleixner Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d86e3252f300..803100518f11 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1301,6 +1301,13 @@ lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; +static int task_state_char(unsigned long state) +{ + int bit = state ? __ffs(state) + 1 : 0; + + return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; +} + /* * The message is supposed to contain an ending newline. * If the printing stops prematurely, try to add a newline of our own. @@ -1396,12 +1403,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_assign_type(field, entry); - T = field->next_state < sizeof(state_to_char) ? - state_to_char[field->next_state] : 'X'; - - state = field->prev_state ? - __ffs(field->prev_state) + 1 : 0; - S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; + T = task_state_char(field->next_state); + S = task_state_char(field->prev_state); comm = trace_find_cmdline(field->next_pid); trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", field->prev_pid, @@ -1519,10 +1522,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); - S = field->prev_state < sizeof(state_to_char) ? - state_to_char[field->prev_state] : 'X'; - T = field->next_state < sizeof(state_to_char) ? - state_to_char[field->next_state] : 'X'; + T = task_state_char(field->next_state); + S = task_state_char(field->prev_state); ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n", field->prev_pid, field->prev_prio, @@ -1621,12 +1622,9 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); - S = field->prev_state < sizeof(state_to_char) ? - state_to_char[field->prev_state] : 'X'; - T = field->next_state < sizeof(state_to_char) ? - state_to_char[field->next_state] : 'X'; - if (entry->type == TRACE_WAKE) - S = '+'; + T = task_state_char(field->next_state); + S = entry->type == TRACE_WAKE ? '+' : + task_state_char(field->prev_state); ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n", field->prev_pid, field->prev_prio, @@ -1712,12 +1710,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) trace_assign_type(field, entry); - S = field->prev_state < sizeof(state_to_char) ? - state_to_char[field->prev_state] : 'X'; - T = field->next_state < sizeof(state_to_char) ? - state_to_char[field->next_state] : 'X'; - if (entry->type == TRACE_WAKE) - S = '+'; + T = task_state_char(field->next_state); + S = entry->type == TRACE_WAKE ? '+' : + task_state_char(field->prev_state); SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio); SEQ_PUT_HEX_FIELD_RET(s, S); -- cgit v1.2.3 From 3bddb9a3246f6df5cf3b7655cb541ac10203bb71 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Dec 2008 01:03:29 +0100 Subject: tracing: fix warning in kernel/trace/trace.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: kernel/trace/trace.c: In function ‘print_lat_fmt’: kernel/trace/trace.c:1826: warning: unused variable ‘state’ Triggers because 'state' has become unused - remove it. Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a3d6b329782..49fc7201295e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1816,7 +1816,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) char *comm; int S, T; int i; - unsigned state; if (entry->type == TRACE_CONT) return TRACE_TYPE_HANDLED; -- cgit v1.2.3 From 213cc060797378059a28ebc5c539f3e9a80160bd Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Fri, 19 Dec 2008 12:08:39 +0200 Subject: ftrace: introduce tracing_reset_online_cpus() helper Impact: cleanup This patch factors out common code from multiple tracers into a tracing_reset_online_cpus() function and converts the tracers to use it. Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0eb6d48347f7..79db26e8216e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -679,6 +679,16 @@ void tracing_reset(struct trace_array *tr, int cpu) ftrace_enable_cpu(); } +void tracing_reset_online_cpus(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); +} + #define SAVED_CMDLINES 128 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; -- cgit v1.2.3