From 71e308a239c098673570d0b417d42262bb535909 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 18 Jun 2009 12:45:08 -0400 Subject: function-graph: add stack frame test In case gcc does something funny with the stack frames, or the return from function code, we would like to detect that. An arch may implement passing of a variable that is unique to the function and can be saved on entering a function and can be tested when exiting the function. Usually the frame pointer can be used for this purpose. This patch also implements this for x86. Where it passes in the stack frame of the parent function, and will test that frame on exit. There was a case in x86_32 with optimize for size (-Os) where, for a few functions, gcc would align the stack frame and place a copy of the return address into it. The function graph tracer modified the copy and not the actual return address. On return from the funtion, it did not go to the tracer hook, but returned to the parent. This broke the function graph tracer, because the return of the parent (where gcc did not do this funky manipulation) returned to the location that the child function was suppose to. This caused strange kernel crashes. This test detected the problem and pointed out where the issue was. This modifies the parameters of one of the functions that the arch specific code calls, so it includes changes to arch code to accommodate the new prototype. Note, I notice that the parsic arch implements its own push_return_trace. This is now a generic function and the ftrace_push_return_trace should be used instead. This patch does not touch that code. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Frederic Weisbecker Cc: Helge Deller Cc: Kyle McMartin Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8b592418d8b2..d2249abafb53 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = { /* Add a function return address to the trace stack on thread info.*/ int -ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, + unsigned long frame_pointer) { unsigned long long calltime; int index; @@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; current->ret_stack[index].subtime = 0; + current->ret_stack[index].fp = frame_pointer; *depth = index; return 0; @@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) /* Retrieve a function return address to the trace stack on thread info.*/ static void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) +ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, + unsigned long frame_pointer) { int index; @@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) return; } +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST + /* + * The arch may choose to record the frame pointer used + * and check it here to make sure that it is what we expect it + * to be. If gcc does not set the place holder of the return + * address in the frame pointer, and does a copy instead, then + * the function graph trace will fail. This test detects this + * case. + * + * Currently, x86_32 with optimize for size (-Os) makes the latest + * gcc do the above. + */ + if (unlikely(current->ret_stack[index].fp != frame_pointer)) { + ftrace_graph_stop(); + WARN(1, "Bad frame pointer: expected %lx, received %lx\n" + " from func %pF return to %lx\n", + current->ret_stack[index].fp, + frame_pointer, + (void *)current->ret_stack[index].func, + current->ret_stack[index].ret); + *ret = (unsigned long)panic; + return; + } +#endif + *ret = current->ret_stack[index].ret; trace->func = current->ret_stack[index].func; trace->calltime = current->ret_stack[index].calltime; @@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) * Send the trace to the ring-buffer. * @return the original return address. */ -unsigned long ftrace_return_to_handler(void) +unsigned long ftrace_return_to_handler(unsigned long frame_pointer) { struct ftrace_graph_ret trace; unsigned long ret; - ftrace_pop_return_trace(&trace, &ret); + ftrace_pop_return_trace(&trace, &ret, frame_pointer); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); barrier(); -- cgit v1.2.3 From 68baafcfc46074c4bb4e4c3115c2c76a8a85f37d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 9 Jul 2009 04:46:29 +0200 Subject: tracing/function-graph-tracer: Use the %pf format Remove the obsolete seq_print_ip_sym() usage and replace it by the %pf format in order to print function symbols. Signed-off-by: Frederic Weisbecker Reviewed-by: Li Zefan Cc: Steven Rostedt Cc: Lai Jiangshan LKML-Reference: <1247107590-6428-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions_graph.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d2249abafb53..abf7c4ae2c8b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -565,11 +565,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } - ret = seq_print_ip_sym(s, call->func, 0); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, "();\n"); + ret = trace_seq_printf(s, "%pf();\n", (void *)call->func); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -612,11 +608,7 @@ print_graph_entry_nested(struct trace_iterator *iter, return TRACE_TYPE_PARTIAL_LINE; } - ret = seq_print_ip_sym(s, call->func, 0); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, "() {\n"); + ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func); if (!ret) return TRACE_TYPE_PARTIAL_LINE; -- cgit v1.2.3 From 38ceb592fcac9110c6b3c87ea0a27bff68c43486 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 28 Jul 2009 20:11:24 +0800 Subject: tracing: Fix invalid function_graph entry When print_graph_entry() computes a function call entry event, it needs to also check the next entry to guess if it matches the return event of the current function entry. In order to look at this next event, it needs to consume the current entry before going ahead in the ring buffer. However, if the current event that gets consumed is the last one in the ring buffer head page, the ring_buffer may reuse the page for writers. The consumed entry will then become invalid because of possible racy overwriting. Me must then handle this entry by making a copy of it. The fix also applies on 2.6.30 Signed-off-by: Lai Jiangshan Cc: Steven Rostedt Cc: stable@kernel.org LKML-Reference: <4A6EEAEC.3050508@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_functions_graph.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d2249abafb53..420ec3487579 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -843,9 +843,16 @@ print_graph_function(struct trace_iterator *iter) switch (entry->type) { case TRACE_GRAPH_ENT: { - struct ftrace_graph_ent_entry *field; + /* + * print_graph_entry() may consume the current event, + * thus @field may become invalid, so we need to save it. + * sizeof(struct ftrace_graph_ent_entry) is very small, + * it can be safely saved at the stack. + */ + struct ftrace_graph_ent_entry *field, saved; trace_assign_type(field, entry); - return print_graph_entry(field, s, iter); + saved = *field; + return print_graph_entry(&saved, s, iter); } case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; -- cgit v1.2.3 From 0c9e6f639aed490202bbc79214f4495cf4bfde58 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 28 Jul 2009 20:26:06 +0800 Subject: tracing: Simplify print_graph_cpu() print_graph_cpu() is little over-designed. And "log10_all" may be wrong when there are holes in cpu_online_mask: the max online cpu id > cpumask_weight(cpu_online_mask) So change it by using a static column length for the cpu matching nr_cpu_ids number of decimal characters. Signed-off-by: Lai Jiangshan Cc: Steven Rostedt LKML-Reference: <4A6EEE5E.2000001@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_functions_graph.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index abf7c4ae2c8b..e30472da15d5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -183,43 +183,19 @@ static void graph_trace_reset(struct trace_array *tr) unregister_ftrace_graph(); } -static inline int log10_cpu(int nb) -{ - if (nb / 100) - return 3; - if (nb / 10) - return 2; - return 1; -} +static int max_bytes_for_cpu; static enum print_line_t print_graph_cpu(struct trace_seq *s, int cpu) { - int i; int ret; - int log10_this = log10_cpu(cpu); - int log10_all = log10_cpu(cpumask_weight(cpu_online_mask)); - /* * Start with a space character - to make it stand out * to the right a bit when trace output is pasted into * email: */ - ret = trace_seq_printf(s, " "); - - /* - * Tricky - we space the CPU field according to the max - * number of online CPUs. On a 2-cpu system it would take - * a maximum of 1 digit - on a 128 cpu system it would - * take up to 3 digits: - */ - for (i = 0; i < log10_all - log10_this; i++) { - ret = trace_seq_printf(s, " "); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - ret = trace_seq_printf(s, "%d) ", cpu); + ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -919,6 +895,8 @@ static struct tracer graph_trace __read_mostly = { static __init int init_graph_trace(void) { + max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); + return register_tracer(&graph_trace); } -- cgit v1.2.3 From 1a0799a8fef5acc6503f9c5e79b2cd003317826c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 29 Jul 2009 18:59:58 +0200 Subject: tracing/function-graph-tracer: Move graph event insertion helpers in the graph tracer file The function graph events helpers which insert the function entry and return events into the ring buffer currently reside in trace.c But this file is quite overloaded and the right place for these helpers is in the function graph tracer file. Then move them to trace_functions_graph.c Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 122 ++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e30472da15d5..f97244a41a4f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -52,7 +52,7 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; -/* pid on the last trace processed */ +static struct trace_array *graph_array; /* Add a function return address to the trace stack on thread info.*/ @@ -166,10 +166,121 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) return ret; } +static int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ftrace_event_call *call = &event_funcgraph_entry; + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return 0; + + event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_ENT, + sizeof(*entry), flags, pc); + if (!event) + return 0; + entry = ring_buffer_event_data(event); + entry->graph_ent = *trace; + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(tr->buffer, event); + + return 1; +} + +int trace_graph_entry(struct ftrace_graph_ent *trace) +{ + struct trace_array *tr = graph_array; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int ret; + int cpu; + int pc; + + if (unlikely(!tr)) + return 0; + + if (!ftrace_trace_task(current)) + return 0; + + if (!ftrace_graph_addr(trace->func)) + return 0; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + ret = __trace_graph_entry(tr, trace, flags, pc); + } else { + ret = 0; + } + /* Only do the atomic if it is not already set */ + if (!test_tsk_trace_graph(current)) + set_tsk_trace_graph(current); + + atomic_dec(&data->disabled); + local_irq_restore(flags); + + return ret; +} + +static void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, + unsigned long flags, + int pc) +{ + struct ftrace_event_call *call = &event_funcgraph_exit; + struct ring_buffer_event *event; + struct ftrace_graph_ret_entry *entry; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_RET, + sizeof(*entry), flags, pc); + if (!event) + return; + entry = ring_buffer_event_data(event); + entry->ret = *trace; + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(tr->buffer, event); +} + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = graph_array; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, trace, flags, pc); + } + if (!trace->depth) + clear_tsk_trace_graph(current); + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + static int graph_trace_init(struct trace_array *tr) { - int ret = register_ftrace_graph(&trace_graph_return, - &trace_graph_entry); + int ret; + + graph_array = tr; + ret = register_ftrace_graph(&trace_graph_return, + &trace_graph_entry); if (ret) return ret; tracing_start_cmdline_record(); @@ -177,6 +288,11 @@ static int graph_trace_init(struct trace_array *tr) return 0; } +void set_graph_array(struct trace_array *tr) +{ + graph_array = tr; +} + static void graph_trace_reset(struct trace_array *tr) { tracing_stop_cmdline_record(); -- cgit v1.2.3 From e77405ad80f53966524b5c31244e13fbbbecbd84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Sep 2009 14:17:06 -0400 Subject: tracing: pass around ring buffer instead of tracer The latency tracers (irqsoff and wakeup) can swap trace buffers on the fly. If an event is happening and has reserved data on one of the buffers, and the latency tracer swaps the global buffer with the max buffer, the result is that the event may commit the data to the wrong buffer. This patch changes the API to the trace recording to be recieve the buffer that was used to reserve a commit. Then this buffer can be passed in to the commit. Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3f4a251b7d16..b3749a2c3132 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -173,19 +173,20 @@ static int __trace_graph_entry(struct trace_array *tr, { struct ftrace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; + struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ent_entry *entry; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return 0; - event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_ENT, + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, sizeof(*entry), flags, pc); if (!event) return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - if (!filter_current_check_discard(call, entry, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_current_check_discard(buffer, call, entry, event)) + ring_buffer_unlock_commit(buffer, event); return 1; } @@ -236,19 +237,20 @@ static void __trace_graph_return(struct trace_array *tr, { struct ftrace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; + struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ret_entry *entry; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_RET, + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); entry->ret = *trace; - if (!filter_current_check_discard(call, entry, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_current_check_discard(buffer, call, entry, event)) + ring_buffer_unlock_commit(buffer, event); } void trace_graph_return(struct ftrace_graph_ret *trace) -- cgit v1.2.3