From d8a0349c0cea477322c66ea9362f10c62fad5f62 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 13 Nov 2012 09:53:04 +0800 Subject: tracing: Use this_cpu_ptr per-cpu helper typeof(&buffer) is a pointer to array of 1024 char, or char (*)[1024]. But, typeof(&buffer[0]) is a pointer to char which match the return type of get_trace_buf(). As well-known, the value of &buffer is equal to &buffer[0]. so return this_cpu_ptr(&percpu_buffer->buffer[0]) can avoid type cast. Link: http://lkml.kernel.org/r/50A1A800.3020102@gmail.com Reviewed-by: Christoph Lameter Signed-off-by: Shan Wei Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3c13e46d7d24..f8b7c626f3fd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1517,7 +1517,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer; static char *get_trace_buf(void) { struct trace_buffer_struct *percpu_buffer; - struct trace_buffer_struct *buffer; /* * If we have allocated per cpu buffers, then we do not @@ -1535,9 +1534,7 @@ static char *get_trace_buf(void) if (!percpu_buffer) return NULL; - buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); - - return buffer->buffer; + return this_cpu_ptr(&percpu_buffer->buffer[0]); } static int alloc_percpu_trace_buffer(void) -- cgit v1.2.3 From a54164114b96b4693b42cdb553260eec41ea4393 Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 19 Dec 2012 16:02:34 +0900 Subject: tracing: Add checks if tr->buffer is NULL in tracing_reset{_online_cpus} max_tr->buffer could be NULL in the tracing_reset{_online_cpus}. In this case, a NULL pointer dereference happens, so we should return immediately from these functions. Note, the current code does not call tracing_reset*() with max_tr when its buffer is NULL, but future code will. This patch is needed to prevent the future code from crashing. Link: http://lkml.kernel.org/r/20121219070234.31200.93863.stgit@liselsia Signed-off-by: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8b7c626f3fd..72b171b90e55 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -922,6 +922,9 @@ void tracing_reset(struct trace_array *tr, int cpu) { struct ring_buffer *buffer = tr->buffer; + if (!buffer) + return; + ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ @@ -936,6 +939,9 @@ void tracing_reset_online_cpus(struct trace_array *tr) struct ring_buffer *buffer = tr->buffer; int cpu; + if (!buffer) + return; + ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ -- cgit v1.2.3 From 84c6cf0db6a00601eb43cfc08244a398ffb0894c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Dec 2012 21:43:52 -0500 Subject: tracing: Remove unneeded check of max_tr->buffer before tracing_reset There's now a check in tracing_reset_online_cpus() if the buffer is allocated or NULL. No need to do a check before calling it with max_tr. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 72b171b90e55..d62248dfda76 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4040,8 +4040,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, * Reset the buffer so that it doesn't have incomparable timestamps. */ tracing_reset_online_cpus(&global_trace); - if (max_tr.buffer) - tracing_reset_online_cpus(&max_tr); + tracing_reset_online_cpus(&max_tr); mutex_unlock(&trace_types_lock); -- cgit v1.2.3 From b000c8065a92b0fe0e1694f41b2c8d8ba7b7b1ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Jan 2013 10:31:20 -0500 Subject: tracing: Remove the extra 4 bytes of padding in events Due to a userspace issue with PowerTop v2beta, which hardcoded the offset of event fields that it was using, it broke when we removed the Big Kernel Lock counter from the event header. (commit e6e1e2593 "tracing: Remove lock_depth from event entry") Because this broke userspace, it was determined that we must keep those 4 bytes around. (commit a3a4a5acd "Regression: partial revert "tracing: Remove lock_depth from event entry"") This unfortunately wastes space in the ring buffer. 4 bytes per event, where a lot of events are just 24 bytes. That's 16% of the buffer wasted. A million events will add 4 megs of white space into the buffer. It was later noticed that PowerTop v2beta could not work on systems where the kernel was 64 bit but the userspace was 32 bits. The reason was because the offsets are different between the two and the hard coded offset of one would not work with the other. With PowerTop v2 final, it implemented the same interface that both perf and trace-cmd use. That is, it reads the format file of the event to find the offsets of the fields it needs. This fixes the problem with running powertop on a 32 bit userspace running on a 64 bit kernel. It also no longer requires the 4 byte padding. As PowerTop v2 has been out for a while, and is included in all major distributions, it is time that we can safely remove the 4 bytes of padding. Users of PowerTop v2beta should upgrade to PowerTop v2 final. Cc: Linus Torvalds Acked-by: Arjan van de Ven Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d62248dfda76..a387bd271e71 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1173,7 +1173,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->padding = 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | -- cgit v1.2.3 From 34600f0e9c33c9cd48ae87448205f51332b7d5a0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 Jan 2013 13:35:11 -0500 Subject: tracing: Fix race with max_tr and changing tracers There's a race condition between the setting of a new tracer and the update of the max trace buffers (the swap). When a new tracer is added, it sets current_trace to nop_trace before disabling the old tracer. At this moment, if the old tracer uses update_max_tr(), the update may trigger the warning against !current_trace->use_max-tr, as nop_trace doesn't have that set. As update_max_tr() requires that interrupts be disabled, we can add a check to see if current_trace == nop_trace and bail if it does. Then when disabling the current_trace, set it to nop_trace and run synchronize_sched(). This will make sure all calls to update_max_tr() have completed (it was called with interrupts disabled). As a clean up, this commit also removes shrinking and recreating the max_tr buffer if the old and new tracers both have use_max_tr set. The old way use to always shrink the buffer, and then expand it for the next tracer. This is a waste of time. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a387bd271e71..d2a658349ca1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -709,10 +709,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (!current_trace->use_max_tr) { - WARN_ON_ONCE(1); + + /* If we disabled the tracer, stop now */ + if (current_trace == &nop_trace) return; - } + + if (WARN_ON_ONCE(!current_trace->use_max_tr)) + return; + arch_spin_lock(&ftrace_max_lock); tr->buffer = max_tr.buffer; @@ -3185,6 +3189,7 @@ static int tracing_set_tracer(const char *buf) static struct trace_option_dentry *topts; struct trace_array *tr = &global_trace; struct tracer *t; + bool had_max_tr; int ret = 0; mutex_lock(&trace_types_lock); @@ -3211,7 +3216,19 @@ static int tracing_set_tracer(const char *buf) trace_branch_disable(); if (current_trace && current_trace->reset) current_trace->reset(tr); - if (current_trace && current_trace->use_max_tr) { + + had_max_tr = current_trace && current_trace->use_max_tr; + current_trace = &nop_trace; + + if (had_max_tr && !t->use_max_tr) { + /* + * We need to make sure that the update_max_tr sees that + * current_trace changed to nop_trace to keep it from + * swapping the buffers after we resize it. + * The update_max_tr is called from interrupts disabled + * so a synchronized_sched() is sufficient. + */ + synchronize_sched(); /* * We don't free the ring buffer. instead, resize it because * The max_tr ring buffer has some state (e.g. ring->clock) and @@ -3222,10 +3239,8 @@ static int tracing_set_tracer(const char *buf) } destroy_trace_option_files(topts); - current_trace = &nop_trace; - topts = create_trace_option_files(t); - if (t->use_max_tr) { + if (t->use_max_tr && !had_max_tr) { /* we need to make per cpu buffer sizes equivalent */ ret = resize_buffer_duplicate_size(&max_tr, &global_trace, RING_BUFFER_ALL_CPUS); -- cgit v1.2.3 From b736f48bda54ec75b7dc9306884c3843f1a78a0a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 18 Nov 2012 21:27:45 -0800 Subject: tracing: Mark tracing_dentry_percpu() static Nothing outside of kernel/trace/trace.c references tracing_dentry_percpu(). Link: http://lkml.kernel.org/r/1353302917-13995-7-git-send-email-josh@joshtriplett.org Signed-off-by: Josh Triplett Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d2a658349ca1..ca9b7dfed8ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4506,7 +4506,7 @@ struct dentry *tracing_init_dentry(void) static struct dentry *d_percpu; -struct dentry *tracing_dentry_percpu(void) +static struct dentry *tracing_dentry_percpu(void) { static int once; struct dentry *d_tracer; -- cgit v1.2.3 From 821465295b36136998ef294fe176fba4e09c1cd9 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 19 Nov 2012 13:21:01 +0800 Subject: tracing: Use __this_cpu_inc/dec operation instead of __get_cpu_var __this_cpu_inc_return() or __this_cpu_dec generates a single instruction, which is faster than __get_cpu_var operation. Link: http://lkml.kernel.org/r/50A9C1BD.1060308@gmail.com Reviewed-by: Christoph Lameter Signed-off-by: Shan Wei Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca9b7dfed8ef..07888e15c694 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1344,7 +1344,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, */ preempt_disable_notrace(); - use_stack = ++__get_cpu_var(ftrace_stack_reserve); + use_stack = __this_cpu_inc_return(ftrace_stack_reserve); /* * We don't need any atomic variables, just a barrier. * If an interrupt comes in, we don't care, because it would @@ -1398,7 +1398,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, out: /* Again, don't let gcc optimize things here */ barrier(); - __get_cpu_var(ftrace_stack_reserve)--; + __this_cpu_dec(ftrace_stack_reserve); preempt_enable_notrace(); } -- cgit v1.2.3 From 38dbe0b137bfe6ea92be495017885c0785179a02 Mon Sep 17 00:00:00 2001 From: Jovi Zhang Date: Fri, 25 Jan 2013 18:03:07 +0800 Subject: tracing: Remove second iterator initializer The trace iterator is already initialized by trace_init_global_iter(), so there is no need to initialize it again. Link: http://lkml.kernel.org/r/CACV3sb+G1YnO6168JhY3dEadmJi58pA5-2cSZT8E0WVHJNFt9Q@mail.gmail.com Signed-off-by: Jovi Zhang Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 07888e15c694..d399592701aa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5030,6 +5030,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) if (disable_tracing) ftrace_kill(); + /* Simulate the iterator */ trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { @@ -5041,10 +5042,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - /* Simulate the iterator */ - iter.tr = &global_trace; - iter.trace = current_trace; - switch (oops_dump_mode) { case DUMP_ALL: iter.cpu_file = TRACE_PIPE_ALL_CPU; -- cgit v1.2.3 From ad964704ba9326d027fc10fd0099b7c880e50172 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 29 Jan 2013 17:45:49 -0500 Subject: ring-buffer: Add stats field for amount read from trace ring buffer Add a stat about the number of events read from the ring buffer: # cat /debug/tracing/per_cpu/cpu0/stats entries: 39869 overrun: 870512 commit overrun: 0 bytes: 1449912 oldest event ts: 6561.368690 now ts: 6565.246426 dropped events: 0 read events: 112 <-- Added Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d399592701aa..90a1c71fdbfc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4430,6 +4430,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf, cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); trace_seq_printf(s, "dropped events: %ld\n", cnt); + cnt = ring_buffer_read_events_cpu(tr->buffer, cpu); + trace_seq_printf(s, "read events: %ld\n", cnt); + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); kfree(s); -- cgit v1.2.3 From 2fd196ec1eab2623096e7fc7e6f3976160392bce Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:52:52 +0900 Subject: tracing: Replace static old_tracer check of tracer name Currently the trace buffer read functions use a static variable "old_tracer" for detecting if the current tracer changes. This was suitable for a single trace file ("trace"), but to add a snapshot feature that will use the same function for its file, a check against a static variable is not sufficient. To use the output functions for two different files, instead of storing the current tracer in a static variable, as the trace iterator descriptor contains a pointer to the original current tracer's name, that pointer can now be used to check if the current tracer has changed between different reads of the trace file. Link: http://lkml.kernel.org/r/20121226025252.3252.9276.stgit@liselsia Signed-off-by: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 90a1c71fdbfc..2c724662a3e8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1948,18 +1948,20 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; - static struct tracer *old_tracer; int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; - /* copy the tracer to avoid using a global lock all around */ + /* + * copy the tracer to avoid using a global lock all around. + * iter->trace is a copy of current_trace, the pointer to the + * name may be used instead of a strcmp(), as iter->trace->name + * will point to the same string as current_trace->name. + */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); atomic_inc(&trace_record_cmdline_disabled); @@ -3494,7 +3496,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; - static struct tracer *old_tracer; ssize_t sret; /* return any leftover data */ @@ -3506,10 +3507,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); /* @@ -3665,7 +3664,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; - static struct tracer *old_tracer; ssize_t ret; size_t rem; unsigned int i; @@ -3675,10 +3673,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); mutex_lock(&iter->mutex); -- cgit v1.2.3 From debdd57f5145f3c6a4b3f8d0126abd1a2def7fc6 Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:53:00 +0900 Subject: tracing: Make a snapshot feature available from userspace Ftrace has a snapshot feature available from kernel space and latency tracers (e.g. irqsoff) are using it. This patch enables user applictions to take a snapshot via debugfs. Add "snapshot" debugfs file in "tracing" directory. snapshot: This is used to take a snapshot and to read the output of the snapshot. # echo 1 > snapshot This will allocate the spare buffer for snapshot (if it is not allocated), and take a snapshot. # cat snapshot This will show contents of the snapshot. # echo 0 > snapshot This will free the snapshot if it is allocated. Any other positive values will clear the snapshot contents if the snapshot is allocated, or return EINVAL if it is not allocated. Link: http://lkml.kernel.org/r/20121226025300.3252.86850.stgit@liselsia Cc: Jiri Olsa Cc: David Sharp Signed-off-by: Hiraku Toyooka [ Fixed irqsoff selftest and also a conflict with a change that fixes the update_max_tr. ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 166 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 140 insertions(+), 26 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c724662a3e8..70dce64b9ecf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -710,12 +710,11 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); - /* If we disabled the tracer, stop now */ - if (current_trace == &nop_trace) - return; - - if (WARN_ON_ONCE(!current_trace->use_max_tr)) + if (!current_trace->allocated_snapshot) { + /* Only the nop tracer should hit this when disabling */ + WARN_ON_ONCE(current_trace != &nop_trace); return; + } arch_spin_lock(&ftrace_max_lock); @@ -743,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (!current_trace->use_max_tr) { - WARN_ON_ONCE(1); + if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) return; - } arch_spin_lock(&ftrace_max_lock); @@ -866,10 +863,13 @@ int register_tracer(struct tracer *type) current_trace = type; - /* If we expanded the buffers, make sure the max is expanded too */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, trace_buf_size, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + /* If we expanded the buffers, make sure the max is expanded too */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, trace_buf_size, + RING_BUFFER_ALL_CPUS); + type->allocated_snapshot = true; + } /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -885,10 +885,14 @@ int register_tracer(struct tracer *type) /* Only reset on passing, to avoid touching corrupted buffers */ tracing_reset_online_cpus(tr); - /* Shrink the max buffer again */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, 1, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + type->allocated_snapshot = false; + + /* Shrink the max buffer again */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + } printk(KERN_CONT "PASSED\n"); } @@ -1964,7 +1968,11 @@ static void *s_start(struct seq_file *m, loff_t *pos) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); - atomic_inc(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return ERR_PTR(-EBUSY); + + if (!iter->snapshot) + atomic_inc(&trace_record_cmdline_disabled); if (*pos != iter->pos) { iter->ent = NULL; @@ -2003,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return; + + if (!iter->snapshot) + atomic_dec(&trace_record_cmdline_disabled); trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -2438,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file) +__tracing_open(struct inode *inode, struct file *file, bool snapshot) { long cpu_file = (long) inode->i_private; struct trace_iterator *iter; @@ -2471,10 +2483,11 @@ __tracing_open(struct inode *inode, struct file *file) if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - if (current_trace && current_trace->print_max) + if ((current_trace && current_trace->print_max) || snapshot) iter->tr = &max_tr; else iter->tr = &global_trace; + iter->snapshot = snapshot; iter->pos = -1; mutex_init(&iter->mutex); iter->cpu_file = cpu_file; @@ -2491,8 +2504,9 @@ __tracing_open(struct inode *inode, struct file *file) if (trace_clocks[trace_clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - /* stop the trace while dumping */ - tracing_stop(); + /* stop the trace while dumping if we are not opening "snapshot" */ + if (!iter->snapshot) + tracing_stop(); if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { @@ -2555,8 +2569,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (iter->trace && iter->trace->close) iter->trace->close(iter); - /* reenable tracing if it was previously enabled */ - tracing_start(); + if (!iter->snapshot) + /* reenable tracing if it was previously enabled */ + tracing_start(); mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -2584,7 +2599,7 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file); + iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) @@ -3219,7 +3234,7 @@ static int tracing_set_tracer(const char *buf) if (current_trace && current_trace->reset) current_trace->reset(tr); - had_max_tr = current_trace && current_trace->use_max_tr; + had_max_tr = current_trace && current_trace->allocated_snapshot; current_trace = &nop_trace; if (had_max_tr && !t->use_max_tr) { @@ -3238,6 +3253,8 @@ static int tracing_set_tracer(const char *buf) */ ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; } destroy_trace_option_files(topts); @@ -3248,6 +3265,7 @@ static int tracing_set_tracer(const char *buf) RING_BUFFER_ALL_CPUS); if (ret < 0) goto out; + t->allocated_snapshot = true; } if (t->init) { @@ -4066,6 +4084,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file) return single_open(file, tracing_clock_show, NULL); } +#ifdef CONFIG_TRACER_SNAPSHOT +static int tracing_snapshot_open(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter; + int ret = 0; + + if (file->f_mode & FMODE_READ) { + iter = __tracing_open(inode, file, true); + if (IS_ERR(iter)) + ret = PTR_ERR(iter); + } + return ret; +} + +static ssize_t +tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + unsigned long val; + int ret; + + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + mutex_lock(&trace_types_lock); + + if (current_trace && current_trace->use_max_tr) { + ret = -EBUSY; + goto out; + } + + switch (val) { + case 0: + if (current_trace->allocated_snapshot) { + /* free spare buffer */ + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; + } + break; + case 1: + if (!current_trace->allocated_snapshot) { + /* allocate spare buffer */ + ret = resize_buffer_duplicate_size(&max_tr, + &global_trace, RING_BUFFER_ALL_CPUS); + if (ret < 0) + break; + current_trace->allocated_snapshot = true; + } + + local_irq_disable(); + /* Now, we're going to swap */ + update_max_tr(&global_trace, current, smp_processor_id()); + local_irq_enable(); + break; + default: + if (current_trace->allocated_snapshot) + tracing_reset_online_cpus(&max_tr); + else + ret = -EINVAL; + break; + } + + if (ret >= 0) { + *ppos += cnt; + ret = cnt; + } +out: + mutex_unlock(&trace_types_lock); + return ret; +} +#endif /* CONFIG_TRACER_SNAPSHOT */ + + static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -4122,6 +4221,16 @@ static const struct file_operations trace_clock_fops = { .write = tracing_clock_write, }; +#ifdef CONFIG_TRACER_SNAPSHOT +static const struct file_operations snapshot_fops = { + .open = tracing_snapshot_open, + .read = seq_read, + .write = tracing_snapshot_write, + .llseek = tracing_seek, + .release = tracing_release, +}; +#endif /* CONFIG_TRACER_SNAPSHOT */ + struct ftrace_buffer_info { struct trace_array *tr; void *spare; @@ -4921,6 +5030,11 @@ static __init int tracer_init_debugfs(void) &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif +#ifdef CONFIG_TRACER_SNAPSHOT + trace_create_file("snapshot", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops); +#endif + create_trace_options_dir(); for_each_tracing_cpu(cpu) -- cgit v1.2.3 From d840f718d28715a9833c1a8f46c2493ff3fd219b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 1 Feb 2013 18:38:47 -0500 Subject: tracing: Init current_trace to nop_trace and remove NULL checks On early boot up, when the ftrace ring buffer is initialized, the static variable current_trace is initialized to &nop_trace. Before this initialization, current_trace is NULL and will never become NULL again. It is always reassigned to a ftrace tracer. Several places check if current_trace is NULL before it uses it, and this check is frivolous, because at the point in time when the checks are made the only way current_trace could be NULL is if ftrace failed its allocations at boot up, and the paths to these locations would probably not be possible. By initializing current_trace to &nop_trace where it is declared, current_trace will never be NULL, and we can remove all these checks of current_trace being NULL which never needed to be checked in the first place. Cc: Dan Carpenter Cc: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 70dce64b9ecf..5d520b7bb4c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -249,7 +249,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; static struct tracer *trace_types __read_mostly; /* current_trace points to the tracer that is currently active */ -static struct tracer *current_trace __read_mostly; +static struct tracer *current_trace __read_mostly = &nop_trace; /* * trace_types_lock is used to protect the trace_types list. @@ -2100,8 +2100,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) unsigned long total; const char *name = "preemption"; - if (type) - name = type->name; + name = type->name; get_total_entries(tr, &total, &entries); @@ -2477,13 +2476,12 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (!iter->trace) goto fail; - if (current_trace) - *iter->trace = *current_trace; + *iter->trace = *current_trace; if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - if ((current_trace && current_trace->print_max) || snapshot) + if (current_trace->print_max || snapshot) iter->tr = &max_tr; else iter->tr = &global_trace; @@ -3037,10 +3035,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int r; mutex_lock(&trace_types_lock); - if (current_trace) - r = sprintf(buf, "%s\n", current_trace->name); - else - r = sprintf(buf, "\n"); + r = sprintf(buf, "%s\n", current_trace->name); mutex_unlock(&trace_types_lock); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -3231,10 +3226,10 @@ static int tracing_set_tracer(const char *buf) goto out; trace_branch_disable(); - if (current_trace && current_trace->reset) + if (current_trace->reset) current_trace->reset(tr); - had_max_tr = current_trace && current_trace->allocated_snapshot; + had_max_tr = current_trace->allocated_snapshot; current_trace = &nop_trace; if (had_max_tr && !t->use_max_tr) { @@ -3373,8 +3368,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) ret = -ENOMEM; goto fail; } - if (current_trace) - *iter->trace = *current_trace; + *iter->trace = *current_trace; if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { ret = -ENOMEM; @@ -3525,7 +3519,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(current_trace && iter->trace->name != current_trace->name)) + if (unlikely(iter->trace->name != current_trace->name)) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); @@ -3691,7 +3685,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(current_trace && iter->trace->name != current_trace->name)) + if (unlikely(iter->trace->name != current_trace->name)) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); @@ -4115,7 +4109,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, mutex_lock(&trace_types_lock); - if (current_trace && current_trace->use_max_tr) { + if (current_trace->use_max_tr) { ret = -EBUSY; goto out; } @@ -5299,7 +5293,7 @@ __init static int tracer_alloc_buffers(void) init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); - current_trace = &nop_trace; + /* All seems OK, enable tracing */ tracing_disabled = 0; -- cgit v1.2.3 From 8bd75c77b7c6a3954140dd2e20346aef3efe4a35 Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Thu, 7 Feb 2013 09:47:07 -0600 Subject: sched/rt: Move rt specific bits into new header file Move rt scheduler definitions out of include/linux/sched.h into new file include/linux/sched/rt.h Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20130207094707.7b9f825f@riff.lan Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3c13e46d7d24..4d2e4afd956e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" -- cgit v1.2.3 From d8741e2e88ac9a458765a0c7b4e6542d7c038334 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 Mar 2013 10:25:16 -0500 Subject: tracing: Add help of snapshot feature when snapshot is empty When cat'ing the snapshot file, instead of showing an empty trace header like the trace file does, show how to use the snapshot feature. Also, this is a good place to show if the snapshot has been allocated or not. Users may want to "pre allocate" the snapshot to have a fast "swap" of the current buffer. Otherwise, a swap would be slow and might fail as it would need to allocate the snapshot buffer, and that might fail under tight memory constraints. Here's what it looked like before: # tracer: nop # # entries-in-buffer/entries-written: 0/0 #P:4 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | Here's what it looks like now: # tracer: nop # # # * Snapshot is freed * # # Snapshot commands: # echo 0 > snapshot : Clears and frees snapshot buffer # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated. # Takes a snapshot of the main buffer. # echo 2 > snapshot : Clears snapshot buffer (but does not allocate) # (Doesn't have to be '2' works with any number that # is not a '0' or '1') Acked-by: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c2e2c2310374..9e3120b8a2ad 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2400,6 +2400,27 @@ static void test_ftrace_alive(struct seq_file *m) seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); } +#ifdef CONFIG_TRACER_MAX_TRACE +static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) +{ + if (iter->trace->allocated_snapshot) + seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); + else + seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); + + seq_printf(m, "# Snapshot commands:\n"); + seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); + seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); + seq_printf(m, "# Takes a snapshot of the main buffer.\n"); + seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n"); + seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); + seq_printf(m, "# is not a '0' or '1')\n"); +} +#else +/* Should never be called */ +static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } +#endif + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2411,7 +2432,9 @@ static int s_show(struct seq_file *m, void *v) seq_puts(m, "#\n"); test_ftrace_alive(m); } - if (iter->trace && iter->trace->print_header) + if (iter->snapshot && trace_empty(iter)) + print_snapshot_help(m, iter); + else if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); else trace_default_header(m); -- cgit v1.2.3 From c9960e48543799f168c4c9486f9790fb686ce5a8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 Mar 2013 10:53:02 -0500 Subject: tracing: Do not return EINVAL in snapshot when not allocated To use the tracing snapshot feature, writing a '1' into the snapshot file causes the snapshot buffer to be allocated if it has not already been allocated and dose a 'swap' with the main buffer, so that the snapshot now contains what was in the main buffer, and the main buffer now writes to what was the snapshot buffer. To free the snapshot buffer, a '0' is written into the snapshot file. To clear the snapshot buffer, any number but a '0' or '1' is written into the snapshot file. But if the file is not allocated it returns -EINVAL error code. This is rather pointless. It is better just to do nothing and return success. Acked-by: Hiraku Toyooka Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9e3120b8a2ad..1f835a83cb2c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4167,8 +4167,6 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, default: if (current_trace->allocated_snapshot) tracing_reset_online_cpus(&max_tr); - else - ret = -EINVAL; break; } -- cgit v1.2.3