From bcd83ea6cbfee54e33d1527b87538dc99ca2137b Mon Sep 17 00:00:00 2001
From: Daniel Walter <sahne@0x90.at>
Date: Wed, 26 Sep 2012 22:08:38 +0200
Subject: tracing: Replace strict_strto* with kstrto*

 * remove old string conversions with kstrto*

Link: http://lkml.kernel.org/r/20120926200838.GC1244@0x90.at

Signed-off-by: Daniel Walter <sahne@0x90.at>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 31e4f55773f1..f6928edacd6d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -431,7 +431,7 @@ static int __init set_tracing_thresh(char *str)
 
 	if (!str)
 		return 0;
-	ret = strict_strtoul(str, 0, &threshold);
+	ret = kstrtoul(str, 0, &threshold);
 	if (ret < 0)
 		return 0;
 	tracing_thresh = threshold * 1000;
-- 
cgit v1.2.3


From 884bfe89a462fcc85c8abd96171519cf2fe70929 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slavapestov@google.com>
Date: Fri, 15 Jul 2011 14:23:58 -0700
Subject: ring-buffer: Add a 'dropped events' counter

The existing 'overrun' counter is incremented when the ring
buffer wraps around, with overflow on (the default). We wanted
a way to count requests lost from the buffer filling up with
overflow off, too. I decided to add a new counter instead
of retro-fitting the existing one because it seems like a
different statistic to count conceptually, and also because
of how the code was structured.

Link: http://lkml.kernel.org/r/1310765038-26399-1-git-send-email-slavapestov@google.com

Signed-off-by: Slava Pestov <slavapestov@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f6928edacd6d..36c213fbfce7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4385,6 +4385,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	usec_rem = do_div(t, USEC_PER_SEC);
 	trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
 
+	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "dropped events: %ld\n", cnt);
+
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
-- 
cgit v1.2.3


From b382ede6b5eb8188926b72a9ef42fd2354342a97 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Oct 2012 21:44:34 -0400
Subject: tracing: Expand ring buffer when trace_printk() is used

Since tracing is not used by 99% of Linux users, even though tracing
may be configured in, it does not make sense to allocate 1.4 Megs
per CPU for the ring buffers if they are not used. Thus, on boot up
the ring buffers are set to a minimal size until something needs the
and they are expanded.

This works well for events and tracers (function, etc), but for the
asynchronous use of trace_printk() which can write to the ring buffer
at any time, does not expand the buffers.

On boot up a check is made to see if any trace_printk() is used to
see if the trace_printk() temp buffer pages should be allocated. This
same code can be used to expand the buffers as well.

Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 36c213fbfce7..a5411b7414b1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1571,6 +1571,9 @@ void trace_printk_init_buffers(void)
 
 	pr_info("ftrace: Allocated trace_printk buffers\n");
 
+	/* Expand the buffers to set size */
+	tracing_update_buffers();
+
 	buffers_allocated = 1;
 }
 
@@ -3030,6 +3033,10 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 	 */
 	ring_buffer_expanded = 1;
 
+	/* May be called before buffers are initialized */
+	if (!global_trace.buffer)
+		return 0;
+
 	ret = ring_buffer_resize(global_trace.buffer, size, cpu);
 	if (ret < 0)
 		return ret;
-- 
cgit v1.2.3


From 81698831bc462ff16f76bc11249a1e492424da4c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 11 Oct 2012 10:15:05 -0400
Subject: tracing: Enable comm recording if trace_printk() is used

If comm recording is not enabled when trace_printk() is used then
you just get this type of output:

[ adding trace_printk("hello! %d", irq); in do_IRQ ]

           <...>-2843  [001] d.h.    80.812300: do_IRQ: hello! 14
           <...>-2734  [002] d.h2    80.824664: do_IRQ: hello! 14
           <...>-2713  [003] d.h.    80.829971: do_IRQ: hello! 14
           <...>-2814  [000] d.h.    80.833026: do_IRQ: hello! 14

By enabling the comm recorder when trace_printk is enabled:

       hackbench-6715  [001] d.h.   193.233776: do_IRQ: hello! 21
            sshd-2659  [001] d.h.   193.665862: do_IRQ: hello! 21
          <idle>-0     [001] d.h1   193.665996: do_IRQ: hello! 21

Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a5411b7414b1..b90a827a4641 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1559,10 +1559,10 @@ static int alloc_percpu_trace_buffer(void)
 	return -ENOMEM;
 }
 
+static int buffers_allocated;
+
 void trace_printk_init_buffers(void)
 {
-	static int buffers_allocated;
-
 	if (buffers_allocated)
 		return;
 
@@ -1575,6 +1575,34 @@ void trace_printk_init_buffers(void)
 	tracing_update_buffers();
 
 	buffers_allocated = 1;
+
+	/*
+	 * trace_printk_init_buffers() can be called by modules.
+	 * If that happens, then we need to start cmdline recording
+	 * directly here. If the global_trace.buffer is already
+	 * allocated here, then this was called by module code.
+	 */
+	if (global_trace.buffer)
+		tracing_start_cmdline_record();
+}
+
+void trace_printk_start_comm(void)
+{
+	/* Start tracing comms if trace printk is set */
+	if (!buffers_allocated)
+		return;
+	tracing_start_cmdline_record();
+}
+
+static void trace_printk_start_stop_comm(int enabled)
+{
+	if (!buffers_allocated)
+		return;
+
+	if (enabled)
+		tracing_start_cmdline_record();
+	else
+		tracing_stop_cmdline_record();
 }
 
 /**
@@ -2797,6 +2825,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
 
 	if (mask == TRACE_ITER_OVERWRITE)
 		ring_buffer_change_overwrite(global_trace.buffer, enabled);
+
+	if (mask == TRACE_ITER_PRINTK)
+		trace_printk_start_stop_comm(enabled);
 }
 
 static ssize_t
@@ -5099,6 +5130,7 @@ __init static int tracer_alloc_buffers(void)
 
 	/* Only allocate trace_printk buffers if a trace_printk exists */
 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
+		/* Must be called before global_trace.buffer is allocated */
 		trace_printk_init_buffers();
 
 	/* To save memory, keep the ring buffer size to its minimum */
-- 
cgit v1.2.3


From 7ffbd48d5cab22bcd1120eb2349db1319e2d827a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 11 Oct 2012 12:14:25 -0400
Subject: tracing: Cache comms only after an event occurred

Whenever an event is registered, the comm of tasks are saved at
every task switch instead of saving them at every event. But if
an event isn't executed much, the comm cache will be filled up
by tasks that did not record the event and you lose out on the comms
that did.

Here's an example, if you enable the following events:

echo 1 > /debug/tracing/events/kvm/kvm_cr/enable
echo 1 > /debug/tracing/events/net/net_dev_xmit/enable

Note, there's no kvm running on this machine so the first event will
never be triggered, but because it is enabled, the storing of comms
will continue. If we now disable the network event:

echo 0 > /debug/tracing/events/net/net_dev_xmit/enable

and look at the trace:

cat /debug/tracing/trace
            sshd-2672  [001] ..s2   375.731616: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s1   375.731617: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s2   375.859356: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s1   375.859357: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s2   375.947351: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s1   375.947352: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s2   376.035383: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s1   376.035383: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
            sshd-2672  [001] ..s2   377.563806: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=226 rc=0
            sshd-2672  [001] ..s1   377.563807: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=226 rc=0
            sshd-2672  [001] ..s2   377.563834: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6be0 len=114 rc=0
            sshd-2672  [001] ..s1   377.563842: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6be0 len=114 rc=0

We see that process 2672 which triggered the events has the comm "sshd".
But if we run hackbench for a bit and look again:

cat /debug/tracing/trace
           <...>-2672  [001] ..s2   375.731616: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s1   375.731617: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s2   375.859356: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s1   375.859357: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s2   375.947351: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s1   375.947352: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s2   376.035383: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s1   376.035383: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=242 rc=0
           <...>-2672  [001] ..s2   377.563806: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6de0 len=226 rc=0
           <...>-2672  [001] ..s1   377.563807: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6de0 len=226 rc=0
           <...>-2672  [001] ..s2   377.563834: net_dev_xmit: dev=eth0 skbaddr=ffff88005cbb6be0 len=114 rc=0
           <...>-2672  [001] ..s1   377.563842: net_dev_xmit: dev=br0 skbaddr=ffff88005cbb6be0 len=114 rc=0

The stored "sshd" comm has been flushed out and we get a useless "<...>".

But by only storing comms after a trace event occurred, we can run
hackbench all day and still get the same output.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b90a827a4641..88111b08b2c1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -77,6 +77,13 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
 	return 0;
 }
 
+/*
+ * To prevent the comm cache from being overwritten when no
+ * tracing is active, only save the comm when a trace event
+ * occurred.
+ */
+static DEFINE_PER_CPU(bool, trace_cmdline_save);
+
 /*
  * Kill all tracing for good (never come back).
  * It is initialized to 1 but will turn to zero if the initialization
@@ -1135,6 +1142,11 @@ void tracing_record_cmdline(struct task_struct *tsk)
 	    !tracing_is_on())
 		return;
 
+	if (!__this_cpu_read(trace_cmdline_save))
+		return;
+
+	__this_cpu_write(trace_cmdline_save, false);
+
 	trace_save_cmdline(tsk);
 }
 
@@ -1178,13 +1190,20 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
 	return event;
 }
 
+void
+__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
+{
+	__this_cpu_write(trace_cmdline_save, true);
+	ring_buffer_unlock_commit(buffer, event);
+}
+
 static inline void
 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
 			     struct ring_buffer_event *event,
 			     unsigned long flags, int pc,
 			     int wake)
 {
-	ring_buffer_unlock_commit(buffer, event);
+	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack(buffer, flags, 6, pc);
 	ftrace_trace_userstack(buffer, flags, pc);
@@ -1232,7 +1251,7 @@ void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
 					    unsigned long flags, int pc,
 					    struct pt_regs *regs)
 {
-	ring_buffer_unlock_commit(buffer, event);
+	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
@@ -1269,7 +1288,7 @@ trace_function(struct trace_array *tr,
 	entry->parent_ip		= parent_ip;
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 }
 
 void
@@ -1362,7 +1381,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	entry->size = trace.nr_entries;
 
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 
  out:
 	/* Again, don't let gcc optimize things here */
@@ -1458,7 +1477,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 
 	save_stack_trace_user(&trace);
 	if (!filter_check_discard(call, entry, buffer, event))
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 
  out_drop_count:
 	__this_cpu_dec(user_stack_count);
@@ -1653,7 +1672,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 
 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
 	if (!filter_check_discard(call, entry, buffer, event)) {
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
 
@@ -1724,7 +1743,7 @@ int trace_array_vprintk(struct trace_array *tr,
 	memcpy(&entry->buf, tbuffer, len);
 	entry->buf[len] = '\0';
 	if (!filter_check_discard(call, entry, buffer, event)) {
-		ring_buffer_unlock_commit(buffer, event);
+		__buffer_unlock_commit(buffer, event);
 		ftrace_trace_stack(buffer, flags, 6, pc);
 	}
  out:
@@ -3993,7 +4012,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 	} else
 		entry->buf[cnt] = '\0';
 
-	ring_buffer_unlock_commit(buffer, event);
+	__buffer_unlock_commit(buffer, event);
 
 	written = cnt;
 
-- 
cgit v1.2.3


From 60303ed3f4b9332b9aa9bc17c68bc174e7343e2d Mon Sep 17 00:00:00 2001
From: David Sharp <dhsharp@google.com>
Date: Thu, 11 Oct 2012 16:27:52 -0700
Subject: tracing: Reset ring buffer when changing trace_clocks

Because the "tsc" clock isn't in nanoseconds, the ring buffer must be
reset when changing clocks so that incomparable timestamps don't end up
in the same trace.

Tested: Confirmed switching clocks resets the trace buffer.

Google-Bug-Id: 6980623
Link: http://lkml.kernel.org/r/1349998076-15495-3-git-send-email-dhsharp@google.com

Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88111b08b2c1..6ed6013dff2b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4073,6 +4073,14 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 	if (max_tr.buffer)
 		ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
 
+	/*
+	 * New clock may not be consistent with the previous clock.
+	 * Reset the buffer so that it doesn't have incomparable timestamps.
+	 */
+	tracing_reset_online_cpus(&global_trace);
+	if (max_tr.buffer)
+		tracing_reset_online_cpus(&max_tr);
+
 	mutex_unlock(&trace_types_lock);
 
 	*fpos += cnt;
-- 
cgit v1.2.3


From c7b84ecada9a8b7fe3e6c081e70801703897ed5d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 May 2012 20:54:53 -0400
Subject: tracing: Remove unused function unregister_tracer()

The function register_tracer() is only used by kernel core code,
that never needs to remove the tracer. As trace_events have become
the main way to add new tracing to the kernel, the need to
unregister a tracer has diminished. Remove the unused function
unregister_tracer(). If a need arises where we need it, then we
can always add it back.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6ed6013dff2b..d1d8039578ab 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -882,32 +882,6 @@ int register_tracer(struct tracer *type)
 	return ret;
 }
 
-void unregister_tracer(struct tracer *type)
-{
-	struct tracer **t;
-
-	mutex_lock(&trace_types_lock);
-	for (t = &trace_types; *t; t = &(*t)->next) {
-		if (*t == type)
-			goto found;
-	}
-	pr_info("Tracer %s not registered\n", type->name);
-	goto out;
-
- found:
-	*t = (*t)->next;
-
-	if (type == current_trace && tracer_enabled) {
-		tracer_enabled = 0;
-		tracing_stop();
-		if (current_trace->stop)
-			current_trace->stop(&global_trace);
-		current_trace = &nop_trace;
-	}
-out:
-	mutex_unlock(&trace_types_lock);
-}
-
 void tracing_reset(struct trace_array *tr, int cpu)
 {
 	struct ring_buffer *buffer = tr->buffer;
-- 
cgit v1.2.3


From 0fb9656d957d79dbe7ae155bb6533b1d465e4a50 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 May 2012 14:25:30 -0400
Subject: tracing: Make tracing_enabled be equal to tracing_on

The tracing_enabled file has been deprecated as it never was able
to serve its purpose well. The tracing_on file has taken over.
Instead of having code to keep tracing_enabled, have the tracing_enabled
file just set tracing_on, and remove the tracing_enabled variable.

This allows us to remove the tracing_enabled file. The reason that
the remove is in a different change set and not removed here is
in case we find some lonely userspace tool that requires the file
to exist. Then the removal patch will get reverted, but this one
will not.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 79 ++++------------------------------------------------
 1 file changed, 5 insertions(+), 74 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d1d8039578ab..3c9b96aee51a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -205,20 +205,9 @@ static struct trace_array	max_tr;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
 
-/* tracer_enabled is used to toggle activation of a tracer */
-static int			tracer_enabled = 1;
-
-/**
- * tracing_is_enabled - return tracer_enabled status
- *
- * This function is used by other tracers to know the status
- * of the tracer_enabled flag.  Tracers may use this function
- * to know if it should enable their features when starting
- * up. See irqsoff tracer for an example (start_irqsoff_tracer).
- */
 int tracing_is_enabled(void)
 {
-	return tracer_enabled;
+	return tracing_is_on();
 }
 
 /*
@@ -1112,8 +1101,7 @@ void trace_find_cmdline(int pid, char comm[])
 
 void tracing_record_cmdline(struct task_struct *tsk)
 {
-	if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
-	    !tracing_is_on())
+	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
 		return;
 
 	if (!__this_cpu_read(trace_cmdline_save))
@@ -2966,56 +2954,6 @@ static const struct file_operations tracing_saved_cmdlines_fops = {
     .llseek	= generic_file_llseek,
 };
 
-static ssize_t
-tracing_ctrl_read(struct file *filp, char __user *ubuf,
-		  size_t cnt, loff_t *ppos)
-{
-	char buf[64];
-	int r;
-
-	r = sprintf(buf, "%u\n", tracer_enabled);
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-tracing_ctrl_write(struct file *filp, const char __user *ubuf,
-		   size_t cnt, loff_t *ppos)
-{
-	struct trace_array *tr = filp->private_data;
-	unsigned long val;
-	int ret;
-
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
-		return ret;
-
-	val = !!val;
-
-	mutex_lock(&trace_types_lock);
-	if (tracer_enabled ^ val) {
-
-		/* Only need to warn if this is used to change the state */
-		WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
-
-		if (val) {
-			tracer_enabled = 1;
-			if (current_trace->start)
-				current_trace->start(tr);
-			tracing_start();
-		} else {
-			tracer_enabled = 0;
-			tracing_stop();
-			if (current_trace->stop)
-				current_trace->stop(tr);
-		}
-	}
-	mutex_unlock(&trace_types_lock);
-
-	*ppos += cnt;
-
-	return cnt;
-}
-
 static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
@@ -3469,7 +3407,7 @@ static int tracing_wait_pipe(struct file *filp)
 			return -EINTR;
 
 		/*
-		 * We block until we read something and tracing is disabled.
+		 * We block until we read something and tracing is enabled.
 		 * We still block if tracing is disabled, but we have never
 		 * read anything. This allows a user to cat this file, and
 		 * then enable tracing. But after we have read something,
@@ -3477,7 +3415,7 @@ static int tracing_wait_pipe(struct file *filp)
 		 *
 		 * iter->pos will be 0 if we haven't read anything.
 		 */
-		if (!tracer_enabled && iter->pos)
+		if (tracing_is_enabled() && iter->pos)
 			break;
 	}
 
@@ -4076,13 +4014,6 @@ static const struct file_operations tracing_max_lat_fops = {
 	.llseek		= generic_file_llseek,
 };
 
-static const struct file_operations tracing_ctrl_fops = {
-	.open		= tracing_open_generic,
-	.read		= tracing_ctrl_read,
-	.write		= tracing_ctrl_write,
-	.llseek		= generic_file_llseek,
-};
-
 static const struct file_operations set_tracer_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_set_trace_read,
@@ -4858,7 +4789,7 @@ static __init int tracer_init_debugfs(void)
 	d_tracer = tracing_init_dentry();
 
 	trace_create_file("tracing_enabled", 0644, d_tracer,
-			&global_trace, &tracing_ctrl_fops);
+			  &global_trace, &rb_simple_fops);
 
 	trace_create_file("trace_options", 0644, d_tracer,
 			NULL, &tracing_iter_fops);
-- 
cgit v1.2.3


From 02404baf1b47123f1c88c9f9f1f3b00e1e2b10db Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 1 Nov 2012 11:51:40 -0400
Subject: tracing: Remove deprecated tracing_enabled file

The tracing_enabled file was used as a quick way to stop
tracers, and try to bring down overhead for things like
the latency tracers (irqsoff, wakeup, etc). But it didn't
work that well.

The tracing_on file was created as a really fast way to
stop recording into the ftrace ring buffer and can interact
with the kernel. That is a tracing_off() call in the kernel
can disable recording of events, and then from userspace one
could echo 1 into the tracing_on file to continue it. The
tracing_enabled function did too much to allow for this.

The tracing_on has taken over as a way to start and stop tracing
and the tracing_enabled file should not be used. But because of
its existance, it still confuses people. Over a year ago the
following commit was added:

 commit 6752ab4a9c30d5411b2dfdb251a3f1cb18aae487
 Author: Steven Rostedt <srostedt@redhat.com>
 Date:   Tue Feb 8 13:54:06 2011 -0500

    tracing: Deprecate tracing_enabled for tracing_on

This commit added a WARN_ON() if the tracing_enabled file's variable
was changed. After this was added, only LatencyTop complained, and
they soon fixed their tool as there was no reason that LatencyTop
should touch this file as it was using the perf ring buffers which
this file does not interact with. But since that time no one else
has complained about this WARN_ON(). Thus it is safe to assume that
this file is no longer needed. Time to get rid of it.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c9b96aee51a..d5cbc0d3f209 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4788,9 +4788,6 @@ static __init int tracer_init_debugfs(void)
 
 	d_tracer = tracing_init_dentry();
 
-	trace_create_file("tracing_enabled", 0644, d_tracer,
-			  &global_trace, &rb_simple_fops);
-
 	trace_create_file("trace_options", 0644, d_tracer,
 			NULL, &tracing_iter_fops);
 
-- 
cgit v1.2.3


From 0d5c6e1c19bab82fad4837108c2902f557d62a04 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 1 Nov 2012 20:54:21 -0400
Subject: tracing: Use irq_work for wake ups and remove *_nowake_*() functions

Have the ring buffer commit function use the irq_work infrastructure to
wake up any waiters waiting on the ring buffer for new data. The irq_work
was created for such a purpose, where doing the actual wake up at the
time of adding data is too dangerous, as an event or function trace may
be in the midst of the work queue locks and cause deadlocks. The irq_work
will either delay the action to the next timer interrupt, or trigger an IPI
to itself forcing an interrupt to do the work (in a safe location).

With irq_work, all ring buffer commits can safely do wakeups, removing
the need for the ring buffer commit "nowake" variants, which were used
by events and function tracing. All commits can now safely use the
normal commit, and the "nowake" variants can be removed.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 121 +++++++++++++++++++++++++++++----------------------
 1 file changed, 68 insertions(+), 53 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d5cbc0d3f209..37d1c703e3ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
+#include <linux/irq_work.h>
 #include <linux/debugfs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
@@ -84,6 +85,14 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
  */
 static DEFINE_PER_CPU(bool, trace_cmdline_save);
 
+/*
+ * When a reader is waiting for data, then this variable is
+ * set to true.
+ */
+static bool trace_wakeup_needed;
+
+static struct irq_work trace_work_wakeup;
+
 /*
  * Kill all tracing for good (never come back).
  * It is initialized to 1 but will turn to zero if the initialization
@@ -329,12 +338,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 static int trace_stop_count;
 static DEFINE_RAW_SPINLOCK(tracing_start_lock);
 
-static void wakeup_work_handler(struct work_struct *work)
+/**
+ * trace_wake_up - wake up tasks waiting for trace input
+ *
+ * Schedules a delayed work to wake up any task that is blocked on the
+ * trace_wait queue. These is used with trace_poll for tasks polling the
+ * trace.
+ */
+static void trace_wake_up(struct irq_work *work)
 {
-	wake_up(&trace_wait);
-}
+	wake_up_all(&trace_wait);
 
-static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
+}
 
 /**
  * tracing_on - enable tracing buffers
@@ -389,22 +404,6 @@ int tracing_is_on(void)
 }
 EXPORT_SYMBOL_GPL(tracing_is_on);
 
-/**
- * trace_wake_up - wake up tasks waiting for trace input
- *
- * Schedules a delayed work to wake up any task that is blocked on the
- * trace_wait queue. These is used with trace_poll for tasks polling the
- * trace.
- */
-void trace_wake_up(void)
-{
-	const unsigned long delay = msecs_to_jiffies(2);
-
-	if (trace_flags & TRACE_ITER_BLOCK)
-		return;
-	schedule_delayed_work(&wakeup_work, delay);
-}
-
 static int __init set_buf_size(char *str)
 {
 	unsigned long buf_size;
@@ -753,6 +752,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
+static void default_wait_pipe(struct trace_iterator *iter)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * The events can happen in critical sections where
+	 * checking a work queue can cause deadlocks.
+	 * After adding a task to the queue, this flag is set
+	 * only to notify events to try to wake up the queue
+	 * using irq_work.
+	 *
+	 * We don't clear it even if the buffer is no longer
+	 * empty. The flag only causes the next event to run
+	 * irq_work to do the work queue wake up. The worse
+	 * that can happen if we race with !trace_empty() is that
+	 * an event will cause an irq_work to try to wake up
+	 * an empty queue.
+	 *
+	 * There's no reason to protect this flag either, as
+	 * the work queue and irq_work logic will do the necessary
+	 * synchronization for the wake ups. The only thing
+	 * that is necessary is that the wake up happens after
+	 * a task has been queued. It's OK for spurious wake ups.
+	 */
+	trace_wakeup_needed = true;
+
+	if (trace_empty(iter))
+		schedule();
+
+	finish_wait(&trace_wait, &wait);
+}
+
 /**
  * register_tracer - register a tracer with the ftrace system.
  * @type - the plugin for the tracer
@@ -1156,30 +1189,32 @@ void
 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 {
 	__this_cpu_write(trace_cmdline_save, true);
+	if (trace_wakeup_needed) {
+		trace_wakeup_needed = false;
+		/* irq_work_queue() supplies it's own memory barriers */
+		irq_work_queue(&trace_work_wakeup);
+	}
 	ring_buffer_unlock_commit(buffer, event);
 }
 
 static inline void
 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
 			     struct ring_buffer_event *event,
-			     unsigned long flags, int pc,
-			     int wake)
+			     unsigned long flags, int pc)
 {
 	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack(buffer, flags, 6, pc);
 	ftrace_trace_userstack(buffer, flags, pc);
-
-	if (wake)
-		trace_wake_up();
 }
 
 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
 
 struct ring_buffer_event *
 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
@@ -1196,29 +1231,21 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 					struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
-void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
-				       struct ring_buffer_event *event,
-				       unsigned long flags, int pc)
-{
-	__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
-}
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
-
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs)
+void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+				     struct ring_buffer_event *event,
+				     unsigned long flags, int pc,
+				     struct pt_regs *regs)
 {
 	__buffer_unlock_commit(buffer, event);
 
 	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
+EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
 
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event)
@@ -3354,19 +3381,6 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
 	}
 }
 
-
-void default_wait_pipe(struct trace_iterator *iter)
-{
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
-
-	if (trace_empty(iter))
-		schedule();
-
-	finish_wait(&trace_wait, &wait);
-}
-
 /*
  * This is a make-shift waitqueue.
  * A tracer might use this callback on some rare cases:
@@ -5107,6 +5121,7 @@ __init static int tracer_alloc_buffers(void)
 #endif
 
 	trace_init_cmdlines();
+	init_irq_work(&trace_work_wakeup, trace_wake_up);
 
 	register_tracer(&nop_trace);
 	current_trace = &nop_trace;
-- 
cgit v1.2.3


From 7bcfaf54f591a0775254c4ea679faf615152ee3a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 1 Nov 2012 22:56:07 -0400
Subject: tracing: Add trace_options kernel command line parameter

Add trace_options to the kernel command line parameter to be able to
set options at early boot. For example, to enable stack dumps of
events, add the following:

  trace_options=stacktrace

This along with the trace_event option, you can get not only
traces of the events but also the stack dumps with them.

Requested-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 54 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 15 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 37d1c703e3ec..c1434b5ce4d1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -155,6 +155,18 @@ static int __init set_ftrace_dump_on_oops(char *str)
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
+
+static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
+static char *trace_boot_options __initdata;
+
+static int __init set_trace_boot_options(char *str)
+{
+	strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
+	trace_boot_options = trace_boot_options_buf;
+	return 0;
+}
+__setup("trace_options=", set_trace_boot_options);
+
 unsigned long long ns2usecs(cycle_t nsec)
 {
 	nsec += 500;
@@ -2838,24 +2850,14 @@ static void set_tracer_flags(unsigned int mask, int enabled)
 		trace_printk_start_stop_comm(enabled);
 }
 
-static ssize_t
-tracing_trace_options_write(struct file *filp, const char __user *ubuf,
-			size_t cnt, loff_t *ppos)
+static int trace_set_options(char *option)
 {
-	char buf[64];
 	char *cmp;
 	int neg = 0;
-	int ret;
+	int ret = 0;
 	int i;
 
-	if (cnt >= sizeof(buf))
-		return -EINVAL;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
+	cmp = strstrip(option);
 
 	if (strncmp(cmp, "no", 2) == 0) {
 		neg = 1;
@@ -2874,10 +2876,25 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 		mutex_lock(&trace_types_lock);
 		ret = set_tracer_option(current_trace, cmp, neg);
 		mutex_unlock(&trace_types_lock);
-		if (ret)
-			return ret;
 	}
 
+	return ret;
+}
+
+static ssize_t
+tracing_trace_options_write(struct file *filp, const char __user *ubuf,
+			size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	trace_set_options(buf);
+
 	*ppos += cnt;
 
 	return cnt;
@@ -5133,6 +5150,13 @@ __init static int tracer_alloc_buffers(void)
 
 	register_die_notifier(&trace_die_notifier);
 
+	while (trace_boot_options) {
+		char *option;
+
+		option = strsep(&trace_boot_options, ",");
+		trace_set_options(option);
+	}
+
 	return 0;
 
 out_free_cpumask:
-- 
cgit v1.2.3


From 8cbd9cc6254065c97c4bac42daa55ba1abe73a8e Mon Sep 17 00:00:00 2001
From: David Sharp <dhsharp@google.com>
Date: Tue, 13 Nov 2012 12:18:21 -0800
Subject: tracing,x86: Add a TSC trace_clock

In order to promote interoperability between userspace tracers and ftrace,
add a trace_clock that reports raw TSC values which will then be recorded
in the ring buffer. Userspace tracers that also record TSCs are then on
exactly the same time base as the kernel and events can be unambiguously
interlaced.

Tested: Enabled a tracepoint and the "tsc" trace_clock and saw very large
timestamp values.

v2:
Move arch-specific bits out of generic code.
v3:
Rename "x86-tsc", cleanups
v7:
Generic arch bits in Kbuild.

Google-Bug-Id: 6980623
Link: http://lkml.kernel.org/r/1352837903-32191-1-git-send-email-dhsharp@google.com

Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1434b5ce4d1..0d20620c0d27 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -488,6 +488,7 @@ static struct {
 	{ trace_clock_local,	"local" },
 	{ trace_clock_global,	"global" },
 	{ trace_clock_counter,	"counter" },
+	ARCH_TRACE_CLOCKS
 };
 
 int trace_clock_id;
-- 
cgit v1.2.3


From 8be0709f10e3dd5d7d07933ad61a9f18c4b93ca5 Mon Sep 17 00:00:00 2001
From: David Sharp <dhsharp@google.com>
Date: Tue, 13 Nov 2012 12:18:22 -0800
Subject: tracing: Format non-nanosec times from tsc clock without a decimal
 point.

With the addition of the "tsc" clock, formatting timestamps to look like
fractional seconds is misleading. Mark clocks as either in nanoseconds or
not, and format non-nanosecond timestamps as decimal integers.

Tested:
$ cd /sys/kernel/debug/tracing/
$ cat trace_clock
[local] global tsc
$ echo sched_switch > set_event
$ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on
$ cat trace
          <idle>-0     [000]  6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
           sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
$ echo 1 > options/latency-format
$ cat trace
  <idle>-0       0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
   sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
$ echo tsc > trace_clock
$ cat trace
$ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on
$ echo 0 > options/latency-format
$ cat trace
          <idle>-0     [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
           sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...
echo 1 > options/latency-format
$ cat trace
  <idle>-0       0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
   sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
  ...

v2:
Move arch-specific bits out of generic code.
v4:
Fix x86_32 build due to 64-bit division.

Google-Bug-Id: 6980623
Link: http://lkml.kernel.org/r/1352837903-32191-2-git-send-email-dhsharp@google.com

Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0d20620c0d27..d943e69569cd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -484,10 +484,11 @@ static const char *trace_options[] = {
 static struct {
 	u64 (*func)(void);
 	const char *name;
+	int in_ns;		/* is this clock in nanoseconds? */
 } trace_clocks[] = {
-	{ trace_clock_local,	"local" },
-	{ trace_clock_global,	"global" },
-	{ trace_clock_counter,	"counter" },
+	{ trace_clock_local,	"local",	1 },
+	{ trace_clock_global,	"global",	1 },
+	{ trace_clock_counter,	"counter",	0 },
 	ARCH_TRACE_CLOCKS
 };
 
@@ -2478,6 +2479,10 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (ring_buffer_overruns(iter->tr->buffer))
 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
+	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
+	if (trace_clocks[trace_clock_id].in_ns)
+		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
 	/* stop the trace while dumping */
 	tracing_stop();
 
@@ -3339,6 +3344,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	if (trace_flags & TRACE_ITER_LATENCY_FMT)
 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
 
+	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
+	if (trace_clocks[trace_clock_id].in_ns)
+		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
 	iter->cpu_file = cpu_file;
 	iter->tr = &global_trace;
 	mutex_init(&iter->mutex);
-- 
cgit v1.2.3


From 11043d8b125671a32253cddb0b05177be0e976f6 Mon Sep 17 00:00:00 2001
From: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Date: Tue, 13 Nov 2012 12:18:23 -0800
Subject: tracing: Show raw time stamp on stats per cpu using counter or tsc
 mode for trace_clock

Show raw time stamp values for stats per cpu if you choose counter or tsc mode
for trace_clock. Although a unit of tracing time stamp is nsec in local or global mode,
the units in counter and TSC mode are tracing counter and cycles respectively.
Link: http://lkml.kernel.org/r/1352837903-32191-3-git-send-email-dhsharp@google.com

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Signed-off-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d943e69569cd..b69cc380322d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4388,13 +4388,24 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
 	trace_seq_printf(s, "bytes: %ld\n", cnt);
 
-	t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
-	usec_rem = do_div(t, USEC_PER_SEC);
-	trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
+	if (trace_clocks[trace_clock_id].in_ns) {
+		/* local or global for trace_clock */
+		t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+		usec_rem = do_div(t, USEC_PER_SEC);
+		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
+								t, usec_rem);
+
+		t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+		usec_rem = do_div(t, USEC_PER_SEC);
+		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+	} else {
+		/* counter or tsc mode for trace_clock */
+		trace_seq_printf(s, "oldest event ts: %llu\n",
+				ring_buffer_oldest_event_ts(tr->buffer, cpu));
 
-	t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
-	usec_rem = do_div(t, USEC_PER_SEC);
-	trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+		trace_seq_printf(s, "now ts: %llu\n",
+				ring_buffer_time_stamp(tr->buffer, cpu));
+	}
 
 	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
-- 
cgit v1.2.3


From d60da506cbeb3f1907a740547dd7ef04a93e908e Mon Sep 17 00:00:00 2001
From: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Date: Wed, 17 Oct 2012 11:56:16 +0900
Subject: tracing: Add a resize function to make one buffer equivalent to
 another buffer

Trace buffer size is now per-cpu, so that there are the following two
patterns in resizing of buffers.

  (1) resize per-cpu buffers to same given size
  (2) resize per-cpu buffers to another trace_array's buffer size
      for each CPU (such as preparing the max_tr which is equivalent
      to the global_trace's size)

__tracing_resize_ring_buffer() can be used for (1), and had
implemented (2) inside it for resetting the global_trace to the
original size.

(2) was also implemented in another place. So this patch assembles
them in a new function - resize_buffer_duplicate_size().

Link: http://lkml.kernel.org/r/20121017025616.2627.91226.stgit@falsita

Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 58 ++++++++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b69cc380322d..64ad9bc4275b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3034,6 +3034,31 @@ static void set_buffer_entries(struct trace_array *tr, unsigned long val)
 		tr->data[cpu]->entries = val;
 }
 
+/* resize @tr's buffer to the size of @size_tr's entries */
+static int resize_buffer_duplicate_size(struct trace_array *tr,
+					struct trace_array *size_tr, int cpu_id)
+{
+	int cpu, ret = 0;
+
+	if (cpu_id == RING_BUFFER_ALL_CPUS) {
+		for_each_tracing_cpu(cpu) {
+			ret = ring_buffer_resize(tr->buffer,
+					size_tr->data[cpu]->entries, cpu);
+			if (ret < 0)
+				break;
+			tr->data[cpu]->entries = size_tr->data[cpu]->entries;
+		}
+	} else {
+		ret = ring_buffer_resize(tr->buffer,
+					size_tr->data[cpu_id]->entries, cpu_id);
+		if (ret == 0)
+			tr->data[cpu_id]->entries =
+				size_tr->data[cpu_id]->entries;
+	}
+
+	return ret;
+}
+
 static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 {
 	int ret;
@@ -3058,23 +3083,8 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 
 	ret = ring_buffer_resize(max_tr.buffer, size, cpu);
 	if (ret < 0) {
-		int r = 0;
-
-		if (cpu == RING_BUFFER_ALL_CPUS) {
-			int i;
-			for_each_tracing_cpu(i) {
-				r = ring_buffer_resize(global_trace.buffer,
-						global_trace.data[i]->entries,
-						i);
-				if (r < 0)
-					break;
-			}
-		} else {
-			r = ring_buffer_resize(global_trace.buffer,
-						global_trace.data[cpu]->entries,
-						cpu);
-		}
-
+		int r = resize_buffer_duplicate_size(&global_trace,
+						     &global_trace, cpu);
 		if (r < 0) {
 			/*
 			 * AARGH! We are left with different
@@ -3212,17 +3222,11 @@ static int tracing_set_tracer(const char *buf)
 
 	topts = create_trace_option_files(t);
 	if (t->use_max_tr) {
-		int cpu;
 		/* we need to make per cpu buffer sizes equivalent */
-		for_each_tracing_cpu(cpu) {
-			ret = ring_buffer_resize(max_tr.buffer,
-						global_trace.data[cpu]->entries,
-						cpu);
-			if (ret < 0)
-				goto out;
-			max_tr.data[cpu]->entries =
-					global_trace.data[cpu]->entries;
-		}
+		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
+						   RING_BUFFER_ALL_CPUS);
+		if (ret < 0)
+			goto out;
 	}
 
 	if (t->init) {
-- 
cgit v1.2.3


From bf3071f5a054db9e5bab873355d27a7330ce5187 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 25 Jul 2012 11:39:08 -0400
Subject: tracing: Remove unnecessary WARN_ONCE's from
 tracing_buffers_splice_read

WARN shouldn't be used as a means of communicating failure to a userspace programmer.

Link: http://lkml.kernel.org/r/20120725153908.GA25203@redhat.com

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 64ad9bc4275b..5bc35907cc6e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4275,13 +4275,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		return -ENOMEM;
 
 	if (*ppos & (PAGE_SIZE - 1)) {
-		WARN_ONCE(1, "Ftrace: previous read must page-align\n");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (len & (PAGE_SIZE - 1)) {
-		WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
 		if (len < PAGE_SIZE) {
 			ret = -EINVAL;
 			goto out;
-- 
cgit v1.2.3


From a8dd2176a8e988e3744e863ac39647a6f59fa900 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 9 Jan 2013 20:54:17 -0500
Subject: tracing: Fix regression of trace_options file setting

The latest change to allow trace options to be set on the command
line also broke the trace_options file.

The zeroing of the last byte of the option name that is echoed into
the trace_option file was removed with the consolidation of some
of the code. The compare between the option and what was written to
the trace_options file fails because the string holding the data
written doesn't terminate with a null character.

A zero needs to be added to the end of the string copied from
user space.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e5125677efa0..1bbfa0446507 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
 
+	buf[cnt] = 0;
+
 	trace_set_options(buf);
 
 	*ppos += cnt;
-- 
cgit v1.2.3


From 2df8f8a6a897ebf4c5613b5be6103d33b2a21520 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Jan 2013 16:14:10 -0500
Subject: tracing: Fix regression with irqsoff tracer and tracing_on file

Commit 02404baf1b47 "tracing: Remove deprecated tracing_enabled file"
removed the tracing_enabled file as it never worked properly and
the tracing_on file should be used instead. But the tracing_on file
didn't call into the tracers start/stop routines like the
tracing_enabled file did. This caused trace-cmd to break when it
enabled the irqsoff tracer.

If you just did "echo irqsoff > current_tracer" then it would work
properly. But the tool trace-cmd disables tracing first by writing
"0" into the tracing_on file. Then it writes "irqsoff" into
current_tracer and then writes "1" into tracing_on. Unfortunately,
the above commit changed the irqsoff tracer to check the tracing_on
status instead of the tracing_enabled status. If it's disabled then
it does not start the tracer internals.

The problem is that writing "1" into tracing_on does not call the
tracers "start" routine like writing "1" into tracing_enabled did.
This makes the irqsoff tracer not start when using the trace-cmd
tool, and is a regression for userspace.

Simple fix is to have the tracing_on file call the tracers start()
method when being enabled (and the stop() method when disabled).

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1bbfa0446507..f3ec1cfb0de1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4817,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
 		return ret;
 
 	if (buffer) {
-		if (val)
+		mutex_lock(&trace_types_lock);
+		if (val) {
 			ring_buffer_record_on(buffer);
-		else
+			if (current_trace->start)
+				current_trace->start(tr);
+		} else {
 			ring_buffer_record_off(buffer);
+			if (current_trace->stop)
+				current_trace->stop(tr);
+		}
+		mutex_unlock(&trace_types_lock);
 	}
 
 	(*ppos)++;
-- 
cgit v1.2.3


From 250bfd3d8e7e19cb649dd94689f0af2ce3474060 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 14 Jan 2013 10:54:11 +0800
Subject: tracing: Fix regression of trace_pipe

Commit 0fb9656d "tracing: Make tracing_enabled be equal to tracing_on"
changes the behaviour of trace_pipe, ie. it makes trace_pipe return if
we've read something and tracing is enabled, and this means that we have
to 'cat trace_pipe' again and again while running tests.

IMO the right way is if tracing is enabled, we always block and wait for
ring buffer, or we may lose what we want since ring buffer's size is limited.

Link: http://lkml.kernel.org/r/1358132051-5410-1-git-send-email-bo.li.liu@oracle.com

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/trace/trace.c')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f3ec1cfb0de1..3c13e46d7d24 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3454,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp)
 			return -EINTR;
 
 		/*
-		 * We block until we read something and tracing is enabled.
+		 * We block until we read something and tracing is disabled.
 		 * We still block if tracing is disabled, but we have never
 		 * read anything. This allows a user to cat this file, and
 		 * then enable tracing. But after we have read something,
@@ -3462,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp)
 		 *
 		 * iter->pos will be 0 if we haven't read anything.
 		 */
-		if (tracing_is_enabled() && iter->pos)
+		if (!tracing_is_enabled() && iter->pos)
 			break;
 	}
 
-- 
cgit v1.2.3