From bc7ee55633867909bb05e71f957a4d3c1aa1b488 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 30 Nov 2011 14:27:08 +0000 Subject: regmap: Add trace event for successful cache reads Currently we only trace physical reads, there's no instrumentation if the read is satisfied from cache. Signed-off-by: Mark Brown --- include/trace/events/regmap.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h index 1e3193b8fcc8..12fbf43524e9 100644 --- a/include/trace/events/regmap.h +++ b/include/trace/events/regmap.h @@ -55,6 +55,15 @@ DEFINE_EVENT(regmap_reg, regmap_reg_read, ); +DEFINE_EVENT(regmap_reg, regmap_reg_read_cache, + + TP_PROTO(struct device *dev, unsigned int reg, + unsigned int val), + + TP_ARGS(dev, reg, val) + +); + DECLARE_EVENT_CLASS(regmap_block, TP_PROTO(struct device *dev, unsigned int reg, int count), -- cgit v1.2.3 From b781a602ac745ee3d5d745276f1e1905a2c101f9 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Mon, 28 Nov 2011 12:03:35 +0300 Subject: events, sched: Add tracepoint for accounting blocked time This tracepoint shows how long a task is sleeping in uninterruptible state. E.g. it may show how long and where a mutex is waited for. Signed-off-by: Andrew Vagin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1322471015-107825-8-git-send-email-avagin@openvz.org Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 959ff18b63b6..e33ed1bfa113 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -330,6 +330,13 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); +/* + * Tracepoint for accounting blocked time (time the task is in uninterruptible). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_blocked, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). -- cgit v1.2.3 From 9b2e4f1880b789be1f24f9684f7a54b90310b5c0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Sep 2011 12:10:22 -0700 Subject: rcu: Track idleness independent of idle tasks Earlier versions of RCU used the scheduling-clock tick to detect idleness by checking for the idle task, but handled idleness differently for CONFIG_NO_HZ=y. But there are now a number of uses of RCU read-side critical sections in the idle task, for example, for tracing. A more fine-grained detection of idleness is therefore required. This commit presses the old dyntick-idle code into full-time service, so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is always invoked at the beginning of an idle loop iteration. Similarly, rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked at the end of an idle-loop iteration. This allows the idle task to use RCU everywhere except between consecutive rcu_idle_enter() and rcu_idle_exit() calls, in turn allowing architecture maintainers to specify exactly where in the idle loop that RCU may be used. Because some of the userspace upcall uses can result in what looks to RCU like half of an interrupt, it is not possible to expect that the irq_enter() and irq_exit() hooks will give exact counts. This patch therefore expands the ->dynticks_nesting counter to 64 bits and uses two separate bitfields to count process/idle transitions and interrupt entry/exit transitions. It is presumed that userspace upcalls do not happen in the idle loop or from usermode execution (though usermode might do a system call that results in an upcall). The counter is hard-reset on each process/idle transition, which avoids the interrupt entry/exit error from accumulating. Overflow is avoided by the 64-bitness of the ->dyntick_nesting counter. This commit also adds warnings if a non-idle task asks RCU to enter idle state (and these checks will need some adjustment before applying Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246). In addition, validation of ->dynticks and ->dynticks_nesting is added. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 669fbd62ec25..e5771804c507 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -246,19 +246,21 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(char *polarity), + TP_PROTO(char *polarity, int nesting), - TP_ARGS(polarity), + TP_ARGS(polarity, nesting), TP_STRUCT__entry( __field(char *, polarity) + __field(int, nesting) ), TP_fast_assign( __entry->polarity = polarity; + __entry->nesting = nesting; ), - TP_printk("%s", __entry->polarity) + TP_printk("%s %d", __entry->polarity, __entry->nesting) ); /* @@ -443,7 +445,7 @@ TRACE_EVENT(rcu_batch_end, #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity) do { } while (0) +#define trace_rcu_dyntick(polarity, nesting) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) -- cgit v1.2.3 From 91afaf300269aa99a4d646969b3258b74294ac4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 2 Oct 2011 07:44:32 -0700 Subject: rcu: Add failure tracing to rcutorture Trace the rcutorture RCU accesses and dump the trace buffer when the first failure is detected. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index e5771804c507..172620a92b1a 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -437,6 +437,31 @@ TRACE_EVENT(rcu_batch_end, __entry->rcuname, __entry->callbacks_invoked) ); +/* + * Tracepoint for rcutorture readers. The first argument is the name + * of the RCU flavor from rcutorture's viewpoint and the second argument + * is the callback address. + */ +TRACE_EVENT(rcu_torture_read, + + TP_PROTO(char *rcutorturename, struct rcu_head *rhp), + + TP_ARGS(rcutorturename, rhp), + + TP_STRUCT__entry( + __field(char *, rcutorturename) + __field(struct rcu_head *, rhp) + ), + + TP_fast_assign( + __entry->rcutorturename = rcutorturename; + __entry->rhp = rhp; + ), + + TP_printk("%s torture read %p", + __entry->rcutorturename, __entry->rhp) +); + #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -452,6 +477,7 @@ TRACE_EVENT(rcu_batch_end, #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) +#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ -- cgit v1.2.3 From 4145fa7fbee3ec1e61c52825b146192885d9759f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Oct 2011 15:01:54 -0700 Subject: rcu: Deconfuse dynticks entry-exit tracing The trace_rcu_dyntick() trace event did not print both the old and the new value of the nesting level, and furthermore printed only the low-order 32 bits of it. This could result in some confusion when interpreting trace-event dumps, so this commit prints both the old and the new value, prints the full 64 bits, and also selects the process-entry/exit increment to print nicely in hexadecimal. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 172620a92b1a..c29fb2f55909 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -246,21 +246,24 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(char *polarity, int nesting), + TP_PROTO(char *polarity, long long oldnesting, long long newnesting), - TP_ARGS(polarity, nesting), + TP_ARGS(polarity, oldnesting, newnesting), TP_STRUCT__entry( __field(char *, polarity) - __field(int, nesting) + __field(long long, oldnesting) + __field(long long, newnesting) ), TP_fast_assign( __entry->polarity = polarity; - __entry->nesting = nesting; + __entry->oldnesting = oldnesting; + __entry->newnesting = newnesting; ), - TP_printk("%s %d", __entry->polarity, __entry->nesting) + TP_printk("%s %llx %llx", __entry->polarity, + __entry->oldnesting, __entry->newnesting) ); /* @@ -470,7 +473,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity, nesting) do { } while (0) +#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) -- cgit v1.2.3 From 045fb9315a2129023d70a0eecf0942e18fca4fcd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Nov 2011 12:13:03 -0800 Subject: rcu: Update trace_rcu_dyntick() header comment This commit updates the trace_rcu_dyntick() header comment to reflect events added by commit 4b4f421. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c29fb2f55909..7f6877a35051 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -241,8 +241,16 @@ TRACE_EVENT(rcu_fqs, /* * Tracepoint for dyntick-idle entry/exit events. These take a string - * as argument: "Start" for entering dyntick-idle mode and "End" for - * leaving it. + * as argument: "Start" for entering dyntick-idle mode, "End" for + * leaving it, "--=" for events moving towards idle, and "++=" for events + * moving away from idle. "Error on entry: not idle task" and "Error on + * exit: not idle task" indicate that a non-idle task is erroneously + * toying with the idle loop. + * + * These events also take a pair of numbers, which indicate the nesting + * depth before and after the event of interest. Note that task-related + * events use the upper bits of each number, while interrupt-related + * events use the lower bits. */ TRACE_EVENT(rcu_dyntick, -- cgit v1.2.3 From 433cdddcd9ac5558068edd7f8d4707a70f7710f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Nov 2011 14:58:03 -0800 Subject: rcu: Add tracing for RCU_FAST_NO_HZ This commit adds trace_rcu_prep_idle(), which is invoked from rcu_prepare_for_idle() and rcu_wake_cpu() to trace attempts on the part of RCU to force CPUs into dyntick-idle mode. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 7f6877a35051..debe453c9623 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -274,6 +274,42 @@ TRACE_EVENT(rcu_dyntick, __entry->oldnesting, __entry->newnesting) ); +/* + * Tracepoint for RCU preparation for idle, the goal being to get RCU + * processing done so that the current CPU can shut off its scheduling + * clock and enter dyntick-idle mode. One way to accomplish this is + * to drain all RCU callbacks from this CPU, and the other is to have + * done everything RCU requires for the current grace period. In this + * latter case, the CPU will be awakened at the end of the current grace + * period in order to process the remainder of its callbacks. + * + * These tracepoints take a string as argument: + * + * "No callbacks": Nothing to do, no callbacks on this CPU. + * "In holdoff": Nothing to do, holding off after unsuccessful attempt. + * "Dyntick with callbacks": Callbacks remain, but RCU doesn't need CPU. + * "Begin holdoff": Attempt failed, don't retry until next jiffy. + * "More callbacks": Still more callbacks, try again to clear them out. + * "Callbacks drained": All callbacks processed, off to dyntick idle! + * "CPU awakened at GP end": + */ +TRACE_EVENT(rcu_prep_idle, + + TP_PROTO(char *reason), + + TP_ARGS(reason), + + TP_STRUCT__entry( + __field(char *, reason) + ), + + TP_fast_assign( + __entry->reason = reason; + ), + + TP_printk("%s", __entry->reason) +); + /* * Tracepoint for the registration of a single RCU callback function. * The first argument is the type of RCU, the second argument is @@ -482,6 +518,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) +#define trace_rcu_prep_idle(reason) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) -- cgit v1.2.3 From f535a607c13c7b674e0788ca5765779aa74a01c3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Nov 2011 20:43:02 -0800 Subject: rcu: Eliminate RCU_FAST_NO_HZ grace-period hang With the new implementation of RCU_FAST_NO_HZ, it was possible to hang RCU grace periods as follows: o CPU 0 attempts to go idle, cycles several times through the rcu_prepare_for_idle() loop, then goes dyntick-idle when RCU needs nothing more from it, while still having at least on RCU callback pending. o CPU 1 goes idle with no callbacks. Both CPUs can then stay in dyntick-idle mode indefinitely, preventing the RCU grace period from ever completing, possibly hanging the system. This commit therefore prevents CPUs that have RCU callbacks from entering dyntick-idle mode. This approach also eliminates the need for the end-of-grace-period IPIs used previously. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index debe453c9623..8dd6fcb94946 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -287,7 +287,6 @@ TRACE_EVENT(rcu_dyntick, * * "No callbacks": Nothing to do, no callbacks on this CPU. * "In holdoff": Nothing to do, holding off after unsuccessful attempt. - * "Dyntick with callbacks": Callbacks remain, but RCU doesn't need CPU. * "Begin holdoff": Attempt failed, don't retry until next jiffy. * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! -- cgit v1.2.3 From 7cb92499000e3c86dae653077b1465458a039ef6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Nov 2011 12:28:34 -0800 Subject: rcu: Permit dyntick-idle with callbacks pending The current implementation of RCU_FAST_NO_HZ prevents CPUs from entering dyntick-idle state if they have RCU callbacks pending. Unfortunately, this has the side-effect of often preventing them from entering this state, especially if at least one other CPU is not in dyntick-idle state. However, the resulting per-tick wakeup is wasteful in many cases: if the CPU has already fully responded to the current RCU grace period, there will be nothing for it to do until this grace period ends, which will frequently take several jiffies. This commit therefore permits a CPU that has done everything that the current grace period has asked of it (rcu_pending() == 0) even if it still as RCU callbacks pending. However, such a CPU posts a timer to wake it up several jiffies later (6 jiffies, based on experience with grace-period lengths). This wakeup is required to handle situations that can result in all CPUs being in dyntick-idle mode, thus failing to ever complete the current grace period. If a CPU wakes up before the timer goes off, then it cancels that timer, thus avoiding spurious wakeups. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 8dd6fcb94946..c75418c3ccb8 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -288,9 +288,10 @@ TRACE_EVENT(rcu_dyntick, * "No callbacks": Nothing to do, no callbacks on this CPU. * "In holdoff": Nothing to do, holding off after unsuccessful attempt. * "Begin holdoff": Attempt failed, don't retry until next jiffy. + * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! - * "CPU awakened at GP end": + * "Timer": Timer fired to cause CPU to continue processing callbacks. */ TRACE_EVENT(rcu_prep_idle, -- cgit v1.2.3 From 4968c300e1fa5389fdf1f1ebd8b8e4aec9aa4a9e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Dec 2011 16:32:40 -0800 Subject: rcu: Augment rcu_batch_end tracing for idle and callback state The current rcu_batch_end event trace records only the name of the RCU flavor and the total number of callbacks that remain queued on the current CPU. This is insufficient for testing and tuning the new dyntick-idle RCU_FAST_NO_HZ code, so this commit adds idle state along with whether or not any of the callbacks that were ready to invoke at the beginning of rcu_do_batch() are still queued. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c75418c3ccb8..d2d88bed891b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -461,27 +461,46 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the name of the RCU flavor and - * the second argument is number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor, + * the second argument is number of callbacks actually invoked, + * the third argument (cb) is whether or not any of the callbacks that + * were ready to invoke at the beginning of this batch are still + * queued, the fourth argument (nr) is the return value of need_resched(), + * the fifth argument (iit) is 1 if the current task is the idle task, + * and the sixth argument (risk) is the return value from + * rcu_is_callbacks_kthread(). */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(char *rcuname, int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked, + bool cb, bool nr, bool iit, bool risk), - TP_ARGS(rcuname, callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk), TP_STRUCT__entry( __field(char *, rcuname) __field(int, callbacks_invoked) + __field(bool, cb) + __field(bool, nr) + __field(bool, iit) + __field(bool, risk) ), TP_fast_assign( __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; - ), - - TP_printk("%s CBs-invoked=%d", - __entry->rcuname, __entry->callbacks_invoked) + __entry->cb = cb; + __entry->nr = nr; + __entry->iit = iit; + __entry->risk = risk; + ), + + TP_printk("%s CBs-invoked=%d idle=%c%c%c%c", + __entry->rcuname, __entry->callbacks_invoked, + __entry->cb ? 'C' : '.', + __entry->nr ? 'S' : '.', + __entry->iit ? 'I' : '.', + __entry->risk ? 'R' : '.') ); /* @@ -524,7 +543,8 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) -#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ + do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ -- cgit v1.2.3 From b3bba872ddb0320a7ecb54decae53c13ceb2ed4c Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 8 Dec 2011 16:53:54 -0600 Subject: writeback: show writeback reason with __print_symbolic This makes the binary trace understandable by trace-cmd. CC: Dave Chinner CC: Curt Wohlgemuth CC: Steven Rostedt Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b99caa8b780c..99d1d0decf88 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -21,6 +21,16 @@ {I_REFERENCED, "I_REFERENCED"} \ ) +#define WB_WORK_REASON \ + {WB_REASON_BACKGROUND, "background"}, \ + {WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages"}, \ + {WB_REASON_SYNC, "sync"}, \ + {WB_REASON_PERIODIC, "periodic"}, \ + {WB_REASON_LAPTOP_TIMER, "laptop_timer"}, \ + {WB_REASON_FREE_MORE_MEM, "free_more_memory"}, \ + {WB_REASON_FS_FREE_SPACE, "fs_free_space"}, \ + {WB_REASON_FORKER_THREAD, "forker_thread"} + struct wb_writeback_work; DECLARE_EVENT_CLASS(writeback_work_class, @@ -55,7 +65,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __entry->for_kupdate, __entry->range_cyclic, __entry->for_background, - wb_reason_name[__entry->reason] + __print_symbolic(__entry->reason, WB_WORK_REASON) ) ); #define DEFINE_WRITEBACK_WORK_EVENT(name) \ @@ -184,7 +194,8 @@ TRACE_EVENT(writeback_queue_io, __entry->older, /* older_than_this in jiffies */ __entry->age, /* older_than_this in relative milliseconds */ __entry->moved, - wb_reason_name[__entry->reason]) + __print_symbolic(__entry->reason, WB_WORK_REASON) + ) ); TRACE_EVENT(global_dirty_state, -- cgit v1.2.3 From 83712358ba0a1497ce59a4f84ce4dd0f803fe6fc Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 11 Jun 2011 19:25:42 -0600 Subject: writeback: dirty ratelimit - think time compensation Compensate the task's think time when computing the final pause time, so that ->dirty_ratelimit can be executed accurately. think time := time spend outside of balance_dirty_pages() In the rare case that the task slept longer than the 200ms period time (result in negative pause time), the sleep time will be compensated in the following periods, too, if it's less than 1 second. Accumulated errors are carefully avoided as long as the max pause area is not hitted. Pseudo code: period = pages_dirtied / task_ratelimit; think = jiffies - dirty_paused_when; pause = period - think; 1) normal case: period > think pause = period - think dirty_paused_when = jiffies + pause nr_dirtied = 0 period time |===============================>| think time pause time |===============>|==============>| ------|----------------|---------------|------------------------ dirty_paused_when jiffies 2) no pause case: period <= think don't pause; reduce future pause time by: dirty_paused_when += period nr_dirtied = 0 period time |===============================>| think time |===================================================>| ------|--------------------------------+-------------------|---- dirty_paused_when jiffies Acked-by: Jan Kara Acked-by: Peter Zijlstra Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 99d1d0decf88..8588a8918023 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages, unsigned long dirty_ratelimit, unsigned long task_ratelimit, unsigned long dirtied, + unsigned long period, long pause, unsigned long start_time), TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, dirty_ratelimit, task_ratelimit, - dirtied, pause, start_time), + dirtied, period, pause, start_time), TP_STRUCT__entry( __array( char, bdi, 32) @@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages, __field(unsigned int, dirtied_pause) __field(unsigned long, paused) __field( long, pause) + __field(unsigned long, period) + __field( long, think) ), TP_fast_assign( @@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages, __entry->task_ratelimit = KBps(task_ratelimit); __entry->dirtied = dirtied; __entry->dirtied_pause = current->nr_dirtied_pause; + __entry->think = current->dirty_paused_when == 0 ? 0 : + (long)(jiffies - current->dirty_paused_when) * 1000/HZ; + __entry->period = period * 1000 / HZ; __entry->pause = pause * 1000 / HZ; __entry->paused = (jiffies - start_time) * 1000 / HZ; ), @@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages, "bdi_setpoint=%lu bdi_dirty=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "dirtied=%u dirtied_pause=%u " - "paused=%lu pause=%ld", + "paused=%lu pause=%ld period=%lu think=%ld", __entry->bdi, __entry->limit, __entry->setpoint, @@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages, __entry->dirtied, __entry->dirtied_pause, __entry->paused, /* ms */ - __entry->pause /* ms */ + __entry->pause, /* ms */ + __entry->period, /* ms */ + __entry->think /* ms */ ) ); -- cgit v1.2.3 From 60e07cf515e541ea3e13b888d273c9b19a2ad9dd Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 18 Dec 2011 15:49:54 -0500 Subject: ext4: do not reference pa_inode from group_pa pa_inode in group_pa is set NULL in ext4_mb_new_group_pa, so pa_inode should be not referenced. Reported-by: Wu Fengguang Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 748ff7cbe555..319538bf17d2 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ); TRACE_EVENT(ext4_mb_release_group_pa, - TP_PROTO(struct ext4_prealloc_space *pa), + TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa), - TP_ARGS(pa), + TP_ARGS(sb, pa), TP_STRUCT__entry( __field( dev_t, dev ) @@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa, ), TP_fast_assign( - __entry->dev = pa->pa_inode->i_sb->s_dev; + __entry->dev = sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), -- cgit v1.2.3 From 1ac9bc6943edf7d181b4b1cc734981350d4f6bae Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Wed, 21 Dec 2011 16:15:40 -0800 Subject: sched/tracing: Add a new tracepoint for sleeptime If CONFIG_SCHEDSTATS is defined, the kernel maintains information about how long the task was sleeping or in the case of iowait, blocking in the kernel before getting woken up. This will be useful for sleep time profiling. Note: this information is only provided for sched_fair. Other scheduling classes may choose to provide this in the future. Note: the delay includes the time spent on the runqueue as well. Signed-off-by: Arun Sharma Acked-by: Peter Zijlstra Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Arnaldo Carvalho de Melo Cc: Andrew Vagin Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e33ed1bfa113..6ba596b07a72 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); +#ifdef CREATE_TRACE_POINTS +static inline u64 trace_get_sleeptime(struct task_struct *tsk) +{ +#ifdef CONFIG_SCHEDSTATS + u64 block, sleep; + + block = tsk->se.statistics.block_start; + sleep = tsk->se.statistics.sleep_start; + tsk->se.statistics.block_start = 0; + tsk->se.statistics.sleep_start = 0; + + return block ? block : sleep ? sleep : 0; +#else + return 0; +#endif +} +#endif + +/* + * Tracepoint for accounting sleeptime (time the task is sleeping + * or waiting for I/O). + */ +TRACE_EVENT(sched_stat_sleeptime, + + TP_PROTO(struct task_struct *tsk, u64 now), + + TP_ARGS(tsk, now), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, sleeptime ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->sleeptime = trace_get_sleeptime(tsk); + __entry->sleeptime = __entry->sleeptime ? + now - __entry->sleeptime : 0; + ) + TP_perf_assign( + __perf_count(__entry->sleeptime); + ), + + TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->sleeptime) +); + /* * Tracepoint for showing priority inheritance modifying a tasks * priority. -- cgit v1.2.3 From b413d48aa70605701c0b395b2e350ca15f5d643a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 10 Jan 2012 15:07:09 -0800 Subject: mm-tracepoint: rename page-free events Rename mm_page_free_direct into mm_page_free and mm_pagevec_free into mm_page_free_batched Since v2.6.33-5426-gc475dab the kernel triggers mm_page_free_direct for all freed pages, not only for directly freed. So, let's name it properly. For pages freed via page-list we also trigger mm_page_free_batched event. Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KOSAKI Motohiro Reviewed-by: Minchan Kim Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index a9c87ad8331c..5f889f16b0c8 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -147,7 +147,7 @@ DEFINE_EVENT(kmem_free, kmem_cache_free, TP_ARGS(call_site, ptr) ); -TRACE_EVENT(mm_page_free_direct, +TRACE_EVENT(mm_page_free, TP_PROTO(struct page *page, unsigned int order), @@ -169,7 +169,7 @@ TRACE_EVENT(mm_page_free_direct, __entry->order) ); -TRACE_EVENT(mm_pagevec_free, +TRACE_EVENT(mm_page_free_batched, TP_PROTO(struct page *page, int cold), -- cgit v1.2.3 From 43d2b113241d6797b890318767e0af78e313414b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 10 Jan 2012 15:08:09 -0800 Subject: tracepoint: add tracepoints for debugging oom_score_adj oom_score_adj is used for guarding processes from OOM-Killer. One of problem is that it's inherited at fork(). When a daemon set oom_score_adj and make children, it's hard to know where the value is set. This patch adds some tracepoints useful for debugging. This patch adds 3 trace points. - creating new task - renaming a task (exec) - set oom_score_adj To debug, users need to enable some trace pointer. Maybe filtering is useful as # EVENT=/sys/kernel/debug/tracing/events/task/ # echo "oom_score_adj != 0" > $EVENT/task_newtask/filter # echo "oom_score_adj != 0" > $EVENT/task_rename/filter # echo 1 > $EVENT/enable # EVENT=/sys/kernel/debug/tracing/events/oom/ # echo 1 > $EVENT/enable output will be like this. # grep oom /sys/kernel/debug/tracing/trace bash-7699 [007] d..3 5140.744510: oom_score_adj_update: pid=7699 comm=bash oom_score_adj=-1000 bash-7699 [007] ...1 5151.818022: task_newtask: pid=7729 comm=bash clone_flags=1200011 oom_score_adj=-1000 ls-7729 [003] ...2 5151.818504: task_rename: pid=7729 oldcomm=bash newcomm=ls oom_score_adj=-1000 bash-7699 [002] ...1 5175.701468: task_newtask: pid=7730 comm=bash clone_flags=1200011 oom_score_adj=-1000 grep-7730 [007] ...2 5175.701993: task_rename: pid=7730 oldcomm=bash newcomm=grep oom_score_adj=-1000 Signed-off-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/oom.h | 33 ++++++++++++++++++++++++ include/trace/events/task.h | 61 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 include/trace/events/oom.h create mode 100644 include/trace/events/task.h (limited to 'include/trace') diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h new file mode 100644 index 000000000000..dd4ba3b92002 --- /dev/null +++ b/include/trace/events/oom.h @@ -0,0 +1,33 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM oom + +#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_OOM_H +#include + +TRACE_EVENT(oom_score_adj_update, + + TP_PROTO(struct task_struct *task), + + TP_ARGS(task), + + TP_STRUCT__entry( + __field( pid_t, pid) + __array( char, comm, TASK_COMM_LEN ) + __field( int, oom_score_adj) + ), + + TP_fast_assign( + __entry->pid = task->pid; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->oom_score_adj = task->signal->oom_score_adj; + ), + + TP_printk("pid=%d comm=%s oom_score_adj=%d", + __entry->pid, __entry->comm, __entry->oom_score_adj) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/task.h b/include/trace/events/task.h new file mode 100644 index 000000000000..b53add02e929 --- /dev/null +++ b/include/trace/events/task.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM task + +#if !defined(_TRACE_TASK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TASK_H +#include + +TRACE_EVENT(task_newtask, + + TP_PROTO(struct task_struct *task, unsigned long clone_flags), + + TP_ARGS(task, clone_flags), + + TP_STRUCT__entry( + __field( pid_t, pid) + __array( char, comm, TASK_COMM_LEN) + __field( unsigned long, clone_flags) + __field( int, oom_score_adj) + ), + + TP_fast_assign( + __entry->pid = task->pid; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->clone_flags = clone_flags; + __entry->oom_score_adj = task->signal->oom_score_adj; + ), + + TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%d", + __entry->pid, __entry->comm, + __entry->clone_flags, __entry->oom_score_adj) +); + +TRACE_EVENT(task_rename, + + TP_PROTO(struct task_struct *task, char *comm), + + TP_ARGS(task, comm), + + TP_STRUCT__entry( + __field( pid_t, pid) + __array( char, oldcomm, TASK_COMM_LEN) + __array( char, newcomm, TASK_COMM_LEN) + __field( int, oom_score_adj) + ), + + TP_fast_assign( + __entry->pid = task->pid; + memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); + memcpy(entry->newcomm, comm, TASK_COMM_LEN); + __entry->oom_score_adj = task->signal->oom_score_adj; + ), + + TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%d", + __entry->pid, __entry->oldcomm, + __entry->newcomm, __entry->oom_score_adj) +); + +#endif + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From ea4d349ffa8028c655236497c2ba17c17aaa0d65 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 12 Jan 2012 17:19:20 -0800 Subject: vmscan/trace: Add 'file' info to trace_mm_vmscan_lru_isolate() In trace_mm_vmscan_lru_isolate(), we don't output 'file' information to the trace event and it is a bit inconvenient for the user to get the real information(like pasted below). mm_vmscan_lru_isolate: isolate_mode=2 order=0 nr_requested=32 nr_scanned=32 nr_taken=32 contig_taken=0 contig_dirty=0 contig_failed=0 'active' can be obtained by analyzing mode(Thanks go to Minchan and Mel), So this patch adds 'file' to the trace event and it now looks like: mm_vmscan_lru_isolate: isolate_mode=2 order=0 nr_requested=32 nr_scanned=32 nr_taken=32 contig_taken=0 contig_dirty=0 contig_failed=0 file=0 Signed-off-by: Tao Ma Acked-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Cc: Mel Gorman Reviewed-by: Minchan Kim Cc: Rik van Riel Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index edc4b3d25a2d..f64560e204bc 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -266,9 +266,10 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - isolate_mode_t isolate_mode), + isolate_mode_t isolate_mode, + int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), TP_STRUCT__entry( __field(int, order) @@ -279,6 +280,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) __field(isolate_mode_t, isolate_mode) + __field(int, file) ), TP_fast_assign( @@ -290,9 +292,10 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->nr_lumpy_dirty = nr_lumpy_dirty; __entry->nr_lumpy_failed = nr_lumpy_failed; __entry->isolate_mode = isolate_mode; + __entry->file = file; ), - TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu", + TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", __entry->isolate_mode, __entry->order, __entry->nr_requested, @@ -300,7 +303,8 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->nr_taken, __entry->nr_lumpy_taken, __entry->nr_lumpy_dirty, - __entry->nr_lumpy_failed) + __entry->nr_lumpy_failed, + __entry->file) ); DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, @@ -312,9 +316,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - isolate_mode_t isolate_mode), + isolate_mode_t isolate_mode, + int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) ); @@ -327,9 +332,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - isolate_mode_t isolate_mode), + isolate_mode_t isolate_mode, + int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) ); -- cgit v1.2.3 From 3f7de037fb3727b20bc27332cdcf2488b702394c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 10 Nov 2011 08:29:20 -0500 Subject: Btrfs: add allocator tracepoints I used these tracepoints when figuring out what the cluster stuff was doing, so add them to mainline in case we need to profile this stuff again. Thanks, Signed-off-by: Josef Bacik --- include/trace/events/btrfs.h | 173 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index b31702ac15be..1750c0e6660c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -16,6 +16,8 @@ struct btrfs_delayed_ref_node; struct btrfs_delayed_tree_ref; struct btrfs_delayed_data_ref; struct btrfs_delayed_ref_head; +struct btrfs_block_group_cache; +struct btrfs_free_cluster; struct map_lookup; struct extent_buffer; @@ -44,6 +46,15 @@ struct extent_buffer; obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) || \ (obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-" +#define BTRFS_GROUP_FLAGS \ + { BTRFS_BLOCK_GROUP_DATA, "DATA"}, \ + { BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \ + { BTRFS_BLOCK_GROUP_METADATA, "METADATA"}, \ + { BTRFS_BLOCK_GROUP_RAID0, "RAID0"}, \ + { BTRFS_BLOCK_GROUP_RAID1, "RAID1"}, \ + { BTRFS_BLOCK_GROUP_DUP, "DUP"}, \ + { BTRFS_BLOCK_GROUP_RAID10, "RAID10"} + TRACE_EVENT(btrfs_transaction_commit, TP_PROTO(struct btrfs_root *root), @@ -659,6 +670,168 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, TP_ARGS(root, start, len) ); +TRACE_EVENT(find_free_extent, + + TP_PROTO(struct btrfs_root *root, u64 num_bytes, u64 empty_size, + u64 data), + + TP_ARGS(root, num_bytes, empty_size, data), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, num_bytes ) + __field( u64, empty_size ) + __field( u64, data ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->num_bytes = num_bytes; + __entry->empty_size = empty_size; + __entry->data = data; + ), + + TP_printk("root = %Lu(%s), len = %Lu, empty_size = %Lu, " + "flags = %Lu(%s)", show_root_type(__entry->root_objectid), + __entry->num_bytes, __entry->empty_size, __entry->data, + __print_flags((unsigned long)__entry->data, "|", + BTRFS_GROUP_FLAGS)) +); + +DECLARE_EVENT_CLASS(btrfs__reserve_extent, + + TP_PROTO(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group, u64 start, + u64 len), + + TP_ARGS(root, block_group, start, len), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, bg_objectid ) + __field( u64, flags ) + __field( u64, start ) + __field( u64, len ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->bg_objectid = block_group->key.objectid; + __entry->flags = block_group->flags; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), " + "start = %Lu, len = %Lu", + show_root_type(__entry->root_objectid), __entry->bg_objectid, + __entry->flags, __print_flags((unsigned long)__entry->flags, + "|", BTRFS_GROUP_FLAGS), + __entry->start, __entry->len) +); + +DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent, + + TP_PROTO(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group, u64 start, + u64 len), + + TP_ARGS(root, block_group, start, len) +); + +DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster, + + TP_PROTO(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group, u64 start, + u64 len), + + TP_ARGS(root, block_group, start, len) +); + +TRACE_EVENT(btrfs_find_cluster, + + TP_PROTO(struct btrfs_block_group_cache *block_group, u64 start, + u64 bytes, u64 empty_size, u64 min_bytes), + + TP_ARGS(block_group, start, bytes, empty_size, min_bytes), + + TP_STRUCT__entry( + __field( u64, bg_objectid ) + __field( u64, flags ) + __field( u64, start ) + __field( u64, bytes ) + __field( u64, empty_size ) + __field( u64, min_bytes ) + ), + + TP_fast_assign( + __entry->bg_objectid = block_group->key.objectid; + __entry->flags = block_group->flags; + __entry->start = start; + __entry->bytes = bytes; + __entry->empty_size = empty_size; + __entry->min_bytes = min_bytes; + ), + + TP_printk("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu," + " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid, + __entry->flags, + __print_flags((unsigned long)__entry->flags, "|", + BTRFS_GROUP_FLAGS), __entry->start, + __entry->bytes, __entry->empty_size, __entry->min_bytes) +); + +TRACE_EVENT(btrfs_failed_cluster_setup, + + TP_PROTO(struct btrfs_block_group_cache *block_group), + + TP_ARGS(block_group), + + TP_STRUCT__entry( + __field( u64, bg_objectid ) + ), + + TP_fast_assign( + __entry->bg_objectid = block_group->key.objectid; + ), + + TP_printk("block_group = %Lu", __entry->bg_objectid) +); + +TRACE_EVENT(btrfs_setup_cluster, + + TP_PROTO(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, u64 size, int bitmap), + + TP_ARGS(block_group, cluster, size, bitmap), + + TP_STRUCT__entry( + __field( u64, bg_objectid ) + __field( u64, flags ) + __field( u64, start ) + __field( u64, max_size ) + __field( u64, size ) + __field( int, bitmap ) + ), + + TP_fast_assign( + __entry->bg_objectid = block_group->key.objectid; + __entry->flags = block_group->flags; + __entry->start = cluster->window_start; + __entry->max_size = cluster->max_size; + __entry->size = size; + __entry->bitmap = bitmap; + ), + + TP_printk("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, " + "size = %Lu, max_size = %Lu, bitmap = %d", + __entry->bg_objectid, + __entry->flags, + __print_flags((unsigned long)__entry->flags, "|", + BTRFS_GROUP_FLAGS), __entry->start, + __entry->size, __entry->max_size, __entry->bitmap) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 8c2a3ca20f6233677ac3222c6506174010eb414f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 10 Jan 2012 10:31:31 -0500 Subject: Btrfs: space leak tracepoints This in addition to a script in my btrfs-tracing tree will help track down space leaks when we're getting space left over in block groups on umount. Thanks, Signed-off-by: Josef Bacik --- include/trace/events/btrfs.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1750c0e6660c..84f3001a568d 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -55,6 +55,8 @@ struct extent_buffer; { BTRFS_BLOCK_GROUP_DUP, "DUP"}, \ { BTRFS_BLOCK_GROUP_RAID10, "RAID10"} +#define BTRFS_UUID_SIZE 16 + TRACE_EVENT(btrfs_transaction_commit, TP_PROTO(struct btrfs_root *root), @@ -632,6 +634,34 @@ TRACE_EVENT(btrfs_cow_block, __entry->cow_level) ); +TRACE_EVENT(btrfs_space_reservation, + + TP_PROTO(struct btrfs_fs_info *fs_info, char *type, u64 val, + u64 bytes, int reserve), + + TP_ARGS(fs_info, type, val, bytes, reserve), + + TP_STRUCT__entry( + __array( u8, fsid, BTRFS_UUID_SIZE ) + __string( type, type ) + __field( u64, val ) + __field( u64, bytes ) + __field( int, reserve ) + ), + + TP_fast_assign( + memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); + __assign_str(type, type); + __entry->val = val; + __entry->bytes = bytes; + __entry->reserve = reserve; + ), + + TP_printk("%pU: %s: %Lu %s %Lu", __entry->fsid, __get_str(type), + __entry->val, __entry->reserve ? "reserve" : "release", + __entry->bytes) +); + DECLARE_EVENT_CLASS(btrfs__reserved_extent, TP_PROTO(struct btrfs_root *root, u64 start, u64 len), -- cgit v1.2.3