From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Apr 2011 08:18:31 +0200 Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate cycles the CPU does nothing useful, because it is stalled on a cache-miss or some other condition. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 952b4ae3d954..1869e4c646db 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -38,6 +38,7 @@ static struct event_symbol event_symbols[] = { { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, { CHW(BRANCH_MISSES), "branch-misses", "" }, { CHW(BUS_CYCLES), "bus-cycles", "" }, + { CHW(STALLED_CYCLES), "stalled-cycles", "" }, { CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" }, -- cgit v1.2.3 From b908debd4eef91471016138569f7a9e292be682e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Apr 2011 03:55:40 +0200 Subject: perf tools: Accept case-insensitive symbolic event variants We currently fail on something like '-e CPU-migrations', with: invalid or unsupported event: 'CPU-migrations' While 'CPU-migrations' is how we actually print out the event in the default perf stat output: Performance counter stats for 'true': 0.202204 task-clock-msecs # 0.282 CPUs 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec So change the matching to be case-insensitive. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-omcm3edjjtx83a4kh2e244se@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1869e4c646db..8e54bdb553c3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -649,13 +649,15 @@ static int check_events(const char *str, unsigned int i) int n; n = strlen(event_symbols[i].symbol); - if (!strncmp(str, event_symbols[i].symbol, n)) + if (!strncasecmp(str, event_symbols[i].symbol, n)) return n; n = strlen(event_symbols[i].alias); - if (n) - if (!strncmp(str, event_symbols[i].alias, n)) + if (n) { + if (!strncasecmp(str, event_symbols[i].alias, n)) return n; + } + return 0; } -- cgit v1.2.3 From ceb53fbf6dbb1df26d38379a262c6981fe73dd36 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Apr 2011 04:06:33 +0200 Subject: perf stat: Fail more clearly when an invalid modifier is specified Currently we fail without printing any error message on "perf stat -e task-clock-msecs". The reason is that the task-clock event is matched and the "-msecs" postfix is assumed to be an event modifier - but is not recognized. This patch changes the code to be more informative: $ perf stat -e task-clock-msecs true invalid event modifier: '-msecs' Run 'perf list' for a list of valid events and modifiers And restructures the return value of parse_event_modifier() to allow the printing of all variants of invalid event modifiers. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-wlaw3dvz1ly6wple8l52cfca@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8e54bdb553c3..b686269427ea 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -721,15 +721,19 @@ parse_numeric_event(const char **strp, struct perf_event_attr *attr) return EVT_FAILED; } -static enum event_result +static int parse_event_modifier(const char **strp, struct perf_event_attr *attr) { const char *str = *strp; int exclude = 0; int eu = 0, ek = 0, eh = 0, precise = 0; - if (*str++ != ':') + if (!*str) return 0; + + if (*str++ != ':') + return -1; + while (*str) { if (*str == 'u') { if (!exclude) @@ -750,14 +754,16 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) ++str; } - if (str >= *strp + 2) { - *strp = str; - attr->exclude_user = eu; - attr->exclude_kernel = ek; - attr->exclude_hv = eh; - attr->precise_ip = precise; - return 1; - } + if (str < *strp + 2) + return -1; + + *strp = str; + + attr->exclude_user = eu; + attr->exclude_kernel = ek; + attr->exclude_hv = eh; + attr->precise_ip = precise; + return 0; } @@ -800,7 +806,12 @@ parse_event_symbols(const struct option *opt, const char **str, return EVT_FAILED; modifier: - parse_event_modifier(str, attr); + if (parse_event_modifier(str, attr) < 0) { + fprintf(stderr, "invalid event modifier: '%s'\n", *str); + fprintf(stderr, "Run 'perf list' for a list of valid events and modifiers\n"); + + return EVT_FAILED; + } return ret; } -- cgit v1.2.3 From 749141d926faf23ef811686a8050e7cf13dc223f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Apr 2011 04:24:57 +0200 Subject: perf stat: Make all displayed event names parseable as well Right now we display this by default: 0.202204 task-clock-msecs # 0.282 CPUs 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 85 page-faults # 0.420 M/sec The task-clock-msecs event cannot actually be passed back as an event name, the event name we recognize is 'task-clock'. So change the output of the cpu-clock and task-clock events to be idempotent. ( Units should be printed out in the right-side column, if needed. ) Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-lexrnbzy09asscgd4f7oac4i@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b686269427ea..b5bfef12f399 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -70,8 +70,8 @@ static const char *hw_event_names[] = { }; static const char *sw_event_names[] = { - "cpu-clock-msecs", - "task-clock-msecs", + "cpu-clock", + "task-clock", "page-faults", "context-switches", "CPU-migrations", -- cgit v1.2.3 From 1fc570ad89e55dc32dfa4dda1311948b38f26524 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Apr 2011 05:20:22 +0200 Subject: perf stat: Add stalled cycles to the default output The new default output looks like this: Performance counter stats for './loop_1b_instructions': 236.010686 task-clock # 0.996 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 99 page-faults # 0.000 M/sec 756,487,646 cycles # 3.205 GHz 354,938,996 stalled-cycles # 46.92% of all cycles are idle 1,001,403,797 instructions # 1.32 insns per cycle # 0.35 stalled cycles per insn 100,279,773 branches # 424.895 M/sec 12,646 branch-misses # 0.013 % of all branches 0.236902540 seconds time elapsed We dropped cache-refs and cache-misses and added stalled-cycles - this is a more generic "how well utilized is the CPU" metric. If the stalled-cycles ratio is too high then more specific measurements can be taken to figure out the source of the inefficiency. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b5bfef12f399..bbbb735268ef 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -32,13 +32,13 @@ char debugfs_path[MAXPATHLEN]; static struct event_symbol event_symbols[] = { { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" }, { CHW(INSTRUCTIONS), "instructions", "" }, { CHW(CACHE_REFERENCES), "cache-references", "" }, { CHW(CACHE_MISSES), "cache-misses", "" }, { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, { CHW(BRANCH_MISSES), "branch-misses", "" }, { CHW(BUS_CYCLES), "bus-cycles", "" }, - { CHW(STALLED_CYCLES), "stalled-cycles", "" }, { CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" }, @@ -54,9 +54,9 @@ static struct event_symbol event_symbols[] = { #define __PERF_EVENT_FIELD(config, name) \ ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) -#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) +#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) -#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) +#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) static const char *hw_event_names[] = { @@ -67,6 +67,7 @@ static const char *hw_event_names[] = { "branches", "branch-misses", "bus-cycles", + "stalled-cycles", }; static const char *sw_event_names[] = { @@ -308,7 +309,7 @@ const char *__event_name(int type, u64 config) switch (type) { case PERF_TYPE_HARDWARE: - if (config < PERF_COUNT_HW_MAX) + if (config < PERF_COUNT_HW_MAX && hw_event_names[config]) return hw_event_names[config]; return "unknown-hardware"; @@ -334,7 +335,7 @@ const char *__event_name(int type, u64 config) } case PERF_TYPE_SOFTWARE: - if (config < PERF_COUNT_SW_MAX) + if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) return sw_event_names[config]; return "unknown-software"; -- cgit v1.2.3 From 129c04cb8ce2e4bf3f17223f58ef16aa8a2cb3b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 14:41:28 +0200 Subject: perf tools: Add front-end and back-end stalled cycles support Update perf tooling to deal with front-end and back-end stalled cycles events. Add both the default 'perf stat' output. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n002io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bbbb735268ef..04d2f0a96674 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -31,24 +31,25 @@ char debugfs_path[MAXPATHLEN]; #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, - { CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" }, - { CHW(INSTRUCTIONS), "instructions", "" }, - { CHW(CACHE_REFERENCES), "cache-references", "" }, - { CHW(CACHE_MISSES), "cache-misses", "" }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, - { CHW(BRANCH_MISSES), "branch-misses", "" }, - { CHW(BUS_CYCLES), "bus-cycles", "" }, - - { CSW(CPU_CLOCK), "cpu-clock", "" }, - { CSW(TASK_CLOCK), "task-clock", "" }, - { CSW(PAGE_FAULTS), "page-faults", "faults" }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, - { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, - { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, - { CSW(EMULATION_FAULTS), "emulation-faults", "" }, + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" }, + { CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" }, + { CHW(INSTRUCTIONS), "instructions", "" }, + { CHW(CACHE_REFERENCES), "cache-references", "" }, + { CHW(CACHE_MISSES), "cache-misses", "" }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, + { CHW(BRANCH_MISSES), "branch-misses", "" }, + { CHW(BUS_CYCLES), "bus-cycles", "" }, + + { CSW(CPU_CLOCK), "cpu-clock", "" }, + { CSW(TASK_CLOCK), "task-clock", "" }, + { CSW(PAGE_FAULTS), "page-faults", "faults" }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, + { CSW(EMULATION_FAULTS), "emulation-faults", "" }, }; #define __PERF_EVENT_FIELD(config, name) \ -- cgit v1.2.3 From d3d1e86da07b4565815e3dbcd082f53017d215f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 13:49:08 +0200 Subject: perf stat: Analyze front-end and back-end stall counts Sample output: Performance counter stats for './loop_1b': 873.691065 task-clock # 1.000 CPUs utilized 1 context-switches # 0.000 M/sec 1 CPU-migrations # 0.000 M/sec 96 page-faults # 0.000 M/sec 2,012,637,222 cycles # 2.304 GHz (66.58%) 1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%) 7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%) 2,004,551,046 instructions # 1.00 insns per cycle # 0.50 stalled cycles per insn (66.80%) 1,001,304,992 branches # 1146.063 M/sec (66.76%) 39,453 branch-misses # 0.00% of all branches (66.64%) 0.874046121 seconds time elapsed Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 04d2f0a96674..8a407f3e286f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -60,7 +60,7 @@ static struct event_symbol event_symbols[] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) -static const char *hw_event_names[] = { +static const char *hw_event_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", @@ -68,10 +68,11 @@ static const char *hw_event_names[] = { "branches", "branch-misses", "bus-cycles", - "stalled-cycles", + "stalled-cycles-frontend", + "stalled-cycles-backend", }; -static const char *sw_event_names[] = { +static const char *sw_event_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", -- cgit v1.2.3 From 947b4ad1d198b7303ecc961f4939a331be0c48f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 22:52:42 +0200 Subject: perf list: Fix max event string size Recent stalled-cycles event names were larger than the 40 chars printout used by perf list. Extend that, make it robust for future extensions and also adjust alignments in face of wider event names. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n009io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8a407f3e286f..ffa493a24333 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -929,7 +929,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - printf(" %-42s [%s]\n", evt_path, + printf(" %-50s [%s]\n", evt_path, event_type_descriptors[PERF_TYPE_TRACEPOINT]); } closedir(evt_dir); @@ -994,7 +994,7 @@ void print_events_type(u8 type) else snprintf(name, sizeof(name), "%s", syms->symbol); - printf(" %-42s [%s]\n", name, + printf(" %-50s [%s]\n", name, event_type_descriptors[type]); } } @@ -1012,11 +1012,10 @@ int print_hwcache_events(const char *event_glob) for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { char *name = event_cache_name(type, op, i); - if (event_glob != NULL && - !strglobmatch(name, event_glob)) + if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - printf(" %-42s [%s]\n", name, + printf(" %-50s [%s]\n", name, event_type_descriptors[PERF_TYPE_HW_CACHE]); ++printed; } @@ -1026,14 +1025,16 @@ int print_hwcache_events(const char *event_glob) return printed; } +#define MAX_NAME_LEN 100 + /* * Print the help text for the event symbols: */ void print_events(const char *event_glob) { - struct event_symbol *syms = event_symbols; unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; - char name[40]; + struct event_symbol *syms = event_symbols; + char name[MAX_NAME_LEN]; printf("\n"); printf("List of pre-defined events (to be used in -e):\n"); @@ -1053,10 +1054,10 @@ void print_events(const char *event_glob) continue; if (strlen(syms->alias)) - sprintf(name, "%s OR %s", syms->symbol, syms->alias); + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else - strcpy(name, syms->symbol); - printf(" %-42s [%s]\n", name, + strncpy(name, syms->symbol, MAX_NAME_LEN); + printf(" %-50s [%s]\n", name, event_type_descriptors[type]); prev_type = type; @@ -1073,12 +1074,12 @@ void print_events(const char *event_glob) return; printf("\n"); - printf(" %-42s [%s]\n", + printf(" %-50s [%s]\n", "rNNN (see 'perf list --help' on how to encode it)", event_type_descriptors[PERF_TYPE_RAW]); printf("\n"); - printf(" %-42s [%s]\n", + printf(" %-50s [%s]\n", "mem:[:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); printf("\n"); -- cgit v1.2.3 From 94692349c4fc1bc74c19a28f9379509361a06a3b Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 17 May 2011 15:36:19 +0200 Subject: perf: Fix multi-event parsing bug This patch fixes an issue with event parsing. The following commit appears to have broken the ability to specify a comma separated list of events: commit ceb53fbf6dbb1df26d38379a262c6981fe73dd36 Author: Ingo Molnar Date: Wed Apr 27 04:06:33 2011 +0200 perf stat: Fail more clearly when an invalid modifier is specified This patch fixes this while preserving the desired effect: $ perf stat -e instructions:u,instructions:k ls /dev/null /dev/null Performance counter stats for 'ls /dev/null': 365956 instructions:u # 0.00 insns per cycle 731806 instructions:k # 0.00 insns per cycle 0.001108862 seconds time elapsed $ perf stat -e task-clock-msecs true invalid event modifier: '-msecs' Run 'perf list' for a list of valid events and modifiers Signed-off-by: Stephane Eranian Cc: acme@redhat.com Cc: peterz@infradead.org Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20110517133619.GA6999@quad Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf/util/parse-events.c') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ffa493a24333..41982c373faf 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -734,6 +734,9 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) if (!*str) return 0; + if (*str == ',') + return 0; + if (*str++ != ':') return -1; -- cgit v1.2.3