1 files changed, 182 insertions, 129 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d46dbb1bc..e77880b50 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,8 @@ static struct target target = {
 	.uid	= UINT_MAX,
 };
 
+typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
+
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static volatile pid_t		child_pid			= -1;
@@ -119,7 +121,10 @@ static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
-static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
+static aggr_get_id_t		aggr_get_id;
+static bool			append_file;
+static const char		*output_name;
+static int			output_fd;
 
 static volatile int done = 0;
 
@@ -215,7 +220,7 @@ static void read_counters(bool close_counters)
 
 	evlist__for_each(evsel_list, counter) {
 		if (read_counter(counter))
-			pr_warning("failed to read counter %s\n", counter->name);
+			pr_debug("failed to read counter %s\n", counter->name);
 
 		if (perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
@@ -434,7 +439,7 @@ static void print_noise_pct(double total, double avg)
 
 static void print_noise(struct perf_evsel *evsel, double avg)
 {
-	struct perf_stat *ps;
+	struct perf_stat_evsel *ps;
 
 	if (run_count == 1)
 		return;
@@ -479,6 +484,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 			csv_sep);
 		break;
 	case AGGR_GLOBAL:
+	case AGGR_UNSET:
 	default:
 		break;
 	}
@@ -510,15 +516,6 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
-
-	if (csv_output || stat_config.interval)
-		return;
-
-	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
-		fprintf(output, " # %8.3f CPUs utilized          ",
-			avg / avg_stats(&walltime_nsecs_stats));
-	else
-		fprintf(output, "                                   ");
 }
 
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
@@ -526,7 +523,6 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	FILE *output = stat_config.output;
 	double sc =  evsel->scale;
 	const char *fmt;
-	int cpu = cpu_map__id_to_cpu(id);
 
 	if (csv_output) {
 		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
@@ -539,9 +535,6 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 	aggr_printout(evsel, id, nr);
 
-	if (stat_config.aggr_mode == AGGR_GLOBAL)
-		cpu = 0;
-
 	fprintf(output, fmt, avg, csv_sep);
 
 	if (evsel->unit)
@@ -553,12 +546,24 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
+}
 
-	if (csv_output || stat_config.interval)
-		return;
+static void printout(int id, int nr, struct perf_evsel *counter, double uval)
+{
+	int cpu = cpu_map__id_to_cpu(id);
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		cpu = 0;
 
-	perf_stat__print_shadow_stats(output, evsel, avg, cpu,
-				      stat_config.aggr_mode);
+	if (nsec_counter(counter))
+		nsec_printout(id, nr, counter, uval);
+	else
+		abs_printout(id, nr, counter, uval);
+
+	if (!csv_output && !stat_config.interval)
+		perf_stat__print_shadow_stats(stat_config.output, counter,
+					      uval, cpu,
+					      stat_config.aggr_mode);
 }
 
 static void print_aggr(char *prefix)
@@ -614,12 +619,7 @@ static void print_aggr(char *prefix)
 				continue;
 			}
 			uval = val * counter->scale;
-
-			if (nsec_counter(counter))
-				nsec_printout(id, nr, counter, uval);
-			else
-				abs_printout(id, nr, counter, uval);
-
+			printout(id, nr, counter, uval);
 			if (!csv_output)
 				print_noise(counter, 1.0);
 
@@ -650,11 +650,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 			fprintf(output, "%s", prefix);
 
 		uval = val * counter->scale;
-
-		if (nsec_counter(counter))
-			nsec_printout(thread, 0, counter, uval);
-		else
-			abs_printout(thread, 0, counter, uval);
+		printout(thread, 0, counter, uval);
 
 		if (!csv_output)
 			print_noise(counter, 1.0);
@@ -671,7 +667,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 {
 	FILE *output = stat_config.output;
-	struct perf_stat *ps = counter->priv;
+	struct perf_stat_evsel *ps = counter->priv;
 	double avg = avg_stats(&ps->res_stats[0]);
 	int scaled = counter->counts->scaled;
 	double uval;
@@ -704,11 +700,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 	}
 
 	uval = avg * counter->scale;
-
-	if (nsec_counter(counter))
-		nsec_printout(-1, 0, counter, uval);
-	else
-		abs_printout(-1, 0, counter, uval);
+	printout(-1, 0, counter, uval);
 
 	print_noise(counter, avg);
 
@@ -761,12 +753,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 		}
 
 		uval = val * counter->scale;
-
-		if (nsec_counter(counter))
-			nsec_printout(cpu, 0, counter, uval);
-		else
-			abs_printout(cpu, 0, counter, uval);
-
+		printout(cpu, 0, counter, uval);
 		if (!csv_output)
 			print_noise(counter, 1.0);
 		print_running(run, ena);
@@ -799,6 +786,8 @@ static void print_interval(char *prefix, struct timespec *ts)
 		case AGGR_GLOBAL:
 		default:
 			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
+		case AGGR_UNSET:
+			break;
 		}
 	}
 
@@ -880,6 +869,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 		evlist__for_each(evsel_list, counter)
 			print_counter(counter, prefix);
 		break;
+	case AGGR_UNSET:
 	default:
 		break;
 	}
@@ -940,30 +930,151 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
 	return 0;
 }
 
+static const struct option stat_options[] = {
+	OPT_BOOLEAN('T', "transaction", &transaction_run,
+		    "hardware transaction statistics"),
+	OPT_CALLBACK('e', "event", &evsel_list, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events_option),
+	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
+		     "event filter", parse_filter),
+	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
+		    "child tasks do not inherit counters"),
+	OPT_STRING('p', "pid", &target.pid, "pid",
+		   "stat events on existing process id"),
+	OPT_STRING('t', "tid", &target.tid, "tid",
+		   "stat events on existing thread id"),
+	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
+		    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('g', "group", &group,
+		    "put the counters into a counter group"),
+	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_INTEGER('r', "repeat", &run_count,
+		    "repeat command and print average + stddev (max: 100, forever: 0)"),
+	OPT_BOOLEAN('n', "null", &null_run,
+		    "null run - dont start any counters"),
+	OPT_INCR('d', "detailed", &detailed_run,
+		    "detailed run - start a lot of events"),
+	OPT_BOOLEAN('S', "sync", &sync_run,
+		    "call sync() before starting a run"),
+	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
+			   "print large numbers with thousands\' separators",
+			   stat__set_big_num),
+	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
+		    "list of cpus to monitor in system-wide"),
+	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
+		    "disable CPU count aggregation", AGGR_NONE),
+	OPT_STRING('x', "field-separator", &csv_sep, "separator",
+		   "print counts with custom separator"),
+	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
+		     "monitor event in cgroup name only", parse_cgroups),
+	OPT_STRING('o', "output", &output_name, "file", "output file name"),
+	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
+	OPT_INTEGER(0, "log-fd", &output_fd,
+		    "log output to fd, instead of stderr"),
+	OPT_STRING(0, "pre", &pre_cmd, "command",
+			"command to run prior to the measured command"),
+	OPT_STRING(0, "post", &post_cmd, "command",
+			"command to run after to the measured command"),
+	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
+		    "print counts at regular interval in ms (>= 10)"),
+	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
+		     "aggregate counts per processor socket", AGGR_SOCKET),
+	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
+		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
+		     "aggregate counts per thread", AGGR_THREAD),
+	OPT_UINTEGER('D', "delay", &initial_delay,
+		     "ms to wait before starting measurement after program start"),
+	OPT_END()
+};
+
+static int perf_stat__get_socket(struct cpu_map *map, int cpu)
+{
+	return cpu_map__get_socket(map, cpu, NULL);
+}
+
+static int perf_stat__get_core(struct cpu_map *map, int cpu)
+{
+	return cpu_map__get_core(map, cpu, NULL);
+}
+
+static int cpu_map__get_max(struct cpu_map *map)
+{
+	int i, max = -1;
+
+	for (i = 0; i < map->nr; i++) {
+		if (map->map[i] > max)
+			max = map->map[i];
+	}
+
+	return max;
+}
+
+static struct cpu_map *cpus_aggr_map;
+
+static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
+{
+	int cpu;
+
+	if (idx >= map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	if (cpus_aggr_map->map[cpu] == -1)
+		cpus_aggr_map->map[cpu] = get_id(map, idx);
+
+	return cpus_aggr_map->map[cpu];
+}
+
+static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
+{
+	return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
+}
+
+static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
+{
+	return perf_stat__get_aggr(perf_stat__get_core, map, idx);
+}
+
 static int perf_stat_init_aggr_mode(void)
 {
+	int nr;
+
 	switch (stat_config.aggr_mode) {
 	case AGGR_SOCKET:
 		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
 			perror("cannot build socket map");
 			return -1;
 		}
-		aggr_get_id = cpu_map__get_socket;
+		aggr_get_id = perf_stat__get_socket_cached;
 		break;
 	case AGGR_CORE:
 		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
 			perror("cannot build core map");
 			return -1;
 		}
-		aggr_get_id = cpu_map__get_core;
+		aggr_get_id = perf_stat__get_core_cached;
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
 	case AGGR_THREAD:
+	case AGGR_UNSET:
 	default:
 		break;
 	}
-	return 0;
+
+	/*
+	 * The evsel_list->cpus is the base we operate on,
+	 * taking the highest cpu number to be the size of
+	 * the aggregation translate cpumap.
+	 */
+	nr = cpu_map__get_max(evsel_list->cpus);
+	cpus_aggr_map = cpu_map__empty_new(nr + 1);
+	return cpus_aggr_map ? 0 : -ENOMEM;
 }
 
 /*
@@ -1127,69 +1238,6 @@ static int add_default_attributes(void)
 
 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	bool append_file = false;
-	int output_fd = 0;
-	const char *output_name	= NULL;
-	const struct option options[] = {
-	OPT_BOOLEAN('T', "transaction", &transaction_run,
-		    "hardware transaction statistics"),
-	OPT_CALLBACK('e', "event", &evsel_list, "event",
-		     "event selector. use 'perf list' to list available events",
-		     parse_events_option),
-	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
-		     "event filter", parse_filter),
-	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
-		    "child tasks do not inherit counters"),
-	OPT_STRING('p', "pid", &target.pid, "pid",
-		   "stat events on existing process id"),
-	OPT_STRING('t', "tid", &target.tid, "tid",
-		   "stat events on existing thread id"),
-	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
-		    "system-wide collection from all CPUs"),
-	OPT_BOOLEAN('g', "group", &group,
-		    "put the counters into a counter group"),
-	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
-	OPT_INCR('v', "verbose", &verbose,
-		    "be more verbose (show counter open errors, etc)"),
-	OPT_INTEGER('r', "repeat", &run_count,
-		    "repeat command and print average + stddev (max: 100, forever: 0)"),
-	OPT_BOOLEAN('n', "null", &null_run,
-		    "null run - dont start any counters"),
-	OPT_INCR('d', "detailed", &detailed_run,
-		    "detailed run - start a lot of events"),
-	OPT_BOOLEAN('S', "sync", &sync_run,
-		    "call sync() before starting a run"),
-	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
-			   "print large numbers with thousands\' separators",
-			   stat__set_big_num),
-	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
-		    "list of cpus to monitor in system-wide"),
-	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
-		    "disable CPU count aggregation", AGGR_NONE),
-	OPT_STRING('x', "field-separator", &csv_sep, "separator",
-		   "print counts with custom separator"),
-	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
-		     "monitor event in cgroup name only", parse_cgroups),
-	OPT_STRING('o', "output", &output_name, "file", "output file name"),
-	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
-	OPT_INTEGER(0, "log-fd", &output_fd,
-		    "log output to fd, instead of stderr"),
-	OPT_STRING(0, "pre", &pre_cmd, "command",
-			"command to run prior to the measured command"),
-	OPT_STRING(0, "post", &post_cmd, "command",
-			"command to run after to the measured command"),
-	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
-		    "print counts at regular interval in ms (>= 100)"),
-	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
-		     "aggregate counts per processor socket", AGGR_SOCKET),
-	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
-		     "aggregate counts per physical processor core", AGGR_CORE),
-	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
-		     "aggregate counts per thread", AGGR_THREAD),
-	OPT_UINTEGER('D', "delay", &initial_delay,
-		     "ms to wait before starting measurement after program start"),
-	OPT_END()
-	};
 	const char * const stat_usage[] = {
 		"perf stat [<options>] [<command>]",
 		NULL
@@ -1205,7 +1253,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (evsel_list == NULL)
 		return -ENOMEM;
 
-	argc = parse_options(argc, argv, options, stat_usage,
+	argc = parse_options(argc, argv, stat_options, stat_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
 
 	interval = stat_config.interval;
@@ -1215,14 +1263,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	if (output_name && output_fd) {
 		fprintf(stderr, "cannot use both --output and --log-fd\n");
-		parse_options_usage(stat_usage, options, "o", 1);
-		parse_options_usage(NULL, options, "log-fd", 0);
+		parse_options_usage(stat_usage, stat_options, "o", 1);
+		parse_options_usage(NULL, stat_options, "log-fd", 0);
 		goto out;
 	}
 
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
-		parse_options_usage(stat_usage, options, "log-fd", 0);
+		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
 		goto out;
 	}
 
@@ -1262,8 +1310,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		/* User explicitly passed -B? */
 		if (big_num_opt == 1) {
 			fprintf(stderr, "-B option not supported with -x\n");
-			parse_options_usage(stat_usage, options, "B", 1);
-			parse_options_usage(NULL, options, "x", 1);
+			parse_options_usage(stat_usage, stat_options, "B", 1);
+			parse_options_usage(NULL, stat_options, "x", 1);
 			goto out;
 		} else /* Nope, so disable big number formatting */
 			big_num = false;
@@ -1271,11 +1319,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		big_num = false;
 
 	if (!argc && target__none(&target))
-		usage_with_options(stat_usage, options);
+		usage_with_options(stat_usage, stat_options);
 
 	if (run_count < 0) {
 		pr_err("Run count must be a positive number\n");
-		parse_options_usage(stat_usage, options, "r", 1);
+		parse_options_usage(stat_usage, stat_options, "r", 1);
 		goto out;
 	} else if (run_count == 0) {
 		forever = true;
@@ -1285,8 +1333,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
 		fprintf(stderr, "The --per-thread option is only available "
 			"when monitoring via -p -t options.\n");
-		parse_options_usage(NULL, options, "p", 1);
-		parse_options_usage(NULL, options, "t", 1);
+		parse_options_usage(NULL, stat_options, "p", 1);
+		parse_options_usage(NULL, stat_options, "t", 1);
 		goto out;
 	}
 
@@ -1300,9 +1348,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
 
-		parse_options_usage(stat_usage, options, "G", 1);
-		parse_options_usage(NULL, options, "A", 1);
-		parse_options_usage(NULL, options, "a", 1);
+		parse_options_usage(stat_usage, stat_options, "G", 1);
+		parse_options_usage(NULL, stat_options, "A", 1);
+		parse_options_usage(NULL, stat_options, "a", 1);
 		goto out;
 	}
 
@@ -1314,12 +1362,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
 		if (target__has_task(&target)) {
 			pr_err("Problems finding threads of monitor\n");
-			parse_options_usage(stat_usage, options, "p", 1);
-			parse_options_usage(NULL, options, "t", 1);
+			parse_options_usage(stat_usage, stat_options, "p", 1);
+			parse_options_usage(NULL, stat_options, "t", 1);
 		} else if (target__has_cpu(&target)) {
 			perror("failed to parse CPUs map");
-			parse_options_usage(stat_usage, options, "C", 1);
-			parse_options_usage(NULL, options, "a", 1);
+			parse_options_usage(stat_usage, stat_options, "C", 1);
+			parse_options_usage(NULL, stat_options, "a", 1);
 		}
 		goto out;
 	}
@@ -1332,9 +1380,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		thread_map__read_comms(evsel_list->threads);
 
 	if (interval && interval < 100) {
-		pr_err("print interval must be >= 100ms\n");
-		parse_options_usage(stat_usage, options, "I", 1);
-		goto out;
+		if (interval < 10) {
+			pr_err("print interval must be >= 10ms\n");
+			parse_options_usage(stat_usage, stat_options, "I", 1);
+			goto out;
+		} else
+			pr_warning("print interval < 100ms. "
+				   "The overhead percentage could be high in some cases. "
+				   "Please proceed with caution.\n");
 	}
 
 	if (perf_evlist__alloc_stats(evsel_list, interval))