summaryrefslogtreecommitdiff
path: root/tools/power/cpupower/utils/idle_monitor
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /tools/power/cpupower/utils/idle_monitor
Initial import
Diffstat (limited to 'tools/power/cpupower/utils/idle_monitor')
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c335
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c196
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c455
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h85
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c196
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.def8
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.h18
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c338
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c216
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c200
10 files changed, 2047 insertions, 0 deletions
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
new file mode 100644
index 000000000..2116df9ad
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -0,0 +1,335 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * PCI initialization based on example code from:
+ * Andreas Herrmann <andreas.herrmann3@amd.com>
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+
+#include <pci/pci.h>
+
+#include "idle_monitor/cpupower-monitor.h"
+#include "helpers/helpers.h"
+
+#define PCI_NON_PC0_OFFSET 0xb0
+#define PCI_PC1_OFFSET 0xb4
+#define PCI_PC6_OFFSET 0xb8
+
+#define PCI_MONITOR_ENABLE_REG 0xe0
+
+#define PCI_NON_PC0_ENABLE_BIT 0
+#define PCI_PC1_ENABLE_BIT 1
+#define PCI_PC6_ENABLE_BIT 2
+
+#define PCI_NBP1_STAT_OFFSET 0x98
+#define PCI_NBP1_ACTIVE_BIT 2
+#define PCI_NBP1_ENTERED_BIT 1
+
+#define PCI_NBP1_CAP_OFFSET 0x90
+#define PCI_NBP1_CAPABLE_BIT 31
+
+#define OVERFLOW_MS 343597 /* 32 bit register filled at 12500 HZ
+ (1 tick per 80ns) */
+
+enum amd_fam14h_states {NON_PC0 = 0, PC1, PC6, NBP1,
+ AMD_FAM14H_STATE_NUM};
+
+static int fam14h_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
+ unsigned int cpu);
+
+static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = {
+ {
+ .name = "!PC0",
+ .desc = N_("Package in sleep state (PC1 or deeper)"),
+ .id = NON_PC0,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "PC1",
+ .desc = N_("Processor Package C1"),
+ .id = PC1,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "PC6",
+ .desc = N_("Processor Package C6"),
+ .id = PC6,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "NBP1",
+ .desc = N_("North Bridge P1 boolean counter (returns 0 or 1)"),
+ .id = NBP1,
+ .range = RANGE_PACKAGE,
+ .get_count = fam14h_nbp1_count,
+ },
+};
+
+static struct pci_access *pci_acc;
+static struct pci_dev *amd_fam14h_pci_dev;
+static int nbp1_entered;
+
+struct timespec start_time;
+static unsigned long long timediff;
+
+#ifdef DEBUG
+struct timespec dbg_time;
+long dbg_timediff;
+#endif
+
+static unsigned long long *previous_count[AMD_FAM14H_STATE_NUM];
+static unsigned long long *current_count[AMD_FAM14H_STATE_NUM];
+
+static int amd_fam14h_get_pci_info(struct cstate *state,
+ unsigned int *pci_offset,
+ unsigned int *enable_bit,
+ unsigned int cpu)
+{
+ switch (state->id) {
+ case NON_PC0:
+ *enable_bit = PCI_NON_PC0_ENABLE_BIT;
+ *pci_offset = PCI_NON_PC0_OFFSET;
+ break;
+ case PC1:
+ *enable_bit = PCI_PC1_ENABLE_BIT;
+ *pci_offset = PCI_PC1_OFFSET;
+ break;
+ case PC6:
+ *enable_bit = PCI_PC6_ENABLE_BIT;
+ *pci_offset = PCI_PC6_OFFSET;
+ break;
+ case NBP1:
+ *enable_bit = PCI_NBP1_ENTERED_BIT;
+ *pci_offset = PCI_NBP1_STAT_OFFSET;
+ break;
+ default:
+ return -1;
+ };
+ return 0;
+}
+
+static int amd_fam14h_init(cstate_t *state, unsigned int cpu)
+{
+ int enable_bit, pci_offset, ret;
+ uint32_t val;
+
+ ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
+ if (ret)
+ return ret;
+
+ /* NBP1 needs extra treating -> write 1 to D18F6x98 bit 1 for init */
+ if (state->id == NBP1) {
+ val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
+ val |= 1 << enable_bit;
+ val = pci_write_long(amd_fam14h_pci_dev, pci_offset, val);
+ return ret;
+ }
+
+ /* Enable monitor */
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
+ dprint("Init %s: read at offset: 0x%x val: %u\n", state->name,
+ PCI_MONITOR_ENABLE_REG, (unsigned int) val);
+ val |= 1 << enable_bit;
+ pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
+
+ dprint("Init %s: offset: 0x%x enable_bit: %d - val: %u (%u)\n",
+ state->name, PCI_MONITOR_ENABLE_REG, enable_bit,
+ (unsigned int) val, cpu);
+
+ /* Set counter to zero */
+ pci_write_long(amd_fam14h_pci_dev, pci_offset, 0);
+ previous_count[state->id][cpu] = 0;
+
+ return 0;
+}
+
+static int amd_fam14h_disable(cstate_t *state, unsigned int cpu)
+{
+ int enable_bit, pci_offset, ret;
+ uint32_t val;
+
+ ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
+ if (ret)
+ return ret;
+
+ val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
+ dprint("%s: offset: 0x%x %u\n", state->name, pci_offset, val);
+ if (state->id == NBP1) {
+ /* was the bit whether NBP1 got entered set? */
+ nbp1_entered = (val & (1 << PCI_NBP1_ACTIVE_BIT)) |
+ (val & (1 << PCI_NBP1_ENTERED_BIT));
+
+ dprint("NBP1 was %sentered - 0x%x - enable_bit: "
+ "%d - pci_offset: 0x%x\n",
+ nbp1_entered ? "" : "not ",
+ val, enable_bit, pci_offset);
+ return ret;
+ }
+ current_count[state->id][cpu] = val;
+
+ dprint("%s: Current - %llu (%u)\n", state->name,
+ current_count[state->id][cpu], cpu);
+ dprint("%s: Previous - %llu (%u)\n", state->name,
+ previous_count[state->id][cpu], cpu);
+
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
+ val &= ~(1 << enable_bit);
+ pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
+
+ return 0;
+}
+
+static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
+ unsigned int cpu)
+{
+ if (id == NBP1) {
+ if (nbp1_entered)
+ *count = 1;
+ else
+ *count = 0;
+ return 0;
+ }
+ return -1;
+}
+static int fam14h_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long diff;
+
+ if (id >= AMD_FAM14H_STATE_NUM)
+ return -1;
+ /* residency count in 80ns -> divide through 12.5 to get us residency */
+ diff = current_count[id][cpu] - previous_count[id][cpu];
+
+ if (timediff == 0)
+ *percent = 0.0;
+ else
+ *percent = 100.0 * diff / timediff / 12.5;
+
+ dprint("Timediff: %llu - res~: %lu us - percent: %.2f %%\n",
+ timediff, diff * 10 / 125, *percent);
+
+ return 0;
+}
+
+static int amd_fam14h_start(void)
+{
+ int num, cpu;
+ clock_gettime(CLOCK_REALTIME, &start_time);
+ for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ amd_fam14h_init(&amd_fam14h_cstates[num], cpu);
+ }
+#ifdef DEBUG
+ clock_gettime(CLOCK_REALTIME, &dbg_time);
+ dbg_timediff = timespec_diff_us(start_time, dbg_time);
+ dprint("Enabling counters took: %lu us\n",
+ dbg_timediff);
+#endif
+ return 0;
+}
+
+static int amd_fam14h_stop(void)
+{
+ int num, cpu;
+ struct timespec end_time;
+
+ clock_gettime(CLOCK_REALTIME, &end_time);
+
+ for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ amd_fam14h_disable(&amd_fam14h_cstates[num], cpu);
+ }
+#ifdef DEBUG
+ clock_gettime(CLOCK_REALTIME, &dbg_time);
+ dbg_timediff = timespec_diff_us(end_time, dbg_time);
+ dprint("Disabling counters took: %lu ns\n", dbg_timediff);
+#endif
+ timediff = timespec_diff_us(start_time, end_time);
+ if (timediff / 1000 > OVERFLOW_MS)
+ print_overflow_err((unsigned int)timediff / 1000000,
+ OVERFLOW_MS / 1000);
+
+ return 0;
+}
+
+static int is_nbp1_capable(void)
+{
+ uint32_t val;
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_NBP1_CAP_OFFSET);
+ return val & (1 << 31);
+}
+
+struct cpuidle_monitor *amd_fam14h_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_AMD)
+ return NULL;
+
+ if (cpupower_cpu_info.family == 0x14)
+ strncpy(amd_fam14h_monitor.name, "Fam_14h",
+ MONITOR_NAME_LEN - 1);
+ else if (cpupower_cpu_info.family == 0x12)
+ strncpy(amd_fam14h_monitor.name, "Fam_12h",
+ MONITOR_NAME_LEN - 1);
+ else
+ return NULL;
+
+ /* We do not alloc for nbp1 machine wide counter */
+ for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+
+ /* We need PCI device: Slot 18, Func 6, compare with BKDG
+ for fam 12h/14h */
+ amd_fam14h_pci_dev = pci_slot_func_init(&pci_acc, 0x18, 6);
+ if (amd_fam14h_pci_dev == NULL || pci_acc == NULL)
+ return NULL;
+
+ if (!is_nbp1_capable())
+ amd_fam14h_monitor.hw_states_num = AMD_FAM14H_STATE_NUM - 1;
+
+ amd_fam14h_monitor.name_len = strlen(amd_fam14h_monitor.name);
+ return &amd_fam14h_monitor;
+}
+
+static void amd_fam14h_unregister(void)
+{
+ int num;
+ for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ pci_cleanup(pci_acc);
+}
+
+struct cpuidle_monitor amd_fam14h_monitor = {
+ .name = "",
+ .hw_states = amd_fam14h_cstates,
+ .hw_states_num = AMD_FAM14H_STATE_NUM,
+ .start = amd_fam14h_start,
+ .stop = amd_fam14h_stop,
+ .do_register = amd_fam14h_register,
+ .unregister = amd_fam14h_unregister,
+ .needs_root = 1,
+ .overflow_s = OVERFLOW_MS / 1000,
+};
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
new file mode 100644
index 000000000..bcd22a1a3
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -0,0 +1,196 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+
+#include "helpers/sysfs.h"
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define CPUIDLE_STATES_MAX 10
+static cstate_t cpuidle_cstates[CPUIDLE_STATES_MAX];
+struct cpuidle_monitor cpuidle_sysfs_monitor;
+
+static unsigned long long **previous_count;
+static unsigned long long **current_count;
+struct timespec start_time;
+static unsigned long long timediff;
+
+static int cpuidle_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long long statediff = current_count[cpu][id]
+ - previous_count[cpu][id];
+ dprint("%s: - diff: %llu - percent: %f (%u)\n",
+ cpuidle_cstates[id].name, timediff, *percent, cpu);
+
+ if (timediff == 0)
+ *percent = 0.0;
+ else
+ *percent = ((100.0 * statediff) / timediff);
+
+ dprint("%s: - timediff: %llu - statediff: %llu - percent: %f (%u)\n",
+ cpuidle_cstates[id].name, timediff, statediff, *percent, cpu);
+
+ return 0;
+}
+
+static int cpuidle_start(void)
+{
+ int cpu, state;
+ clock_gettime(CLOCK_REALTIME, &start_time);
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
+ state++) {
+ previous_count[cpu][state] =
+ sysfs_get_idlestate_time(cpu, state);
+ dprint("CPU %d - State: %d - Val: %llu\n",
+ cpu, state, previous_count[cpu][state]);
+ }
+ };
+ return 0;
+}
+
+static int cpuidle_stop(void)
+{
+ int cpu, state;
+ struct timespec end_time;
+ clock_gettime(CLOCK_REALTIME, &end_time);
+ timediff = timespec_diff_us(start_time, end_time);
+
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
+ state++) {
+ current_count[cpu][state] =
+ sysfs_get_idlestate_time(cpu, state);
+ dprint("CPU %d - State: %d - Val: %llu\n",
+ cpu, state, previous_count[cpu][state]);
+ }
+ };
+ return 0;
+}
+
+void fix_up_intel_idle_driver_name(char *tmp, int num)
+{
+ /* fix up cpuidle name for intel idle driver */
+ if (!strncmp(tmp, "NHM-", 4)) {
+ switch (num) {
+ case 1:
+ strcpy(tmp, "C1");
+ break;
+ case 2:
+ strcpy(tmp, "C3");
+ break;
+ case 3:
+ strcpy(tmp, "C6");
+ break;
+ }
+ } else if (!strncmp(tmp, "SNB-", 4)) {
+ switch (num) {
+ case 1:
+ strcpy(tmp, "C1");
+ break;
+ case 2:
+ strcpy(tmp, "C3");
+ break;
+ case 3:
+ strcpy(tmp, "C6");
+ break;
+ case 4:
+ strcpy(tmp, "C7");
+ break;
+ }
+ } else if (!strncmp(tmp, "ATM-", 4)) {
+ switch (num) {
+ case 1:
+ strcpy(tmp, "C1");
+ break;
+ case 2:
+ strcpy(tmp, "C2");
+ break;
+ case 3:
+ strcpy(tmp, "C4");
+ break;
+ case 4:
+ strcpy(tmp, "C6");
+ break;
+ }
+ }
+}
+
+static struct cpuidle_monitor *cpuidle_register(void)
+{
+ int num;
+ char *tmp;
+
+ /* Assume idle state count is the same for all CPUs */
+ cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
+
+ if (cpuidle_sysfs_monitor.hw_states_num <= 0)
+ return NULL;
+
+ for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
+ tmp = sysfs_get_idlestate_name(0, num);
+ if (tmp == NULL)
+ continue;
+
+ fix_up_intel_idle_driver_name(tmp, num);
+ strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
+ free(tmp);
+
+ tmp = sysfs_get_idlestate_desc(0, num);
+ if (tmp == NULL)
+ continue;
+ strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1);
+ free(tmp);
+
+ cpuidle_cstates[num].range = RANGE_THREAD;
+ cpuidle_cstates[num].id = num;
+ cpuidle_cstates[num].get_count_percent =
+ cpuidle_get_count_percent;
+ };
+
+ /* Free this at program termination */
+ previous_count = malloc(sizeof(long long *) * cpu_count);
+ current_count = malloc(sizeof(long long *) * cpu_count);
+ for (num = 0; num < cpu_count; num++) {
+ previous_count[num] = malloc(sizeof(long long) *
+ cpuidle_sysfs_monitor.hw_states_num);
+ current_count[num] = malloc(sizeof(long long) *
+ cpuidle_sysfs_monitor.hw_states_num);
+ }
+
+ cpuidle_sysfs_monitor.name_len = strlen(cpuidle_sysfs_monitor.name);
+ return &cpuidle_sysfs_monitor;
+}
+
+void cpuidle_unregister(void)
+{
+ int num;
+
+ for (num = 0; num < cpu_count; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ free(previous_count);
+ free(current_count);
+}
+
+struct cpuidle_monitor cpuidle_sysfs_monitor = {
+ .name = "Idle_Stats",
+ .hw_states = cpuidle_cstates,
+ .start = cpuidle_start,
+ .stop = cpuidle_stop,
+ .do_register = cpuidle_register,
+ .unregister = cpuidle_unregister,
+ .needs_root = 0,
+ .overflow_s = UINT_MAX,
+};
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
new file mode 100644
index 000000000..c4bae9203
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -0,0 +1,455 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Output format inspired by Len Brown's <lenb@kernel.org> turbostat tool.
+ *
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <libgen.h>
+
+#include "idle_monitor/cpupower-monitor.h"
+#include "idle_monitor/idle_monitors.h"
+#include "helpers/helpers.h"
+
+/* Define pointers to all monitors. */
+#define DEF(x) & x ## _monitor ,
+struct cpuidle_monitor *all_monitors[] = {
+#include "idle_monitors.def"
+0
+};
+
+static struct cpuidle_monitor *monitors[MONITORS_MAX];
+static unsigned int avail_monitors;
+
+static char *progname;
+
+enum operation_mode_e { list = 1, show, show_all };
+static int mode;
+static int interval = 1;
+static char *show_monitors_param;
+static struct cpupower_topology cpu_top;
+static unsigned int wake_cpus;
+
+/* ToDo: Document this in the manpage */
+static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
+
+static void print_wrong_arg_exit(void)
+{
+ printf(_("invalid or unknown argument\n"));
+ exit(EXIT_FAILURE);
+}
+
+long long timespec_diff_us(struct timespec start, struct timespec end)
+{
+ struct timespec temp;
+ if ((end.tv_nsec - start.tv_nsec) < 0) {
+ temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+ temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
+ } else {
+ temp.tv_sec = end.tv_sec - start.tv_sec;
+ temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+ }
+ return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
+}
+
+void print_n_spaces(int n)
+{
+ int x;
+ for (x = 0; x < n; x++)
+ printf(" ");
+}
+
+/* size of s must be at least n + 1 */
+int fill_string_with_spaces(char *s, int n)
+{
+ int len = strlen(s);
+ if (len > n)
+ return -1;
+ for (; len < n; len++)
+ s[len] = ' ';
+ s[len] = '\0';
+ return 0;
+}
+
+void print_header(int topology_depth)
+{
+ int unsigned mon;
+ int state, need_len;
+ cstate_t s;
+ char buf[128] = "";
+ int percent_width = 4;
+
+ fill_string_with_spaces(buf, topology_depth * 5 - 1);
+ printf("%s|", buf);
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ need_len = monitors[mon]->hw_states_num * (percent_width + 3)
+ - 1;
+ if (mon != 0) {
+ printf("|| ");
+ need_len--;
+ }
+ sprintf(buf, "%s", monitors[mon]->name);
+ fill_string_with_spaces(buf, need_len);
+ printf("%s", buf);
+ }
+ printf("\n");
+
+ if (topology_depth > 2)
+ printf("PKG |");
+ if (topology_depth > 1)
+ printf("CORE|");
+ if (topology_depth > 0)
+ printf("CPU |");
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ if (mon != 0)
+ printf("|| ");
+ else
+ printf(" ");
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ if (state != 0)
+ printf(" | ");
+ s = monitors[mon]->hw_states[state];
+ sprintf(buf, "%s", s.name);
+ fill_string_with_spaces(buf, percent_width);
+ printf("%s", buf);
+ }
+ printf(" ");
+ }
+ printf("\n");
+}
+
+
+void print_results(int topology_depth, int cpu)
+{
+ unsigned int mon;
+ int state, ret;
+ double percent;
+ unsigned long long result;
+ cstate_t s;
+
+ /* Be careful CPUs may got resorted for pkg value do not just use cpu */
+ if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
+ return;
+
+ if (topology_depth > 2)
+ printf("%4d|", cpu_top.core_info[cpu].pkg);
+ if (topology_depth > 1)
+ printf("%4d|", cpu_top.core_info[cpu].core);
+ if (topology_depth > 0)
+ printf("%4d|", cpu_top.core_info[cpu].cpu);
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ if (mon != 0)
+ printf("||");
+
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ if (state != 0)
+ printf("|");
+
+ s = monitors[mon]->hw_states[state];
+
+ if (s.get_count_percent) {
+ ret = s.get_count_percent(s.id, &percent,
+ cpu_top.core_info[cpu].cpu);
+ if (ret)
+ printf("******");
+ else if (percent >= 100.0)
+ printf("%6.1f", percent);
+ else
+ printf("%6.2f", percent);
+ } else if (s.get_count) {
+ ret = s.get_count(s.id, &result,
+ cpu_top.core_info[cpu].cpu);
+ if (ret)
+ printf("******");
+ else
+ printf("%6llu", result);
+ } else {
+ printf(_("Monitor %s, Counter %s has no count "
+ "function. Implementation error\n"),
+ monitors[mon]->name, s.name);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ /*
+ * The monitor could still provide useful data, for example
+ * AMD HW counters partly sit in PCI config space.
+ * It's up to the monitor plug-in to check .is_online, this one
+ * is just for additional info.
+ */
+ if (!cpu_top.core_info[cpu].is_online) {
+ printf(_(" *is offline\n"));
+ return;
+ } else
+ printf("\n");
+}
+
+
+/* param: string passed by -m param (The list of monitors to show)
+ *
+ * Monitors must have been registered already, matching monitors
+ * are picked out and available monitors array is overridden
+ * with matching ones
+ *
+ * Monitors get sorted in the same order the user passes them
+*/
+
+static void parse_monitor_param(char *param)
+{
+ unsigned int num;
+ int mon, hits = 0;
+ char *tmp = param, *token;
+ struct cpuidle_monitor *tmp_mons[MONITORS_MAX];
+
+
+ for (mon = 0; mon < MONITORS_MAX; mon++, tmp = NULL) {
+ token = strtok(tmp, ",");
+ if (token == NULL)
+ break;
+ if (strlen(token) >= MONITOR_NAME_LEN) {
+ printf(_("%s: max monitor name length"
+ " (%d) exceeded\n"), token, MONITOR_NAME_LEN);
+ continue;
+ }
+
+ for (num = 0; num < avail_monitors; num++) {
+ if (!strcmp(monitors[num]->name, token)) {
+ dprint("Found requested monitor: %s\n", token);
+ tmp_mons[hits] = monitors[num];
+ hits++;
+ }
+ }
+ }
+ if (hits == 0) {
+ printf(_("No matching monitor found in %s, "
+ "try -l option\n"), param);
+ exit(EXIT_FAILURE);
+ }
+ /* Override detected/registerd monitors array with requested one */
+ memcpy(monitors, tmp_mons,
+ sizeof(struct cpuidle_monitor *) * MONITORS_MAX);
+ avail_monitors = hits;
+}
+
+void list_monitors(void)
+{
+ unsigned int mon;
+ int state;
+ cstate_t s;
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ printf(_("Monitor \"%s\" (%d states) - Might overflow after %u "
+ "s\n"),
+ monitors[mon]->name, monitors[mon]->hw_states_num,
+ monitors[mon]->overflow_s);
+
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ s = monitors[mon]->hw_states[state];
+ /*
+ * ToDo show more state capabilities:
+ * percent, time (granlarity)
+ */
+ printf("%s\t[%c] -> %s\n", s.name, range_abbr[s.range],
+ gettext(s.desc));
+ }
+ }
+}
+
+int fork_it(char **argv)
+{
+ int status;
+ unsigned int num;
+ unsigned long long timediff;
+ pid_t child_pid;
+ struct timespec start, end;
+
+ child_pid = fork();
+ clock_gettime(CLOCK_REALTIME, &start);
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->start();
+
+ if (!child_pid) {
+ /* child */
+ execvp(argv[0], argv);
+ } else {
+ /* parent */
+ if (child_pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ signal(SIGINT, SIG_IGN);
+ signal(SIGQUIT, SIG_IGN);
+ if (waitpid(child_pid, &status, 0) == -1) {
+ perror("wait");
+ exit(1);
+ }
+ }
+ clock_gettime(CLOCK_REALTIME, &end);
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->stop();
+
+ timediff = timespec_diff_us(start, end);
+ if (WIFEXITED(status))
+ printf(_("%s took %.5f seconds and exited with status %d\n"),
+ argv[0], timediff / (1000.0 * 1000),
+ WEXITSTATUS(status));
+ return 0;
+}
+
+int do_interval_measure(int i)
+{
+ unsigned int num;
+ int cpu;
+
+ if (wake_cpus)
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ bind_cpu(cpu);
+
+ for (num = 0; num < avail_monitors; num++) {
+ dprint("HW C-state residency monitor: %s - States: %d\n",
+ monitors[num]->name, monitors[num]->hw_states_num);
+ monitors[num]->start();
+ }
+
+ sleep(i);
+
+ if (wake_cpus)
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ bind_cpu(cpu);
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->stop();
+
+
+ return 0;
+}
+
+static void cmdline(int argc, char *argv[])
+{
+ int opt;
+ progname = basename(argv[0]);
+
+ while ((opt = getopt(argc, argv, "+lci:m:")) != -1) {
+ switch (opt) {
+ case 'l':
+ if (mode)
+ print_wrong_arg_exit();
+ mode = list;
+ break;
+ case 'i':
+ /* only allow -i with -m or no option */
+ if (mode && mode != show)
+ print_wrong_arg_exit();
+ interval = atoi(optarg);
+ break;
+ case 'm':
+ if (mode)
+ print_wrong_arg_exit();
+ mode = show;
+ show_monitors_param = optarg;
+ break;
+ case 'c':
+ wake_cpus = 1;
+ break;
+ default:
+ print_wrong_arg_exit();
+ }
+ }
+ if (!mode)
+ mode = show_all;
+}
+
+int cmd_monitor(int argc, char **argv)
+{
+ unsigned int num;
+ struct cpuidle_monitor *test_mon;
+ int cpu;
+
+ cmdline(argc, argv);
+ cpu_count = get_cpu_topology(&cpu_top);
+ if (cpu_count < 0) {
+ printf(_("Cannot read number of available processors\n"));
+ return EXIT_FAILURE;
+ }
+
+ /* Default is: monitor all CPUs */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setall(cpus_chosen);
+
+ dprint("System has up to %d CPU cores\n", cpu_count);
+
+ for (num = 0; all_monitors[num]; num++) {
+ dprint("Try to register: %s\n", all_monitors[num]->name);
+ test_mon = all_monitors[num]->do_register();
+ if (test_mon) {
+ if (test_mon->needs_root && !run_as_root) {
+ fprintf(stderr, _("Available monitor %s needs "
+ "root access\n"), test_mon->name);
+ continue;
+ }
+ monitors[avail_monitors] = test_mon;
+ dprint("%s registered\n", all_monitors[num]->name);
+ avail_monitors++;
+ }
+ }
+
+ if (avail_monitors == 0) {
+ printf(_("No HW Cstate monitors found\n"));
+ return 1;
+ }
+
+ if (mode == list) {
+ list_monitors();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (mode == show)
+ parse_monitor_param(show_monitors_param);
+
+ dprint("Packages: %d - Cores: %d - CPUs: %d\n",
+ cpu_top.pkgs, cpu_top.cores, cpu_count);
+
+ /*
+ * if any params left, it must be a command to fork
+ */
+ if (argc - optind)
+ fork_it(argv + optind);
+ else
+ do_interval_measure(interval);
+
+ /* ToDo: Topology parsing needs fixing first to do
+ this more generically */
+ if (cpu_top.pkgs > 1)
+ print_header(3);
+ else
+ print_header(1);
+
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ if (cpu_top.pkgs > 1)
+ print_results(3, cpu);
+ else
+ print_results(1, cpu);
+ }
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->unregister();
+
+ cpu_topology_release(cpu_top);
+ return 0;
+}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
new file mode 100644
index 000000000..9e43f3371
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -0,0 +1,85 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+#ifndef __CPUIDLE_INFO_HW__
+#define __CPUIDLE_INFO_HW__
+
+#include <stdarg.h>
+#include <time.h>
+
+#include "idle_monitor/idle_monitors.h"
+
+#define MONITORS_MAX 20
+#define MONITOR_NAME_LEN 20
+#define CSTATE_NAME_LEN 5
+#define CSTATE_DESC_LEN 60
+
+int cpu_count;
+
+/* Hard to define the right names ...: */
+enum power_range_e {
+ RANGE_THREAD, /* Lowest in topology hierarcy, AMD: core, Intel: thread
+ kernel sysfs: cpu */
+ RANGE_CORE, /* AMD: unit, Intel: core, kernel_sysfs: core_id */
+ RANGE_PACKAGE, /* Package, processor socket */
+ RANGE_MACHINE, /* Machine, platform wide */
+ RANGE_MAX };
+
+typedef struct cstate {
+ int id;
+ enum power_range_e range;
+ char name[CSTATE_NAME_LEN];
+ char desc[CSTATE_DESC_LEN];
+
+ /* either provide a percentage or a general count */
+ int (*get_count_percent)(unsigned int self_id, double *percent,
+ unsigned int cpu);
+ int (*get_count)(unsigned int self_id, unsigned long long *count,
+ unsigned int cpu);
+} cstate_t;
+
+struct cpuidle_monitor {
+ /* Name must not contain whitespaces */
+ char name[MONITOR_NAME_LEN];
+ int name_len;
+ int hw_states_num;
+ cstate_t *hw_states;
+ int (*start) (void);
+ int (*stop) (void);
+ struct cpuidle_monitor* (*do_register) (void);
+ void (*unregister)(void);
+ unsigned int overflow_s;
+ int needs_root;
+};
+
+extern long long timespec_diff_us(struct timespec start, struct timespec end);
+
+#define print_overflow_err(mes, ov) \
+{ \
+ fprintf(stderr, gettext("Measure took %u seconds, but registers could " \
+ "overflow at %u seconds, results " \
+ "could be inaccurate\n"), mes, ov); \
+}
+
+
+/* Taken over from x86info project sources -> return 0 on success */
+#include <sched.h>
+#include <sys/types.h>
+#include <unistd.h>
+static inline int bind_cpu(int cpu)
+{
+ cpu_set_t set;
+
+ if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) {
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ return sched_setaffinity(getpid(), sizeof(set), &set);
+ }
+ return 1;
+}
+
+#endif /* __CPUIDLE_INFO_HW__ */
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
new file mode 100644
index 000000000..ebeaba657
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -0,0 +1,196 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on SandyBridge monitor. Implements the new package C-states
+ * (PC8, PC9, PC10) coming with a specific Haswell (family 0x45) CPU.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C8_RESIDENCY 0x00000630
+#define MSR_PKG_C9_RESIDENCY 0x00000631
+#define MSR_PKG_C10_RESIDENCY 0x00000632
+
+#define MSR_TSC 0x10
+
+enum intel_hsw_ext_id { PC8 = 0, PC9, PC10, HSW_EXT_CSTATE_COUNT,
+ TSC = 0xFFFF };
+
+static int hsw_ext_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = {
+ {
+ .name = "PC8",
+ .desc = N_("Processor Package C8"),
+ .id = PC8,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = hsw_ext_get_count_percent,
+ },
+ {
+ .name = "PC9",
+ .desc = N_("Processor Package C9"),
+ .desc = N_("Processor Package C2"),
+ .id = PC9,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = hsw_ext_get_count_percent,
+ },
+ {
+ .name = "PC10",
+ .desc = N_("Processor Package C10"),
+ .id = PC10,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = hsw_ext_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[HSW_EXT_CSTATE_COUNT];
+static unsigned long long *current_count[HSW_EXT_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val,
+ unsigned int cpu)
+{
+ int msr;
+
+ switch (id) {
+ case PC8:
+ msr = MSR_PKG_C8_RESIDENCY;
+ break;
+ case PC9:
+ msr = MSR_PKG_C9_RESIDENCY;
+ break;
+ case PC10:
+ msr = MSR_PKG_C10_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+ return 0;
+}
+
+static int hsw_ext_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 *
+ (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ hsw_ext_cstates[id].name, previous_count[id][cpu],
+ current_count[id][cpu], cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ hsw_ext_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int hsw_ext_start(void)
+{
+ int num, cpu;
+ unsigned long long val;
+
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ hsw_ext_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ hsw_ext_get_count(TSC, &tsc_at_measure_start, 0);
+ return 0;
+}
+
+static int hsw_ext_stop(void)
+{
+ unsigned long long val;
+ int num, cpu;
+
+ hsw_ext_get_count(TSC, &tsc_at_measure_end, 0);
+
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ return 0;
+}
+
+struct cpuidle_monitor intel_hsw_ext_monitor;
+
+static struct cpuidle_monitor *hsw_ext_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL
+ || cpupower_cpu_info.family != 6)
+ return NULL;
+
+ switch (cpupower_cpu_info.model) {
+ case 0x45: /* HSW */
+ break;
+ default:
+ return NULL;
+ }
+
+ is_valid = calloc(cpu_count, sizeof(int));
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+ intel_hsw_ext_monitor.name_len = strlen(intel_hsw_ext_monitor.name);
+ return &intel_hsw_ext_monitor;
+}
+
+void hsw_ext_unregister(void)
+{
+ int num;
+ free(is_valid);
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+}
+
+struct cpuidle_monitor intel_hsw_ext_monitor = {
+ .name = "HaswellExtended",
+ .hw_states = hsw_ext_cstates,
+ .hw_states_num = HSW_EXT_CSTATE_COUNT,
+ .start = hsw_ext_start,
+ .stop = hsw_ext_stop,
+ .do_register = hsw_ext_register,
+ .unregister = hsw_ext_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def
new file mode 100644
index 000000000..0d6ba4dbb
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def
@@ -0,0 +1,8 @@
+#if defined(__i386__) || defined(__x86_64__)
+DEF(amd_fam14h)
+DEF(intel_nhm)
+DEF(intel_snb)
+DEF(intel_hsw_ext)
+DEF(mperf)
+#endif
+DEF(cpuidle_sysfs)
diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.h b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h
new file mode 100644
index 000000000..4fcdeb1e0
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h
@@ -0,0 +1,18 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on the idea from Michael Matz <matz@suse.de>
+ *
+ */
+
+#ifndef _CPUIDLE_IDLE_MONITORS_H_
+#define _CPUIDLE_IDLE_MONITORS_H_
+
+#define DEF(x) extern struct cpuidle_monitor x ##_monitor;
+#include "idle_monitors.def"
+#undef DEF
+extern struct cpuidle_monitor *all_monitors[];
+
+#endif /* _CPUIDLE_IDLE_MONITORS_H_ */
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
new file mode 100644
index 000000000..90a8c4f07
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -0,0 +1,338 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include <cpufreq.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_APERF 0xE8
+#define MSR_MPERF 0xE7
+
+#define MSR_TSC 0x10
+
+#define MSR_AMD_HWCR 0xc0010015
+
+enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
+
+static int mperf_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ unsigned int cpu);
+static struct timespec time_start, time_end;
+
+static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
+ {
+ .name = "C0",
+ .desc = N_("Processor Core not idle"),
+ .id = C0,
+ .range = RANGE_THREAD,
+ .get_count_percent = mperf_get_count_percent,
+ },
+ {
+ .name = "Cx",
+ .desc = N_("Processor Core in an idle state"),
+ .id = Cx,
+ .range = RANGE_THREAD,
+ .get_count_percent = mperf_get_count_percent,
+ },
+
+ {
+ .name = "Freq",
+ .desc = N_("Average Frequency (including boost) in MHz"),
+ .id = AVG_FREQ,
+ .range = RANGE_THREAD,
+ .get_count = mperf_get_count_freq,
+ },
+};
+
+enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
+static int max_freq_mode;
+/*
+ * The max frequency mperf is ticking at (in C0), either retrieved via:
+ * 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
+ * 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
+ * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
+ */
+static unsigned long max_frequency;
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *mperf_previous_count;
+static unsigned long long *aperf_previous_count;
+static unsigned long long *mperf_current_count;
+static unsigned long long *aperf_current_count;
+
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int mperf_get_tsc(unsigned long long *tsc)
+{
+ int ret;
+ ret = read_msr(0, MSR_TSC, tsc);
+ if (ret)
+ dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
+ return ret;
+}
+
+static int mperf_init_stats(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ ret = read_msr(cpu, MSR_APERF, &val);
+ aperf_previous_count[cpu] = val;
+ ret |= read_msr(cpu, MSR_MPERF, &val);
+ mperf_previous_count[cpu] = val;
+ is_valid[cpu] = !ret;
+
+ return 0;
+}
+
+static int mperf_measure_stats(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ ret = read_msr(cpu, MSR_APERF, &val);
+ aperf_current_count[cpu] = val;
+ ret |= read_msr(cpu, MSR_MPERF, &val);
+ mperf_current_count[cpu] = val;
+ is_valid[cpu] = !ret;
+
+ return 0;
+}
+
+static int mperf_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long long aperf_diff, mperf_diff, tsc_diff;
+ unsigned long long timediff;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ if (id != C0 && id != Cx)
+ return -1;
+
+ mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+
+ if (max_freq_mode == MAX_FREQ_TSC_REF) {
+ tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+ *percent = 100.0 * mperf_diff / tsc_diff;
+ dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+ mperf_cstates[id].name, mperf_diff, tsc_diff);
+ } else if (max_freq_mode == MAX_FREQ_SYSFS) {
+ timediff = timespec_diff_us(time_start, time_end);
+ *percent = 100.0 * mperf_diff / timediff;
+ dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
+ mperf_cstates[id].name, mperf_diff, timediff);
+ } else
+ return -1;
+
+ if (id == Cx)
+ *percent = 100.0 - *percent;
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ mperf_cstates[id].name, mperf_diff, aperf_diff, cpu);
+ dprint("%s: %f\n", mperf_cstates[id].name, *percent);
+ return 0;
+}
+
+static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ unsigned int cpu)
+{
+ unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
+
+ if (id != AVG_FREQ)
+ return 1;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+
+ if (max_freq_mode == MAX_FREQ_TSC_REF) {
+ /* Calculate max_freq from TSC count */
+ tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+ time_diff = timespec_diff_us(time_start, time_end);
+ max_frequency = tsc_diff / time_diff;
+ }
+
+ *count = max_frequency * ((double)aperf_diff / mperf_diff);
+ dprint("%s: Average freq based on %s maximum frequency:\n",
+ mperf_cstates[id].name,
+ (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
+ dprint("%max_frequency: %lu", max_frequency);
+ dprint("aperf_diff: %llu\n", aperf_diff);
+ dprint("mperf_diff: %llu\n", mperf_diff);
+ dprint("avg freq: %llu\n", *count);
+ return 0;
+}
+
+static int mperf_start(void)
+{
+ int cpu;
+ unsigned long long dbg;
+
+ clock_gettime(CLOCK_REALTIME, &time_start);
+ mperf_get_tsc(&tsc_at_measure_start);
+
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ mperf_init_stats(cpu);
+
+ mperf_get_tsc(&dbg);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+}
+
+static int mperf_stop(void)
+{
+ unsigned long long dbg;
+ int cpu;
+
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ mperf_measure_stats(cpu);
+
+ mperf_get_tsc(&tsc_at_measure_end);
+ clock_gettime(CLOCK_REALTIME, &time_end);
+
+ mperf_get_tsc(&dbg);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+
+ return 0;
+}
+
+/*
+ * Mperf register is defined to tick at P0 (maximum) frequency
+ *
+ * Instead of reading out P0 which can be tricky to read out from HW,
+ * we use TSC counter if it reliably ticks at P0/mperf frequency.
+ *
+ * Still try to fall back to:
+ * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ * on older Intel HW without invariant TSC feature.
+ * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
+ * it's still double checked (MSR_AMD_HWCR)).
+ *
+ * On these machines the user would still get useful mperf
+ * stats when acpi-cpufreq driver is loaded.
+ */
+static int init_maxfreq_mode(void)
+{
+ int ret;
+ unsigned long long hwcr;
+ unsigned long min;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
+ goto use_sysfs;
+
+ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+ /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
+ * freq.
+ * A test whether hwcr is accessable/available would be:
+ * (cpupower_cpu_info.family > 0x10 ||
+ * cpupower_cpu_info.family == 0x10 &&
+ * cpupower_cpu_info.model >= 0x2))
+ * This should be the case for all aperf/mperf
+ * capable AMD machines and is therefore safe to test here.
+ * Compare with Linus kernel git commit: acf01734b1747b1ec4
+ */
+ ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
+ /*
+ * If the MSR read failed, assume a Xen system that did
+ * not explicitly provide access to it and assume TSC works
+ */
+ if (ret != 0) {
+ dprint("TSC read 0x%x failed - assume TSC working\n",
+ MSR_AMD_HWCR);
+ return 0;
+ } else if (1 & (hwcr >> 24)) {
+ max_freq_mode = MAX_FREQ_TSC_REF;
+ return 0;
+ } else { /* Use sysfs max frequency if available */ }
+ } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
+ /*
+ * On Intel we assume mperf (in C0) is ticking at same
+ * rate than TSC
+ */
+ max_freq_mode = MAX_FREQ_TSC_REF;
+ return 0;
+ }
+use_sysfs:
+ if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
+ dprint("Cannot retrieve max freq from cpufreq kernel "
+ "subsystem\n");
+ return -1;
+ }
+ max_freq_mode = MAX_FREQ_SYSFS;
+ return 0;
+}
+
+/*
+ * This monitor provides:
+ *
+ * 1) Average frequency a CPU resided in
+ * This always works if the CPU has aperf/mperf capabilities
+ *
+ * 2) C0 and Cx (any sleep state) time a CPU resided in
+ * Works if mperf timer stops ticking in sleep states which
+ * seem to be the case on all current HW.
+ * Both is directly retrieved from HW registers and is independent
+ * from kernel statistics.
+ */
+struct cpuidle_monitor mperf_monitor;
+struct cpuidle_monitor *mperf_register(void)
+{
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+ return NULL;
+
+ if (init_maxfreq_mode())
+ return NULL;
+
+ /* Free this at program termination */
+ is_valid = calloc(cpu_count, sizeof(int));
+ mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
+ aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
+ mperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
+ aperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
+
+ mperf_monitor.name_len = strlen(mperf_monitor.name);
+ return &mperf_monitor;
+}
+
+void mperf_unregister(void)
+{
+ free(mperf_previous_count);
+ free(aperf_previous_count);
+ free(mperf_current_count);
+ free(aperf_current_count);
+ free(is_valid);
+}
+
+struct cpuidle_monitor mperf_monitor = {
+ .name = "Mperf",
+ .hw_states_num = MPERF_CSTATE_COUNT,
+ .hw_states = mperf_cstates,
+ .start = mperf_start,
+ .stop = mperf_stop,
+ .do_register = mperf_register,
+ .unregister = mperf_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
new file mode 100644
index 000000000..d2a91dd0d
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -0,0 +1,216 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on Len Brown's <lenb@kernel.org> turbostat tool.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C3_RESIDENCY 0x3F8
+#define MSR_PKG_C6_RESIDENCY 0x3F9
+#define MSR_CORE_C3_RESIDENCY 0x3FC
+#define MSR_CORE_C6_RESIDENCY 0x3FD
+
+#define MSR_TSC 0x10
+
+#define NHM_CSTATE_COUNT 4
+
+enum intel_nhm_id { C3 = 0, C6, PC3, PC6, TSC = 0xFFFF };
+
+static int nhm_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t nhm_cstates[NHM_CSTATE_COUNT] = {
+ {
+ .name = "C3",
+ .desc = N_("Processor Core C3"),
+ .id = C3,
+ .range = RANGE_CORE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+ {
+ .name = "C6",
+ .desc = N_("Processor Core C6"),
+ .id = C6,
+ .range = RANGE_CORE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+
+ {
+ .name = "PC3",
+ .desc = N_("Processor Package C3"),
+ .id = PC3,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+ {
+ .name = "PC6",
+ .desc = N_("Processor Package C6"),
+ .id = PC6,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[NHM_CSTATE_COUNT];
+static unsigned long long *current_count[NHM_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val,
+ unsigned int cpu)
+{
+ int msr;
+
+ switch (id) {
+ case C3:
+ msr = MSR_CORE_C3_RESIDENCY;
+ break;
+ case C6:
+ msr = MSR_CORE_C6_RESIDENCY;
+ break;
+ case PC3:
+ msr = MSR_PKG_C3_RESIDENCY;
+ break;
+ case PC6:
+ msr = MSR_PKG_C6_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+
+ return 0;
+}
+
+static int nhm_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 *
+ (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ nhm_cstates[id].name, previous_count[id][cpu],
+ current_count[id][cpu], cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ nhm_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int nhm_start(void)
+{
+ int num, cpu;
+ unsigned long long dbg, val;
+
+ nhm_get_count(TSC, &tsc_at_measure_start, 0);
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ nhm_get_count(TSC, &dbg, 0);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+}
+
+static int nhm_stop(void)
+{
+ unsigned long long val;
+ unsigned long long dbg;
+ int num, cpu;
+
+ nhm_get_count(TSC, &tsc_at_measure_end, 0);
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ nhm_get_count(TSC, &dbg, 0);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+
+ return 0;
+}
+
+struct cpuidle_monitor intel_nhm_monitor;
+
+struct cpuidle_monitor *intel_nhm_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
+ return NULL;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
+ return NULL;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+ return NULL;
+
+ /* Free this at program termination */
+ is_valid = calloc(cpu_count, sizeof(int));
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+
+ intel_nhm_monitor.name_len = strlen(intel_nhm_monitor.name);
+ return &intel_nhm_monitor;
+}
+
+void intel_nhm_unregister(void)
+{
+ int num;
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ free(is_valid);
+}
+
+struct cpuidle_monitor intel_nhm_monitor = {
+ .name = "Nehalem",
+ .hw_states_num = NHM_CSTATE_COUNT,
+ .hw_states = nhm_cstates,
+ .start = nhm_start,
+ .stop = nhm_stop,
+ .do_register = intel_nhm_register,
+ .unregister = intel_nhm_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
new file mode 100644
index 000000000..efc8a69c9
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -0,0 +1,200 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on Len Brown's <lenb@kernel.org> turbostat tool.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C2_RESIDENCY 0x60D
+#define MSR_PKG_C7_RESIDENCY 0x3FA
+#define MSR_CORE_C7_RESIDENCY 0x3FE
+
+#define MSR_TSC 0x10
+
+enum intel_snb_id { C7 = 0, PC2, PC7, SNB_CSTATE_COUNT, TSC = 0xFFFF };
+
+static int snb_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t snb_cstates[SNB_CSTATE_COUNT] = {
+ {
+ .name = "C7",
+ .desc = N_("Processor Core C7"),
+ .id = C7,
+ .range = RANGE_CORE,
+ .get_count_percent = snb_get_count_percent,
+ },
+ {
+ .name = "PC2",
+ .desc = N_("Processor Package C2"),
+ .id = PC2,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = snb_get_count_percent,
+ },
+ {
+ .name = "PC7",
+ .desc = N_("Processor Package C7"),
+ .id = PC7,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = snb_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[SNB_CSTATE_COUNT];
+static unsigned long long *current_count[SNB_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int snb_get_count(enum intel_snb_id id, unsigned long long *val,
+ unsigned int cpu)
+{
+ int msr;
+
+ switch (id) {
+ case C7:
+ msr = MSR_CORE_C7_RESIDENCY;
+ break;
+ case PC2:
+ msr = MSR_PKG_C2_RESIDENCY;
+ break;
+ case PC7:
+ msr = MSR_PKG_C7_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+ return 0;
+}
+
+static int snb_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 *
+ (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ snb_cstates[id].name, previous_count[id][cpu],
+ current_count[id][cpu], cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ snb_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int snb_start(void)
+{
+ int num, cpu;
+ unsigned long long val;
+
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ snb_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ snb_get_count(TSC, &tsc_at_measure_start, 0);
+ return 0;
+}
+
+static int snb_stop(void)
+{
+ unsigned long long val;
+ int num, cpu;
+
+ snb_get_count(TSC, &tsc_at_measure_end, 0);
+
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !snb_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ return 0;
+}
+
+struct cpuidle_monitor intel_snb_monitor;
+
+static struct cpuidle_monitor *snb_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL
+ || cpupower_cpu_info.family != 6)
+ return NULL;
+
+ switch (cpupower_cpu_info.model) {
+ case 0x2A: /* SNB */
+ case 0x2D: /* SNB Xeon */
+ case 0x3A: /* IVB */
+ case 0x3E: /* IVB Xeon */
+ case 0x3C: /* HSW */
+ case 0x3F: /* HSW */
+ case 0x45: /* HSW */
+ case 0x46: /* HSW */
+ break;
+ default:
+ return NULL;
+ }
+
+ is_valid = calloc(cpu_count, sizeof(int));
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+ intel_snb_monitor.name_len = strlen(intel_snb_monitor.name);
+ return &intel_snb_monitor;
+}
+
+void snb_unregister(void)
+{
+ int num;
+ free(is_valid);
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+}
+
+struct cpuidle_monitor intel_snb_monitor = {
+ .name = "SandyBridge",
+ .hw_states = snb_cstates,
+ .hw_states_num = SNB_CSTATE_COUNT,
+ .start = snb_start,
+ .stop = snb_stop,
+ .do_register = snb_register,
+ .unregister = snb_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* defined(__i386__) || defined(__x86_64__) */