summaryrefslogtreecommitdiff
path: root/drivers/cpufreq/intel_pstate.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r--drivers/cpufreq/intel_pstate.c465
1 files changed, 328 insertions, 137 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3b29308f6..d63074ff6 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x)
return ret;
}
+/**
+ * struct sample - Store performance sample
+ * @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual
+ * performance during last sample period
+ * @busy_scaled: Scaled busy value which is used to calculate next
+ * P state. This can be different than core_pct_busy
+ * to account for cpu idle period
+ * @aperf: Difference of actual performance frequency clock count
+ * read from APERF MSR between last and current sample
+ * @mperf: Difference of maximum performance frequency clock count
+ * read from MPERF MSR between last and current sample
+ * @tsc: Difference of time stamp counter between last and
+ * current sample
+ * @freq: Effective frequency calculated from APERF/MPERF
+ * @time: Current time from scheduler
+ *
+ * This structure is used in the cpudata structure to store performance sample
+ * data for choosing next P State.
+ */
struct sample {
int32_t core_pct_busy;
int32_t busy_scaled;
@@ -71,9 +90,23 @@ struct sample {
u64 mperf;
u64 tsc;
int freq;
- ktime_t time;
+ u64 time;
};
+/**
+ * struct pstate_data - Store P state data
+ * @current_pstate: Current requested P state
+ * @min_pstate: Min P state possible for this platform
+ * @max_pstate: Max P state possible for this platform
+ * @max_pstate_physical:This is physical Max P state for a processor
+ * This can be higher than the max_pstate which can
+ * be limited by platform thermal design power limits
+ * @scaling: Scaling factor to convert frequency to cpufreq
+ * frequency units
+ * @turbo_pstate: Max Turbo P state possible for this platform
+ *
+ * Stores the per cpu model P state limits and current P state.
+ */
struct pstate_data {
int current_pstate;
int min_pstate;
@@ -83,6 +116,19 @@ struct pstate_data {
int turbo_pstate;
};
+/**
+ * struct vid_data - Stores voltage information data
+ * @min: VID data for this platform corresponding to
+ * the lowest P state
+ * @max: VID data corresponding to the highest P State.
+ * @turbo: VID data for turbo P state
+ * @ratio: Ratio of (vid max - vid min) /
+ * (max P state - Min P State)
+ *
+ * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling)
+ * This data is used in Atom platforms, where in addition to target P state,
+ * the voltage data needs to be specified to select next P State.
+ */
struct vid_data {
int min;
int max;
@@ -90,6 +136,18 @@ struct vid_data {
int32_t ratio;
};
+/**
+ * struct _pid - Stores PID data
+ * @setpoint: Target set point for busyness or performance
+ * @integral: Storage for accumulated error values
+ * @p_gain: PID proportional gain
+ * @i_gain: PID integral gain
+ * @d_gain: PID derivative gain
+ * @deadband: PID deadband
+ * @last_err: Last error storage for integral part of PID calculation
+ *
+ * Stores PID coefficients and last error for PID controller.
+ */
struct _pid {
int setpoint;
int32_t integral;
@@ -100,16 +158,33 @@ struct _pid {
int32_t last_err;
};
+/**
+ * struct cpudata - Per CPU instance data storage
+ * @cpu: CPU number for this instance data
+ * @update_util: CPUFreq utility callback information
+ * @pstate: Stores P state limits for this CPU
+ * @vid: Stores VID limits for this CPU
+ * @pid: Stores PID parameters for this CPU
+ * @last_sample_time: Last Sample time
+ * @prev_aperf: Last APERF value read from APERF MSR
+ * @prev_mperf: Last MPERF value read from MPERF MSR
+ * @prev_tsc: Last timestamp counter (TSC) value
+ * @prev_cummulative_iowait: IO Wait time difference from last and
+ * current sample
+ * @sample: Storage for storing last Sample data
+ *
+ * This structure stores per CPU instance data for all CPUs.
+ */
struct cpudata {
int cpu;
- struct timer_list timer;
+ struct update_util_data update_util;
struct pstate_data pstate;
struct vid_data vid;
struct _pid pid;
- ktime_t last_sample_time;
+ u64 last_sample_time;
u64 prev_aperf;
u64 prev_mperf;
u64 prev_tsc;
@@ -118,8 +193,22 @@ struct cpudata {
};
static struct cpudata **all_cpu_data;
+
+/**
+ * struct pid_adjust_policy - Stores static PID configuration data
+ * @sample_rate_ms: PID calculation sample rate in ms
+ * @sample_rate_ns: Sample rate calculation in ns
+ * @deadband: PID deadband
+ * @setpoint: PID Setpoint
+ * @p_gain_pct: PID proportional gain
+ * @i_gain_pct: PID integral gain
+ * @d_gain_pct: PID derivative gain
+ *
+ * Stores per CPU model static PID configuration data.
+ */
struct pstate_adjust_policy {
int sample_rate_ms;
+ s64 sample_rate_ns;
int deadband;
int setpoint;
int p_gain_pct;
@@ -127,17 +216,36 @@ struct pstate_adjust_policy {
int i_gain_pct;
};
+/**
+ * struct pstate_funcs - Per CPU model specific callbacks
+ * @get_max: Callback to get maximum non turbo effective P state
+ * @get_max_physical: Callback to get maximum non turbo physical P state
+ * @get_min: Callback to get minimum P state
+ * @get_turbo: Callback to get turbo P state
+ * @get_scaling: Callback to get frequency scaling factor
+ * @get_val: Callback to convert P state to actual MSR write value
+ * @get_vid: Callback to get VID data for Atom platforms
+ * @get_target_pstate: Callback to a function to calculate next P state to use
+ *
+ * Core and Atom CPU models have different way to get P State limits. This
+ * structure is used to store those callbacks.
+ */
struct pstate_funcs {
int (*get_max)(void);
int (*get_max_physical)(void);
int (*get_min)(void);
int (*get_turbo)(void);
int (*get_scaling)(void);
- void (*set)(struct cpudata*, int pstate);
+ u64 (*get_val)(struct cpudata*, int pstate);
void (*get_vid)(struct cpudata *);
int32_t (*get_target_pstate)(struct cpudata *);
};
+/**
+ * struct cpu_defaults- Per CPU model default config data
+ * @pid_policy: PID config data
+ * @funcs: Callback function data
+ */
struct cpu_defaults {
struct pstate_adjust_policy pid_policy;
struct pstate_funcs funcs;
@@ -150,6 +258,34 @@ static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
static int hwp_active;
+
+/**
+ * struct perf_limits - Store user and policy limits
+ * @no_turbo: User requested turbo state from intel_pstate sysfs
+ * @turbo_disabled: Platform turbo status either from msr
+ * MSR_IA32_MISC_ENABLE or when maximum available pstate
+ * matches the maximum turbo pstate
+ * @max_perf_pct: Effective maximum performance limit in percentage, this
+ * is minimum of either limits enforced by cpufreq policy
+ * or limits from user set limits via intel_pstate sysfs
+ * @min_perf_pct: Effective minimum performance limit in percentage, this
+ * is maximum of either limits enforced by cpufreq policy
+ * or limits from user set limits via intel_pstate sysfs
+ * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
+ * This value is used to limit max pstate
+ * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
+ * This value is used to limit min pstate
+ * @max_policy_pct: The maximum performance in percentage enforced by
+ * cpufreq setpolicy interface
+ * @max_sysfs_pct: The maximum performance in percentage enforced by
+ * intel pstate sysfs interface
+ * @min_policy_pct: The minimum performance in percentage enforced by
+ * cpufreq setpolicy interface
+ * @min_sysfs_pct: The minimum performance in percentage enforced by
+ * intel pstate sysfs interface
+ *
+ * Storage for user and policy defined limits.
+ */
struct perf_limits {
int no_turbo;
int turbo_disabled;
@@ -197,8 +333,8 @@ static struct perf_limits *limits = &powersave_limits;
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
int deadband, int integral) {
- pid->setpoint = setpoint;
- pid->deadband = deadband;
+ pid->setpoint = int_tofp(setpoint);
+ pid->deadband = int_tofp(deadband);
pid->integral = int_tofp(integral);
pid->last_err = int_tofp(setpoint) - int_tofp(busy);
}
@@ -224,9 +360,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
int32_t pterm, dterm, fp_error;
int32_t integral_limit;
- fp_error = int_tofp(pid->setpoint) - busy;
+ fp_error = pid->setpoint - busy;
- if (abs(fp_error) <= int_tofp(pid->deadband))
+ if (abs(fp_error) <= pid->deadband)
return 0;
pterm = mul_fp(pid->p_gain, fp_error);
@@ -286,7 +422,7 @@ static inline void update_turbo_state(void)
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
-static void intel_pstate_hwp_set(void)
+static void intel_pstate_hwp_set(const struct cpumask *cpumask)
{
int min, hw_min, max, hw_max, cpu, range, adj_range;
u64 value, cap;
@@ -296,9 +432,7 @@ static void intel_pstate_hwp_set(void)
hw_max = HWP_HIGHEST_PERF(cap);
range = hw_max - hw_min;
- get_online_cpus();
-
- for_each_online_cpu(cpu) {
+ for_each_cpu(cpu, cpumask) {
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
adj_range = limits->min_perf_pct * range / 100;
min = hw_min + adj_range;
@@ -317,7 +451,20 @@ static void intel_pstate_hwp_set(void)
value |= HWP_MAX_PERF(max);
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
+}
+static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
+{
+ if (hwp_active)
+ intel_pstate_hwp_set(policy->cpus);
+
+ return 0;
+}
+
+static void intel_pstate_hwp_set_online_cpus(void)
+{
+ get_online_cpus();
+ intel_pstate_hwp_set(cpu_online_mask);
put_online_cpus();
}
@@ -439,7 +586,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
limits->no_turbo = clamp_t(int, input, 0, 1);
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -465,7 +612,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -490,7 +637,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
int_tofp(100));
if (hwp_active)
- intel_pstate_hwp_set();
+ intel_pstate_hwp_set_online_cpus();
return count;
}
@@ -531,6 +678,9 @@ static void __init intel_pstate_sysfs_expose_params(void)
static void intel_pstate_hwp_enable(struct cpudata *cpudata)
{
+ /* First disable HWP notification interrupt as we don't process them */
+ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
}
@@ -558,7 +708,7 @@ static int atom_get_turbo_pstate(void)
return value & 0x7F;
}
-static void atom_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 atom_get_val(struct cpudata *cpudata, int pstate)
{
u64 val;
int32_t vid_fp;
@@ -583,9 +733,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate)
cpu_nonscaling(cpudata->cpu);
}
- val |= vid;
-
- wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+ return val | vid;
}
static int silvermont_get_scaling(void)
@@ -714,7 +862,7 @@ static inline int core_get_scaling(void)
return 100000;
}
-static void core_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 core_get_val(struct cpudata *cpudata, int pstate)
{
u64 val;
@@ -722,7 +870,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
if (limits->no_turbo && !limits->turbo_disabled)
val |= (u64)1 << 32;
- wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+ return val;
}
static int knl_get_turbo_pstate(void)
@@ -753,7 +901,7 @@ static struct cpu_defaults core_params = {
.get_min = core_get_min_pstate,
.get_turbo = core_get_turbo_pstate,
.get_scaling = core_get_scaling,
- .set = core_set_pstate,
+ .get_val = core_get_val,
.get_target_pstate = get_target_pstate_use_performance,
},
};
@@ -772,7 +920,7 @@ static struct cpu_defaults silvermont_params = {
.get_max_physical = atom_get_max_pstate,
.get_min = atom_get_min_pstate,
.get_turbo = atom_get_turbo_pstate,
- .set = atom_set_pstate,
+ .get_val = atom_get_val,
.get_scaling = silvermont_get_scaling,
.get_vid = atom_get_vid,
.get_target_pstate = get_target_pstate_use_cpu_load,
@@ -793,7 +941,7 @@ static struct cpu_defaults airmont_params = {
.get_max_physical = atom_get_max_pstate,
.get_min = atom_get_min_pstate,
.get_turbo = atom_get_turbo_pstate,
- .set = atom_set_pstate,
+ .get_val = atom_get_val,
.get_scaling = airmont_get_scaling,
.get_vid = atom_get_vid,
.get_target_pstate = get_target_pstate_use_cpu_load,
@@ -815,7 +963,7 @@ static struct cpu_defaults knl_params = {
.get_min = core_get_min_pstate,
.get_turbo = knl_get_turbo_pstate,
.get_scaling = core_get_scaling,
- .set = core_set_pstate,
+ .get_val = core_get_val,
.get_target_pstate = get_target_pstate_use_performance,
},
};
@@ -834,33 +982,32 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
* policy, or by cpu specific default values determined through
* experimentation.
*/
- max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
+ max_perf_adj = fp_toint(max_perf * limits->max_perf);
*max = clamp_t(int, max_perf_adj,
cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
- min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
+ min_perf = fp_toint(max_perf * limits->min_perf);
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
}
-static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
+static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
{
- int max_perf, min_perf;
-
- if (force) {
- update_turbo_state();
-
- intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-
- pstate = clamp_t(int, pstate, min_perf, max_perf);
-
- if (pstate == cpu->pstate.current_pstate)
- return;
- }
trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-
cpu->pstate.current_pstate = pstate;
+}
+
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+{
+ int pstate = cpu->pstate.min_pstate;
- pstate_funcs.set(cpu, pstate);
+ intel_pstate_record_pstate(cpu, pstate);
+ /*
+ * Generally, there is no guarantee that this code will always run on
+ * the CPU being updated, so force the register update to run on the
+ * right CPU.
+ */
+ wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+ pstate_funcs.get_val(cpu, pstate));
}
static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -873,7 +1020,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu);
- intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+
+ intel_pstate_set_min_pstate(cpu);
}
static inline void intel_pstate_calc_busy(struct cpudata *cpu)
@@ -884,16 +1032,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
core_pct = int_tofp(sample->aperf) * int_tofp(100);
core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
- sample->freq = fp_toint(
- mul_fp(int_tofp(
- cpu->pstate.max_pstate_physical *
- cpu->pstate.scaling / 100),
- core_pct));
-
sample->core_pct_busy = (int32_t)core_pct;
}
-static inline void intel_pstate_sample(struct cpudata *cpu)
+static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
{
u64 aperf, mperf;
unsigned long flags;
@@ -903,14 +1045,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
tsc = rdtsc();
- if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
+ if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
local_irq_restore(flags);
- return;
+ return false;
}
local_irq_restore(flags);
cpu->last_sample_time = cpu->sample.time;
- cpu->sample.time = ktime_get();
+ cpu->sample.time = time;
cpu->sample.aperf = aperf;
cpu->sample.mperf = mperf;
cpu->sample.tsc = tsc;
@@ -918,27 +1060,24 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
cpu->sample.mperf -= cpu->prev_mperf;
cpu->sample.tsc -= cpu->prev_tsc;
- intel_pstate_calc_busy(cpu);
-
cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
+ /*
+ * First time this function is invoked in a given cycle, all of the
+ * previous sample data fields are equal to zero or stale and they must
+ * be populated with meaningful numbers for things to work, so assume
+ * that sample.time will always be reset before setting the utilization
+ * update hook and make the caller skip the sample then.
+ */
+ return !!cpu->last_sample_time;
}
-static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
-{
- int delay;
-
- delay = msecs_to_jiffies(50);
- mod_timer_pinned(&cpu->timer, jiffies + delay);
-}
-
-static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
+static inline int32_t get_avg_frequency(struct cpudata *cpu)
{
- int delay;
-
- delay = msecs_to_jiffies(pid_params.sample_rate_ms);
- mod_timer_pinned(&cpu->timer, jiffies + delay);
+ return fp_toint(mul_fp(cpu->sample.core_pct_busy,
+ int_tofp(cpu->pstate.max_pstate_physical *
+ cpu->pstate.scaling / 100)));
}
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
@@ -964,7 +1103,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
mperf = cpu->sample.mperf + delta_iowait_mperf;
cpu->prev_cummulative_iowait = cummulative_iowait;
-
/*
* The load can be estimated as the ratio of the mperf counter
* running at a constant frequency during active periods
@@ -980,8 +1118,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
{
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
- s64 duration_us;
- u32 sample_time;
+ u64 duration_ns;
/*
* core_busy is the ratio of actual performance to max
@@ -1000,25 +1137,41 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
/*
- * Since we have a deferred timer, it will not fire unless
- * we are in C0. So, determine if the actual elapsed time
- * is significantly greater (3x) than our sample interval. If it
- * is, then we were idle for a long enough period of time
- * to adjust our busyness.
+ * Since our utilization update callback will not run unless we are
+ * in C0, check if the actual elapsed time is significantly greater (3x)
+ * than our sample interval. If it is, then we were idle for a long
+ * enough period of time to adjust our busyness.
*/
- sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
- duration_us = ktime_us_delta(cpu->sample.time,
- cpu->last_sample_time);
- if (duration_us > sample_time * 3) {
- sample_ratio = div_fp(int_tofp(sample_time),
- int_tofp(duration_us));
+ duration_ns = cpu->sample.time - cpu->last_sample_time;
+ if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
+ sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
+ int_tofp(duration_ns));
core_busy = mul_fp(core_busy, sample_ratio);
+ } else {
+ sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
+ if (sample_ratio < int_tofp(1))
+ core_busy = 0;
}
cpu->sample.busy_scaled = core_busy;
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
}
+static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+{
+ int max_perf, min_perf;
+
+ update_turbo_state();
+
+ intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
+ pstate = clamp_t(int, pstate, min_perf, max_perf);
+ if (pstate == cpu->pstate.current_pstate)
+ return;
+
+ intel_pstate_record_pstate(cpu, pstate);
+ wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+}
+
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{
int from, target_pstate;
@@ -1028,7 +1181,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
target_pstate = pstate_funcs.get_target_pstate(cpu);
- intel_pstate_set_pstate(cpu, target_pstate, true);
+ intel_pstate_update_pstate(cpu, target_pstate);
sample = &cpu->sample;
trace_pstate_sample(fp_toint(sample->core_pct_busy),
@@ -1038,26 +1191,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
sample->mperf,
sample->aperf,
sample->tsc,
- sample->freq);
-}
-
-static void intel_hwp_timer_func(unsigned long __data)
-{
- struct cpudata *cpu = (struct cpudata *) __data;
-
- intel_pstate_sample(cpu);
- intel_hwp_set_sample_time(cpu);
+ get_avg_frequency(cpu));
}
-static void intel_pstate_timer_func(unsigned long __data)
+static void intel_pstate_update_util(struct update_util_data *data, u64 time,
+ unsigned long util, unsigned long max)
{
- struct cpudata *cpu = (struct cpudata *) __data;
-
- intel_pstate_sample(cpu);
+ struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+ u64 delta_ns = time - cpu->sample.time;
- intel_pstate_adjust_busy_pstate(cpu);
+ if ((s64)delta_ns >= pid_params.sample_rate_ns) {
+ bool sample_taken = intel_pstate_sample(cpu, time);
- intel_pstate_set_sample_time(cpu);
+ if (sample_taken) {
+ intel_pstate_calc_busy(cpu);
+ if (!hwp_active)
+ intel_pstate_adjust_busy_pstate(cpu);
+ }
+ }
}
#define ICPU(model, policy) \
@@ -1105,24 +1256,17 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
cpu->cpu = cpunum;
- if (hwp_active)
+ if (hwp_active) {
intel_pstate_hwp_enable(cpu);
+ pid_params.sample_rate_ms = 50;
+ pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
+ }
intel_pstate_get_cpu_pstates(cpu);
- init_timer_deferrable(&cpu->timer);
- cpu->timer.data = (unsigned long)cpu;
- cpu->timer.expires = jiffies + HZ/100;
-
- if (!hwp_active)
- cpu->timer.function = intel_pstate_timer_func;
- else
- cpu->timer.function = intel_hwp_timer_func;
-
intel_pstate_busy_pid_reset(cpu);
- intel_pstate_sample(cpu);
- add_timer_on(&cpu->timer, cpunum);
+ cpu->update_util.func = intel_pstate_update_util;
pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
@@ -1138,7 +1282,36 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
if (!cpu)
return 0;
sample = &cpu->sample;
- return sample->freq;
+ return get_avg_frequency(cpu);
+}
+
+static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
+{
+ struct cpudata *cpu = all_cpu_data[cpu_num];
+
+ /* Prevent intel_pstate_update_util() from using stale data. */
+ cpu->sample.time = 0;
+ cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
+}
+
+static void intel_pstate_clear_update_util_hook(unsigned int cpu)
+{
+ cpufreq_set_update_util_data(cpu, NULL);
+ synchronize_sched();
+}
+
+static void intel_pstate_set_performance_limits(struct perf_limits *limits)
+{
+ limits->no_turbo = 0;
+ limits->turbo_disabled = 0;
+ limits->max_perf_pct = 100;
+ limits->max_perf = int_tofp(1);
+ limits->min_perf_pct = 100;
+ limits->min_perf = int_tofp(1);
+ limits->max_policy_pct = 100;
+ limits->max_sysfs_pct = 100;
+ limits->min_policy_pct = 0;
+ limits->min_sysfs_pct = 0;
}
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
@@ -1146,17 +1319,20 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
if (!policy->cpuinfo.max_freq)
return -ENODEV;
- if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
- policy->max >= policy->cpuinfo.max_freq) {
- pr_debug("intel_pstate: set performance\n");
+ intel_pstate_clear_update_util_hook(policy->cpu);
+
+ if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits = &performance_limits;
- if (hwp_active)
- intel_pstate_hwp_set();
- return 0;
+ if (policy->max >= policy->cpuinfo.max_freq) {
+ pr_debug("intel_pstate: set performance\n");
+ intel_pstate_set_performance_limits(limits);
+ goto out;
+ }
+ } else {
+ pr_debug("intel_pstate: set powersave\n");
+ limits = &powersave_limits;
}
- pr_debug("intel_pstate: set powersave\n");
- limits = &powersave_limits;
limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
@@ -1182,8 +1358,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
int_tofp(100));
- if (hwp_active)
- intel_pstate_hwp_set();
+ out:
+ intel_pstate_set_update_util_hook(policy->cpu);
+
+ intel_pstate_hwp_set_policy(policy);
return 0;
}
@@ -1206,11 +1384,12 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
- del_timer_sync(&all_cpu_data[cpu_num]->timer);
+ intel_pstate_clear_update_util_hook(cpu_num);
+
if (hwp_active)
return;
- intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+ intel_pstate_set_min_pstate(cpu);
}
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
@@ -1246,6 +1425,7 @@ static struct cpufreq_driver intel_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
+ .resume = intel_pstate_hwp_set_policy,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
.stop_cpu = intel_pstate_stop_cpu,
@@ -1270,6 +1450,7 @@ static int intel_pstate_msrs_not_valid(void)
static void copy_pid_params(struct pstate_adjust_policy *policy)
{
pid_params.sample_rate_ms = policy->sample_rate_ms;
+ pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
pid_params.p_gain_pct = policy->p_gain_pct;
pid_params.i_gain_pct = policy->i_gain_pct;
pid_params.d_gain_pct = policy->d_gain_pct;
@@ -1284,7 +1465,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
pstate_funcs.get_min = funcs->get_min;
pstate_funcs.get_turbo = funcs->get_turbo;
pstate_funcs.get_scaling = funcs->get_scaling;
- pstate_funcs.set = funcs->set;
+ pstate_funcs.get_val = funcs->get_val;
pstate_funcs.get_vid = funcs->get_vid;
pstate_funcs.get_target_pstate = funcs->get_target_pstate;
@@ -1407,6 +1588,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
#endif /* CONFIG_ACPI */
+static const struct x86_cpu_id hwp_support_ids[] __initconst = {
+ { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
+ {}
+};
+
static int __init intel_pstate_init(void)
{
int cpu, rc = 0;
@@ -1416,17 +1602,16 @@ static int __init intel_pstate_init(void)
if (no_load)
return -ENODEV;
+ if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
+ copy_cpu_funcs(&core_params.funcs);
+ hwp_active++;
+ goto hwp_cpu_matched;
+ }
+
id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id)
return -ENODEV;
- /*
- * The Intel pstate driver will be ignored if the platform
- * firmware has its own power management modes.
- */
- if (intel_pstate_platform_pwr_mgmt_exists())
- return -ENODEV;
-
cpu_def = (struct cpu_defaults *)id->driver_data;
copy_pid_params(&cpu_def->pid_policy);
@@ -1435,17 +1620,20 @@ static int __init intel_pstate_init(void)
if (intel_pstate_msrs_not_valid())
return -ENODEV;
+hwp_cpu_matched:
+ /*
+ * The Intel pstate driver will be ignored if the platform
+ * firmware has its own power management modes.
+ */
+ if (intel_pstate_platform_pwr_mgmt_exists())
+ return -ENODEV;
+
pr_info("Intel P-state driver initializing.\n");
all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
if (!all_cpu_data)
return -ENOMEM;
- if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
- pr_info("intel_pstate: HWP enabled\n");
- hwp_active++;
- }
-
if (!hwp_active && hwp_only)
goto out;
@@ -1456,12 +1644,15 @@ static int __init intel_pstate_init(void)
intel_pstate_debug_expose_params();
intel_pstate_sysfs_expose_params();
+ if (hwp_active)
+ pr_info("intel_pstate: HWP enabled\n");
+
return rc;
out:
get_online_cpus();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
- del_timer_sync(&all_cpu_data[cpu]->timer);
+ intel_pstate_clear_update_util_hook(cpu);
kfree(all_cpu_data[cpu]);
}
}