diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 465 |
1 files changed, 328 insertions, 137 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3b29308f6..d63074ff6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x) return ret; } +/** + * struct sample - Store performance sample + * @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual + * performance during last sample period + * @busy_scaled: Scaled busy value which is used to calculate next + * P state. This can be different than core_pct_busy + * to account for cpu idle period + * @aperf: Difference of actual performance frequency clock count + * read from APERF MSR between last and current sample + * @mperf: Difference of maximum performance frequency clock count + * read from MPERF MSR between last and current sample + * @tsc: Difference of time stamp counter between last and + * current sample + * @freq: Effective frequency calculated from APERF/MPERF + * @time: Current time from scheduler + * + * This structure is used in the cpudata structure to store performance sample + * data for choosing next P State. + */ struct sample { int32_t core_pct_busy; int32_t busy_scaled; @@ -71,9 +90,23 @@ struct sample { u64 mperf; u64 tsc; int freq; - ktime_t time; + u64 time; }; +/** + * struct pstate_data - Store P state data + * @current_pstate: Current requested P state + * @min_pstate: Min P state possible for this platform + * @max_pstate: Max P state possible for this platform + * @max_pstate_physical:This is physical Max P state for a processor + * This can be higher than the max_pstate which can + * be limited by platform thermal design power limits + * @scaling: Scaling factor to convert frequency to cpufreq + * frequency units + * @turbo_pstate: Max Turbo P state possible for this platform + * + * Stores the per cpu model P state limits and current P state. + */ struct pstate_data { int current_pstate; int min_pstate; @@ -83,6 +116,19 @@ struct pstate_data { int turbo_pstate; }; +/** + * struct vid_data - Stores voltage information data + * @min: VID data for this platform corresponding to + * the lowest P state + * @max: VID data corresponding to the highest P State. + * @turbo: VID data for turbo P state + * @ratio: Ratio of (vid max - vid min) / + * (max P state - Min P State) + * + * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) + * This data is used in Atom platforms, where in addition to target P state, + * the voltage data needs to be specified to select next P State. + */ struct vid_data { int min; int max; @@ -90,6 +136,18 @@ struct vid_data { int32_t ratio; }; +/** + * struct _pid - Stores PID data + * @setpoint: Target set point for busyness or performance + * @integral: Storage for accumulated error values + * @p_gain: PID proportional gain + * @i_gain: PID integral gain + * @d_gain: PID derivative gain + * @deadband: PID deadband + * @last_err: Last error storage for integral part of PID calculation + * + * Stores PID coefficients and last error for PID controller. + */ struct _pid { int setpoint; int32_t integral; @@ -100,16 +158,33 @@ struct _pid { int32_t last_err; }; +/** + * struct cpudata - Per CPU instance data storage + * @cpu: CPU number for this instance data + * @update_util: CPUFreq utility callback information + * @pstate: Stores P state limits for this CPU + * @vid: Stores VID limits for this CPU + * @pid: Stores PID parameters for this CPU + * @last_sample_time: Last Sample time + * @prev_aperf: Last APERF value read from APERF MSR + * @prev_mperf: Last MPERF value read from MPERF MSR + * @prev_tsc: Last timestamp counter (TSC) value + * @prev_cummulative_iowait: IO Wait time difference from last and + * current sample + * @sample: Storage for storing last Sample data + * + * This structure stores per CPU instance data for all CPUs. + */ struct cpudata { int cpu; - struct timer_list timer; + struct update_util_data update_util; struct pstate_data pstate; struct vid_data vid; struct _pid pid; - ktime_t last_sample_time; + u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; @@ -118,8 +193,22 @@ struct cpudata { }; static struct cpudata **all_cpu_data; + +/** + * struct pid_adjust_policy - Stores static PID configuration data + * @sample_rate_ms: PID calculation sample rate in ms + * @sample_rate_ns: Sample rate calculation in ns + * @deadband: PID deadband + * @setpoint: PID Setpoint + * @p_gain_pct: PID proportional gain + * @i_gain_pct: PID integral gain + * @d_gain_pct: PID derivative gain + * + * Stores per CPU model static PID configuration data. + */ struct pstate_adjust_policy { int sample_rate_ms; + s64 sample_rate_ns; int deadband; int setpoint; int p_gain_pct; @@ -127,17 +216,36 @@ struct pstate_adjust_policy { int i_gain_pct; }; +/** + * struct pstate_funcs - Per CPU model specific callbacks + * @get_max: Callback to get maximum non turbo effective P state + * @get_max_physical: Callback to get maximum non turbo physical P state + * @get_min: Callback to get minimum P state + * @get_turbo: Callback to get turbo P state + * @get_scaling: Callback to get frequency scaling factor + * @get_val: Callback to convert P state to actual MSR write value + * @get_vid: Callback to get VID data for Atom platforms + * @get_target_pstate: Callback to a function to calculate next P state to use + * + * Core and Atom CPU models have different way to get P State limits. This + * structure is used to store those callbacks. + */ struct pstate_funcs { int (*get_max)(void); int (*get_max_physical)(void); int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); - void (*set)(struct cpudata*, int pstate); + u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); int32_t (*get_target_pstate)(struct cpudata *); }; +/** + * struct cpu_defaults- Per CPU model default config data + * @pid_policy: PID config data + * @funcs: Callback function data + */ struct cpu_defaults { struct pstate_adjust_policy pid_policy; struct pstate_funcs funcs; @@ -150,6 +258,34 @@ static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; + +/** + * struct perf_limits - Store user and policy limits + * @no_turbo: User requested turbo state from intel_pstate sysfs + * @turbo_disabled: Platform turbo status either from msr + * MSR_IA32_MISC_ENABLE or when maximum available pstate + * matches the maximum turbo pstate + * @max_perf_pct: Effective maximum performance limit in percentage, this + * is minimum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @min_perf_pct: Effective minimum performance limit in percentage, this + * is maximum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct + * This value is used to limit max pstate + * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct + * This value is used to limit min pstate + * @max_policy_pct: The maximum performance in percentage enforced by + * cpufreq setpolicy interface + * @max_sysfs_pct: The maximum performance in percentage enforced by + * intel pstate sysfs interface + * @min_policy_pct: The minimum performance in percentage enforced by + * cpufreq setpolicy interface + * @min_sysfs_pct: The minimum performance in percentage enforced by + * intel pstate sysfs interface + * + * Storage for user and policy defined limits. + */ struct perf_limits { int no_turbo; int turbo_disabled; @@ -197,8 +333,8 @@ static struct perf_limits *limits = &powersave_limits; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { - pid->setpoint = setpoint; - pid->deadband = deadband; + pid->setpoint = int_tofp(setpoint); + pid->deadband = int_tofp(deadband); pid->integral = int_tofp(integral); pid->last_err = int_tofp(setpoint) - int_tofp(busy); } @@ -224,9 +360,9 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) int32_t pterm, dterm, fp_error; int32_t integral_limit; - fp_error = int_tofp(pid->setpoint) - busy; + fp_error = pid->setpoint - busy; - if (abs(fp_error) <= int_tofp(pid->deadband)) + if (abs(fp_error) <= pid->deadband) return 0; pterm = mul_fp(pid->p_gain, fp_error); @@ -286,7 +422,7 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } -static void intel_pstate_hwp_set(void) +static void intel_pstate_hwp_set(const struct cpumask *cpumask) { int min, hw_min, max, hw_max, cpu, range, adj_range; u64 value, cap; @@ -296,9 +432,7 @@ static void intel_pstate_hwp_set(void) hw_max = HWP_HIGHEST_PERF(cap); range = hw_max - hw_min; - get_online_cpus(); - - for_each_online_cpu(cpu) { + for_each_cpu(cpu, cpumask) { rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); adj_range = limits->min_perf_pct * range / 100; min = hw_min + adj_range; @@ -317,7 +451,20 @@ static void intel_pstate_hwp_set(void) value |= HWP_MAX_PERF(max); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } +} +static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) +{ + if (hwp_active) + intel_pstate_hwp_set(policy->cpus); + + return 0; +} + +static void intel_pstate_hwp_set_online_cpus(void) +{ + get_online_cpus(); + intel_pstate_hwp_set(cpu_online_mask); put_online_cpus(); } @@ -439,7 +586,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -465,7 +612,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -490,7 +637,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, int_tofp(100)); if (hwp_active) - intel_pstate_hwp_set(); + intel_pstate_hwp_set_online_cpus(); return count; } @@ -531,6 +678,9 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { + /* First disable HWP notification interrupt as we don't process them */ + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } @@ -558,7 +708,7 @@ static int atom_get_turbo_pstate(void) return value & 0x7F; } -static void atom_set_pstate(struct cpudata *cpudata, int pstate) +static u64 atom_get_val(struct cpudata *cpudata, int pstate) { u64 val; int32_t vid_fp; @@ -583,9 +733,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate) cpu_nonscaling(cpudata->cpu); } - val |= vid; - - wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); + return val | vid; } static int silvermont_get_scaling(void) @@ -714,7 +862,7 @@ static inline int core_get_scaling(void) return 100000; } -static void core_set_pstate(struct cpudata *cpudata, int pstate) +static u64 core_get_val(struct cpudata *cpudata, int pstate) { u64 val; @@ -722,7 +870,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) if (limits->no_turbo && !limits->turbo_disabled) val |= (u64)1 << 32; - wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); + return val; } static int knl_get_turbo_pstate(void) @@ -753,7 +901,7 @@ static struct cpu_defaults core_params = { .get_min = core_get_min_pstate, .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, - .set = core_set_pstate, + .get_val = core_get_val, .get_target_pstate = get_target_pstate_use_performance, }, }; @@ -772,7 +920,7 @@ static struct cpu_defaults silvermont_params = { .get_max_physical = atom_get_max_pstate, .get_min = atom_get_min_pstate, .get_turbo = atom_get_turbo_pstate, - .set = atom_set_pstate, + .get_val = atom_get_val, .get_scaling = silvermont_get_scaling, .get_vid = atom_get_vid, .get_target_pstate = get_target_pstate_use_cpu_load, @@ -793,7 +941,7 @@ static struct cpu_defaults airmont_params = { .get_max_physical = atom_get_max_pstate, .get_min = atom_get_min_pstate, .get_turbo = atom_get_turbo_pstate, - .set = atom_set_pstate, + .get_val = atom_get_val, .get_scaling = airmont_get_scaling, .get_vid = atom_get_vid, .get_target_pstate = get_target_pstate_use_cpu_load, @@ -815,7 +963,7 @@ static struct cpu_defaults knl_params = { .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, - .set = core_set_pstate, + .get_val = core_get_val, .get_target_pstate = get_target_pstate_use_performance, }, }; @@ -834,33 +982,32 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); + max_perf_adj = fp_toint(max_perf * limits->max_perf); *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); + min_perf = fp_toint(max_perf * limits->min_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } -static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) +static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate) { - int max_perf, min_perf; - - if (force) { - update_turbo_state(); - - intel_pstate_get_min_max(cpu, &min_perf, &max_perf); - - pstate = clamp_t(int, pstate, min_perf, max_perf); - - if (pstate == cpu->pstate.current_pstate) - return; - } trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); - cpu->pstate.current_pstate = pstate; +} + +static void intel_pstate_set_min_pstate(struct cpudata *cpu) +{ + int pstate = cpu->pstate.min_pstate; - pstate_funcs.set(cpu, pstate); + intel_pstate_record_pstate(cpu, pstate); + /* + * Generally, there is no guarantee that this code will always run on + * the CPU being updated, so force the register update to run on the + * right CPU. + */ + wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, + pstate_funcs.get_val(cpu, pstate)); } static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) @@ -873,7 +1020,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (pstate_funcs.get_vid) pstate_funcs.get_vid(cpu); - intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); + + intel_pstate_set_min_pstate(cpu); } static inline void intel_pstate_calc_busy(struct cpudata *cpu) @@ -884,16 +1032,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) core_pct = int_tofp(sample->aperf) * int_tofp(100); core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); - sample->freq = fp_toint( - mul_fp(int_tofp( - cpu->pstate.max_pstate_physical * - cpu->pstate.scaling / 100), - core_pct)); - sample->core_pct_busy = (int32_t)core_pct; } -static inline void intel_pstate_sample(struct cpudata *cpu) +static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) { u64 aperf, mperf; unsigned long flags; @@ -903,14 +1045,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu) rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); tsc = rdtsc(); - if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { + if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { local_irq_restore(flags); - return; + return false; } local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; - cpu->sample.time = ktime_get(); + cpu->sample.time = time; cpu->sample.aperf = aperf; cpu->sample.mperf = mperf; cpu->sample.tsc = tsc; @@ -918,27 +1060,24 @@ static inline void intel_pstate_sample(struct cpudata *cpu) cpu->sample.mperf -= cpu->prev_mperf; cpu->sample.tsc -= cpu->prev_tsc; - intel_pstate_calc_busy(cpu); - cpu->prev_aperf = aperf; cpu->prev_mperf = mperf; cpu->prev_tsc = tsc; + /* + * First time this function is invoked in a given cycle, all of the + * previous sample data fields are equal to zero or stale and they must + * be populated with meaningful numbers for things to work, so assume + * that sample.time will always be reset before setting the utilization + * update hook and make the caller skip the sample then. + */ + return !!cpu->last_sample_time; } -static inline void intel_hwp_set_sample_time(struct cpudata *cpu) -{ - int delay; - - delay = msecs_to_jiffies(50); - mod_timer_pinned(&cpu->timer, jiffies + delay); -} - -static inline void intel_pstate_set_sample_time(struct cpudata *cpu) +static inline int32_t get_avg_frequency(struct cpudata *cpu) { - int delay; - - delay = msecs_to_jiffies(pid_params.sample_rate_ms); - mod_timer_pinned(&cpu->timer, jiffies + delay); + return fp_toint(mul_fp(cpu->sample.core_pct_busy, + int_tofp(cpu->pstate.max_pstate_physical * + cpu->pstate.scaling / 100))); } static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) @@ -964,7 +1103,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) mperf = cpu->sample.mperf + delta_iowait_mperf; cpu->prev_cummulative_iowait = cummulative_iowait; - /* * The load can be estimated as the ratio of the mperf counter * running at a constant frequency during active periods @@ -980,8 +1118,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - s64 duration_us; - u32 sample_time; + u64 duration_ns; /* * core_busy is the ratio of actual performance to max @@ -1000,25 +1137,41 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); /* - * Since we have a deferred timer, it will not fire unless - * we are in C0. So, determine if the actual elapsed time - * is significantly greater (3x) than our sample interval. If it - * is, then we were idle for a long enough period of time - * to adjust our busyness. + * Since our utilization update callback will not run unless we are + * in C0, check if the actual elapsed time is significantly greater (3x) + * than our sample interval. If it is, then we were idle for a long + * enough period of time to adjust our busyness. */ - sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); - if (duration_us > sample_time * 3) { - sample_ratio = div_fp(int_tofp(sample_time), - int_tofp(duration_us)); + duration_ns = cpu->sample.time - cpu->last_sample_time; + if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { + sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), + int_tofp(duration_ns)); core_busy = mul_fp(core_busy, sample_ratio); + } else { + sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); + if (sample_ratio < int_tofp(1)) + core_busy = 0; } cpu->sample.busy_scaled = core_busy; return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy); } +static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) +{ + int max_perf, min_perf; + + update_turbo_state(); + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + pstate = clamp_t(int, pstate, min_perf, max_perf); + if (pstate == cpu->pstate.current_pstate) + return; + + intel_pstate_record_pstate(cpu, pstate); + wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); +} + static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { int from, target_pstate; @@ -1028,7 +1181,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) target_pstate = pstate_funcs.get_target_pstate(cpu); - intel_pstate_set_pstate(cpu, target_pstate, true); + intel_pstate_update_pstate(cpu, target_pstate); sample = &cpu->sample; trace_pstate_sample(fp_toint(sample->core_pct_busy), @@ -1038,26 +1191,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - sample->freq); -} - -static void intel_hwp_timer_func(unsigned long __data) -{ - struct cpudata *cpu = (struct cpudata *) __data; - - intel_pstate_sample(cpu); - intel_hwp_set_sample_time(cpu); + get_avg_frequency(cpu)); } -static void intel_pstate_timer_func(unsigned long __data) +static void intel_pstate_update_util(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max) { - struct cpudata *cpu = (struct cpudata *) __data; - - intel_pstate_sample(cpu); + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; - intel_pstate_adjust_busy_pstate(cpu); + if ((s64)delta_ns >= pid_params.sample_rate_ns) { + bool sample_taken = intel_pstate_sample(cpu, time); - intel_pstate_set_sample_time(cpu); + if (sample_taken) { + intel_pstate_calc_busy(cpu); + if (!hwp_active) + intel_pstate_adjust_busy_pstate(cpu); + } + } } #define ICPU(model, policy) \ @@ -1105,24 +1256,17 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->cpu = cpunum; - if (hwp_active) + if (hwp_active) { intel_pstate_hwp_enable(cpu); + pid_params.sample_rate_ms = 50; + pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + } intel_pstate_get_cpu_pstates(cpu); - init_timer_deferrable(&cpu->timer); - cpu->timer.data = (unsigned long)cpu; - cpu->timer.expires = jiffies + HZ/100; - - if (!hwp_active) - cpu->timer.function = intel_pstate_timer_func; - else - cpu->timer.function = intel_hwp_timer_func; - intel_pstate_busy_pid_reset(cpu); - intel_pstate_sample(cpu); - add_timer_on(&cpu->timer, cpunum); + cpu->update_util.func = intel_pstate_update_util; pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); @@ -1138,7 +1282,36 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) if (!cpu) return 0; sample = &cpu->sample; - return sample->freq; + return get_avg_frequency(cpu); +} + +static void intel_pstate_set_update_util_hook(unsigned int cpu_num) +{ + struct cpudata *cpu = all_cpu_data[cpu_num]; + + /* Prevent intel_pstate_update_util() from using stale data. */ + cpu->sample.time = 0; + cpufreq_set_update_util_data(cpu_num, &cpu->update_util); +} + +static void intel_pstate_clear_update_util_hook(unsigned int cpu) +{ + cpufreq_set_update_util_data(cpu, NULL); + synchronize_sched(); +} + +static void intel_pstate_set_performance_limits(struct perf_limits *limits) +{ + limits->no_turbo = 0; + limits->turbo_disabled = 0; + limits->max_perf_pct = 100; + limits->max_perf = int_tofp(1); + limits->min_perf_pct = 100; + limits->min_perf = int_tofp(1); + limits->max_policy_pct = 100; + limits->max_sysfs_pct = 100; + limits->min_policy_pct = 0; + limits->min_sysfs_pct = 0; } static int intel_pstate_set_policy(struct cpufreq_policy *policy) @@ -1146,17 +1319,20 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && - policy->max >= policy->cpuinfo.max_freq) { - pr_debug("intel_pstate: set performance\n"); + intel_pstate_clear_update_util_hook(policy->cpu); + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; - if (hwp_active) - intel_pstate_hwp_set(); - return 0; + if (policy->max >= policy->cpuinfo.max_freq) { + pr_debug("intel_pstate: set performance\n"); + intel_pstate_set_performance_limits(limits); + goto out; + } + } else { + pr_debug("intel_pstate: set powersave\n"); + limits = &powersave_limits; } - pr_debug("intel_pstate: set powersave\n"); - limits = &powersave_limits; limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, @@ -1182,8 +1358,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), int_tofp(100)); - if (hwp_active) - intel_pstate_hwp_set(); + out: + intel_pstate_set_update_util_hook(policy->cpu); + + intel_pstate_hwp_set_policy(policy); return 0; } @@ -1206,11 +1384,12 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); - del_timer_sync(&all_cpu_data[cpu_num]->timer); + intel_pstate_clear_update_util_hook(cpu_num); + if (hwp_active) return; - intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); + intel_pstate_set_min_pstate(cpu); } static int intel_pstate_cpu_init(struct cpufreq_policy *policy) @@ -1246,6 +1425,7 @@ static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, + .resume = intel_pstate_hwp_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, .stop_cpu = intel_pstate_stop_cpu, @@ -1270,6 +1450,7 @@ static int intel_pstate_msrs_not_valid(void) static void copy_pid_params(struct pstate_adjust_policy *policy) { pid_params.sample_rate_ms = policy->sample_rate_ms; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; pid_params.p_gain_pct = policy->p_gain_pct; pid_params.i_gain_pct = policy->i_gain_pct; pid_params.d_gain_pct = policy->d_gain_pct; @@ -1284,7 +1465,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_min = funcs->get_min; pstate_funcs.get_turbo = funcs->get_turbo; pstate_funcs.get_scaling = funcs->get_scaling; - pstate_funcs.set = funcs->set; + pstate_funcs.get_val = funcs->get_val; pstate_funcs.get_vid = funcs->get_vid; pstate_funcs.get_target_pstate = funcs->get_target_pstate; @@ -1407,6 +1588,11 @@ static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } static inline bool intel_pstate_has_acpi_ppc(void) { return false; } #endif /* CONFIG_ACPI */ +static const struct x86_cpu_id hwp_support_ids[] __initconst = { + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, + {} +}; + static int __init intel_pstate_init(void) { int cpu, rc = 0; @@ -1416,17 +1602,16 @@ static int __init intel_pstate_init(void) if (no_load) return -ENODEV; + if (x86_match_cpu(hwp_support_ids) && !no_hwp) { + copy_cpu_funcs(&core_params.funcs); + hwp_active++; + goto hwp_cpu_matched; + } + id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) return -ENODEV; - /* - * The Intel pstate driver will be ignored if the platform - * firmware has its own power management modes. - */ - if (intel_pstate_platform_pwr_mgmt_exists()) - return -ENODEV; - cpu_def = (struct cpu_defaults *)id->driver_data; copy_pid_params(&cpu_def->pid_policy); @@ -1435,17 +1620,20 @@ static int __init intel_pstate_init(void) if (intel_pstate_msrs_not_valid()) return -ENODEV; +hwp_cpu_matched: + /* + * The Intel pstate driver will be ignored if the platform + * firmware has its own power management modes. + */ + if (intel_pstate_platform_pwr_mgmt_exists()) + return -ENODEV; + pr_info("Intel P-state driver initializing.\n"); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) return -ENOMEM; - if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { - pr_info("intel_pstate: HWP enabled\n"); - hwp_active++; - } - if (!hwp_active && hwp_only) goto out; @@ -1456,12 +1644,15 @@ static int __init intel_pstate_init(void) intel_pstate_debug_expose_params(); intel_pstate_sysfs_expose_params(); + if (hwp_active) + pr_info("intel_pstate: HWP enabled\n"); + return rc; out: get_online_cpus(); for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) { - del_timer_sync(&all_cpu_data[cpu]->timer); + intel_pstate_clear_update_util_hook(cpu); kfree(all_cpu_data[cpu]); } } |