From 135919a3a80565070b9645009e65f73e72c661c0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 9 Sep 2015 13:35:05 -0400 Subject: [PATCH 01/11] intel_idle: Skylake Client Support - updated Addition of PC9 state, and minor tweaks to existing PC6 and PC8 states. Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3a3738fe016b3..cd4510a633754 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -620,7 +620,7 @@ static struct cpuidle_state skl_cstates[] = { .name = "C6-SKL", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 75, + .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, .enter_freeze = intel_idle_freeze, }, @@ -636,10 +636,18 @@ static struct cpuidle_state skl_cstates[] = { .name = "C8-SKL", .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 174, + .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, .enter_freeze = intel_idle_freeze, }, + { + .name = "C9-SKL", + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C10-SKL", .desc = "MWAIT 0x60", From 680168a58a9315e1301f4ebb062244470d4919b0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 22 Sep 2015 09:35:31 -0700 Subject: [PATCH 02/11] PM / OPP: of_property_count_u32_elems() can return errors of_property_count_u32_elems() will never return 0, but a -ve error value of a positive count. And so the current !count check is wrong. Also, a missing "opp-microvolt" property isn't a problem and so we need to do of_find_property() separately to confirm that. Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings) Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 28cd75c535b04..1194669c2bb55 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -892,10 +892,17 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) u32 microvolt[3] = {0}; int count, ret; - count = of_property_count_u32_elems(opp->np, "opp-microvolt"); - if (!count) + /* Missing property isn't a problem, but an invalid entry is */ + if (!of_find_property(opp->np, "opp-microvolt", NULL)) return 0; + count = of_property_count_u32_elems(opp->np, "opp-microvolt"); + if (count < 0) { + dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n", + __func__, count); + return count; + } + /* There can be one or three elements here */ if (count != 1 && count != 3) { dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n", From a8360062ccfb4b891d3013d0e55826c8bcb02bfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 18 Sep 2015 03:08:40 +0200 Subject: [PATCH 03/11] PCI / PM: Update runtime PM documentation for PCI devices Section 3.2 "Device Runtime Power Management" of pci.txt has become outdated, so update it to correctly reflect the current code flow. Also update the comment in local_pci_probe() to document the fact that pm_runtime_put_noidle() is not the only runtime PM helper function that can be used to decrement the device's runtime PM usage counter in .probe(). Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern --- Documentation/power/pci.txt | 51 +++++++++++++++++++++++++++---------- drivers/pci/pci-driver.c | 7 ++--- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index 62328d76b55bd..b0e911e0e8f50 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -979,20 +979,45 @@ every time right after the runtime_resume() callback has returned (alternatively, the runtime_suspend() callback will have to check if the device should really be suspended and return -EAGAIN if that is not the case). -The runtime PM of PCI devices is disabled by default. It is also blocked by -pci_pm_init() that runs the pm_runtime_forbid() helper function. If a PCI -driver implements the runtime PM callbacks and intends to use the runtime PM -framework provided by the PM core and the PCI subsystem, it should enable this -feature by executing the pm_runtime_enable() helper function. However, the -driver should not call the pm_runtime_allow() helper function unblocking -the runtime PM of the device. Instead, it should allow user space or some -platform-specific code to do that (user space can do it via sysfs), although -once it has called pm_runtime_enable(), it must be prepared to handle the +The runtime PM of PCI devices is enabled by default by the PCI core. PCI +device drivers do not need to enable it and should not attempt to do so. +However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid() +helper function. In addition to that, the runtime PM usage counter of +each PCI device is incremented by local_pci_probe() before executing the +probe callback provided by the device's driver. + +If a PCI driver implements the runtime PM callbacks and intends to use the +runtime PM framework provided by the PM core and the PCI subsystem, it needs +to decrement the device's runtime PM usage counter in its probe callback +function. If it doesn't do that, the counter will always be different from +zero for the device and it will never be runtime-suspended. The simplest +way to do that is by calling pm_runtime_put_noidle(), but if the driver +wants to schedule an autosuspend right away, for example, it may call +pm_runtime_put_autosuspend() instead for this purpose. Generally, it +just needs to call a function that decrements the devices usage counter +from its probe routine to make runtime PM work for the device. + +It is important to remember that the driver's runtime_suspend() callback +may be executed right after the usage counter has been decremented, because +user space may already have cuased the pm_runtime_allow() helper function +unblocking the runtime PM of the device to run via sysfs, so the driver must +be prepared to cope with that. + +The driver itself should not call pm_runtime_allow(), though. Instead, it +should let user space or some platform-specific code do that (user space can +do it via sysfs as stated above), but it must be prepared to handle the runtime PM of the device correctly as soon as pm_runtime_allow() is called -(which may happen at any time). [It also is possible that user space causes -pm_runtime_allow() to be called via sysfs before the driver is loaded, so in -fact the driver has to be prepared to handle the runtime PM of the device as -soon as it calls pm_runtime_enable().] +(which may happen at any time, even before the driver is loaded). + +When the driver's remove callback runs, it has to balance the decrementation +of the device's runtime PM usage counter at the probe time. For this reason, +if it has decremented the counter in its probe callback, it must run +pm_runtime_get_noresume() in its remove callback. [Since the core carries +out a runtime resume of the device and bumps up the device's usage counter +before running the driver's remove callback, the runtime PM of the device +is effectively disabled for the duration of the remove execution and all +runtime PM helper functions incrementing the device's usage counter are +then effectively equivalent to pm_runtime_get_noresume().] The runtime PM framework works by processing requests to suspend or resume devices, or to check if they are idle (in which cases it is reasonable to diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index dd652f2ae03db..108a3118ace7f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -299,9 +299,10 @@ static long local_pci_probe(void *_ddi) * Unbound PCI devices are always put in D0, regardless of * runtime PM status. During probe, the device is set to * active and the usage count is incremented. If the driver - * supports runtime PM, it should call pm_runtime_put_noidle() - * in its probe routine and pm_runtime_get_noresume() in its - * remove routine. + * supports runtime PM, it should call pm_runtime_put_noidle(), + * or any other runtime PM helper function decrementing the usage + * count, in its probe routine and pm_runtime_get_noresume() in + * its remove routine. */ pm_runtime_get_sync(dev); pci_dev->driver = pci_drv; From e1a2d49cd5ef551c51be95cc037033e9e582b0cd Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 24 Sep 2015 12:28:44 -0700 Subject: [PATCH 04/11] PM / OPP: Fix typo modifcation -> modification Reported-by: Viresh Kumar Signed-off-by: Stephen Boyd Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 1194669c2bb55..7ae7cd990fbf7 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -1070,7 +1070,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); * share a common logic which is isolated here. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. * * Locking: The internal device_opp and opp structures are RCU protected. @@ -1158,7 +1158,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, * mutex locking or synchronize_rcu() blocking calls cannot be used. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. */ int dev_pm_opp_enable(struct device *dev, unsigned long freq) @@ -1184,7 +1184,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable); * mutex locking or synchronize_rcu() blocking calls cannot be used. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. */ int dev_pm_opp_disable(struct device *dev, unsigned long freq) From 15b94fa32a422f4b97dc34e4b7060ec83d10bee5 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Thu, 24 Sep 2015 14:54:40 +0800 Subject: [PATCH 05/11] ACPI / EC: Fix a memory leak issue in acpi_ec_query() When query handler is not found, "result" is actually stil 0, and "struct acpi_ec_query" is not NULL, so the deletion code of "struct acpi_ec_query" at the end of the function cannot be invoked. As a consequence, memory leak can be observed. The issue is introduced by this commit: Commit: 02b771b64b73226052d6e731a0987db3b47281e9 Subject: ACPI / EC: Fix an issue caused by the serialized _Qxx This patch fixes such memory leakage. Fixes: 02b771b64b73 (ACPI / EC: Fix an issue caused by the serialized _Qxx evaluations) Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 2614a839c60da..42c66b64c12ce 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1044,8 +1044,10 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) goto err_exit; mutex_lock(&ec->mutex); + result = -ENODATA; list_for_each_entry(handler, &ec->list, node) { if (value == handler->query_bit) { + result = 0; q->handler = acpi_ec_get_query_handler(handler); ec_dbg_evt("Query(0x%02x) scheduled", q->handler->query_bit); From 5ebc76035303016ec41bb752bec156ea9fde7c34 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 17 Sep 2015 14:02:45 +0800 Subject: [PATCH 06/11] ACPI, PCI, irq: Do not share PCI IRQ with ISA IRQ Avoid IRQs occupied by ISA IRQs when allocating IRQs for PCI link devices, otherwise it may cause interrupt storm due to incompatible pin attributes. This issue was triggered on a KVM virtual machine, which 1) uses IRQ9 for SCI in high level mode. 2) defines an PCI interrupt link device (LNKS) with IRQ9 as the only possible irq. 3) has an PCI device referring to link device LNKS. So it causes interrupt storm when enabling the PCI device because PCI IRQ works in low level mode. Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_irq.c | 1 + drivers/acpi/pci_link.c | 13 +++++++++++++ include/linux/acpi.h | 1 + 3 files changed, 15 insertions(+) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 6da0f9beab198..c9336751e5e37 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -372,6 +372,7 @@ static int acpi_isa_register_gsi(struct pci_dev *dev) /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF) && + acpi_isa_irq_available(dev->irq) && (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) { dev_warn(&dev->dev, "PCI INT %c: no GSI - using ISA IRQ %d\n", pin_name(dev->pin), dev->irq); diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 3b4ea98e3ea06..246e50d221209 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -553,6 +553,13 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) irq = link->irq.possible[i]; } } + if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) { + printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. " + "Try pci=noacpi or acpi=off\n", + acpi_device_name(link->device), + acpi_device_bid(link->device)); + return -ENODEV; + } /* Attempt to enable the link device at this IRQ. */ if (acpi_pci_link_set(link, irq)) { @@ -821,6 +828,12 @@ void acpi_penalize_isa_irq(int irq, int active) } } +bool acpi_isa_irq_available(int irq) +{ + return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) || + acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS); +} + /* * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460e..43856d19cf4d8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -217,6 +217,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); +bool acpi_isa_irq_available(int irq); void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); From d323efc786910bcc0c8f8b9f97780c70544ac4df Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 17 Sep 2015 14:02:46 +0800 Subject: [PATCH 07/11] ACPI / PCI: Remove duplicated penalty on SCI IRQ Now we have dedicated interface acpi_penalize_sci_irq() to penalize ISA IRQ used by ACPI SCI, so remove duplicated code to penalize ACPI SCI in acpi_irq_penalty_init(). Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 246e50d221209..7c8408b946ca1 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -498,8 +498,7 @@ int __init acpi_irq_penalty_init(void) PIRQ_PENALTY_PCI_POSSIBLE; } } - /* Add a penalty for the SCI */ - acpi_irq_penalty[acpi_gbl_FADT.sci_interrupt] += PIRQ_PENALTY_PCI_USING; + return 0; } From 756357b8e4b072fd5ee86421f794e071a348802b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 25 Sep 2015 21:12:39 -0400 Subject: [PATCH 08/11] tools/power turbostat: IVB Xeon: fix --debug regression Staring in Linux-4.3-rc1, commit 6fb3143b561c ("tools/power turbostat: dump CONFIG_TDP") touches MSR 0x648, which is not supported on IVB-Xeon. This results in "turbostat --debug" exiting on those systems: turbostat: /dev/cpu/2/msr offset 0x648 read failed: Input/output error Remove IVB-Xeon from the list of machines supporting with that MSR. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9655cb49c7cb8..e05d303366136 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1926,8 +1926,6 @@ int has_config_tdp(unsigned int family, unsigned int model) switch (model) { case 0x3A: /* IVB */ - case 0x3E: /* IVB Xeon */ - case 0x3C: /* HSW */ case 0x3F: /* HSX */ case 0x45: /* HSW */ From b2b34dfe4d9aa4c468fc363b3b666974783ed1f9 Mon Sep 17 00:00:00 2001 From: Hubert Chrzaniuk Date: Mon, 14 Sep 2015 13:31:00 +0200 Subject: [PATCH 09/11] tools/power turbostat: KNL workaround for %Busy and Avg_MHz KNL increments APERF and MPERF every 1024 clocks. This is compliant with the architecture specification, which requires that only the ratio of APERF/MPERF need be valid. However, turbostat takes advantage of the fact that these two MSRs increment every un-halted clock at the actual and base frequency: AVG_MHz = APERF_delta/measurement_interval %Busy = MPERF_delta/TSC_delta This quirk is needed for these calculations to also work on KNL, which would otherwise show a value 1024x smaller than expected. Signed-off-by: Hubert Chrzaniuk Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e05d303366136..d333c819fa1fd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -71,6 +71,7 @@ unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; unsigned int extra_delta_offset64; +unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; unsigned int show_pkg; @@ -984,6 +985,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -3; if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; + t->aperf = t->aperf * aperf_mperf_multiplier; + t->mperf = t->mperf * aperf_mperf_multiplier; } if (do_smi) { @@ -2541,6 +2544,13 @@ int is_knl(unsigned int family, unsigned int model) return 0; } +unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) +{ + if (is_knl(family, model)) + return 1024; + return 1; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2742,6 +2752,9 @@ void process_cpuid() } } + if (has_aperf) + aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); From a2b7b74945dbfe5d734eafe8aa52f9f1f8bc6931 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 26 Sep 2015 00:12:38 -0400 Subject: [PATCH 10/11] tools/power turbostat: SKL: Adjust for TSC difference from base frequency On a Skylake with 1500MHz base frequency, the TSC runs at 1512MHz. This is because the TSC is no longer in the n*100 MHz BCLK domain, but is now in the m*24MHz crystal clock domain. (24 MHz * 63 = 1512 MHz) This adds error to several calculations in turbostat, unless the TSC sample sizes are adjusted for this difference. Note that calculations in the time domain are immune from this issue, as the timing sub-system has already calibrated the TSC against a known wall clock. AVG_MHz = APERF_delta/measurement_interval need no adjustment. APERF_delta is in the BCLK domain, and measurement_interval is in the time domain. TSC_MHz = TSC_delta/measurement_interval needs no adjustment -- as we really do want to report the actual measured TSC delta here, and measurement_interval is in the accurate time domain. %Busy = MPERF_delta/TSC_delta needs adjustment to use TSC_BCLK_DOMAIN_delta. TSC_BCLK_DOMAIN_delta = TSC_delta * base_hz / tsc_hz Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval need adjustment as above. No other metrics in turbostat need to be adjusted. Before: CPU Avg_MHz %Busy Bzy_MHz TSC_MHz - 550 24.84 2216 1512 0 2191 98.73 2219 1514 2 0 0.01 2130 1512 1 9 0.43 2016 1512 3 2 0.08 2016 1512 After: CPU Avg_MHz %Busy Bzy_MHz TSC_MHz - 550 25.05 2198 1512 0 2190 99.62 2199 1512 2 0 0.01 2152 1512 1 9 0.46 2000 1512 3 2 0.10 2000 1512 Note that in this example, the "Before" Bzy_MHz was reported as exceeding the 2200 max turbo rate. Also, even a pinned spin loop would not be reported as over 99% busy. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d333c819fa1fd..31d756b4ea787 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -74,6 +74,8 @@ unsigned int extra_delta_offset64; unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; +double base_hz; +double tsc_tweak = 1.0; unsigned int show_pkg; unsigned int show_core; unsigned int show_cpu; @@ -503,7 +505,7 @@ int format_counters(struct thread_data *t, struct core_data *c, /* %Busy */ if (has_aperf) { if (!skip_c0) - outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); else outp += sprintf(outp, "********"); } @@ -511,7 +513,7 @@ int format_counters(struct thread_data *t, struct core_data *c, /* Bzy_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", - 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float); /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); @@ -1152,6 +1154,19 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; + +static void +calculate_tsc_tweak() +{ + unsigned long long msr; + unsigned int base_ratio; + + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + base_hz = base_ratio * bclk * 1000000; + tsc_tweak = base_hz / tsc_hz; +} + static void dump_nhm_platform_info(void) { @@ -2773,6 +2788,9 @@ void process_cpuid() if (debug) dump_cstate_pstate_config_info(); + if (has_skl_msrs(family, model)) + calculate_tsc_tweak(); + return; } From af71b980c0d20586cc331b617c47094a8ec7e1db Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 26 Sep 2015 09:49:55 -0400 Subject: [PATCH 11/11] tools/power turbosat: update version number Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 31d756b4ea787..bde0ef1a63df4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3119,7 +3119,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.7 17-June, 2015" + fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" " - Len Brown \n"); }