From e1689795a784a7c41ac4cf9032794986b095a133 Mon Sep 17 00:00:00 2001 From: Robert Lee Date: Tue, 20 Mar 2012 15:22:42 -0500 Subject: cpuidle: Add common time keeping and irq enabling Make necessary changes to implement time keeping and irq enabling in the core cpuidle code. This will allow the removal of these functionalities from various platform cpuidle implementations whose timekeeping and irq enabling follows the form in this common code. Signed-off-by: Robert Lee Tested-by: Jean Pihet Tested-by: Amit Daniel Tested-by: Robert Lee Reviewed-by: Kevin Hilman Reviewed-by: Daniel Lezcano Reviewed-by: Deepthi Dharwar Acked-by: Jean Pihet Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 66 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 7 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 59f4261c753a..4869b5500234 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -53,6 +53,24 @@ static void cpuidle_kick_cpus(void) {} static int __cpuidle_register_device(struct cpuidle_device *dev); +static inline int cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + struct cpuidle_state *target_state = &drv->states[index]; + return target_state->enter(dev, drv, index); +} + +static inline int cpuidle_enter_tk(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + return cpuidle_wrap_enter(dev, drv, index, cpuidle_enter); +} + +typedef int (*cpuidle_enter_t)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index); + +static cpuidle_enter_t cpuidle_enter_ops; + /** * cpuidle_idle_call - the main idle loop * @@ -63,7 +81,6 @@ int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_driver(); - struct cpuidle_state *target_state; int next_state, entered_state; if (off) @@ -92,12 +109,10 @@ int cpuidle_idle_call(void) return 0; } - target_state = &drv->states[next_state]; - trace_power_start(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle(next_state, dev->cpu); - entered_state = target_state->enter(dev, drv, next_state); + entered_state = cpuidle_enter_ops(dev, drv, next_state); trace_power_end(dev->cpu); trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); @@ -110,6 +125,8 @@ int cpuidle_idle_call(void) dev->states_usage[entered_state].time += (unsigned long long)dev->last_residency; dev->states_usage[entered_state].usage++; + } else { + dev->last_residency = 0; } /* give the governor an opportunity to reflect on the outcome */ @@ -164,6 +181,37 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); +/** + * cpuidle_wrap_enter - performs timekeeping and irqen around enter function + * @dev: pointer to a valid cpuidle_device object + * @drv: pointer to a valid cpuidle_driver object + * @index: index of the target cpuidle state. + */ +int cpuidle_wrap_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index, + int (*enter)(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index)) +{ + ktime_t time_start, time_end; + s64 diff; + + time_start = ktime_get(); + + index = enter(dev, drv, index); + + time_end = ktime_get(); + + local_irq_enable(); + + diff = ktime_to_us(ktime_sub(time_end, time_start)); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int) diff; + + return index; +} + #ifdef CONFIG_ARCH_HAS_CPU_RELAX static int poll_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -212,10 +260,11 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} int cpuidle_enable_device(struct cpuidle_device *dev) { int ret, i; + struct cpuidle_driver *drv = cpuidle_get_driver(); if (dev->enabled) return 0; - if (!cpuidle_get_driver() || !cpuidle_curr_governor) + if (!drv || !cpuidle_curr_governor) return -EIO; if (!dev->state_count) return -EINVAL; @@ -226,13 +275,16 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return ret; } - poll_idle_init(cpuidle_get_driver()); + cpuidle_enter_ops = drv->en_core_tk_irqen ? + cpuidle_enter_tk : cpuidle_enter; + + poll_idle_init(drv); if ((ret = cpuidle_add_state_sysfs(dev))) return ret; if (cpuidle_curr_governor->enable && - (ret = cpuidle_curr_governor->enable(cpuidle_get_driver(), dev))) + (ret = cpuidle_curr_governor->enable(drv, dev))) goto fail_sysfs; for (i = 0; i < dev->state_count; i++) { -- cgit v1.2.3 From 3a53396b0381ec9d5180fd8fe7a681c8ce95fd9a Mon Sep 17 00:00:00 2001 From: ShuoX Liu Date: Wed, 28 Mar 2012 15:19:11 -0700 Subject: cpuidle: add a sysfs entry to disable specific C state for debug purpose. Some C states of new CPU might be not good. One reason is BIOS might configure them incorrectly. To help developers root cause it quickly, the patch adds a new sysfs entry, so developers could disable specific C state manually. In addition, C state might have much impact on performance tuning, as it takes much time to enter/exit C states, which might delay interrupt processing. With the new debug option, developers could check if a deep C state could impact performance and how much impact it could cause. Also add this option in Documentation/cpuidle/sysfs.txt. [akpm@linux-foundation.org: check kstrtol return value] Signed-off-by: ShuoX Liu Reviewed-by: Yanmin Zhang Reviewed-and-Tested-by: Deepthi Dharwar Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- Documentation/cpuidle/sysfs.txt | 5 +++++ drivers/cpuidle/cpuidle.c | 1 + drivers/cpuidle/governors/menu.c | 5 ++++- drivers/cpuidle/sysfs.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/cpuidle.h | 1 + 5 files changed, 51 insertions(+), 1 deletion(-) (limited to 'drivers/cpuidle') diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt index 50d7b1642759..9d28a3406e74 100644 --- a/Documentation/cpuidle/sysfs.txt +++ b/Documentation/cpuidle/sysfs.txt @@ -36,6 +36,7 @@ drwxr-xr-x 2 root root 0 Feb 8 10:42 state3 /sys/devices/system/cpu/cpu0/cpuidle/state0: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -45,6 +46,7 @@ total 0 /sys/devices/system/cpu/cpu0/cpuidle/state1: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -54,6 +56,7 @@ total 0 /sys/devices/system/cpu/cpu0/cpuidle/state2: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -63,6 +66,7 @@ total 0 /sys/devices/system/cpu/cpu0/cpuidle/state3: total 0 -r--r--r-- 1 root root 4096 Feb 8 10:42 desc +-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable -r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 power @@ -72,6 +76,7 @@ total 0 * desc : Small description about the idle state (string) +* disable : Option to disable this idle state (bool) * latency : Latency to exit out of this idle state (in microseconds) * name : Name of the idle state (string) * power : Power consumed while in this idle state (in milliwatts) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 4869b5500234..77304b6b8aef 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -245,6 +245,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) state->power_usage = -1; state->flags = 0; state->enter = poll_idle; + state->disable = 0; } #else static void poll_idle_init(struct cpuidle_driver *drv) {} diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index ad0952601ae2..5c17ca112fc2 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -280,7 +280,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * We want to default to C1 (hlt), not to busy polling * unless the timer is happening really really soon. */ - if (data->expected_us > 5) + if (data->expected_us > 5 && + drv->states[CPUIDLE_DRIVER_STATE_START].disable == 0) data->last_state_idx = CPUIDLE_DRIVER_STATE_START; /* @@ -290,6 +291,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; + if (s->disable) + continue; if (s->target_residency > data->predicted_us) continue; if (s->exit_latency > latency_req) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 3fe41fe4851a..88032b4dc6d2 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -222,6 +223,9 @@ struct cpuidle_state_attr { #define define_one_state_ro(_name, show) \ static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL) +#define define_one_state_rw(_name, show, store) \ +static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0644, show, store) + #define define_show_state_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, char *buf) \ @@ -229,6 +233,24 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ return sprintf(buf, "%u\n", state->_name);\ } +#define define_store_state_function(_name) \ +static ssize_t store_state_##_name(struct cpuidle_state *state, \ + const char *buf, size_t size) \ +{ \ + long value; \ + int err; \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + err = kstrtol(buf, 0, &value); \ + if (err) \ + return err; \ + if (value) \ + state->disable = 1; \ + else \ + state->disable = 0; \ + return size; \ +} + #define define_show_state_ull_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ struct cpuidle_state_usage *state_usage, char *buf) \ @@ -251,6 +273,8 @@ define_show_state_ull_function(usage) define_show_state_ull_function(time) define_show_state_str_function(name) define_show_state_str_function(desc) +define_show_state_function(disable) +define_store_state_function(disable) define_one_state_ro(name, show_state_name); define_one_state_ro(desc, show_state_desc); @@ -258,6 +282,7 @@ define_one_state_ro(latency, show_state_exit_latency); define_one_state_ro(power, show_state_power_usage); define_one_state_ro(usage, show_state_usage); define_one_state_ro(time, show_state_time); +define_one_state_rw(disable, show_state_disable, store_state_disable); static struct attribute *cpuidle_state_default_attrs[] = { &attr_name.attr, @@ -266,6 +291,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_power.attr, &attr_usage.attr, &attr_time.attr, + &attr_disable.attr, NULL }; @@ -287,8 +313,22 @@ static ssize_t cpuidle_state_show(struct kobject * kobj, return ret; } +static ssize_t cpuidle_state_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t size) +{ + int ret = -EIO; + struct cpuidle_state *state = kobj_to_state(kobj); + struct cpuidle_state_attr *cattr = attr_to_stateattr(attr); + + if (cattr->store) + ret = cattr->store(state, buf, size); + + return ret; +} + static const struct sysfs_ops cpuidle_state_sysfs_ops = { .show = cpuidle_state_show, + .store = cpuidle_state_store, }; static void cpuidle_state_sysfs_release(struct kobject *kobj) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 927db28a2a4c..ca4e4983773f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -46,6 +46,7 @@ struct cpuidle_state { unsigned int exit_latency; /* in US */ unsigned int power_usage; /* in mW */ unsigned int target_residency; /* in US */ + unsigned int disable; int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, -- cgit v1.2.3 From fc850f39ea54c760ce438a601cfea8ab80c4898e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 26 Mar 2012 14:51:26 +0200 Subject: cpuidle: use the driver's state_count as default If the state_count is not initialized for the device use the driver's state count as the default. That will prevent to add it manually in the cpuidle driver initialization routine and will save us from duplicate line of code. Signed-off-by: Daniel Lezcano Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 2 +- drivers/cpuidle/driver.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 77304b6b8aef..f7cab5e9c4d6 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -268,7 +268,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!drv || !cpuidle_curr_governor) return -EIO; if (!dev->state_count) - return -EINVAL; + dev->state_count = drv->state_count; if (dev->registered == 0) { ret = __cpuidle_register_device(dev); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 284d7af5a9c8..40cd3f3024df 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -47,7 +47,7 @@ static void __cpuidle_register_driver(struct cpuidle_driver *drv) */ int cpuidle_register_driver(struct cpuidle_driver *drv) { - if (!drv) + if (!drv || !drv->state_count) return -EINVAL; if (cpuidle_disabled()) -- cgit v1.2.3 From 1a022e3f1be11730bd8747b1af96a0274bf6356e Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 13 Mar 2012 19:55:09 +0100 Subject: idle, x86: Allow off-lined CPU to enter deeper C states Currently when a CPU is off-lined it enters either MWAIT-based idle or, if MWAIT is not desired or supported, HLT-based idle (which places the processor in C1 state). This patch allows processors without MWAIT support to stay in states deeper than C1. Signed-off-by: Boris Ostrovsky Signed-off-by: Len Brown --- arch/x86/kernel/smpboot.c | 4 +++- drivers/acpi/processor_idle.c | 31 +++++++++++++++++++++++++++++++ drivers/cpuidle/cpuidle.c | 28 ++++++++++++++++++++++++++++ include/linux/cpuidle.h | 5 +++++ 4 files changed, 67 insertions(+), 1 deletion(-) (limited to 'drivers/cpuidle') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 66d250c00d11..93a2a0932b51 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -1422,7 +1423,8 @@ void native_play_dead(void) tboot_shutdown(TB_SHUTDOWN_WFS); mwait_play_dead(); /* Only returns on failure */ - hlt_play_dead(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0e8e2de2ed3e..6b1d32a161ae 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -770,6 +770,35 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, return index; } + +/** + * acpi_idle_play_dead - enters an ACPI state for long-term idle (i.e. off-lining) + * @dev: the target CPU + * @index: the index of suggested state + */ +static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + + ACPI_FLUSH_CPU_CACHE(); + + while (1) { + + if (cx->entry_method == ACPI_CSTATE_HALT) + halt(); + else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { + inb(cx->address); + /* See comment in acpi_idle_do_entry() */ + inl(acpi_gbl_FADT.xpm_timer_block.address); + } else + return -ENODEV; + } + + /* Never reached */ + return 0; +} + /** * acpi_idle_enter_simple - enters an ACPI state without BM handling * @dev: the target CPU @@ -1077,12 +1106,14 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_c1; + state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; case ACPI_STATE_C2: state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; + state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index f7cab5e9c4d6..3e146b2ada4a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -71,6 +71,34 @@ typedef int (*cpuidle_enter_t)(struct cpuidle_device *dev, static cpuidle_enter_t cpuidle_enter_ops; +/** + * cpuidle_play_dead - cpu off-lining + * + * Only returns in case of an error + */ +int cpuidle_play_dead(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_driver(); + int i, dead_state = -1; + int power_usage = -1; + + /* Find lowest-power state that supports long-term idle */ + for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + + if (s->power_usage < power_usage && s->enter_dead) { + power_usage = s->power_usage; + dead_state = i; + } + } + + if (dead_state != -1) + return drv->states[dead_state].enter_dead(dev, dead_state); + + return -ENODEV; +} + /** * cpuidle_idle_call - the main idle loop * diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index f3ebbba368b3..d557bcd0ada7 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -51,6 +51,8 @@ struct cpuidle_state { int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); + + int (*enter_dead) (struct cpuidle_device *dev, int index); }; /* Idle State Flags */ @@ -147,6 +149,8 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index, int (*enter)(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)); +extern int cpuidle_play_dead(void); + #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } @@ -168,6 +172,7 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, int (*enter)(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)) { return -ENODEV; } +static inline int cpuidle_play_dead(void) {return -ENODEV; } #endif -- cgit v1.2.3 From 02401c06b7f6bec65f314e3cec7894502c973501 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 13 Mar 2012 19:55:10 +0100 Subject: cpuidle: power_usage should be declared signed integer power_usage is always assigned a negative value and should be declared a signed integer Signed-off-by: Boris Ostrovsky Signed-off-by: Len Brown --- drivers/cpuidle/governors/menu.c | 2 +- include/linux/cpuidle.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 5c17ca112fc2..06335756ea14 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -236,7 +236,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &__get_cpu_var(menu_devices); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); - unsigned int power_usage = -1; + int power_usage = -1; int i; int multiplier; struct timespec t; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d557bcd0ada7..6c26a3da0e03 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -44,7 +44,7 @@ struct cpuidle_state { unsigned int flags; unsigned int exit_latency; /* in US */ - unsigned int power_usage; /* in mW */ + int power_usage; /* in mW */ unsigned int target_residency; /* in US */ unsigned int disable; -- cgit v1.2.3