Re: [patch]: fixing of pmtimer overflow that make Cx states timeincorrect

From: yakui_zhao
Date: Tue Feb 03 2009 - 01:07:32 EST


On Mon, 2009-02-02 at 09:46 +0800, alex.shi wrote:
> Yagui want to give a clear explanation to be used for commitment. So I resend
> this again.
>
> On most boxes the ACPI PM timer is 24-bit counter that runs on 3.579545MHz
> clock. In such case the max C-state sleep time should be less than 4687ms when
> it is used to record C2/C3 duration time.
> But on some boxes the max C-state sleep time is more than 4687ms. In such
> case the overflow happens and the C-state duration time can't be counted
> accurately.
Hi, All

When the clocksource is used to get the C-state idle time, the unit
of idle_time will be microsecond. To be compatible with
the /proc/acpi/processor/*/power interface, it will be converted to ACPI
PM timer sleep ticks by using the macro definition of
US_TO_PM_TIMER_TICKS(this I/F will be used by the powertop tool of early
version). When the macro definition is used, sometimes the overflow
will happen if the 32-bit data type is used.
So the function of div64_u64 is used in the macro definition of
US_TO_PM_TIMER_TICKS.
On the x86_64 platform the function of div64_u64 is very simple. But
on the i386 platform it will be very complex.

In fact in the latest powertop the sys I/F will be preferred instead
of /proc I/F. The time unit of sys I/F is microsecond. In such case it
seems redundant that the C-state idle time is converted to PM timer
sleep ticks.
How about delete the conversion? If so, the time unit of /proc I/F
will also be microsecond. And the div operation will be omitted.

---
drivers/acpi/processor_idle.c | 83
+++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 41 deletions(-)

Index: linux-2.6/drivers/acpi/processor_idle.c
===================================================================
--- linux-2.6.orig/drivers/acpi/processor_idle.c 2009-02-03
13:41:07.000000000 +0800
+++ linux-2.6/drivers/acpi/processor_idle.c 2009-02-03
14:16:30.000000000 +0800
@@ -67,8 +67,8 @@
#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) /
1000)
#define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY)
#ifndef CONFIG_CPU_IDLE
-#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
-#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
+#define C2_OVERHEAD 1 /* 1us (3.579 ticks per us) */
+#define C3_OVERHEAD 1 /* 1us (3.579 ticks per us) */
static void (*pm_idle_save) (void) __read_mostly;
#else
#define C2_OVERHEAD 1 /* 1us */
@@ -396,8 +396,9 @@
struct acpi_processor *pr = NULL;
struct acpi_processor_cx *cx = NULL;
struct acpi_processor_cx *next_state = NULL;
- int sleep_ticks = 0;
- u32 t1, t2 = 0;
+ s64 sleep_ticks = 0;
+ ktime_t kt1, kt2;
+ s64 idle_time;

/*
* Interrupts must be disabled during bus mastering calculations and
@@ -544,26 +545,26 @@

case ACPI_STATE_C2:
/* Get start time (ticks) */
- t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt1 = ktime_get_real();
/* Tell the scheduler that we are going deep-idle: */
sched_clock_idle_sleep_event();
/* Invoke C2 */
acpi_state_timer_broadcast(pr, cx, 1);
acpi_cstate_enter(cx);
/* Get end time (ticks) */
- t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt2 = ktime_get_real();
+ idle_time = ktime_to_us(ktime_sub(kt2, kt1));

#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
/* TSC halts in C2, so notify users */
if (tsc_halts_in_c(ACPI_STATE_C2))
mark_tsc_unstable("possible TSC halt in C2");
#endif
- /* Compute time (ticks) that we were actually asleep */
- sleep_ticks = ticks_elapsed(t1, t2);

/* Tell the scheduler how much we idled: */
- sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
-
+ /* the unit of idle_time is us. We need convert it to NS */
+ sched_clock_idle_wakeup_event(idle_time * 1000);
+ sleep_ticks = idle_time;
/* Re-enable interrupts */
local_irq_enable();
/* Do not account our idle-switching overhead: */
@@ -605,13 +606,14 @@
}

/* Get start time (ticks) */
- t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt1 = ktime_get_real();
/* Invoke C3 */
/* Tell the scheduler that we are going deep-idle: */
sched_clock_idle_sleep_event();
acpi_cstate_enter(cx);
/* Get end time (ticks) */
- t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt2 = ktime_get_real();
+ idle_time = ktime_to_us(ktime_sub(kt2, kt1));
if (pr->flags.bm_check && pr->flags.bm_control) {
/* Enable bus master arbitration */
atomic_dec(&c3_cpu_count);
@@ -624,9 +626,10 @@
mark_tsc_unstable("TSC halts in C3");
#endif
/* Compute time (ticks) that we were actually asleep */
- sleep_ticks = ticks_elapsed(t1, t2);
+ sleep_ticks = idle_time;
/* Tell the scheduler how much we idled: */
- sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+ /* the unit of sleept_ticks is us. We need convert it to NS */
+ sched_clock_idle_wakeup_event(idle_time * 1000);

/* Re-enable interrupts */
local_irq_enable();
@@ -1046,12 +1049,8 @@
* Normalize the C2 latency to expidite policy
*/
cx->valid = 1;
-
-#ifndef CONFIG_CPU_IDLE
- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
-#else
+ /* the unit of latency_ticks will always be US */
cx->latency_ticks = cx->latency;
-#endif

return;
}
@@ -1132,11 +1131,7 @@
*/
cx->valid = 1;

-#ifndef CONFIG_CPU_IDLE
- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
-#else
cx->latency_ticks = cx->latency;
-#endif

return;
}
@@ -1455,7 +1450,8 @@
static int acpi_idle_enter_c1(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
- u32 t1, t2;
+ ktime_t kt1, kt2;
+ s64 idle_time;
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);

@@ -1476,14 +1472,15 @@
if (pr->flags.bm_check)
acpi_idle_update_bm_rld(pr, cx);

- t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt1 = ktime_get_real();
acpi_idle_do_entry(cx);
- t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt2 = ktime_get_real();
+ idle_time = ktime_to_us(ktime_sub(kt2, kt1));

local_irq_enable();
cx->usage++;

- return ticks_elapsed_in_us(t1, t2);
+ return idle_time;
}

/**
@@ -1496,8 +1493,9 @@
{
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
- u32 t1, t2;
- int sleep_ticks = 0;
+ ktime_t kt1, kt2;
+ s64 idle_time;
+ s64 sleep_ticks = 0;

pr = __get_cpu_var(processors);

@@ -1533,21 +1531,22 @@
if (cx->type == ACPI_STATE_C3)
ACPI_FLUSH_CPU_CACHE();

- t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt1 = ktime_get_real();
/* Tell the scheduler that we are going deep-idle: */
sched_clock_idle_sleep_event();
acpi_idle_do_entry(cx);
- t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt2 = ktime_get_real();
+ idle_time = ktime_to_us(ktime_sub(kt2, kt1));

#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
/* TSC could halt in idle, so notify users */
if (tsc_halts_in_c(cx->type))
mark_tsc_unstable("TSC halts in idle");;
#endif
- sleep_ticks = ticks_elapsed(t1, t2);
+ sleep_ticks = idle_time;

/* Tell the scheduler how much we idled: */
- sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+ sched_clock_idle_wakeup_event(idle_time * 1000);

local_irq_enable();
current_thread_info()->status |= TS_POLLING;
@@ -1556,7 +1555,7 @@

acpi_state_timer_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return ticks_elapsed_in_us(t1, t2);
+ return idle_time;
}

static int c3_cpu_count;
@@ -1574,8 +1573,9 @@
{
struct acpi_processor *pr;
struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
- u32 t1, t2;
- int sleep_ticks = 0;
+ ktime_t kt1, kt2;
+ s64 idle_time;
+ s64 sleep_ticks = 0;

pr = __get_cpu_var(processors);

@@ -1644,9 +1644,10 @@
ACPI_FLUSH_CPU_CACHE();
}

- t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt1 = ktime_get_real();
acpi_idle_do_entry(cx);
- t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ kt2 = ktime_get_real();
+ idle_time = ktime_to_us(ktime_sub(kt2, kt1));

/* Re-enable bus master arbitration */
if (pr->flags.bm_check && pr->flags.bm_control) {
@@ -1661,9 +1662,9 @@
if (tsc_halts_in_c(ACPI_STATE_C3))
mark_tsc_unstable("TSC halts in idle");
#endif
- sleep_ticks = ticks_elapsed(t1, t2);
+ sleep_ticks = idle_time;
/* Tell the scheduler how much we idled: */
- sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+ sched_clock_idle_wakeup_event(idle_time * 1000);

local_irq_enable();
current_thread_info()->status |= TS_POLLING;
@@ -1672,7 +1673,7 @@

acpi_state_timer_broadcast(pr, cx, 0);
cx->time += sleep_ticks;
- return ticks_elapsed_in_us(t1, t2);
+ return idle_time;
}

struct cpuidle_driver acpi_idle_driver = {


>
> Use clocksource to get the C-state time instead of ACPI PM timer. and use
> div64_u64 to convert US_TO_PM_TIME_TICKS in i386 mode.
>
> Thanks
> Alex
>
> Asked-by: venkatesh.pallipadi@xxxxxxxxx
> Tested-by: Alex Shi <alex.shi@xxxxxxxxx>
> Signed-off-by: Alex Shi <alex.shi@xxxxxxxxx>
> Signed-off-by: Yakui.zhao <yakui.zhao@xxxxxxxxx>
> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
>
> ---
>
>
> Index: linux-2.6.29-rc3/drivers/acpi/processor_idle.c
> ===================================================================
> --- linux-2.6.29-rc3.orig/drivers/acpi/processor_idle.c
> +++ linux-2.6.29-rc3/drivers/acpi/processor_idle.c
> @@ -64,7 +64,8 @@
> #define _COMPONENT ACPI_PROCESSOR_COMPONENT
> ACPI_MODULE_NAME("processor_idle");
> #define ACPI_PROCESSOR_FILE_POWER "power"
> -#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
> +#define US_TO_PM_TIMER_TICKS(t) div64_u64(\
> + (t * (PM_TIMER_FREQUENCY/1000)), 1000ULL)
> #define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY)
> #ifndef CONFIG_CPU_IDLE
> #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
> @@ -396,8 +397,9 @@ static void acpi_processor_idle(void)
> struct acpi_processor *pr = NULL;
> struct acpi_processor_cx *cx = NULL;
> struct acpi_processor_cx *next_state = NULL;
> - int sleep_ticks = 0;
> - u32 t1, t2 = 0;
> + s64 sleep_ticks = 0;
> + ktime_t kt1, kt2;
> + s64 idle_time;
>
> /*
> * Interrupts must be disabled during bus mastering calculations and
> @@ -544,14 +546,15 @@ static void acpi_processor_idle(void)
>
> case ACPI_STATE_C2:
> /* Get start time (ticks) */
> - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt1 = ktime_get_real();
> /* Tell the scheduler that we are going deep-idle: */
> sched_clock_idle_sleep_event();
> /* Invoke C2 */
> acpi_state_timer_broadcast(pr, cx, 1);
> acpi_cstate_enter(cx);
> /* Get end time (ticks) */
> - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt2 = ktime_get_real();
> + idle_time = ktime_to_us(ktime_sub(kt2, kt1));
>
> #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
> /* TSC halts in C2, so notify users */
> @@ -559,7 +562,7 @@ static void acpi_processor_idle(void)
> mark_tsc_unstable("possible TSC halt in C2");
> #endif
> /* Compute time (ticks) that we were actually asleep */
> - sleep_ticks = ticks_elapsed(t1, t2);
> + sleep_ticks = US_TO_PM_TIMER_TICKS(idle_time);
>
> /* Tell the scheduler how much we idled: */
> sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
> @@ -605,13 +608,14 @@ static void acpi_processor_idle(void)
> }
>
> /* Get start time (ticks) */
> - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt1 = ktime_get_real();
> /* Invoke C3 */
> /* Tell the scheduler that we are going deep-idle: */
> sched_clock_idle_sleep_event();
> acpi_cstate_enter(cx);
> /* Get end time (ticks) */
> - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt2 = ktime_get_real();
> + idle_time = ktime_to_us(ktime_sub(kt2, kt1));
> if (pr->flags.bm_check && pr->flags.bm_control) {
> /* Enable bus master arbitration */
> atomic_dec(&c3_cpu_count);
> @@ -624,7 +628,7 @@ static void acpi_processor_idle(void)
> mark_tsc_unstable("TSC halts in C3");
> #endif
> /* Compute time (ticks) that we were actually asleep */
> - sleep_ticks = ticks_elapsed(t1, t2);
> + sleep_ticks = US_TO_PM_TIMER_TICKS(idle_time);
> /* Tell the scheduler how much we idled: */
> sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
>
> @@ -1455,7 +1459,8 @@ static inline void acpi_idle_do_entry(st
> static int acpi_idle_enter_c1(struct cpuidle_device *dev,
> struct cpuidle_state *state)
> {
> - u32 t1, t2;
> + ktime_t kt1, kt2;
> + s64 idle_time;
> struct acpi_processor *pr;
> struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
>
> @@ -1476,14 +1481,15 @@ static int acpi_idle_enter_c1(struct cpu
> if (pr->flags.bm_check)
> acpi_idle_update_bm_rld(pr, cx);
>
> - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt1 = ktime_get_real();
> acpi_idle_do_entry(cx);
> - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt2 = ktime_get_real();
> + idle_time = ktime_to_us(ktime_sub(kt2, kt1));
>
> local_irq_enable();
> cx->usage++;
>
> - return ticks_elapsed_in_us(t1, t2);
> + return idle_time;
> }
>
> /**
> @@ -1496,8 +1502,9 @@ static int acpi_idle_enter_simple(struct
> {
> struct acpi_processor *pr;
> struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
> - u32 t1, t2;
> - int sleep_ticks = 0;
> + ktime_t kt1, kt2;
> + s64 idle_time;
> + s64 sleep_ticks = 0;
>
> pr = __get_cpu_var(processors);
>
> @@ -1533,18 +1540,19 @@ static int acpi_idle_enter_simple(struct
> if (cx->type == ACPI_STATE_C3)
> ACPI_FLUSH_CPU_CACHE();
>
> - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt1 = ktime_get_real();
> /* Tell the scheduler that we are going deep-idle: */
> sched_clock_idle_sleep_event();
> acpi_idle_do_entry(cx);
> - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt2 = ktime_get_real();
> + idle_time = ktime_to_us(ktime_sub(kt2, kt1));
>
> #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
> /* TSC could halt in idle, so notify users */
> if (tsc_halts_in_c(cx->type))
> mark_tsc_unstable("TSC halts in idle");;
> #endif
> - sleep_ticks = ticks_elapsed(t1, t2);
> + sleep_ticks = US_TO_PM_TIMER_TICKS(idle_time);
>
> /* Tell the scheduler how much we idled: */
> sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
> @@ -1556,7 +1564,7 @@ static int acpi_idle_enter_simple(struct
>
> acpi_state_timer_broadcast(pr, cx, 0);
> cx->time += sleep_ticks;
> - return ticks_elapsed_in_us(t1, t2);
> + return idle_time;
> }
>
> static int c3_cpu_count;
> @@ -1574,8 +1582,9 @@ static int acpi_idle_enter_bm(struct cpu
> {
> struct acpi_processor *pr;
> struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
> - u32 t1, t2;
> - int sleep_ticks = 0;
> + ktime_t kt1, kt2;
> + s64 idle_time;
> + s64 sleep_ticks = 0;
>
> pr = __get_cpu_var(processors);
>
> @@ -1644,9 +1653,10 @@ static int acpi_idle_enter_bm(struct cpu
> ACPI_FLUSH_CPU_CACHE();
> }
>
> - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt1 = ktime_get_real();
> acpi_idle_do_entry(cx);
> - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
> + kt2 = ktime_get_real();
> + idle_time = ktime_to_us(ktime_sub(kt2, kt1));
>
> /* Re-enable bus master arbitration */
> if (pr->flags.bm_check && pr->flags.bm_control) {
> @@ -1661,7 +1671,7 @@ static int acpi_idle_enter_bm(struct cpu
> if (tsc_halts_in_c(ACPI_STATE_C3))
> mark_tsc_unstable("TSC halts in idle");
> #endif
> - sleep_ticks = ticks_elapsed(t1, t2);
> + sleep_ticks = US_TO_PM_TIMER_TICKS(idle_time);
> /* Tell the scheduler how much we idled: */
> sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
>
> @@ -1672,7 +1682,7 @@ static int acpi_idle_enter_bm(struct cpu
>
> acpi_state_timer_broadcast(pr, cx, 0);
> cx->time += sleep_ticks;
> - return ticks_elapsed_in_us(t1, t2);
> + return idle_time;
> }
>
> struct cpuidle_driver acpi_idle_driver = {
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/