Re: [v6, 08/11] powerpc/powernv: Add platform support for stop instruction

From: Michael Ellerman
Date: Wed Jun 15 2016 - 07:15:02 EST


Hi Shreyas,

Comments inline ...

On Wed, 2016-08-06 at 16:54:28 UTC, "Shreyas B. Prabhu" wrote:
> POWER ISA v3 defines a new idle processor core mechanism. In summary,
> a) new instruction named stop is added. This instruction replaces
> instructions like nap, sleep, rvwinkle.
> b) new per thread SPR named Processor Stop Status and Control Register
> (PSSCR) is added which controls the behavior of stop instruction.
>
> PSSCR layout:
> ----------------------------------------------------------
> | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
> ----------------------------------------------------------
> 0 4 41 42 43 44 48 54 56 60
>
> PSSCR key fields:
> Bits 0:3 - Power-Saving Level Status. This field indicates the lowest
> power-saving state the thread entered since stop instruction was last
> executed.
>
> Bit 42 - Enable State Loss
> 0 - No state is lost irrespective of other fields
> 1 - Allows state loss
>
> Bits 44:47 - Power-Saving Level Limit
> This limits the power-saving level that can be entered into.
>
> Bits 60:63 - Requested Level
> Used to specify which power-saving level must be entered on executing
> stop instruction

That would probably be good as a comment somewhere too, maybe idle.c

> diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
> index d2f99ca..3d7fc06 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -13,6 +13,8 @@
> #ifndef __ASSEMBLY__
> extern u32 pnv_fastsleep_workaround_at_entry[];
> extern u32 pnv_fastsleep_workaround_at_exit[];
> +
> +extern u64 pnv_first_deep_stop_state;

Should this have some safe initial value?

> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index 6bdcd0d..ae3b155 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -262,6 +262,7 @@ struct machdep_calls {
> extern void e500_idle(void);
> extern void power4_idle(void);
> extern void power7_idle(void);
> +extern void power_stop0(void);

Can that have a better name please?

> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index 9bb8ddf..7f3f8c6 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -162,13 +162,20 @@
>
> /* Device tree flags */
>
> -/* Flags set in power-mgmt nodes in device tree if
> - * respective idle states are supported in the platform.
> +/*
> + * Flags set in power-mgmt nodes in device tree describing
> + * idle states that are supported in the platform.
> */
> +
> +#define OPAL_PM_TIMEBASE_STOP 0x00000002
> +#define OPAL_PM_LOSE_HYP_CONTEXT 0x00002000
> +#define OPAL_PM_LOSE_FULL_CONTEXT 0x00004000
> #define OPAL_PM_NAP_ENABLED 0x00010000
> #define OPAL_PM_SLEEP_ENABLED 0x00020000
> #define OPAL_PM_WINKLE_ENABLED 0x00040000
> #define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 /* with workaround */
> +#define OPAL_PM_STOP_INST_FAST 0x00100000
> +#define OPAL_PM_STOP_INST_DEEP 0x00200000

I don't see the above in skiboot yet?

> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 546540b..ae91b44 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -171,6 +171,8 @@ struct paca_struct {
> /* Mask to denote subcore sibling threads */
> u8 subcore_sibling_mask;
> #endif
> + /* Template for PSSCR with EC, ESL, TR, PSLL, MTL fields set */
> + u64 thread_psscr;

I'm not entirely clear on why that needs to be in the paca. Could it not be global?

> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
> index 009fab1..7f92fc8 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -457,6 +457,7 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */
> extern unsigned long power7_nap(int check_irq);
> extern unsigned long power7_sleep(void);
> extern unsigned long power7_winkle(void);
> +extern unsigned long power_stop(unsigned long state);

Can that also have a better name?

> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index a0948f4..89a00d9 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -145,6 +145,16 @@
> #define MSR_64BIT 0
> #endif
>
> +/* Power Management - PSSCR Fields */
> +#define PSSCR_RL_MASK 0x0000000F
> +#define PSSCR_MTL_MASK 0x000000F0
> +#define PSSCR_TR_MASK 0x00000300
> +#define PSSCR_PSLL_MASK 0x000F0000
> +#define PSSCR_EC 0x00100000
> +#define PSSCR_ESL 0x00200000
> +#define PSSCR_SD 0x00400000

Can we get a comment for each #define saying what it is?

> @@ -288,6 +298,7 @@
> #define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */
> #define SPRN_PMSR 0x355 /* Power Management Status Reg */
> #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */
> +#define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register */

Can you add ISA 3, eg:

#define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */

I know we haven't been very consistent about that in the past, but we can always try :)

> @@ -761,6 +772,9 @@
> #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */
> #define SPRN_SIAR 796
> #define SPRN_SDAR 797
> +#define SPRN_LMRR 813
> +#define SPRN_LMSER 814
> +#define SPRN_ASDR 816

Ditto, comments with ISA 3 please.

> diff --git a/arch/powerpc/kernel/idle_power_common.S b/arch/powerpc/kernel/idle_power_common.S
> index 2f909a1..c6c2f66 100644
> --- a/arch/powerpc/kernel/idle_power_common.S
> +++ b/arch/powerpc/kernel/idle_power_common.S
> @@ -50,6 +55,15 @@
> IDLE_INST; \
> b .
>
> +/*
> + * rA - Requested stop state
> + * rB - Spare reg that can be used
> + */
> +#define PSSCR_REQUEST_STATE(rA, rB) \
> + ld rB, PACA_THREAD_PSSCR(r13); \
> + or rB,rB,rA; \
> + mtspr SPRN_PSSCR, rB;

I only see this used once, so it can just be inline.

> +
> .text
>
> /*
> @@ -61,8 +75,19 @@ save_sprs_to_stack:
> * Note all register i.e per-core, per-subcore or per-thread is saved
> * here since any thread in the core might wake up first
> */
> +BEGIN_FTR_SECTION
> + mfspr r3,SPRN_PTCR
> + std r3,_PTCR(r1)
> + mfspr r3,SPRN_LMRR
> + std r3,_LMRR(r1)
> + mfspr r3,SPRN_LMSER
> + std r3,_LMSER(r1)
> + mfspr r3,SPRN_ASDR
> + std r3,_ASDR(r1)
> +FTR_SECTION_ELSE

A comment here saying that SDR1 is removed in ISA 3.0 would be helpful.

> mfspr r3,SPRN_SDR1
> std r3,_SDR1(r1)
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)

> @@ -293,6 +354,21 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
>
>
> /*
> + * Used for ppc_md.power_save which needs a function with no parameters
> + */
> +_GLOBAL(power_stop0)
> + li r3,0

Zero?

> + /* Fall through to power_stop */

I think I'd rather you just did that as a C function.

> +/*
> + * r3 - requested stop state
> + */
> +_GLOBAL(power_stop)
> + PSSCR_REQUEST_STATE(r3,r4)
> + li r4, 1
> + LOAD_REG_ADDR(r5,power_enter_stop)
> + b pnv_powersave_common
> + /* No return */
> +/*
> * Called from reset vector. Check whether we have woken up with
> * hypervisor state loss. If yes, restore hypervisor state and return
> * back to reset vector.
> @@ -301,7 +377,32 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
> * cr3 - set to gt if waking up with partial/complete hypervisor state loss
> */
> _GLOBAL(pnv_restore_hyp_resource)
> +BEGIN_FTR_SECTION
> /*
> + * POWER ISA 3. Use PSSCR to determine if we
> + * are waking up from deep idle state
> + */
> + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)

That's an @got load using r2, but have we restored r2 yet?

> + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> +
> + mfspr r5,SPRN_PSSCR

> @@ -397,8 +507,11 @@ first_thread_in_subcore:
> bne cr4,subcore_state_restored
>
> /* Restore per-subcore state */

We don't have subcores on P9, or did I miss a memo?

A comment somewhere explaining that would help I think, it's not clear AFAICS.

> +BEGIN_FTR_SECTION
> ld r4,_SDR1(r1)
> mtspr SPRN_SDR1,r4
> +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> +
> ld r4,_RPR(r1)
> mtspr SPRN_RPR,r4
> ld r4,_AMOR(r1)

> @@ -477,6 +601,21 @@ common_exit:
> slbmte r6,r5
> 1: addi r8,r8,16
> .endr
> +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
> +
> + /* Restore per thread state */
> +BEGIN_FTR_SECTION
> + bl __restore_cpu_power9
> +
> + ld r4,_LMRR(r1)
> + mtspr SPRN_LMRR,r4
> + ld r4,_LMSER(r1)
> + mtspr SPRN_LMSER,r4
> + ld r4,_ASDR(r1)
> + mtspr SPRN_ASDR,r4

Should those be in __restore_cpu_power9 ?

> +FTR_SECTION_ELSE
> + bl __restore_cpu_power8
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)

Then we could potentially do the above by calling through cur_cpu_spec.

> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index fbb09fb..bfbd359 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -27,9 +27,11 @@
> #include "powernv.h"
> #include "subcore.h"
>
> +#define MAX_STOP_STATE 0xF

Says who?

> @@ -130,8 +136,8 @@ static void pnv_alloc_idle_core_states(void)
>
> update_subcore_sibling_mask();
>
> - if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
> - pnv_save_sprs_for_winkle();
> + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
> + pnv_save_sprs_for_deep_states();

How does that work on old skiboot that doesn't set OPAL_PM_LOSE_FULL_CONTEXT but
still sets OPAL_PM_WINKLE_ENABLED?

> }
>
> u32 pnv_get_supported_cpuidle_states(void)
> @@ -230,11 +236,18 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
> show_fastsleep_workaround_applyonce,
> store_fastsleep_workaround_applyonce);
>
> +/*
> + * First deep stop state. Used to figure out when to save/restore
> + * hypervisor context.
> + */
> +u64 pnv_first_deep_stop_state;
> +
> static int __init pnv_init_idle_states(void)
> {
> struct device_node *power_mgt;

I prefer just "np" - it's shorter and I immediately know what it is.

> int dt_idle_states;
> u32 *flags;
> + u64 *psscr_val = NULL;
> int i;
>
> supported_cpuidle_states = 0;
> @@ -264,6 +277,32 @@ static int __init pnv_init_idle_states(void)
> goto out_free;
> }
>
> + if (cpu_has_feature(CPU_FTR_ARCH_300)) {

> + psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val),
> + GFP_KERNEL);
> + if (!psscr_val)
> + goto out_free;

Newline please.

> + if (of_property_read_u64_array(power_mgt,
> + "ibm,cpu-idle-state-psscr",
> + psscr_val, dt_idle_states)) {
> + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n");
> + goto out_free_psscr;
> + }
> +
> + /*
> + * Set pnv_first_deep_stop_state to the first stop level
> + * to cause hypervisor state loss
> + */
> + pnv_first_deep_stop_state = MAX_STOP_STATE;
> + for (i = 0; i < dt_idle_states; i++) {
> + u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
> +
> + if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
> + (pnv_first_deep_stop_state > psscr_rl))
> + pnv_first_deep_stop_state = psscr_rl;
> + }
> + }

This function is >110 lines, which is too big for my taste.

The above would be better as a separate function I think.

> for (i = 0; i < dt_idle_states; i++)
> supported_cpuidle_states |= flags[i];
>
> @@ -286,8 +325,29 @@ static int __init pnv_init_idle_states(void)
>
> pnv_alloc_idle_core_states();
>
> + if (supported_cpuidle_states & OPAL_PM_STOP_INST_FAST)
> + for_each_possible_cpu(i) {
> +
> + u64 psscr_init_val = PSSCR_ESL | PSSCR_EC |
> + PSSCR_PSLL_MASK | PSSCR_TR_MASK |
> + PSSCR_MTL_MASK;
> +
> + paca[i].thread_psscr = psscr_init_val;
> + /*
> + * Memory barrier to ensure that the writes to PACA
> + * goes through before ppc_md.power_save is updated
> + * below.
> + */
> + mb();
> + }

And likewise that loop.

cheers