Re: [PATCH v2 5/6] RISC-V: hwprobe: Support probing of misaligned access performance

From: Conor Dooley
Date: Wed Feb 15 2023 - 16:57:27 EST


On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
>
> Co-developed-by: Palmer Dabbelt <palmer@xxxxxxxxxxxx>
> Signed-off-by: Palmer Dabbelt <palmer@xxxxxxxxxxxx>
> Signed-off-by: Evan Green <evan@xxxxxxxxxxxx>
>
> ---
>
> Changes in v2:
> - Fixed logic error in if(of_property_read_string...) that caused crash
> - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> warning.
> - Added a _MASK define
> - Fix random checkpatch complaints
>
> Documentation/riscv/hwprobe.rst | 13 +++++++++++
> arch/riscv/include/asm/cpufeature.h | 2 ++
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/asm/smp.h | 9 ++++++++
> arch/riscv/include/uapi/asm/hwprobe.h | 6 ++++++
> arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++--
> arch/riscv/kernel/sys_riscv.c | 23 ++++++++++++++++++++
> 7 files changed, 83 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index ce186967861f..0dc75e83e127 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -51,3 +51,16 @@ The following keys are defined:
> not minNum/maxNum") of the RISC-V ISA manual.
> * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
> version 2.2 of the RISC-V ISA manual.
> +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information

This doesn't match what's defined?

> + about the selected set of processors.
> + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
> + accesses is unknown.
> + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
> + software, either in or below the kernel. These accesses are always
> + extremely slow.
> + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
> + hardware, but are slower than the cooresponding aligned accesses
> + sequences.
> + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
> + hardware and are faster than the cooresponding aligned accesses
> + sequences.

> diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
> index 3831b638ecab..6c1759091e44 100644
> --- a/arch/riscv/include/asm/smp.h
> +++ b/arch/riscv/include/asm/smp.h
> @@ -26,6 +26,15 @@ struct riscv_ipi_ops {
> */
> extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
> #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu]
> +static inline long hartid_to_cpuid_map(unsigned long hartid)
> +{
> + long i;
> +
> + for (i = 0; i < NR_CPUS; ++i)

I'm never (or not yet?) sure about these things.
Should this be for_each_possible_cpu()?

> + if (cpuid_to_hartid_map(i) == hartid)
> + return i;
> + return -1;
> +}
>
> /* print IPI stats */
> void show_ipi_stats(struct seq_file *p, int prec);
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index ce39d6e74103..5d55e2da2b1f 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -25,5 +25,11 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_KEY_IMA_EXT_0 4
> #define RISCV_HWPROBE_IMA_FD (1 << 0)
> #define RISCV_HWPROBE_IMA_C (1 << 1)
> +#define RISCV_HWPROBE_KEY_CPUPERF_0 5
> +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0)

Why is it UNKNOWN rather than UNSUPPORTED?
I thought I saw Palmer saying that there is no requirement to support
misaligned accesses any more.
Plenty of old DTs are going to lack this property so would be UNKNOWN,
and I *assume* that the user of the syscall is gonna conflate the two,
but the rationale interests me.

> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
> #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 93e45560af30..12af6f7a2f53 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -14,8 +14,10 @@
> #include <linux/of.h>
> #include <asm/alternative.h>
> #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
> #include <asm/errata_list.h>
> #include <asm/hwcap.h>
> +#include <asm/hwprobe.h>
> #include <asm/patch.h>
> #include <asm/pgtable.h>
> #include <asm/processor.h>
> @@ -32,6 +34,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
> EXPORT_SYMBOL(riscv_isa_ext_keys);
>
> +/* Performance information */
> +DEFINE_PER_CPU(long, misaligned_access_speed);
> +
> /**
> * riscv_isa_extension_base() - Get base extension word
> *
> @@ -89,11 +94,11 @@ static bool riscv_isa_extension_check(int id)
> void __init riscv_fill_hwcap(void)
> {
> struct device_node *node;
> - const char *isa;
> + const char *isa, *misaligned;
> char print_str[NUM_ALPHA_EXTS + 1];
> int i, j, rc;
> unsigned long isa2hwcap[26] = {0};
> - unsigned long hartid;
> + unsigned long hartid, cpu;
>
> isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
> isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
> @@ -246,6 +251,28 @@ void __init riscv_fill_hwcap(void)
> bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> else
> bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> +
> + /*
> + * Check for the performance of misaligned accesses.
> + */
> + cpu = hartid_to_cpuid_map(hartid);
> + if (cpu < 0)
> + continue;
> +
> + if (!of_property_read_string(node, "riscv,misaligned-access-performance",
> + &misaligned)) {
> + if (strcmp(misaligned, "emulated") == 0)
> + per_cpu(misaligned_access_speed, cpu) =
> + RISCV_HWPROBE_MISALIGNED_EMULATED;
> +
> + if (strcmp(misaligned, "slow") == 0)
> + per_cpu(misaligned_access_speed, cpu) =
> + RISCV_HWPROBE_MISALIGNED_SLOW;
> +
> + if (strcmp(misaligned, "fast") == 0)
> + per_cpu(misaligned_access_speed, cpu) =
> + RISCV_HWPROBE_MISALIGNED_FAST;
> + }
> }
>
> /* We don't support systems with F but without D, so mask those out
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index 74e0d72c877d..73d937c54f4e 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -133,6 +133,25 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key,
> return set_hwprobe(pair, id);
> }
>
> +static long hwprobe_misaligned(cpumask_t *cpus)
> +{
> + long cpu, perf = -1;
> +
> + for_each_cpu(cpu, cpus) {
> + long this_perf = per_cpu(misaligned_access_speed, cpu);
> +
> + if (perf == -1)
> + perf = this_perf;
> +
> + if (perf != this_perf)
> + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;

Is there any reason to continue in the loop if this condition is met?

> + }
> +
> + if (perf == -1)
> + return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> + return perf;

heh, nitpicking the maintainer's use of whitespace... newline before
return please :)

Cheers,
Conor.

> +}
> +
> static
> long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
> long cpu_count, unsigned long __user *cpus_user,
> @@ -205,6 +224,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
> }
> break;
>
> + case RISCV_HWPROBE_KEY_CPUPERF_0:
> + ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus));
> + break;
> +
> /*
> * For forward compatibility, unknown keys don't fail the whole
> * call, but get their element key set to -1 and value set to 0
> --
> 2.25.1
>

Attachment: signature.asc
Description: PGP signature