Re: [PATCH] RISC-V: Allow drivers to provide custom read_cycles64 for M-mode kernel

From: Palmer Dabbelt
Date: Fri Sep 04 2020 - 21:18:01 EST


On Fri, 04 Sep 2020 09:57:09 PDT (-0700), Christoph Hellwig wrote:
On Fri, Sep 04, 2020 at 10:13:18PM +0530, Anup Patel wrote:
I respectfully disagree. IMHO, the previous code made the RISC-V
timer driver convoluted (both SBI call and CLINT in one place) and
mandated CLINT for NoMMU kernel. In fact, RISC-V spec does not
mandate CLINT or PLIC. The RISC-V SOC vendors are free to
implement their own timer device, IPI device and interrupt controller.

Yes, exactly what we need is everyone coming up with another stupid
non-standard timer and irq driver.

Well, we don't have a standard one so there's really no way around people
coming up with their own. It doesn't seem reasonable to just say "SiFive's
driver landed first, so we will accept no other timer drivers for RISC-V
systems".

But the point is this crap came in after -rc1, and it adds totally
pointless indirect calls to the IPI path, and with your "fix" also
to get_cycles which all have exactly one implementation for MMU or
NOMMU kernels.

So the only sensible thing is to revert all this crap. And if at some
point we actually have to deal with different implementations do it
with alternatives or static_branch infrastructure so that we don't
pay the price for indirect calls in the super hot path.

I'm OK reverting the dynamic stuff, as I can buy it needs more time to bake,
but I'm not sure performance is the right argument -- while this is adding an
indirection, decoupling MMU/NOMMU from the timer driver is the first step
towards getting rid of the traps which are a way bigger performance issue than
the indirection (not to mention the issue of relying on instructions that don't
technically exist in the ISA we're relying on any more).

I'm not really convinced the timers are on such a hot path that an extra load
is that bad, but I don't have that much experience with this stuff so you may
be right. I'd prefer to keep the driver separate, though, and just bring back
the direct CLINT implementation in timex.h -- we've only got one implementation
for now anyway, so it doesn't seem that bad to just inline it (and I suppose I
could buy that the ISA says this register has to behave this way, though I
don't think that's all that strong of an argument).

I'm not convinced this is a big performance hit for IPIs either, but we could
just do the same thing over there -- though I guess I'd be much less convinced
about any arguments as to the ISA having a say in that as IIRC it's a lot more
hands off.

Something like this seems to fix the rdtime issue without any extra overhead,
but I haven't tested it
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
new file mode 100644
index 000000000000..51909ab60ad0
--- /dev/null
+++ b/arch/riscv/include/asm/clint.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google, Inc
+ */
+
+#ifndef _ASM_RISCV_CLINT_H
+#define _ASM_RISCV_CLINT_H
+
+#include <linux/types.h>
+#include <asm/mmio.h>
+
+#ifdef CONFIG_RISCV_M_MODE
+/*
+ * This lives in the CLINT driver, but is accessed directly by timex.h to avoid
+ * any overhead when accessing the MMIO timer.
+ */
+extern u64 __iomem *clint_time_val;
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index a3fb85d505d4..7f659dda0032 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -10,6 +10,31 @@

typedef unsigned long cycles_t;

+#ifdef CONFIG_RISCV_M_MODE
+
+#include <asm/clint.h>
+
+#ifdef CONFIG_64BIT
+static inline cycles_t get_cycles(void)
+{
+ return readq_relaxed(clint_time_val);
+}
+#else /* !CONFIG_64BIT */
+static inline u32 get_cycles(void)
+{
+ return readl_relaxed(((u32 *)clint_time_val));
+}
+#define get_cycles get_cycles
+
+static inline u32 get_cycles_hi(void)
+{
+ return readl_relaxed(((u32 *)clint_time_val) + 1);
+}
+#define get_cycles_hi get_cycles_hi
+#endif /* CONFIG_64BIT */
+
+#else /* CONFIG_RISCV_M_MODE */
+
static inline cycles_t get_cycles(void)
{
return csr_read(CSR_TIME);
@@ -41,6 +66,8 @@ static inline u64 get_cycles64(void)
}
#endif /* CONFIG_64BIT */

+#endif /* !CONFIG_RISCV_M_MODE */
+
#define ARCH_HAS_READ_CURRENT_TIMER
static inline int read_current_timer(unsigned long *timer_val)
{
diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c
index 8eeafa82c03d..43ae0f885bfa 100644
--- a/drivers/clocksource/timer-clint.c
+++ b/drivers/clocksource/timer-clint.c
@@ -19,6 +19,11 @@
#include <linux/interrupt.h>
#include <linux/of_irq.h>
#include <linux/smp.h>
+#include <linux/timex.h>
+
+#ifndef CONFIG_MMU
+#include <asm/clint.h>
+#endif

#define CLINT_IPI_OFF 0
#define CLINT_TIMER_CMP_OFF 0x4000
@@ -31,6 +36,10 @@ static u64 __iomem *clint_timer_val;
static unsigned long clint_timer_freq;
static unsigned int clint_timer_irq;

+#ifdef CONFIG_RISCV_M_MODE
+u64 __iomem *clint_time_val;
+#endif
+
static void clint_send_ipi(const struct cpumask *target)
{
unsigned int cpu;
@@ -184,6 +193,14 @@ static int __init clint_timer_init_dt(struct device_node *np)
clint_timer_val = base + CLINT_TIMER_VAL_OFF;
clint_timer_freq = riscv_timebase;

+#ifdef CONFIG_RISCV_M_MODE
+ /*
+ * Yes, that's an odd naming scheme. time_val is public, but hopefully
+ * will die in favor of something cleaner.
+ */
+ clint_time_val = clint_timer_val;
+#endif
+
pr_info("%pOFP: timer running at %ld Hz\n", np, clint_timer_freq);

rc = clocksource_register_hz(&clint_clocksource, clint_timer_freq);