Re: [PATCH] MIPS: Introduce aligned IO memory operations

From: Jiaxun Yang
Date: Sat Jan 18 2020 - 10:14:03 EST




18.01.2020, 22:41, "Philippe Mathieu-DaudÃ" <f4bug@xxxxxxxxx>:
> Hi Jiaxun,
>
> On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <jiaxun.yang@xxxxxxxxxxx> wrote:
>> ÂSome platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
>> Âinstructions like lwl or lwr in IO memory access. However, our current
>> ÂIO memcpy/memset is wired to the generic implementation, which leads
>> Âto a fatal result.
>
> Do you have a handy reproducer to try with QEMU/KVM?

It was triggered by QXL DRM driver when I was working on KVM for Loongson
with Huacai.

See arch/mips/kvm/emulate.c, we didn't have unaligned instructions trap
emulation for MMIO. You can construct a simple unaligned memcpy_fromio
case to reproduce it.

Thanks.

>
>> ÂSigned-off-by: Jiaxun Yang <jiaxun.yang@xxxxxxxxxxx>
>> Â---
>> ÂÂarch/mips/Kconfig | 4 ++
>> ÂÂarch/mips/include/asm/io.h | 10 ++++
>> ÂÂarch/mips/kernel/Makefile | 2 +-
>> ÂÂarch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
>> ÂÂ4 files changed, 113 insertions(+), 1 deletion(-)
>> ÂÂcreate mode 100644 arch/mips/kernel/io.c
>>
>> Âdiff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>> Âindex 8b0cd692a43f..15a331aa23a2 100644
>> Â--- a/arch/mips/Kconfig
>> Â+++ b/arch/mips/Kconfig
>> Â@@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
>> ÂÂÂÂÂÂÂÂÂselect CPU_SUPPORTS_HIGHMEM
>> ÂÂÂÂÂÂÂÂÂselect CPU_SUPPORTS_HUGEPAGES
>> ÂÂÂÂÂÂÂÂÂselect CPU_SUPPORTS_MSA
>> Â+ select CPU_NEEDS_ALIGNED_IO
>> ÂÂÂÂÂÂÂÂÂselect CPU_HAS_LOAD_STORE_LR
>> ÂÂÂÂÂÂÂÂÂselect CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
>> ÂÂÂÂÂÂÂÂÂselect CPU_MIPSR2_IRQ_VI
>> Â@@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
>> ÂÂÂÂÂÂÂÂÂÂÂLWL, LWR, SWL, SWR (Load/store word left/right).
>> ÂÂÂÂÂÂÂÂÂÂÂLDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>>
>> Â+config CPU_NEEDS_ALIGNED_IO
>> Â+ bool
>> Â+
>> ÂÂ#
>> ÂÂ# Vectored interrupt mode is an R2 feature
>> ÂÂ#
>> Âdiff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
>> Âindex 3f6ce74335b4..3b0eb4941f23 100644
>> Â--- a/arch/mips/include/asm/io.h
>> Â+++ b/arch/mips/include/asm/io.h
>> Â@@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
>> ÂÂBUILDSTRING(q, u64)
>> ÂÂ#endif
>>
>> Â+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>> Â+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
>> Â+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
>> Â+extern void __memset_io(volatile void __iomem *, int, size_t);
>> Â+
>> Â+#define memset_io(c, v, l) __memset_io((c), (v), (l))
>> Â+#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
>> Â+#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
>> Â+#else
>> ÂÂstatic inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
>> ÂÂ{
>> ÂÂÂÂÂÂÂÂÂmemset((void __force *) addr, val, count);
>> Â@@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
>> ÂÂ{
>> ÂÂÂÂÂÂÂÂÂmemcpy((void __force *) dst, src, count);
>> ÂÂ}
>> Â+#endif
>>
>> ÂÂ/*
>> ÂÂÂ* The caches on some architectures aren't dma-coherent and have need to
>> Âdiff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
>> Âindex d6e97df51cfb..b07b97b9385e 100644
>> Â--- a/arch/mips/kernel/Makefile
>> Â+++ b/arch/mips/kernel/Makefile
>> Â@@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
>> ÂÂobj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂprocess.o prom.o ptrace.o reset.o setup.o signal.o \
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂsyscall.o time.o topology.o traps.o unaligned.o watch.o \
>> Â- vdso.o cacheinfo.o
>> Â+ vdso.o cacheinfo.o io.o
>>
>> ÂÂifdef CONFIG_FUNCTION_TRACER
>> ÂÂCFLAGS_REMOVE_ftrace.o = -pg
>> Âdiff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
>> Ânew file mode 100644
>> Âindex 000000000000..ca105aa76d4d
>> Â--- /dev/null
>> Â+++ b/arch/mips/kernel/io.c
>> Â@@ -0,0 +1,98 @@
>> Â+// SPDX-License-Identifier: GPL-2.0-or-later
>> Â+
>> Â+#include <linux/export.h>
>> Â+#include <linux/types.h>
>> Â+#include <linux/io.h>
>> Â+
>> Â+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>> Â+
>> Â+#if defined(CONFIG_64BIT)
>> Â+#define IO_LONG_READ __raw_readq
>> Â+#define IO_LONG_WRITE __raw_writeq
>> Â+#define IO_LONG_SIZE 8
>> Â+#else
>> Â+#define IO_LONG_READ __raw_readl
>> Â+#define IO_LONG_WRITE __raw_writel
>> Â+#define IO_LONG_SIZE 4
>> Â+#endif
>> Â+
>> Â+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
>> Â+{
>> Â+ while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>> Â+ !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>> Â+ *(u8 *)to = __raw_readb(from);
>> Â+ from++;
>> Â+ to++;
>> Â+ count--;
>> Â+ }
>> Â+
>> Â+ while (count >= IO_LONG_SIZE) {
>> Â+ *(unsigned long *)to = IO_LONG_READ(from);
>> Â+ from += IO_LONG_SIZE;
>> Â+ to += IO_LONG_SIZE;
>> Â+ count -= IO_LONG_SIZE;
>> Â+ }
>> Â+
>> Â+ while (count) {
>> Â+ *(u8 *)to = __raw_readb(from);
>> Â+ from++;
>> Â+ to++;
>> Â+ count--;
>> Â+ }
>> Â+}
>> Â+EXPORT_SYMBOL(__memcpy_fromio);
>> Â+
>> Â+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
>> Â+{
>> Â+ while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>> Â+ !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>> Â+ __raw_writeb(*(u8 *)from, to);
>> Â+ from++;
>> Â+ to++;
>> Â+ count--;
>> Â+ }
>> Â+
>> Â+ while (count >= IO_LONG_SIZE) {
>> Â+ IO_LONG_WRITE(*(unsigned long *)from, to);
>> Â+ from += IO_LONG_SIZE;
>> Â+ to += IO_LONG_SIZE;
>> Â+ count -= IO_LONG_SIZE;
>> Â+ }
>> Â+
>> Â+ while (count) {
>> Â+ __raw_writeb(*(u8 *)from, to);
>> Â+ from++;
>> Â+ to++;
>> Â+ count--;
>> Â+ }
>> Â+}
>> Â+EXPORT_SYMBOL(__memcpy_toio);
>> Â+
>> Â+void __memset_io(volatile void __iomem *dst, int c, size_t count)
>> Â+{
>> Â+ unsigned long lc = (u8)c;
>> Â+ int i;
>> Â+
>> Â+ for (i = 1; i < IO_LONG_SIZE; i++)
>> Â+ lc |= (u8)c << (i * BITS_PER_BYTE);
>> Â+
>> Â+ while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
>> Â+ __raw_writeb((u8)c, dst);
>> Â+ dst++;
>> Â+ count--;
>> Â+ }
>> Â+
>> Â+ while (count >= IO_LONG_SIZE) {
>> Â+ IO_LONG_WRITE(lc, dst);
>> Â+ dst += IO_LONG_SIZE;
>> Â+ count -= IO_LONG_SIZE;
>> Â+ }
>> Â+
>> Â+ while (count) {
>> Â+ __raw_writeb(c, dst);
>> Â+ dst++;
>> Â+ count--;
>> Â+ }
>> Â+}
>> Â+EXPORT_SYMBOL(__memset_io);
>> Â+#endif
>> Â--
>> Â2.24.1