Re: [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active

From: Brijesh Singh
Date: Wed Jul 26 2017 - 16:07:38 EST




On 07/26/2017 02:26 PM, H. Peter Anvin wrote:
\
static inline void outs##bwl(int port, const void *addr, unsigned
long count) \
{

This will clash with a fix I did to add a "memory" clobber
for the traditional implementation, see
https://patchwork.kernel.org/patch/9854573/

Is it even worth leaving these as inline functions?
Given the speed of IO cycles it is unlikely that the cost of calling
a real
function will be significant.
The code bloat reduction will be significant.

I think the smallest code would be the original "rep insb" etc, which
should be smaller than a function call, unlike the loop. Then again,
there is a rather small number of affected device drivers, almost all
of them for ancient hardware that you won't even build in a 64-bit
x86 kernel, see the list below. The only user I found that is
actually
still relevant is drivers/tty/hvc/hvc_xen.c, which uses it for the
early
console.


There are some indirect user of string I/O functions. The following
functions
defined in lib/iomap.c calls rep version of ins and outs.

- ioread8_rep, ioread16_rep, ioread32_rep
- iowrite8_rep, iowrite16_rep, iowrite32_rep

I found that several drivers use above functions.

Here is one approach to convert it into non-inline functions. In this
approach,
I have added a new file arch/x86/kernel/io.c which provides non rep
version of
string I/O routines. The file gets built and used only when
AMD_MEM_ENCRYPT is
enabled. On positive side, if we don't build kernel with
AMD_MEM_ENCRYPT support
then we use inline routines, when AMD_MEM_ENCRYPT is built then we make
a function
call. Inside the function we unroll only when SEV is active.

Do you see any issue with this approach ? thanks

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e080a39..104927d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -323,8 +323,9 @@ static inline unsigned type in##bwl##_p(int port)
\
unsigned type value = in##bwl(port); \
slow_down_io(); \
return value; \
-}
\
-
\
+}
+
+#define BUILDIO_REP(bwl, bw, type)
\
static inline void outs##bwl(int port, const void *addr, unsigned long
count) \
{
\
asm volatile("rep; outs" #bwl \
@@ -335,12 +336,31 @@ static inline void ins##bwl(int port, void *addr,
unsigned long count) \
{
\
asm volatile("rep; ins" #bwl \
: "+D"(addr), "+c"(count) : "d"(port)); \
-}
+}
\
BUILDIO(b, b, char)
BUILDIO(w, w, short)
BUILDIO(l, , int)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern void outsb_try_rep(int port, const void *addr, unsigned long
count);
+extern void insb_try_rep(int port, void *addr, unsigned long count);
+extern void outsw_try_rep(int port, const void *addr, unsigned long
count);
+extern void insw_try_rep(int port, void *addr, unsigned long count);
+extern void outsl_try_rep(int port, const void *addr, unsigned long
count);
+extern void insl_try_rep(int port, void *addr, unsigned long count);
+#define outsb outsb_try_rep
+#define insb insb_try_rep
+#define outsw outsw_try_rep
+#define insw insw_try_rep
+#define outsl outsl_try_rep
+#define insl insl_try_rep
+#else
+BUILDIO_REP(b, b, char)
+BUILDIO_REP(w, w, short)
+BUILDIO_REP(l, , int)
+#endif
+
extern void *xlate_dev_mem_ptr(phys_addr_t phys);
extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a01892b..3b6e2a3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -42,6 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o
obj-$(CONFIG_COMPAT) += signal_compat.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += io.o
obj-y += traps.o irq.o irq_$(BITS).o
dumpstack_$(BITS).o
obj-y += time.o ioport.o dumpstack.o nmi.o
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
diff --git a/arch/x86/kernel/io.c b/arch/x86/kernel/io.c
new file mode 100644
index 0000000..f58afa9
--- /dev/null
+++ b/arch/x86/kernel/io.c
@@ -0,0 +1,87 @@
+#include <linux/types.h>
+#include <linux/io.h>
+#include <asm/io.h>
+
+void outsb_try_rep(int port, const void *addr, unsigned long count)
+{
+ if (sev_active()) {
+ unsigned char *value = (unsigned char *)addr;
+ while (count) {
+ outb(*value, port);
+ value++;
+ count--;
+ }
+ } else {
+ asm volatile("rep; outsb" : "+S"(addr), "+c"(count) :
"d"(port));
+ }
+}
+
+void insb_try_rep(int port, void *addr, unsigned long count)
+{
+ if (sev_active()) {
+ unsigned char *value = (unsigned char *)addr;
+ while (count) {
+ *value = inb(port);
+ value++;
+ count--;
+ }
+ } else {
+ asm volatile("rep; insb" : "+D"(addr), "+c"(count) :
"d"(port));
+ }
+}
+
+void outsw_try_rep(int port, const void *addr, unsigned long count)
+{
+ if (sev_active()) {
+ unsigned short *value = (unsigned short *)addr;
+ while (count) {
+ outw(*value, port);
+ value++;
+ count--;
+ }
+ } else {
+ asm volatile("rep; outsw" : "+S"(addr), "+c"(count) :
"d"(port));
+ }
+}
+void insw_try_rep(int port, void *addr, unsigned long count)
+{
+ if (sev_active()) {
+ unsigned short *value = (unsigned short *)addr;
+ while (count) {
+ *value = inw(port);
+ value++;
+ count--;
+ }
+ } else {
+ asm volatile("rep; insw" : "+D"(addr), "+c"(count) :
"d"(port));
+ }
+}
+
+void outsl_try_rep(int port, const void *addr, unsigned long count)
+{
+ if (sev_active()) {
+ unsigned int *value = (unsigned int *)addr;
+ while (count) {
+ outl(*value, port);
+ value++;
+ count--;
+ }
+ } else {
+ asm volatile("rep; outsl" : "+S"(addr), "+c"(count) :
"d"(port));
+ }
+}
+
+void insl_try_rep(int port, void *addr, unsigned long count)
+{
+ if (sev_active()) {
+ unsigned int *value = (unsigned int *)addr;
+ while (count) {
+ *value = inl(port);
+ value++;
+ count--;
+ }
+ } else {
+ asm volatile("rep; insl" : "+D"(addr), "+c"(count) :
"d"(port));
+ }
+}

What the heck?


I am not sure if I understand your concern.

Are you commenting on amount of code duplication ? If so, I can certainly improve
and use the similar macro used into header file to generate the functions body.

Are you commenting that we should not attempt to make those functions non-inline
and you prefer them to stay inline ?

thanks