Re: [PATCH v4 2/5] LoongArch: Use la.pcrel instead of la.abs for exception handlers

From: Jinyang He
Date: Thu Feb 16 2023 - 01:59:30 EST


On 2023-02-16 10:32, Youling Tang wrote:

Hi folks,

On 02/10/2023 05:18 PM, Youling Tang wrote:


On 02/10/2023 05:09 PM, Huacai Chen wrote:
Hi, Youling and Ruoyao,

Thank you very much for implementing the per-node exceptions. But I
want to know if the per-node solution is really worthy for a PIE
kernel. So, could you please test the performance? Maybe we can reduce
the complexity if we give up the per-node solution.

Tested on Loongson-3C5000L-LL machine, using CLFS7.3 system.

- nopernode:
  Based on the v1 patch method, and remove the else branch process in
  setup_tlb_handler().

- pernode: Based on the v4 patch method.

- pie: Enable RANDOMIZE_BASE (KASLR).

- nopie: Disable RANDOMIZE_BASE and RELOCATABLE.


The UnixBench test results are as follows:

- nopernode-nopie: 3938.7

- pernode-nopie: 4062.2

- nopernode-pie: 4009.7

- pernode-pie: 4028.7

In general, `pernode` is higher than `nopernode`, and `nopie` is higher
than `pie`. (except that nopernode-pie is higher than nopernode-nopie,
which is not as expected, which may be caused by the instability of the
machine).

Everyone is more inclined to use `pernode` or `nopernode` to implement
in the exception handling process?

Youling.

Hi, Youling,


Thanks for your test results.


I did an informal patch to keep la.abs, which think la.abs as a macro. just qemu test.

To test this patch, patch the [PATCH v4 1/5] [PATCH v4 3/5] as prediction.

This following patch just provides a method. I'm busy with other things. Hopefully it will help you simplify [PATCH v4 2/5].


Thanks,

Jinyang



diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index 328bb956f241..6ebad458d662 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -667,4 +667,19 @@
     nor    \dst, \src, zero
 .endm

+.macro la.abs reg, sym
+766:
+    nop
+    nop
+    nop
+    nop
+    .pushsection ".laabs", "aw", %progbits
+768:
+    .word 768b-766b
+    parse_r regno, \reg
+    .word regno
+    .dword \sym
+    .popsection
+.endm
+
 #endif /* _ASM_ASMMACRO_H */
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index d2ac26b5b22b..3b273f05be8c 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -86,6 +86,7 @@ SYM_CODE_START(kernel_entry)            # kernel entry point
     PTR_ADD        sp, sp, tp
     set_saved_sp    sp, t0, t1

+    bl        relocate_laabs
     bl        start_kernel
     ASM_BUG()

diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 4344502c0b31..9f8833a2524a 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -582,3 +582,30 @@ void __init setup_arch(char **cmdline_p)

     paging_init();
 }
+
+void __init relocate_laabs(void)
+{
+    extern void *__laabs_begin;
+    extern void *__laabs_end;
+    struct laabs {
+        int offset;
+        int reg;
+        long symvalue;
+    } *p;
+
+    for (p = (void *)&__laabs_begin; (void *)p < (void *)&__laabs_end; p++)
+    {
+        int lu12iw, ori, lu32id, lu52id;
+        long v = p->symvalue;
+        int reg = p->reg;
+        int *insn = (void *)p - p->offset;
+        lu12iw = 0x14000000 | reg | (((v & 0xfffff000) >> 12) << 5);
+        ori = 0x03800000 | reg | (reg<<5) | ((v & 0xfff) << 10);
+        lu32id = 0x16000000 | reg | (((v & 0x000fffff00000000) >> 32) << 5);
+        lu52id = 0x03000000 | reg | (reg<<5) | (((v >> 52) & 0xfff) << 10);
+        insn[0] = lu12iw;
+        insn[1] = ori;
+        insn[2] = lu32id;
+        insn[3] = lu52id;
+    }
+}
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index 733b16e8d55d..4d128e089393 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -66,6 +66,13 @@ SECTIONS
         __alt_instructions_end = .;
     }

+    . = ALIGN(4);
+    .laabs : AT(ADDR(.laabs) - LOAD_OFFSET) {
+        __laabs_begin = .;
+        *(.laabs)
+        __laabs_end = .;
+    }
+
     .got : ALIGN(16) { *(.got) }
     .plt : ALIGN(16) { *(.plt) }
     .got.plt : ALIGN(16) { *(.got.plt) }