[PATCH v6 03/13] riscv/kprobe: Add skeleton for preparing optimized kprobe

From: Chen Guokai
Date: Fri Jan 27 2023 - 08:06:39 EST


From: Liao Chang <liaochang1@xxxxxxxxxx>

The skeleton for preparing optprobe is consist of three major parts:

- Check if kprobe satisfies the requirements of optimization.
- Search two registers to form AUIPC/JALR instructions.
- Prepare detour buffer for optimized kprobe.

To avoid introducing too much code in single patch just add some dummy
implementaion for compilation.

Signed-off-by: Liao Chang <liaochang1@xxxxxxxxxx>
Co-developed-by: Chen Guokai <chenguokai17@xxxxxxxxxxxxxxxx>
Signed-off-by: Chen Guokai <chenguokai17@xxxxxxxxxxxxxxxx>
---
arch/riscv/kernel/probes/opt.c | 98 +++++++++++++++++++++++++++++++++-
1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c
index 56c8a227c857..c03cdb1512a6 100644
--- a/arch/riscv/kernel/probes/opt.c
+++ b/arch/riscv/kernel/probes/opt.c
@@ -10,6 +10,53 @@

#include <linux/kprobes.h>
#include <asm/kprobes.h>
+#include <asm/patch.h>
+
+static int in_auipc_jalr_range(long val)
+{
+#ifdef CONFIG_ARCH_RV32I
+ return 1;
+#else
+ /*
+ * Note that the set of address offsets that can be formed
+ * by pairing LUI with LD, AUIPC with JALR, etc. RV64I is
+ * [−2^31−2^11, 2^31−2^11−1].
+ */
+ return ((-(1L << 31) - (1L << 11)) <= val) &&
+ (val < ((1L << 31) - (1L << 11)));
+#endif
+}
+
+/*
+ * Copy optprobe assembly code template into detour buffer and modify some
+ * instructions for each kprobe.
+ */
+static void prepare_detour_buffer(kprobe_opcode_t *code, kprobe_opcode_t *slot,
+ int rd, struct optimized_kprobe *op,
+ kprobe_opcode_t opcode)
+{
+}
+
+/*
+ * In RISC-V ISA, AUIPC/JALR clobber one register to form target address,
+ * inspired by register renaming in OoO processor, this involves search
+ * backward that is not previously used as a source register and is used
+ * as a destination register before any branch or jump instruction.
+ */
+static void find_free_registers(struct kprobe *kp, struct optimized_kprobe *op,
+ int *rd, int *ra)
+{
+}
+
+/*
+ * The kprobe based on breakpoint just requires the instrumented instruction
+ * supports execute out-of-line or simulation, besides that, optimized kprobe
+ * requires no near instruction jump to any instruction replaced by AUIPC/JALR.
+ */
+static bool can_optimize(unsigned long paddr, struct optimized_kprobe *op)
+{
+ return false;
+}

int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
{
@@ -24,7 +71,56 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
struct kprobe *orig)
{
- return 0;
+ long rel;
+ int rd = 0, ra = 0, ret;
+ kprobe_opcode_t *code = NULL, *slot = NULL;
+
+ if (!can_optimize((unsigned long)orig->addr, op))
+ return -EILSEQ;
+
+ code = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
+ slot = get_optinsn_slot();
+ if (!code || !slot) {
+ ret = -ENOMEM;
+ goto on_error;
+ }
+
+ /* Check if the detour buffer is in the 32-bit pc-relative range. */
+ rel = (unsigned long)slot - (unsigned long)orig->addr;
+ if (!in_auipc_jalr_range(rel)) {
+ ret = -ERANGE;
+ goto on_error;
+ }
+
+ /*
+ * Search two free registers, rd is used to form AUIPC/JALR jumping
+ * to detour buffer, ra is used to form JR jumping back from detour
+ * buffer.
+ */
+ find_free_registers(orig, op, &rd, &ra);
+ if (rd == 0 || ra == 0) {
+ ret = -EILSEQ;
+ goto on_error;
+ }
+
+ op->optinsn.rd = rd;
+ prepare_detour_buffer(code, slot, ra, op, orig->opcode);
+
+ ret = patch_text_nosync((void *)slot, code, MAX_OPTINSN_SIZE);
+ if (!ret) {
+ op->optinsn.insn = slot;
+ kfree(code);
+ return 0;
+ }
+
+on_error:
+ if (slot) {
+ free_optinsn_slot(slot, 0);
+ op->optinsn.insn = NULL;
+ op->optinsn.length = 0;
+ }
+ kfree(code);
+ return ret;
}

void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
--
2.34.1