Re: [PATCH bpf-next v12 3/3] arm64/cfi,bpf: Support kCFI + BPF on arm64

From: Xu Kuohai
Date: Tue Jul 22 2025 - 08:05:06 EST


On 7/22/2025 11:44 AM, Xu Kuohai wrote:
On 7/22/2025 4:20 AM, Sami Tolvanen wrote:
From: Puranjay Mohan <puranjay12@xxxxxxxxx>

Currently, bpf_dispatcher_*_func() is marked with `__nocfi` therefore
calling BPF programs from this interface doesn't cause CFI warnings.

When BPF programs are called directly from C: from BPF helpers or
struct_ops, CFI warnings are generated.

Implement proper CFI prologues for the BPF programs and callbacks and
drop __nocfi for arm64. Fix the trampoline generation code to emit kCFI
prologue when a struct_ops trampoline is being prepared.

Signed-off-by: Puranjay Mohan <puranjay12@xxxxxxxxx>
Co-developed-by: Maxwell Bland <mbland@xxxxxxxxxxxx>
Signed-off-by: Maxwell Bland <mbland@xxxxxxxxxxxx>
Co-developed-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
Tested-by: Dao Huang <huangdao1@xxxxxxxx>
Acked-by: Will Deacon <will@xxxxxxxxxx>
---
  arch/arm64/include/asm/cfi.h  |  7 +++++++
  arch/arm64/net/bpf_jit_comp.c | 22 +++++++++++++++++++---
  2 files changed, 26 insertions(+), 3 deletions(-)
  create mode 100644 arch/arm64/include/asm/cfi.h

diff --git a/arch/arm64/include/asm/cfi.h b/arch/arm64/include/asm/cfi.h
new file mode 100644
index 000000000000..ab90f0351b7a
--- /dev/null
+++ b/arch/arm64/include/asm/cfi.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_CFI_H
+#define _ASM_ARM64_CFI_H
+
+#define __bpfcall
+
+#endif /* _ASM_ARM64_CFI_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 89b1b8c248c6..f4a98c1a1583 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -10,6 +10,7 @@
  #include <linux/arm-smccc.h>
  #include <linux/bitfield.h>
  #include <linux/bpf.h>
+#include <linux/cfi.h>
  #include <linux/filter.h>
  #include <linux/memory.h>
  #include <linux/printk.h>
@@ -166,6 +167,12 @@ static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
          emit(insn, ctx);
  }
+static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx)
+{
+    if (IS_ENABLED(CONFIG_CFI_CLANG))
+        emit(hash, ctx);

I guess this won't work on big-endian cpus, since arm64 instructions
are always stored in little-endian, but data not.


There is indeed an issue. I built a big-endian kernel with this patch
and tested it on qemu, a CFI failure is triggered on kernel booting:

CFI failure at kern_sys_bpf+0x2d4/0x4f0 (target: bpf_prog_dc1d7467ed3b3c17___loader.prog+0x0/0x6dc; expected type: 0xd9421881)
Internal error: Oops - CFI: 00000000f2008228 [#1] SMP
Modules linked in:
CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.16.0-rc6-ge72c32d6c27a-dirty #10 NONE
Hardware name: linux,dummy-virt (DT)
pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : kern_sys_bpf+0x2d4/0x4f0
lr : kern_sys_bpf+0x290/0x4f0
sp : ffff8000844e7320
x29: ffff8000844e7390 x28: ffff80008436f000 x27: 1fffe00018268040
x26: ffff8000844e7400 x25: ffff8000844e77c0 x24: 0000000000000030
x23: 1ffff0001089ce68 x22: dfff800000000000 x21: ffff80008455b030
x20: ffff0000c1340200 x19: ffff80008455b000 x18: ffffffff00000000
x17: 00000000d9421881 x16: 00000000811842d9 x15: 0000000000000001
x14: 0000000000000001 x13: ffff0001b5b947f4 x12: 1fffe0001807a001
x11: 0000000000000001 x10: 0000000000000000 x9 : 1ffff000108ab606
x8 : ffff800084979894 x7 : ffff8000805a5ce8 x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000010
x2 : 0000000000000000 x1 : ffff80008455b048 x0 : ffff0000c1340200
Call trace:
kern_sys_bpf+0x2d4/0x4f0 (P)
load+0x324/0x7a4
do_one_initcall+0x1e8/0x7a0
do_initcall_level+0x180/0x36c
do_initcalls+0x60/0xa4
do_basic_setup+0x9c/0xb0
kernel_init_freeable+0x270/0x390
kernel_init+0x2c/0x1c8
ret_from_fork+0x10/0x20
Code: 72831031 72bb2851 6b11021f 54000040 (d4304500)
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Oops - CFI: Fatal exception
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x1000,000800d0,02000800,0400420b
Memory Limit: none
---[ end Kernel panic - not syncing: Oops - CFI: Fatal exception ]---


And the failure can be fixed with the following change:

--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -107,6 +107,14 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
ctx->idx++;
}

+static inline void emit_u32_data(const u32 data, struct jit_ctx *ctx)
+{
+ if (ctx->image != NULL && ctx->write)
+ ctx->image[ctx->idx] = data;
+
+ ctx->idx++;
+}
+
static inline void emit_a64_mov_i(const int is64, const int reg,
const s32 val, struct jit_ctx *ctx)
{
@@ -170,7 +178,7 @@ static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx)
{
if (IS_ENABLED(CONFIG_CFI_CLANG))
- emit(hash, ctx);
+ emit_u32_data(hash, ctx);
}

+}
+
  /*
   * Kernel addresses in the vmalloc space use at most 48 bits, and the
   * remaining bits are guaranteed to be 0x1. So we can compose the address
@@ -476,7 +483,6 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
      const bool is_main_prog = !bpf_is_subprog(prog);
      const u8 fp = bpf2a64[BPF_REG_FP];
      const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
-    const int idx0 = ctx->idx;
      int cur_offset;
      /*
@@ -502,6 +508,9 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
       *
       */
+    emit_kcfi(is_main_prog ? cfi_bpf_hash : cfi_bpf_subprog_hash, ctx);
+    const int idx0 = ctx->idx;

move the idx0 definition back to its original position to match the
coding style of the rest of the file?

+
      /* bpf function may be invoked by 3 instruction types:
       * 1. bl, attached via freplace to bpf prog via short jump
       * 2. br, attached via freplace to bpf prog via long jump
@@ -2055,9 +2064,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
          jit_data->ro_header = ro_header;
      }
-    prog->bpf_func = (void *)ctx.ro_image;
+    prog->bpf_func = (void *)ctx.ro_image + cfi_get_offset();
      prog->jited = 1;
-    prog->jited_len = prog_size;
+    prog->jited_len = prog_size - cfi_get_offset();
      if (!prog->is_func || extra_pass) {
          int i;
@@ -2426,6 +2435,12 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
      /* return address locates above FP */
      retaddr_off = stack_size + 8;
+    if (flags & BPF_TRAMP_F_INDIRECT) {
+        /*
+         * Indirect call for bpf_struct_ops
+         */
+        emit_kcfi(cfi_get_func_hash(func_addr), ctx);
+    }
      /* bpf trampoline may be invoked by 3 instruction types:
       * 1. bl, attached to bpf prog or kernel function via short jump
       * 2. br, attached to bpf prog or kernel function via long jump
@@ -2942,6 +2957,7 @@ void bpf_jit_free(struct bpf_prog *prog)
                         sizeof(jit_data->header->size));
              kfree(jit_data);
          }
+        prog->bpf_func -= cfi_get_offset();
          hdr = bpf_jit_binary_pack_hdr(prog);
          bpf_jit_binary_pack_free(hdr, NULL);
          WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));