On Wed, 27 Aug 2025 at 17:37, Puranjay Mohan <puranjay@xxxxxxxxxx> wrote: > > Begin reporting arena page faults and the faulting address to BPF > program's stderr, this patch adds support in the arm64 and x86-64 JITs, > support for other archs can be added later. > > The fault handlers receive the 32 bit address in the arena region so > the upper 32 bits of user_vm_start is added to it before printing the > address. This is what the user would expect to see as this is what is > printed by bpf_printk() is you pass it an address returned by > bpf_arena_alloc_pages(); > > Signed-off-by: Puranjay Mohan <puranjay@xxxxxxxxxx> > Acked-by: Yonghong Song <yonghong.song@xxxxxxxxx> > --- > arch/arm64/net/bpf_jit_comp.c | 52 +++++++++++++++++++++++ > arch/x86/net/bpf_jit_comp.c | 79 +++++++++++++++++++++++++++++++++-- > include/linux/bpf.h | 5 +++ > kernel/bpf/arena.c | 20 +++++++++ > 4 files changed, 152 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c > index 42643fd9168fc..5083886d6e66b 100644 > --- a/arch/arm64/net/bpf_jit_comp.c > +++ b/arch/arm64/net/bpf_jit_comp.c > @@ -1066,6 +1066,30 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic) > emit(A64_RET(A64_LR), ctx); > } > > +/* > + * Metadata encoding for exception handling in JITed code. > + * > + * Format of `fixup` field in `struct exception_table_entry`: > + * > + * Bit layout of `fixup` (32-bit): > + * > + * +-----------+--------+-----------+-----------+----------+ > + * | 31-27 | 26-22 | 21 | 20-16 | 15-0 | > + * | | | | | | > + * | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET | > + * +-----------+--------+-----------+-----------+----------+ > + * > + * - OFFSET (16 bits): Offset used to compute address for Load/Store instruction. > + * - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when > + * accessing the arena region. > + * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region. > + * - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to > + * DONT_CLEAR if it is a store instruction. > + */ > + > +#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0) > +#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16) > +#define BPF_ARENA_ACCESS BIT(21) > #define BPF_FIXUP_REG_MASK GENMASK(31, 27) > #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ > > @@ -1073,11 +1097,22 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, > struct pt_regs *regs) > { > int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); > + s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); > + int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup); > + bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS); > + bool is_write = (dst_reg == DONT_CLEAR); > + unsigned long addr; > > if (dst_reg != DONT_CLEAR) > regs->regs[dst_reg] = 0; > /* Skip the faulting instruction */ > regs->pc += AARCH64_INSN_SIZE; > + > + if (is_arena) { > + addr = regs->regs[arena_reg] + off; What happens when arena_reg == dst_reg? > + bpf_prog_report_arena_violation(is_write, addr); > + } > + > return true; > } > > @@ -1087,6 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn, > int dst_reg) > { > off_t ins_offset; > + s16 off = insn->off; > + bool is_arena; > + int arena_reg; > unsigned long pc; > struct exception_table_entry *ex; > > @@ -1100,6 +1138,9 @@ static int add_exception_handler(const struct bpf_insn *insn, > BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) > return 0; > > + is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) || > + (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC); > + > if (!ctx->prog->aux->extable || > WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) > return -EINVAL; > @@ -1131,6 +1172,17 @@ static int add_exception_handler(const struct bpf_insn *insn, > > ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); > > + if (is_arena) { > + ex->fixup |= BPF_ARENA_ACCESS; > + if (BPF_CLASS(insn->code) == BPF_LDX) > + arena_reg = bpf2a64[insn->src_reg]; > + else > + arena_reg = bpf2a64[insn->dst_reg]; > + > + ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) | > + FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg); > + } > + > ex->type = EX_TYPE_BPF; > > ctx->exentry_idx++; > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 7e3fca1646203..b75dea55df5a2 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -8,6 +8,7 @@ > #include <linux/netdevice.h> > #include <linux/filter.h> > #include <linux/if_vlan.h> > +#include <linux/bitfield.h> > #include <linux/bpf.h> > #include <linux/memory.h> > #include <linux/sort.h> > @@ -1388,16 +1389,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size, > return 0; > } > > +/* > + * Metadata encoding for exception handling in JITed code. > + * > + * Format of `fixup` and `data` fields in `struct exception_table_entry`: > + * > + * Bit layout of `fixup` (32-bit): > + * > + * +-----------+--------+-----------+---------+----------+ > + * | 31 | 30-24 | 23-16 | 15-8 | 7-0 | > + * | | | | | | > + * | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN | > + * +-----------+--------+-----------+---------+----------+ > + * > + * - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)). > + * - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)). > + * This is set to DONT_CLEAR if the insn is a store. > + * - ARENA_REG (8 bits): Offset of the register that is used to calculate the > + * address for load/store when accessing the arena region. > + * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region. > + * > + * Bit layout of `data` (32-bit): > + * > + * +--------------+--------+--------------+ > + * | 31-16 | 15-8 | 7-0 | > + * | | | | > + * | ARENA_OFFSET | Unused | EX_TYPE_BPF | > + * +--------------+--------+--------------+ > + * > + * - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when > + * accessing the arena region. > + */ > + > #define DONT_CLEAR 1 > +#define FIXUP_INSN_LEN_MASK GENMASK(7, 0) > +#define FIXUP_REG_MASK GENMASK(15, 8) > +#define FIXUP_ARENA_REG_MASK GENMASK(23, 16) > +#define FIXUP_ARENA_ACCESS BIT(31) > +#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16) > > bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) > { > - u32 reg = x->fixup >> 8; > + u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup); > + u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup); > + bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS); > + bool is_write = (reg == DONT_CLEAR); > + unsigned long addr; > + s16 off; > + u32 arena_reg; > > /* jump over faulting load and clear dest register */ > if (reg != DONT_CLEAR) > *(unsigned long *)((void *)regs + reg) = 0; > - regs->ip += x->fixup & 0xff; > + regs->ip += insn_len; > + > + if (is_arena) { > + arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup); > + off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data); > + addr = *(unsigned long *)((void *)regs + arena_reg) + off; Same question. I faintly remember I spent a few hours when I implemented this, wondering why the reported address was always zeroed out for x86 before realizing they can be the same. It would be good to add a test for this condition. And also, to work around this, the address needs to be captured before the destination register is cleared. > [...]