Re: [RFC bpf-next 5/9] bpf, x86: add support for indirect jumps

Anton Protopopov <a.s.protopopov@xxxxxxxxx> · Thu, 19 Jun 2025 20:13:47 +0000

On 25/06/18 04:03AM, Eduard Zingerman wrote:
> On Sun, 2025-06-15 at 08:59 +0000, Anton Protopopov wrote:
> 
> [...]
> 
> >     0:   r3 = r1                    # "switch (r3)"
> >     1:   if r3 > 0x13 goto +0x666   # check r3 boundaries
> >     2:   r3 <<= 0x3                 # r3 is void*, point to an address
> >     3:   r1 = 0xbeef ll             # r1 is PTR_TO_MAP_VALUE, r1->map_ptr=M
> >     5:   r1 += r3                   # r1 inherits boundaries from r3
> >     6:   r1 = *(u64 *)(r1 + 0x0)    # r1 now has type INSN_TO_PTR
>                                                         ^^^^^^^^^^^
>                                                         PTR_TO_INSN?

Heh, thanks, fill fix. [It's C, so a[1] and 1[a] means the same :-)]

> >     7:   gotox r1[,imm=fd(M)]       # verifier checks that M == r1->map_ptr
> 
> [...]
> 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index 37dc83d91832..d20f6775605d 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -2520,6 +2520,13 @@ st:			if (is_imm8(insn->off))
> >  
> >  			break;
> >  
> > +		case BPF_JMP | BPF_JA | BPF_X:
> > +		case BPF_JMP32 | BPF_JA | BPF_X:
> 
> Is it necessary to add both JMP and JMP32 versions?
> Do we need to extend e.g. bpf_jit_supports_insn() and report an error
> in verifier.c or should we rely on individual jits to report unknown
> instruction?

Hmm, should I just leave BPF_JMP? Or just leave as is and do not distinguish?

> 
> > +			emit_indirect_jump(&prog,
> > +					   reg2hex[insn->dst_reg],
> > +					   is_ereg(insn->dst_reg),
> > +					   image + addrs[i - 1]);
> > +			break;
> >  		case BPF_JMP | BPF_JA:
> >  		case BPF_JMP32 | BPF_JA:
> >  			if (BPF_CLASS(insn->code) == BPF_JMP) {
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 008bcd44c60e..3c5eaea2b476 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -952,6 +952,7 @@ enum bpf_reg_type {
> >  	PTR_TO_ARENA,
> >  	PTR_TO_BUF,		 /* reg points to a read/write buffer */
> >  	PTR_TO_FUNC,		 /* reg points to a bpf program function */
> > +	PTR_TO_INSN,		 /* reg points to a bpf program instruction */
> >  	CONST_PTR_TO_DYNPTR,	 /* reg points to a const struct bpf_dynptr */
> >  	__BPF_REG_TYPE_MAX,
> >  
> > @@ -3601,6 +3602,7 @@ int bpf_insn_set_ready(struct bpf_map *map);
> >  void bpf_insn_set_release(struct bpf_map *map);
> >  void bpf_insn_set_adjust(struct bpf_map *map, u32 off, u32 len);
> >  void bpf_insn_set_adjust_after_remove(struct bpf_map *map, u32 off, u32 len);
> > +int bpf_insn_set_iter_xlated_offset(struct bpf_map *map, u32 iter_no);
> 
> This is a horrible name:
> - this function is not an iterator;
> - it is way too long.
> 
> Maybe make it a bit more complex but convenient to use, e.g.:
> 
>   struct bpf_iarray_iter {
> 	struct bpf_map *map;
> 	u32 idx;
>   };
> 
>   struct bpf_iset_iter bpf_iset_make_iter(struct bpf_map *map, u32 lo, u32 hi);
>   bool bpf_iset_iter_next(struct bpf_iarray_iter *it, u32 *offset); // still a horrible name
> 
> This would hide the manipulation with unique indices from verifier.c.
> 
> ?

How about just bpf_insn_set_next[_unique]_offset()?

> 
> >  
> >  struct bpf_insn_ptr {
> >  	void *jitted_ip;
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index 84b5e6b25c52..80d9afcca488 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -229,6 +229,10 @@ struct bpf_reg_state {
> >  	enum bpf_reg_liveness live;
> >  	/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
> >  	bool precise;
> > +
> > +	/* Used to track boundaries of a PTR_TO_INSN */
> > +	u32 min_index;
> > +	u32 max_index;
> 
> Use {umin,umax}_value instead?

Please see my comments in the reply to Alexei.

> >  };
> >  
> >  enum bpf_stack_slot_type {
> > diff --git a/kernel/bpf/bpf_insn_set.c b/kernel/bpf/bpf_insn_set.c
> > index c20e99327118..316cecad60a9 100644
> > --- a/kernel/bpf/bpf_insn_set.c
> > +++ b/kernel/bpf/bpf_insn_set.c
> > @@ -9,6 +9,8 @@ struct bpf_insn_set {
> >  	struct bpf_map map;
> >  	struct mutex state_mutex;
> >  	int state;
> > +	u32 **unique_offsets;
> 
> Why is this a pointer to pointer?
> bpf_insn_set_iter_xlated_offset() is only used during check_cfg() and
> main verification. At that point no instruction movement occurred yet,
> so no need to track `&insn_set->ptrs[i].user_value.xlated_off`?
> 
> > +	u32 unique_offsets_cnt;
> >  	long *ips;
> >  	DECLARE_FLEX_ARRAY(struct bpf_insn_ptr, ptrs);
> >  };
> 
> [...]
> 
> > @@ -15296,6 +15330,22 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
> >  		return 0;
> >  	}
> >  
> > +	if (dst_reg->type == PTR_TO_MAP_VALUE && map_is_insn_set(dst_reg->map_ptr)) {
> > +		if (opcode != BPF_ADD) {
> > +			verbose(env, "Operation %s on ptr to instruction set map is prohibited\n",
> > +				bpf_alu_string[opcode >> 4]);
> > +			return -EACCES;
> > +		}
> > +		src_reg = &regs[insn->src_reg];
> > +		if (src_reg->type != SCALAR_VALUE) {
> > +			verbose(env, "Adding non-scalar R%d to an instruction ptr is prohibited\n",
> > +				insn->src_reg);
> > +			return -EACCES;
> > +		}
> > +		dst_reg->min_index = src_reg->umin_value / sizeof(long);
> > +		dst_reg->max_index = src_reg->umax_value / sizeof(long);
> > +	}
> > +
> 
> What if there are several BPF_ADD on the same PTR_TO_MAP_VALUE in a row?
> Shouldn't the {min,max}_index be accumulated in that case?
> 
> Nit: this should be handled inside adjust_ptr_min_max_vals().

Yes, thanks, I've had this in my TBDs list for the next version. (All
"legal" cases generated by LLVM just do one add, so I've skipped it.)

> >  	if (dst_reg->type != SCALAR_VALUE)
> >  		ptr_reg = dst_reg;
> >  
> 
> [...]
> 
> > @@ -17552,6 +17607,62 @@ static int mark_fastcall_patterns(struct bpf_verifier_env *env)
> 
> [...]
> 
> > +/* "conditional jump with N edges" */
> > +static int visit_goto_x_insn(int t, struct bpf_verifier_env *env, int fd)
> > +{
> > +	struct bpf_map *map;
> > +	int ret;
> > +
> > +	ret = add_used_map(env, fd, &map);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	if (map->map_type != BPF_MAP_TYPE_INSN_SET)
> > +		return -EINVAL;
> 
> Nit: print something in the log?

Yes, thanks.

> 
> > +
> > +	return push_goto_x_edge(t, env, map);
> > +}
> > +
> 
> [...]
> 
> > @@ -18786,11 +18904,22 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
> >  			      struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
> >  {
> >  	u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
> > +	struct bpf_insn *insn;
> >  	u16 i;
> >  
> >  	if (old->callback_depth > cur->callback_depth)
> >  		return false;
> >  
> > +	insn = &env->prog->insnsi[insn_idx];
> > +	if (insn_is_gotox(insn)) {
> > +		struct bpf_reg_state *old_dst = &old->regs[insn->dst_reg];
> > +		struct bpf_reg_state *cur_dst = &cur->regs[insn->dst_reg];
> > +
> > +		if (old_dst->min_index != cur_dst->min_index ||
> > +		    old_dst->max_index != cur_dst->max_index)
> > +			return false;
> > +	}
> > +
> 
> Concur with Alexei, this should be handled by regsafe().
> Also, having cur_dst as a subset of old_dst should be fine.

Thanks, yes to both.

> >  	for (i = 0; i < MAX_BPF_REG; i++)
> >  		if (((1 << i) & live_regs) &&
> >  		    !regsafe(env, &old->regs[i], &cur->regs[i],
> > @@ -19654,6 +19783,55 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env,
> >  	return PROCESS_BPF_EXIT;
> >  }
> >  
> > +static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > +{
> > +	struct bpf_verifier_state *other_branch;
> > +	struct bpf_reg_state *dst_reg;
> > +	struct bpf_map *map;
> > +	int xoff;
> > +	int err;
> > +	u32 i;
> > +
> > +	/* this map should already have been added */
> > +	err = add_used_map(env, insn->imm, &map);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	dst_reg = reg_state(env, insn->dst_reg);
> > +	if (dst_reg->type != PTR_TO_INSN) {
> > +		verbose(env, "BPF_JA|BPF_X R%d has type %d, expected PTR_TO_INSN\n",
> > +				insn->dst_reg, dst_reg->type);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (dst_reg->map_ptr != map) {
> > +		verbose(env, "BPF_JA|BPF_X R%d was loaded from map id=%u, expected id=%u\n",
> > +				insn->dst_reg, dst_reg->map_ptr->id, map->id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (dst_reg->max_index >= map->max_entries)
> > +		return -EINVAL;
> > +
> > +	for (i = dst_reg->min_index + 1; i <= dst_reg->max_index; i++) {
> 
> Why +1 is needed in `i = dst_reg->min_index + 1`?

We want to "jump" to states {min,min+1,...,max} so we push states
{min+1,...,max} and continue the current state with the `jump
M[min]`:

    env->insn_idx = bpf_insn_set_iter_xlated_offset(map, dst_reg->min_index);

> > +		xoff = bpf_insn_set_iter_xlated_offset(map, i);
> > +		if (xoff == -ENOENT)
> > +			break;
> > +		if (xoff < 0)
> > +			return xoff;
> > +
> > +		other_branch = push_stack(env, xoff, env->insn_idx, false);
> > +		if (!other_branch)
> > +			return -EFAULT;
> 
> Nit: `return -ENOMEM`.

Ok, thanks.

> 
> > +	}
> > +
> > +	env->insn_idx = bpf_insn_set_iter_xlated_offset(map, dst_reg->min_index);
> > +	if (env->insn_idx < 0)
> > +		return env->insn_idx;
> > +
> > +	return 0;
> > +}
> > +
> >  static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
> >  {
> >  	int err;
> 
> [...]
>