diff options
Diffstat (limited to 'arch/mips/net/bpf_jit.c')
-rw-r--r-- | arch/mips/net/bpf_jit.c | 262 |
1 files changed, 69 insertions, 193 deletions
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index e23fdf2a9..0c4a133f6 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -20,6 +20,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/types.h> +#include <asm/asm.h> #include <asm/bitops.h> #include <asm/cacheflush.h> #include <asm/cpu-features.h> @@ -28,14 +29,14 @@ #include "bpf_jit.h" /* ABI - * - * s0 1st scratch register - * s1 2nd scratch register - * s2 offset register - * s3 BPF register A - * s4 BPF register X - * s5 *skb - * s6 *scratch memory + * r_skb_hl SKB header length + * r_data SKB data pointer + * r_off Offset + * r_A BPF register A + * r_X BPF register X + * r_skb *skb + * r_M *scratch memory + * r_skb_len SKB length * * On entry (*bpf_func)(*skb, *filter) * a0 = MIPS_R_A0 = skb; @@ -63,44 +64,8 @@ * ---------------------------------------------------- */ -#define RSIZE (sizeof(unsigned long)) #define ptr typeof(unsigned long) -/* ABI specific return values */ -#ifdef CONFIG_32BIT /* O32 */ -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define r_err MIPS_R_V1 -#define r_val MIPS_R_V0 -#else /* CONFIG_CPU_LITTLE_ENDIAN */ -#define r_err MIPS_R_V0 -#define r_val MIPS_R_V1 -#endif -#else /* N64 */ -#define r_err MIPS_R_V0 -#define r_val MIPS_R_V0 -#endif - -#define r_ret MIPS_R_V0 - -/* - * Use 2 scratch registers to avoid pipeline interlocks. - * There is no overhead during epilogue and prologue since - * any of the $s0-$s6 registers will only be preserved if - * they are going to actually be used. - */ -#define r_s0 MIPS_R_S0 /* scratch reg 1 */ -#define r_s1 MIPS_R_S1 /* scratch reg 2 */ -#define r_off MIPS_R_S2 -#define r_A MIPS_R_S3 -#define r_X MIPS_R_S4 -#define r_skb MIPS_R_S5 -#define r_M MIPS_R_S6 -#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ -#define r_tmp MIPS_R_T7 /* No need to preserve this */ -#define r_zero MIPS_R_ZERO -#define r_sp MIPS_R_SP -#define r_ra MIPS_R_RA - #define SCRATCH_OFF(k) (4 * (k)) /* JIT flags */ @@ -108,13 +73,13 @@ #define SEEN_SREG_SFT (BPF_MEMWORDS + 1) #define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) #define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) -#define SEEN_S0 SEEN_SREG(0) -#define SEEN_S1 SEEN_SREG(1) #define SEEN_OFF SEEN_SREG(2) #define SEEN_A SEEN_SREG(3) #define SEEN_X SEEN_SREG(4) #define SEEN_SKB SEEN_SREG(5) #define SEEN_MEM SEEN_SREG(6) +/* SEEN_SK_DATA also implies skb_hl an skb_len */ +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) /* Arguments used by JIT */ #define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ @@ -577,27 +542,13 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) /* Adjust the stack pointer */ emit_stack_offset(-align_sp(offset), ctx); - if (ctx->flags & SEEN_CALL) { - /* Argument save area */ - if (config_enabled(CONFIG_64BIT)) - /* Bottom of current frame */ - real_off = align_sp(offset) - RSIZE; - else - /* Top of previous frame */ - real_off = align_sp(offset) + RSIZE; - emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); - emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); - - real_off = 0; - } - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; /* sflags is essentially a bitmap */ while (tmp_flags) { if ((sflags >> i) & 0x1) { emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, ctx); - real_off += RSIZE; + real_off += SZREG; } i++; tmp_flags >>= 1; @@ -606,13 +557,13 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) /* save return address */ if (ctx->flags & SEEN_CALL) { emit_store_stack_reg(r_ra, r_sp, real_off, ctx); - real_off += RSIZE; + real_off += SZREG; } /* Setup r_M leaving the alignment gap if necessary */ if (ctx->flags & SEEN_MEM) { - if (real_off % (RSIZE * 2)) - real_off += RSIZE; + if (real_off % (SZREG * 2)) + real_off += SZREG; emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); } } @@ -623,19 +574,6 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx, int i, real_off = 0; u32 sflags, tmp_flags; - if (ctx->flags & SEEN_CALL) { - if (config_enabled(CONFIG_64BIT)) - /* Bottom of current frame */ - real_off = align_sp(offset) - RSIZE; - else - /* Top of previous frame */ - real_off = align_sp(offset) + RSIZE; - emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); - emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); - - real_off = 0; - } - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; /* sflags is a bitmap */ i = 0; @@ -643,7 +581,7 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx, if ((sflags >> i) & 0x1) { emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, ctx); - real_off += RSIZE; + real_off += SZREG; } i++; tmp_flags >>= 1; @@ -663,23 +601,13 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx) /* How may s* regs do we need to preserved? */ - sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE; + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; if (ctx->flags & SEEN_MEM) sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ if (ctx->flags & SEEN_CALL) - /* - * The JIT code make calls to external functions using 2 - * arguments. Therefore, for o32 we don't need to allocate - * space because we don't care if the argumetns are lost - * across calls. We do need however to preserve incoming - * arguments but the space is already allocated for us by - * the caller. On the other hand, for n64, we need to allocate - * this space ourselves. We need to preserve $ra as well. - */ - sp_off += config_enabled(CONFIG_64BIT) ? - (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE; + sp_off += SZREG; /* Space for our ra register */ return sp_off; } @@ -696,6 +624,19 @@ static void build_prologue(struct jit_ctx *ctx) if (ctx->flags & SEEN_SKB) emit_reg_move(r_skb, MIPS_R_A0, ctx); + if (ctx->flags & SEEN_SKB_DATA) { + /* Load packet length */ + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), + ctx); + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), + ctx); + /* Load the data pointer */ + emit_load_ptr(r_skb_data, r_skb, + offsetof(struct sk_buff, data), ctx); + /* Load the header length */ + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); + } + if (ctx->flags & SEEN_X) emit_jit_reg_move(r_X, r_zero, ctx); @@ -718,43 +659,17 @@ static void build_epilogue(struct jit_ctx *ctx) emit_nop(ctx); } -static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) -{ - u8 ret; - int err; - - err = skb_copy_bits(skb, offset, &ret, 1); - - return (u64)err << 32 | ret; -} - -static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) -{ - u16 ret; - int err; - - err = skb_copy_bits(skb, offset, &ret, 2); - - return (u64)err << 32 | ntohs(ret); -} - -static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) -{ - u32 ret; - int err; - - err = skb_copy_bits(skb, offset, &ret, 4); - - return (u64)err << 32 | ntohl(ret); -} +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) static int build_body(struct jit_ctx *ctx) { - void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; const struct bpf_prog *prog = ctx->skf; const struct sock_filter *inst; - unsigned int i, off, load_order, condt; + unsigned int i, off, condt; u32 k, b_off __maybe_unused; + u8 (*sk_load_func)(unsigned long *skb, int offset); for (i = 0; i < prog->len; i++) { u16 code; @@ -788,71 +703,46 @@ static int build_body(struct jit_ctx *ctx) break; case BPF_LD | BPF_W | BPF_ABS: /* A <- P[k:4] */ - load_order = 2; + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); goto load; case BPF_LD | BPF_H | BPF_ABS: /* A <- P[k:2] */ - load_order = 1; + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); goto load; case BPF_LD | BPF_B | BPF_ABS: /* A <- P[k:1] */ - load_order = 0; + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); load: - /* the interpreter will deal with the negative K */ - if ((int)k < 0) - return -ENOTSUPP; - emit_load_imm(r_off, k, ctx); load_common: - /* - * We may got here from the indirect loads so - * return if offset is negative. - */ - emit_slt(r_s0, r_off, r_zero, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, - b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_zero, ctx); - - ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 | - SEEN_SKB | SEEN_A; + ctx->flags |= SEEN_CALL | SEEN_OFF | + SEEN_SKB | SEEN_A | SEEN_SKB_DATA; - emit_load_func(r_s0, (ptr)load_func[load_order], - ctx); + emit_load_func(r_s0, (ptr)sk_load_func, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); emit_jalr(MIPS_R_RA, r_s0, ctx); /* Load second argument to delay slot */ emit_reg_move(MIPS_R_A1, r_off, ctx); /* Check the error value */ - if (config_enabled(CONFIG_64BIT)) { - /* Get error code from the top 32-bits */ - emit_dsrl32(r_s0, r_val, 0, ctx); - /* Branch to 3 instructions ahead */ - emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2, - ctx); - } else { - /* Branch to 3 instructions ahead */ - emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2, - ctx); - } - emit_nop(ctx); - /* We are good */ - emit_b(b_imm(i + 1, ctx), ctx); - emit_jit_reg_move(r_A, r_val, ctx); + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), + ctx); + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); /* Return with error */ emit_b(b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_zero, ctx); + emit_nop(ctx); break; case BPF_LD | BPF_W | BPF_IND: /* A <- P[X + k:4] */ - load_order = 2; + sk_load_func = sk_load_word; goto load_ind; case BPF_LD | BPF_H | BPF_IND: /* A <- P[X + k:2] */ - load_order = 1; + sk_load_func = sk_load_half; goto load_ind; case BPF_LD | BPF_B | BPF_IND: /* A <- P[X + k:1] */ - load_order = 0; + sk_load_func = sk_load_byte; load_ind: ctx->flags |= SEEN_OFF | SEEN_X; emit_addiu(r_off, r_X, k, ctx); @@ -874,14 +764,10 @@ load_ind: emit_load(r_X, r_skb, off, ctx); break; case BPF_LDX | BPF_B | BPF_MSH: - /* the interpreter will deal with the negative K */ - if ((int)k < 0) - return -ENOTSUPP; - /* X <- 4 * (P[k:1] & 0xf) */ - ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB; + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; /* Load offset to a1 */ - emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx); + emit_load_func(r_s0, (ptr)sk_load_byte, ctx); /* * This may emit two instructions so it may not fit * in the delay slot. So use a0 in the delay slot. @@ -890,25 +776,15 @@ load_ind: emit_jalr(MIPS_R_RA, r_s0, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ /* Check the error value */ - if (config_enabled(CONFIG_64BIT)) { - /* Top 32-bits of $v0 on 64-bit */ - emit_dsrl32(r_s0, r_val, 0, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, - 3 << 2, ctx); - } else { - emit_bcond(MIPS_COND_NE, r_err, r_zero, - 3 << 2, ctx); - } - /* No need for delay slot */ + emit_bcond(MIPS_COND_NE, r_ret, 0, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); /* We are good */ /* X <- P[1:K] & 0xf */ - emit_andi(r_X, r_val, 0xf, ctx); + emit_andi(r_X, r_A, 0xf, ctx); /* X << 2 */ emit_b(b_imm(i + 1, ctx), ctx); emit_sll(r_X, r_X, 2, ctx); /* delay slot */ - /* Return with error */ - emit_b(b_imm(prog->len, ctx), ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ break; case BPF_ST: /* M[k] <- A */ @@ -943,7 +819,7 @@ load_ind: case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ /* Load K to scratch register before MUL */ - ctx->flags |= SEEN_A | SEEN_S0; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); emit_mul(r_A, r_A, r_s0, ctx); break; @@ -961,7 +837,7 @@ load_ind: emit_srl(r_A, r_A, k, ctx); break; } - ctx->flags |= SEEN_A | SEEN_S0; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); emit_div(r_A, r_s0, ctx); break; @@ -971,7 +847,7 @@ load_ind: ctx->flags |= SEEN_A; emit_jit_reg_move(r_A, r_zero, ctx); } else { - ctx->flags |= SEEN_A | SEEN_S0; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); emit_mod(r_A, r_s0, ctx); } @@ -982,7 +858,7 @@ load_ind: /* Check if r_X is zero */ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_imm(prog->len, ctx), ctx); - emit_load_imm(r_val, 0, ctx); /* delay slot */ + emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_div(r_A, r_X, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: @@ -991,7 +867,7 @@ load_ind: /* Check if r_X is zero */ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_imm(prog->len, ctx), ctx); - emit_load_imm(r_val, 0, ctx); /* delay slot */ + emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_mod(r_A, r_X, ctx); break; case BPF_ALU | BPF_OR | BPF_K: @@ -1085,10 +961,10 @@ jmp_cmp: if ((condt & MIPS_COND_GE) || (condt & MIPS_COND_GT)) { if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_S0 | SEEN_A; + ctx->flags |= SEEN_A; emit_sltiu(r_s0, r_A, k, ctx); } else { /* X */ - ctx->flags |= SEEN_S0 | SEEN_A | + ctx->flags |= SEEN_A | SEEN_X; emit_sltu(r_s0, r_A, r_X, ctx); } @@ -1100,7 +976,7 @@ jmp_cmp: /* A > (K|X) ? scratch = 0 */ if (condt & MIPS_COND_GT) { /* Checking for equality */ - ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X; + ctx->flags |= SEEN_A | SEEN_X; if (condt & MIPS_COND_K) emit_load_imm(r_s0, k, ctx); else @@ -1123,7 +999,7 @@ jmp_cmp: } else { /* A == K|X */ if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_S0 | SEEN_A; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); /* jump true */ b_off = b_imm(i + inst->jt + 1, ctx); @@ -1153,7 +1029,7 @@ jmp_cmp: } break; case BPF_JMP | BPF_JSET | BPF_K: - ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A; + ctx->flags |= SEEN_A; /* pc += (A & K) ? pc -> jt : pc -> jf */ emit_load_imm(r_s1, k, ctx); emit_and(r_s0, r_A, r_s1, ctx); @@ -1167,7 +1043,7 @@ jmp_cmp: emit_nop(ctx); break; case BPF_JMP | BPF_JSET | BPF_X: - ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A; + ctx->flags |= SEEN_X | SEEN_A; /* pc += (A & X) ? pc -> jt : pc -> jf */ emit_and(r_s0, r_A, r_X, ctx); /* jump true */ @@ -1251,7 +1127,7 @@ jmp_cmp: break; case BPF_ANC | SKF_AD_IFINDEX: /* A = skb->dev->ifindex */ - ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0; + ctx->flags |= SEEN_SKB | SEEN_A; off = offsetof(struct sk_buff, dev); /* Load *dev pointer */ emit_load_ptr(r_s0, r_skb, off, ctx); @@ -1278,7 +1154,7 @@ jmp_cmp: break; case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A; + ctx->flags |= SEEN_SKB | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); |