diff options
Diffstat (limited to 'net/core/filter.c')
-rw-r--r-- | net/core/filter.c | 135 |
1 files changed, 111 insertions, 24 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index bb18c3680..672eefbfb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -49,16 +49,17 @@ #include <net/sch_generic.h> #include <net/cls_cgroup.h> #include <net/dst_metadata.h> +#include <net/dst.h> /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter * - * Run the filter code and then cut skb->data to correct size returned by - * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller + * Run the eBPF program and then cut skb->data to correct size returned by + * the program. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to SK_RUN_FILTER. It returns 0 if the packet should + * wrapper to BPF_PROG_RUN. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { - unsigned int pkt_len = SK_RUN_FILTER(filter, skb); + unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return raw_smp_processor_id(); } -/* note that this only generates 32-bit random numbers */ -static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) -{ - return prandom_u32(); -} - static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, *insn = BPF_EMIT_CALL(__get_raw_cpu_id); break; case SKF_AD_OFF + SKF_AD_RANDOM: - *insn = BPF_EMIT_CALL(__get_random_u32); + *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); + bpf_user_rnd_init_once(); break; } break; @@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, int err; fp->bpf_func = NULL; - fp->jited = false; + fp->jited = 0; err = bpf_check_classic(fp->insns, fp->len); if (err) { @@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create); * @pfp: the unattached filter that is created * @fprog: the filter program * @trans: post-classic verifier transformation handler + * @save_orig: save classic BPF program * * This function effectively does the same as bpf_prog_create(), only * that it builds up its insns buffer from user space provided buffer. * It also allows for passing a bpf_aux_classic_check_t handler. */ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, - bpf_aux_classic_check_t trans) + bpf_aux_classic_check_t trans, bool save_orig) { unsigned int fsize = bpf_classic_proglen(fprog); struct bpf_prog *fp; + int err; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) @@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, } fp->len = fprog->len; - /* Since unattached filters are not copied back to user - * space through sk_get_filter(), we do not need to hold - * a copy here, and can spare us the work. - */ fp->orig_prog = NULL; + if (save_orig) { + err = bpf_prog_store_orig_filter(fp, fprog); + if (err) { + __bpf_prog_free(fp); + return -ENOMEM; + } + } + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ @@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!dev)) return -EINVAL; - if (unlikely(!(dev->flags & IFF_UP))) - return -EINVAL; - skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) return -ENOMEM; @@ -1428,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; +struct redirect_info { + u32 ifindex; + u32 flags; +}; + +static DEFINE_PER_CPU(struct redirect_info, redirect_info); +static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + ri->ifindex = ifindex; + ri->flags = flags; + return TC_ACT_REDIRECT; +} + +int skb_do_redirect(struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *dev; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + kfree_skb(skb); + return -EINVAL; + } + + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + return dev_forward_skb(dev, skb); + + skb->dev = dev; + skb_sender_cpu_clear(skb); + return dev_queue_xmit(skb); +} + +const struct bpf_func_proto bpf_redirect_proto = { + .func = bpf_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return task_get_classid((struct sk_buff *) (unsigned long) r1); @@ -1440,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + const struct dst_entry *dst; + + dst = skb_dst((struct sk_buff *) (unsigned long) r1); + if (dst) + return dst->tclassid; +#endif + return 0; +} + +static const struct bpf_func_proto bpf_get_route_realm_proto = { + .func = bpf_get_route_realm, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; @@ -1580,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); default: return NULL; } @@ -1608,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: return bpf_get_skb_set_tunnel_key_proto(); + case BPF_FUNC_redirect: + return &bpf_redirect_proto; + case BPF_FUNC_get_route_realm: + return &bpf_get_route_realm_proto; default: return sk_filter_func_proto(func_id); } @@ -1633,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... @@ -1649,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return type == BPF_WRITE ? true : false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, mark): case offsetof(struct __sk_buff, tc_index): + case offsetof(struct __sk_buff, priority): case offsetof(struct __sk_buff, cb[0]) ... offsetof(struct __sk_buff, cb[4]): break; @@ -1665,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size, static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn_buf) + struct bpf_insn *insn_buf, + struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; @@ -1694,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, case offsetof(struct __sk_buff, priority): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, - offsetof(struct sk_buff, priority)); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); break; case offsetof(struct __sk_buff, ingress_ifindex): @@ -1752,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + prog->cb_access = 1; ctx_off -= offsetof(struct __sk_buff, cb[0]); ctx_off += offsetof(struct sk_buff, cb); ctx_off += offsetof(struct qdisc_skb_cb, data); @@ -1761,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); break; + case offsetof(struct __sk_buff, tc_classid): + ctx_off -= offsetof(struct __sk_buff, tc_classid); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); + WARN_ON(type != BPF_WRITE); + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + break; + case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); |