diff options
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 14 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 10 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 57 | ||||
-rw-r--r-- | samples/bpf/lathist_kern.c | 99 | ||||
-rw-r--r-- | samples/bpf/lathist_user.c | 103 | ||||
-rw-r--r-- | samples/bpf/sockex3_kern.c | 290 | ||||
-rw-r--r-- | samples/bpf/sockex3_user.c | 66 | ||||
-rw-r--r-- | samples/bpf/tcbpf1_kern.c | 8 | ||||
-rw-r--r-- | samples/bpf/test_verifier.c | 84 | ||||
-rw-r--r-- | samples/bpf/tracex2_kern.c | 24 | ||||
-rw-r--r-- | samples/bpf/tracex2_user.c | 67 | ||||
-rw-r--r-- | samples/bpf/tracex5_kern.c | 75 | ||||
-rw-r--r-- | samples/bpf/tracex5_user.c | 46 |
13 files changed, 910 insertions, 33 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 76e3458a5..4450fed91 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -6,45 +6,57 @@ hostprogs-y := test_verifier test_maps hostprogs-y += sock_example hostprogs-y += sockex1 hostprogs-y += sockex2 +hostprogs-y += sockex3 hostprogs-y += tracex1 hostprogs-y += tracex2 hostprogs-y += tracex3 hostprogs-y += tracex4 +hostprogs-y += tracex5 +hostprogs-y += lathist test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o sockex2-objs := bpf_load.o libbpf.o sockex2_user.o +sockex3-objs := bpf_load.o libbpf.o sockex3_user.o tracex1-objs := bpf_load.o libbpf.o tracex1_user.o tracex2-objs := bpf_load.o libbpf.o tracex2_user.o tracex3-objs := bpf_load.o libbpf.o tracex3_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o +tracex5-objs := bpf_load.o libbpf.o tracex5_user.o +lathist-objs := bpf_load.o libbpf.o lathist_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) always += sockex1_kern.o always += sockex2_kern.o +always += sockex3_kern.o always += tracex1_kern.o always += tracex2_kern.o always += tracex3_kern.o always += tracex4_kern.o +always += tracex5_kern.o always += tcbpf1_kern.o +always += lathist_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex2 += -lelf +HOSTLOADLIBES_sockex3 += -lelf HOSTLOADLIBES_tracex1 += -lelf HOSTLOADLIBES_tracex2 += -lelf HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt +HOSTLOADLIBES_tracex5 += -lelf +HOSTLOADLIBES_lathist += -lelf # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc -%.o: %.c +$(obj)/%.o: $(src)/%.c clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f960b5fb3..bdf1c1607 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -21,6 +21,16 @@ static unsigned long long (*bpf_ktime_get_ns)(void) = (void *) BPF_FUNC_ktime_get_ns; static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) BPF_FUNC_trace_printk; +static void (*bpf_tail_call)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_tail_call; +static unsigned long long (*bpf_get_smp_processor_id)(void) = + (void *) BPF_FUNC_get_smp_processor_id; +static unsigned long long (*bpf_get_current_pid_tgid)(void) = + (void *) BPF_FUNC_get_current_pid_tgid; +static unsigned long long (*bpf_get_current_uid_gid)(void) = + (void *) BPF_FUNC_get_current_uid_gid; +static int (*bpf_get_current_comm)(void *buf, int buf_size) = + (void *) BPF_FUNC_get_current_comm; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 38dac5a53..da86a8e0a 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -16,6 +16,7 @@ #include <sys/ioctl.h> #include <sys/mman.h> #include <poll.h> +#include <ctype.h> #include "libbpf.h" #include "bpf_helpers.h" #include "bpf_load.h" @@ -29,6 +30,19 @@ int map_fd[MAX_MAPS]; int prog_fd[MAX_PROGS]; int event_fd[MAX_PROGS]; int prog_cnt; +int prog_array_fd = -1; + +static int populate_prog_array(const char *event, int prog_fd) +{ + int ind = atoi(event), err; + + err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); + if (err < 0) { + printf("failed to store prog_fd in prog_array\n"); + return -1; + } + return 0; +} static int load_and_attach(const char *event, struct bpf_insn *prog, int size) { @@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } + fd = bpf_prog_load(prog_type, prog, size, license, kern_version); + if (fd < 0) { + printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + if (is_socket) { + event += 6; + if (*event != '/') + return 0; + event++; + if (!isdigit(*event)) { + printf("invalid prog number\n"); + return -1; + } + return populate_prog_array(event, fd); + } + if (is_kprobe || is_kretprobe) { if (is_kprobe) event += 7; else event += 10; + if (*event == 0) { + printf("event name cannot be empty\n"); + return -1; + } + + if (isdigit(*event)) + return populate_prog_array(event, fd); + snprintf(buf, sizeof(buf), "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", is_kprobe ? 'p' : 'r', event, event); @@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) } } - fd = bpf_prog_load(prog_type, prog, size, license, kern_version); - - if (fd < 0) { - printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); - return -1; - } - - prog_fd[prog_cnt++] = fd; - - if (is_socket) - return 0; - strcpy(buf, DEBUGFS); strcat(buf, "events/kprobes/"); strcat(buf, event); @@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len) maps[i].max_entries); if (map_fd[i] < 0) return 1; + + if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + prog_array_fd = map_fd[i]; } return 0; } diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c new file mode 100644 index 000000000..18fa08847 --- /dev/null +++ b/samples/bpf/lathist_kern.c @@ -0,0 +1,99 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * Copyright (c) 2015 BMW Car IT GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/version.h> +#include <linux/ptrace.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +#define MAX_ENTRIES 20 +#define MAX_CPU 4 + +/* We need to stick to static allocated memory (an array instead of + * hash table) because managing dynamic memory from the + * trace_preempt_[on|off] tracepoints hooks is not supported. + */ + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u64), + .max_entries = MAX_CPU, +}; + +SEC("kprobe/trace_preempt_off") +int bpf_prog1(struct pt_regs *ctx) +{ + int cpu = bpf_get_smp_processor_id(); + u64 *ts = bpf_map_lookup_elem(&my_map, &cpu); + + if (ts) + *ts = bpf_ktime_get_ns(); + + return 0; +} + +static unsigned int log2(unsigned int v) +{ + unsigned int r; + unsigned int shift; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + + return r; +} + +static unsigned int log2l(unsigned long v) +{ + unsigned int hi = v >> 32; + + if (hi) + return log2(hi) + 32; + else + return log2(v); +} + +struct bpf_map_def SEC("maps") my_lat = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(long), + .max_entries = MAX_CPU * MAX_ENTRIES, +}; + +SEC("kprobe/trace_preempt_on") +int bpf_prog2(struct pt_regs *ctx) +{ + u64 *ts, cur_ts, delta; + int key, cpu; + long *val; + + cpu = bpf_get_smp_processor_id(); + ts = bpf_map_lookup_elem(&my_map, &cpu); + if (!ts) + return 0; + + cur_ts = bpf_ktime_get_ns(); + delta = log2l(cur_ts - *ts); + + if (delta > MAX_ENTRIES - 1) + delta = MAX_ENTRIES - 1; + + key = cpu * MAX_ENTRIES + delta; + val = bpf_map_lookup_elem(&my_lat, &key); + if (val) + __sync_fetch_and_add((long *)val, 1); + + return 0; + +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c new file mode 100644 index 000000000..65da8c157 --- /dev/null +++ b/samples/bpf/lathist_user.c @@ -0,0 +1,103 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * Copyright (c) 2015 BMW Car IT GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define MAX_ENTRIES 20 +#define MAX_CPU 4 +#define MAX_STARS 40 + +struct cpu_hist { + long data[MAX_ENTRIES]; + long max; +}; + +static struct cpu_hist cpu_hist[MAX_CPU]; + +static void stars(char *str, long val, long max, int width) +{ + int i; + + for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++) + str[i] = '*'; + if (val > max) + str[i - 1] = '+'; + str[i] = '\0'; +} + +static void print_hist(void) +{ + char starstr[MAX_STARS]; + struct cpu_hist *hist; + int i, j; + + /* clear screen */ + printf("\033[2J"); + + for (j = 0; j < MAX_CPU; j++) { + hist = &cpu_hist[j]; + + /* ignore CPUs without data (maybe offline?) */ + if (hist->max == 0) + continue; + + printf("CPU %d\n", j); + printf(" latency : count distribution\n"); + for (i = 1; i <= MAX_ENTRIES; i++) { + stars(starstr, hist->data[i - 1], hist->max, MAX_STARS); + printf("%8ld -> %-8ld : %-8ld |%-*s|\n", + (1l << i) >> 1, (1l << i) - 1, + hist->data[i - 1], MAX_STARS, starstr); + } + } +} + +static void get_data(int fd) +{ + long key, value; + int c, i; + + for (i = 0; i < MAX_CPU; i++) + cpu_hist[i].max = 0; + + for (c = 0; c < MAX_CPU; c++) { + for (i = 0; i < MAX_ENTRIES; i++) { + key = c * MAX_ENTRIES + i; + bpf_lookup_elem(fd, &key, &value); + + cpu_hist[c].data[i] = value; + if (value > cpu_hist[c].max) + cpu_hist[c].max = value; + } + } +} + +int main(int argc, char **argv) +{ + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + while (1) { + get_data(map_fd[1]); + print_hist(); + sleep(5); + } + + return 0; +} diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c new file mode 100644 index 000000000..41ae2fd21 --- /dev/null +++ b/samples/bpf/sockex3_kern.c @@ -0,0 +1,290 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" +#include <uapi/linux/in.h> +#include <uapi/linux/if.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/if_tunnel.h> +#include <uapi/linux/mpls.h> +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 8, +}; + +#define PARSE_VLAN 1 +#define PARSE_MPLS 2 +#define PARSE_IP 3 +#define PARSE_IPV6 4 + +/* protocol dispatch routine. + * It tail-calls next BPF program depending on eth proto + * Note, we could have used: + * bpf_tail_call(skb, &jmp_table, proto); + * but it would need large prog_array + */ +static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) +{ + switch (proto) { + case ETH_P_8021Q: + case ETH_P_8021AD: + bpf_tail_call(skb, &jmp_table, PARSE_VLAN); + break; + case ETH_P_MPLS_UC: + case ETH_P_MPLS_MC: + bpf_tail_call(skb, &jmp_table, PARSE_MPLS); + break; + case ETH_P_IP: + bpf_tail_call(skb, &jmp_table, PARSE_IP); + break; + case ETH_P_IPV6: + bpf_tail_call(skb, &jmp_table, PARSE_IPV6); + break; + } +} + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u32 ip_proto; +}; + +static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) +{ + return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) + & (IP_MF | IP_OFFSET); +} + +static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) +{ + __u64 w0 = load_word(ctx, off); + __u64 w1 = load_word(ctx, off + 4); + __u64 w2 = load_word(ctx, off + 8); + __u64 w3 = load_word(ctx, off + 12); + + return (__u32)(w0 ^ w1 ^ w2 ^ w3); +} + +struct globals { + struct flow_keys flow; +}; + +struct bpf_map_def SEC("maps") percpu_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct globals), + .max_entries = 32, +}; + +/* user poor man's per_cpu until native support is ready */ +static struct globals *this_cpu_globals(void) +{ + u32 key = bpf_get_smp_processor_id(); + + return bpf_map_lookup_elem(&percpu_map, &key); +} + +/* some simple stats for user space consumption */ +struct pair { + __u64 packets; + __u64 bytes; +}; + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct flow_keys), + .value_size = sizeof(struct pair), + .max_entries = 1024, +}; + +static void update_stats(struct __sk_buff *skb, struct globals *g) +{ + struct flow_keys key = g->flow; + struct pair *value; + + value = bpf_map_lookup_elem(&hash_map, &key); + if (value) { + __sync_fetch_and_add(&value->packets, 1); + __sync_fetch_and_add(&value->bytes, skb->len); + } else { + struct pair val = {1, skb->len}; + + bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); + } +} + +static __always_inline void parse_ip_proto(struct __sk_buff *skb, + struct globals *g, __u32 ip_proto) +{ + __u32 nhoff = skb->cb[0]; + int poff; + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + + __u32 gre_flags = load_half(skb, + nhoff + offsetof(struct gre_hdr, flags)); + __u32 gre_proto = load_half(skb, + nhoff + offsetof(struct gre_hdr, proto)); + + if (gre_flags & (GRE_VERSION|GRE_ROUTING)) + break; + + nhoff += 4; + if (gre_flags & GRE_CSUM) + nhoff += 4; + if (gre_flags & GRE_KEY) + nhoff += 4; + if (gre_flags & GRE_SEQ) + nhoff += 4; + + skb->cb[0] = nhoff; + parse_eth_proto(skb, gre_proto); + break; + } + case IPPROTO_IPIP: + parse_eth_proto(skb, ETH_P_IP); + break; + case IPPROTO_IPV6: + parse_eth_proto(skb, ETH_P_IPV6); + break; + case IPPROTO_TCP: + case IPPROTO_UDP: + g->flow.ports = load_word(skb, nhoff); + case IPPROTO_ICMP: + g->flow.ip_proto = ip_proto; + update_stats(skb, g); + break; + default: + break; + } +} + +PROG(PARSE_IP)(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff, verlen, ip_proto; + + if (!g) + return 0; + + nhoff = skb->cb[0]; + + if (unlikely(ip_is_fragment(skb, nhoff))) + return 0; + + ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + + if (ip_proto != IPPROTO_GRE) { + g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); + g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); + } + + verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); + nhoff += (verlen & 0xF) << 2; + + skb->cb[0] = nhoff; + parse_ip_proto(skb, g, ip_proto); + return 0; +} + +PROG(PARSE_IPV6)(struct __sk_buff *skb) +{ + struct globals *g = this_cpu_globals(); + __u32 nhoff, ip_proto; + + if (!g) + return 0; + + nhoff = skb->cb[0]; + + ip_proto = load_byte(skb, + nhoff + offsetof(struct ipv6hdr, nexthdr)); + g->flow.src = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, saddr)); + g->flow.dst = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, daddr)); + nhoff += sizeof(struct ipv6hdr); + + skb->cb[0] = nhoff; + parse_ip_proto(skb, g, ip_proto); + return 0; +} + +PROG(PARSE_VLAN)(struct __sk_buff *skb) +{ + __u32 nhoff, proto; + + nhoff = skb->cb[0]; + + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + skb->cb[0] = nhoff; + + parse_eth_proto(skb, proto); + + return 0; +} + +PROG(PARSE_MPLS)(struct __sk_buff *skb) +{ + __u32 nhoff, label; + + nhoff = skb->cb[0]; + + label = load_word(skb, nhoff); + nhoff += sizeof(struct mpls_label); + skb->cb[0] = nhoff; + + if (label & MPLS_LS_S_MASK) { + __u8 verlen = load_byte(skb, nhoff); + if ((verlen & 0xF0) == 4) + parse_eth_proto(skb, ETH_P_IP); + else + parse_eth_proto(skb, ETH_P_IPV6); + } else { + parse_eth_proto(skb, ETH_P_MPLS_UC); + } + + return 0; +} + +SEC("socket/0") +int main_prog(struct __sk_buff *skb) +{ + __u32 nhoff = ETH_HLEN; + __u32 proto = load_half(skb, 12); + + skb->cb[0] = nhoff; + parse_eth_proto(skb, proto); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c new file mode 100644 index 000000000..2617772d0 --- /dev/null +++ b/samples/bpf/sockex3_user.c @@ -0,0 +1,66 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <arpa/inet.h> + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u32 ip_proto; +}; + +struct pair { + __u64 packets; + __u64 bytes; +}; + +int main(int argc, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4], + sizeof(__u32)) == 0); + + if (argc > 1) + f = popen("ping -c5 localhost", "r"); + else + f = popen("netperf -l 4 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + struct flow_keys key = {}, next_key; + struct pair value; + + sleep(1); + printf("IP src.port -> dst.port bytes packets\n"); + while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[2], &next_key, &value); + printf("%s.%05d -> %s.%05d %12lld %12lld\n", + inet_ntoa((struct in_addr){htonl(next_key.src)}), + next_key.port16[0], + inet_ntoa((struct in_addr){htonl(next_key.dst)}), + next_key.port16[1], + value.bytes, value.packets); + key = next_key; + } + } + return 0; +} diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index 7c27710f8..9bfb2eb34 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -21,7 +21,7 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac) static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) { - __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF); + __u8 old_tos = load_byte(skb, TOS_OFF); bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2); bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0); @@ -34,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) { - __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF)); + __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF)); bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip)); bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); @@ -44,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest)) static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) { - __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF)); + __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF)); bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port)); bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0); @@ -53,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) SEC("classifier") int bpf_prog1(struct __sk_buff *skb) { - __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol)); + __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); long *value; if (proto == IPPROTO_TCP) { diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 12f3780af..693605997 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -29,6 +29,7 @@ struct bpf_test { ACCEPT, REJECT } result; + enum bpf_prog_type prog_type; }; static struct bpf_test tests[] = { @@ -743,6 +744,84 @@ static struct bpf_test tests[] = { .errstr = "different pointers", .result = REJECT, }, + { + "check skb->mark is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check skb->tc_index is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check non-u32 access to cb", + .insns = { + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check out of range skb->cb access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[60])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + }, + { + "write skb fields from socket prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "write skb fields from tc_cls_act prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -775,6 +854,7 @@ static int test(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_insn *prog = tests[i].insns; + int prog_type = tests[i].prog_type; int prog_len = probe_filter_length(prog); int *fixup = tests[i].fixup; int map_fd = -1; @@ -789,8 +869,8 @@ static int test(void) } printf("#%d %s ", i, tests[i].descr); - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, - prog_len * sizeof(struct bpf_insn), + prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len * sizeof(struct bpf_insn), "GPL", 0); if (tests[i].result == ACCEPT) { diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 19ec1cfc4..dc50f4f29 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v) return log2(v); } +struct hist_key { + char comm[16]; + u64 pid_tgid; + u64 uid_gid; + u32 index; +}; + struct bpf_map_def SEC("maps") my_hist_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct hist_key), .value_size = sizeof(long), - .max_entries = 64, + .max_entries = 1024, }; SEC("kprobe/sys_write") @@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx) long write_size = ctx->dx; /* arg3 */ long init_val = 1; long *value; - u32 index = log2l(write_size); + struct hist_key key = {}; + + key.index = log2l(write_size); + key.pid_tgid = bpf_get_current_pid_tgid(); + key.uid_gid = bpf_get_current_uid_gid(); + bpf_get_current_comm(&key.comm, sizeof(key.comm)); - value = bpf_map_lookup_elem(&my_hist_map, &index); + value = bpf_map_lookup_elem(&my_hist_map, &key); if (value) __sync_fetch_and_add(value, 1); + else + bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY); return 0; } char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 91b8d0896..cd0241c14 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -3,6 +3,7 @@ #include <stdlib.h> #include <signal.h> #include <linux/bpf.h> +#include <string.h> #include "libbpf.h" #include "bpf_load.h" @@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width) str[i] = '\0'; } -static void print_hist(int fd) +struct task { + char comm[16]; + __u64 pid_tgid; + __u64 uid_gid; +}; + +struct hist_key { + struct task t; + __u32 index; +}; + +#define SIZE sizeof(struct task) + +static void print_hist_for_pid(int fd, void *task) { - int key; + struct hist_key key = {}, next_key; + char starstr[MAX_STARS]; long value; long data[MAX_INDEX] = {}; - char starstr[MAX_STARS]; - int i; int max_ind = -1; long max_value = 0; + int i, ind; - for (key = 0; key < MAX_INDEX; key++) { - bpf_lookup_elem(fd, &key, &value); - data[key] = value; - if (value && key > max_ind) - max_ind = key; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + if (memcmp(&next_key, task, SIZE)) { + key = next_key; + continue; + } + bpf_lookup_elem(fd, &next_key, &value); + ind = next_key.index; + data[ind] = value; + if (value && ind > max_ind) + max_ind = ind; if (value > max_value) max_value = value; + key = next_key; } printf(" syscall write() stats\n"); @@ -48,6 +68,35 @@ static void print_hist(int fd) MAX_STARS, starstr); } } + +static void print_hist(int fd) +{ + struct hist_key key = {}, next_key; + static struct task tasks[1024]; + int task_cnt = 0; + int i; + + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + int found = 0; + + for (i = 0; i < task_cnt; i++) + if (memcmp(&tasks[i], &next_key, SIZE) == 0) + found = 1; + if (!found) + memcpy(&tasks[task_cnt++], &next_key, SIZE); + key = next_key; + } + + for (i = 0; i < task_cnt; i++) { + printf("\npid %d cmd %s uid %d\n", + (__u32) tasks[i].pid_tgid, + tasks[i].comm, + (__u32) tasks[i].uid_gid); + print_hist_for_pid(fd, &tasks[i]); + } + +} + static void int_exit(int sig) { print_hist(map_fd[1]); diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c new file mode 100644 index 000000000..b71fe07a7 --- /dev/null +++ b/samples/bpf/tracex5_kern.c @@ -0,0 +1,75 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <uapi/linux/seccomp.h> +#include "bpf_helpers.h" + +#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F + +struct bpf_map_def SEC("maps") progs = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1024, +}; + +SEC("kprobe/seccomp_phase1") +int bpf_prog1(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + + /* dispatch into next BPF program depending on syscall number */ + bpf_tail_call(ctx, &progs, sd.nr); + + /* fall through -> unknown syscall */ + if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { + char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; + bpf_trace_printk(fmt, sizeof(fmt), sd.nr); + } + return 0; +} + +/* we jump here when syscall number == __NR_write */ +PROG(__NR_write)(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + if (sd.args[2] == 512) { + char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), + sd.args[0], sd.args[1], sd.args[2]); + } + return 0; +} + +PROG(__NR_read)(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + if (sd.args[2] > 128 && sd.args[2] <= 1024) { + char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), + sd.args[0], sd.args[1], sd.args[2]); + } + return 0; +} + +PROG(__NR_mmap)(struct pt_regs *ctx) +{ + char fmt[] = "mmap\n"; + bpf_trace_printk(fmt, sizeof(fmt)); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c new file mode 100644 index 000000000..a04dd3cd4 --- /dev/null +++ b/samples/bpf/tracex5_user.c @@ -0,0 +1,46 @@ +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> +#include "libbpf.h" +#include "bpf_load.h" + +/* install fake seccomp program to enable seccomp code path inside the kernel, + * so that our kprobe attached to seccomp_phase1() can be triggered + */ +static void install_accept_all_seccomp(void) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + if (prctl(PR_SET_SECCOMP, 2, &prog)) + perror("prctl"); +} + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + install_accept_all_seccomp(); + + f = popen("dd if=/dev/zero of=/dev/null count=5", "r"); + (void) f; + + read_trace_pipe(); + + return 0; +} |