summaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /samples
Initial import
Diffstat (limited to 'samples')
-rw-r--r--samples/Kconfig80
-rw-r--r--samples/Makefile5
-rw-r--r--samples/bpf/Makefile50
-rw-r--r--samples/bpf/bpf_helpers.h53
-rw-r--r--samples/bpf/bpf_load.c312
-rw-r--r--samples/bpf/bpf_load.h27
-rw-r--r--samples/bpf/libbpf.c135
-rw-r--r--samples/bpf/libbpf.h190
-rw-r--r--samples/bpf/sock_example.c101
-rw-r--r--samples/bpf/sockex1_kern.c29
-rw-r--r--samples/bpf/sockex1_user.c49
-rw-r--r--samples/bpf/sockex2_kern.c221
-rw-r--r--samples/bpf/sockex2_user.c49
-rw-r--r--samples/bpf/tcbpf1_kern.c67
-rw-r--r--samples/bpf/test_maps.c291
-rw-r--r--samples/bpf/test_verifier.c835
-rw-r--r--samples/bpf/tracex1_kern.c50
-rw-r--r--samples/bpf/tracex1_user.c25
-rw-r--r--samples/bpf/tracex2_kern.c86
-rw-r--r--samples/bpf/tracex2_user.c95
-rw-r--r--samples/bpf/tracex3_kern.c89
-rw-r--r--samples/bpf/tracex3_user.c150
-rw-r--r--samples/bpf/tracex4_kern.c54
-rw-r--r--samples/bpf/tracex4_user.c69
-rw-r--r--samples/hidraw/.gitignore1
-rw-r--r--samples/hidraw/Makefile12
-rw-r--r--samples/hidraw/hid-example.c181
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c90
-rw-r--r--samples/kdb/Makefile1
-rw-r--r--samples/kdb/kdb_hello.c60
-rw-r--r--samples/kdbus/.gitignore1
-rw-r--r--samples/kdbus/Makefile9
-rw-r--r--samples/kdbus/kdbus-api.h114
-rw-r--r--samples/kdbus/kdbus-workers.c1345
-rw-r--r--samples/kfifo/Makefile1
-rw-r--r--samples/kfifo/bytestream-example.c194
-rw-r--r--samples/kfifo/dma-example.c143
-rw-r--r--samples/kfifo/inttype-example.c185
-rw-r--r--samples/kfifo/record-example.c201
-rw-r--r--samples/kobject/Makefile1
-rw-r--r--samples/kobject/kobject-example.c146
-rw-r--r--samples/kobject/kset-example.c289
-rw-r--r--samples/kprobes/Makefile5
-rw-r--r--samples/kprobes/jprobe_example.c67
-rw-r--r--samples/kprobes/kprobe_example.c109
-rw-r--r--samples/kprobes/kretprobe_example.c107
-rw-r--r--samples/livepatch/Makefile1
-rw-r--r--samples/livepatch/livepatch-sample.c91
-rw-r--r--samples/pktgen/pktgen.conf-1-159
-rwxr-xr-xsamples/pktgen/pktgen.conf-1-1-flows67
-rwxr-xr-xsamples/pktgen/pktgen.conf-1-1-ip660
-rwxr-xr-xsamples/pktgen/pktgen.conf-1-1-ip6-rdos63
-rwxr-xr-xsamples/pktgen/pktgen.conf-1-1-rdos64
-rwxr-xr-xsamples/pktgen/pktgen.conf-1-269
-rw-r--r--samples/pktgen/pktgen.conf-2-166
-rw-r--r--samples/pktgen/pktgen.conf-2-273
-rw-r--r--samples/rpmsg/Makefile1
-rw-r--r--samples/rpmsg/rpmsg_client_sample.c100
-rw-r--r--samples/seccomp/.gitignore3
-rw-r--r--samples/seccomp/Makefile48
-rw-r--r--samples/seccomp/bpf-direct.c190
-rw-r--r--samples/seccomp/bpf-fancy.c104
-rw-r--r--samples/seccomp/bpf-helper.c95
-rw-r--r--samples/seccomp/bpf-helper.h243
-rw-r--r--samples/seccomp/dropper.c68
-rw-r--r--samples/trace_events/Makefile14
-rw-r--r--samples/trace_events/trace-events-sample.c130
-rw-r--r--samples/trace_events/trace-events-sample.h523
-rw-r--r--samples/uhid/Makefile10
-rw-r--r--samples/uhid/uhid-example.c464
71 files changed, 8981 insertions, 0 deletions
diff --git a/samples/Kconfig b/samples/Kconfig
new file mode 100644
index 000000000..a4c6b2f8f
--- /dev/null
+++ b/samples/Kconfig
@@ -0,0 +1,80 @@
+menuconfig SAMPLES
+ bool "Sample kernel code"
+ help
+ You can build and test sample kernel code here.
+
+if SAMPLES
+
+config SAMPLE_TRACE_EVENTS
+ tristate "Build trace_events examples -- loadable modules only"
+ depends on EVENT_TRACING && m
+ help
+ This build trace event example modules.
+
+config SAMPLE_KOBJECT
+ tristate "Build kobject examples -- loadable modules only"
+ depends on m
+ help
+ This config option will allow you to build a number of
+ different kobject sample modules showing how to use kobjects,
+ ksets, and ktypes properly.
+
+ If in doubt, say "N" here.
+
+config SAMPLE_KPROBES
+ tristate "Build kprobes examples -- loadable modules only"
+ depends on KPROBES && m
+ help
+ This build several kprobes example modules.
+
+config SAMPLE_KRETPROBES
+ tristate "Build kretprobes example -- loadable modules only"
+ default m
+ depends on SAMPLE_KPROBES && KRETPROBES
+
+config SAMPLE_HW_BREAKPOINT
+ tristate "Build kernel hardware breakpoint examples -- loadable module only"
+ depends on HAVE_HW_BREAKPOINT && m
+ help
+ This builds kernel hardware breakpoint example modules.
+
+config SAMPLE_KFIFO
+ tristate "Build kfifo examples -- loadable modules only"
+ depends on m
+ help
+ This config option will allow you to build a number of
+ different kfifo sample modules showing how to use the
+ generic kfifo API.
+
+ If in doubt, say "N" here.
+
+config SAMPLE_KDB
+ tristate "Build kdb command example -- loadable modules only"
+ depends on KGDB_KDB && m
+ help
+ Build an example of how to dynamically add the hello
+ command to the kdb shell.
+
+config SAMPLE_KDBUS
+ bool "Build kdbus API example"
+ depends on KDBUS
+ help
+ Build an example of how the kdbus API can be used from
+ userspace.
+
+config SAMPLE_RPMSG_CLIENT
+ tristate "Build rpmsg client sample -- loadable modules only"
+ depends on RPMSG && m
+ help
+ Build an rpmsg client sample driver, which demonstrates how
+ to communicate with an AMP-configured remote processor over
+ the rpmsg bus.
+
+config SAMPLE_LIVEPATCH
+ tristate "Build live patching sample -- loadable modules only"
+ depends on LIVEPATCH && m
+ help
+ Builds a sample live patch that replaces the procfs handler
+ for /proc/cmdline to print "this has been live patched".
+
+endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
new file mode 100644
index 000000000..f0ad51e5b
--- /dev/null
+++ b/samples/Makefile
@@ -0,0 +1,5 @@
+# Makefile for Linux samples code
+
+obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
+ hw_breakpoint/ kfifo/ kdb/ kdbus/ hidraw/ rpmsg/ \
+ seccomp/
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
new file mode 100644
index 000000000..76e3458a5
--- /dev/null
+++ b/samples/bpf/Makefile
@@ -0,0 +1,50 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := test_verifier test_maps
+hostprogs-y += sock_example
+hostprogs-y += sockex1
+hostprogs-y += sockex2
+hostprogs-y += tracex1
+hostprogs-y += tracex2
+hostprogs-y += tracex3
+hostprogs-y += tracex4
+
+test_verifier-objs := test_verifier.o libbpf.o
+test_maps-objs := test_maps.o libbpf.o
+sock_example-objs := sock_example.o libbpf.o
+sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
+sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
+tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
+tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
+tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+always += sockex1_kern.o
+always += sockex2_kern.o
+always += tracex1_kern.o
+always += tracex2_kern.o
+always += tracex3_kern.o
+always += tracex4_kern.o
+always += tcbpf1_kern.o
+
+HOSTCFLAGS += -I$(objtree)/usr/include
+
+HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_sockex1 += -lelf
+HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_tracex1 += -lelf
+HOSTLOADLIBES_tracex2 += -lelf
+HOSTLOADLIBES_tracex3 += -lelf
+HOSTLOADLIBES_tracex4 += -lelf -lrt
+
+# point this to your LLVM backend with bpf support
+LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
+
+%.o: %.c
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
new file mode 100644
index 000000000..f960b5fb3
--- /dev/null
+++ b/samples/bpf/bpf_helpers.h
@@ -0,0 +1,53 @@
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+ (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+ (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) BPF_FUNC_trace_printk;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+};
+
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+
+#endif
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
new file mode 100644
index 000000000..38dac5a53
--- /dev/null
+++ b/samples/bpf/bpf_load.c
@@ -0,0 +1,312 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include "libbpf.h"
+#include "bpf_helpers.h"
+#include "bpf_load.h"
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+static char license[128];
+static int kern_version;
+static bool processed_sec[128];
+int map_fd[MAX_MAPS];
+int prog_fd[MAX_PROGS];
+int event_fd[MAX_PROGS];
+int prog_cnt;
+
+static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
+{
+ bool is_socket = strncmp(event, "socket", 6) == 0;
+ bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+ bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+ enum bpf_prog_type prog_type;
+ char buf[256];
+ int fd, efd, err, id;
+ struct perf_event_attr attr = {};
+
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ if (is_socket) {
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ } else if (is_kprobe || is_kretprobe) {
+ prog_type = BPF_PROG_TYPE_KPROBE;
+ } else {
+ printf("Unknown event '%s'\n", event);
+ return -1;
+ }
+
+ if (is_kprobe || is_kretprobe) {
+ if (is_kprobe)
+ event += 7;
+ else
+ event += 10;
+
+ snprintf(buf, sizeof(buf),
+ "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+ is_kprobe ? 'p' : 'r', event, event);
+ err = system(buf);
+ if (err < 0) {
+ printf("failed to create kprobe '%s' error '%s'\n",
+ event, strerror(errno));
+ return -1;
+ }
+ }
+
+ fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
+
+ if (fd < 0) {
+ printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ prog_fd[prog_cnt++] = fd;
+
+ if (is_socket)
+ return 0;
+
+ strcpy(buf, DEBUGFS);
+ strcat(buf, "events/kprobes/");
+ strcat(buf, event);
+ strcat(buf, "/id");
+
+ efd = open(buf, O_RDONLY, 0);
+ if (efd < 0) {
+ printf("failed to open event %s\n", event);
+ return -1;
+ }
+
+ err = read(efd, buf, sizeof(buf));
+ if (err < 0 || err >= sizeof(buf)) {
+ printf("read from '%s' failed '%s'\n", event, strerror(errno));
+ return -1;
+ }
+
+ close(efd);
+
+ buf[err] = 0;
+ id = atoi(buf);
+ attr.config = id;
+
+ efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+ if (efd < 0) {
+ printf("event %d fd %d err %s\n", id, efd, strerror(errno));
+ return -1;
+ }
+ event_fd[prog_cnt - 1] = efd;
+ ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+
+ return 0;
+}
+
+static int load_maps(struct bpf_map_def *maps, int len)
+{
+ int i;
+
+ for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
+
+ map_fd[i] = bpf_create_map(maps[i].type,
+ maps[i].key_size,
+ maps[i].value_size,
+ maps[i].max_entries);
+ if (map_fd[i] < 0)
+ return 1;
+ }
+ return 0;
+}
+
+static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
+ GElf_Shdr *shdr, Elf_Data **data)
+{
+ Elf_Scn *scn;
+
+ scn = elf_getscn(elf, i);
+ if (!scn)
+ return 1;
+
+ if (gelf_getshdr(scn, shdr) != shdr)
+ return 2;
+
+ *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
+ if (!*shname || !shdr->sh_size)
+ return 3;
+
+ *data = elf_getdata(scn, 0);
+ if (!*data || elf_getdata(scn, *data) != NULL)
+ return 4;
+
+ return 0;
+}
+
+static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
+ GElf_Shdr *shdr, struct bpf_insn *insn)
+{
+ int i, nrels;
+
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ for (i = 0; i < nrels; i++) {
+ GElf_Sym sym;
+ GElf_Rel rel;
+ unsigned int insn_idx;
+
+ gelf_getrel(data, i, &rel);
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+
+ gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
+
+ if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ printf("invalid relo for insn[%d].code 0x%x\n",
+ insn_idx, insn[insn_idx].code);
+ return 1;
+ }
+ insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
+ }
+
+ return 0;
+}
+
+int load_bpf_file(char *path)
+{
+ int fd, i;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr, shdr_prog;
+ Elf_Data *data, *data_prog, *symbols = NULL;
+ char *shname, *shname_prog;
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ return 1;
+
+ fd = open(path, O_RDONLY, 0);
+ if (fd < 0)
+ return 1;
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+
+ if (!elf)
+ return 1;
+
+ if (gelf_getehdr(elf, &ehdr) != &ehdr)
+ return 1;
+
+ /* clear all kprobes */
+ i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+
+ /* scan over all elf sections to get license and map info */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+
+ if (0) /* helpful for llvm debugging */
+ printf("section %d:%s data %p size %zd link %d flags %d\n",
+ i, shname, data->d_buf, data->d_size,
+ shdr.sh_link, (int) shdr.sh_flags);
+
+ if (strcmp(shname, "license") == 0) {
+ processed_sec[i] = true;
+ memcpy(license, data->d_buf, data->d_size);
+ } else if (strcmp(shname, "version") == 0) {
+ processed_sec[i] = true;
+ if (data->d_size != sizeof(int)) {
+ printf("invalid size of version section %zd\n",
+ data->d_size);
+ return 1;
+ }
+ memcpy(&kern_version, data->d_buf, sizeof(int));
+ } else if (strcmp(shname, "maps") == 0) {
+ processed_sec[i] = true;
+ if (load_maps(data->d_buf, data->d_size))
+ return 1;
+ } else if (shdr.sh_type == SHT_SYMTAB) {
+ symbols = data;
+ }
+ }
+
+ /* load programs that need map fixup (relocations) */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+ if (shdr.sh_type == SHT_REL) {
+ struct bpf_insn *insns;
+
+ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
+ &shdr_prog, &data_prog))
+ continue;
+
+ insns = (struct bpf_insn *) data_prog->d_buf;
+
+ processed_sec[shdr.sh_info] = true;
+ processed_sec[i] = true;
+
+ if (parse_relo_and_apply(data, symbols, &shdr, insns))
+ continue;
+
+ if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
+ memcmp(shname_prog, "kretprobe/", 10) == 0 ||
+ memcmp(shname_prog, "socket", 6) == 0)
+ load_and_attach(shname_prog, insns, data_prog->d_size);
+ }
+ }
+
+ /* load programs that don't use maps */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (processed_sec[i])
+ continue;
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+
+ if (memcmp(shname, "kprobe/", 7) == 0 ||
+ memcmp(shname, "kretprobe/", 10) == 0 ||
+ memcmp(shname, "socket", 6) == 0)
+ load_and_attach(shname, data->d_buf, data->d_size);
+ }
+
+ close(fd);
+ return 0;
+}
+
+void read_trace_pipe(void)
+{
+ int trace_fd;
+
+ trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (trace_fd < 0)
+ return;
+
+ while (1) {
+ static char buf[4096];
+ ssize_t sz;
+
+ sz = read(trace_fd, buf, sizeof(buf));
+ if (sz > 0) {
+ buf[sz] = 0;
+ puts(buf);
+ }
+ }
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
new file mode 100644
index 000000000..cbd7c2b53
--- /dev/null
+++ b/samples/bpf/bpf_load.h
@@ -0,0 +1,27 @@
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#define MAX_MAPS 32
+#define MAX_PROGS 32
+
+extern int map_fd[MAX_MAPS];
+extern int prog_fd[MAX_PROGS];
+extern int event_fd[MAX_PROGS];
+
+/* parses elf file compiled by llvm .c->.o
+ * . parses 'maps' section and creates maps via BPF syscall
+ * . parses 'license' section and passes it to syscall
+ * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
+ * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
+ * . loads eBPF programs via BPF syscall
+ *
+ * One ELF file can contain multiple BPF programs which will be loaded
+ * and their FDs stored stored in prog_fd array
+ *
+ * returns zero on success
+ */
+int load_bpf_file(char *path);
+
+void read_trace_pipe(void);
+
+#endif
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
new file mode 100644
index 000000000..7e1efa7e2
--- /dev/null
+++ b/samples/bpf/libbpf.c
@@ -0,0 +1,135 @@
+/* eBPF mini library */
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/unistd.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include "libbpf.h"
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+ int max_entries)
+{
+ union bpf_attr attr = {
+ .map_type = map_type,
+ .key_size = key_size,
+ .value_size = value_size,
+ .max_entries = max_entries
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ .value = ptr_to_u64(value),
+ .flags = flags,
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_lookup_elem(int fd, void *key, void *value)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ .value = ptr_to_u64(value),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_delete_elem(int fd, void *key)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_get_next_key(int fd, void *key, void *next_key)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ .next_key = ptr_to_u64(next_key),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
+
+char bpf_log_buf[LOG_BUF_SIZE];
+
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const struct bpf_insn *insns, int prog_len,
+ const char *license, int kern_version)
+{
+ union bpf_attr attr = {
+ .prog_type = prog_type,
+ .insns = ptr_to_u64((void *) insns),
+ .insn_cnt = prog_len / sizeof(struct bpf_insn),
+ .license = ptr_to_u64((void *) license),
+ .log_buf = ptr_to_u64(bpf_log_buf),
+ .log_size = LOG_BUF_SIZE,
+ .log_level = 1,
+ };
+
+ /* assign one field outside of struct init to make sure any
+ * padding is zero initialized
+ */
+ attr.kern_version = kern_version;
+
+ bpf_log_buf[0] = 0;
+
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+int open_raw_sock(const char *name)
+{
+ struct sockaddr_ll sll;
+ int sock;
+
+ sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
+ if (sock < 0) {
+ printf("cannot create raw socket\n");
+ return -1;
+ }
+
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = if_nametoindex(name);
+ sll.sll_protocol = htons(ETH_P_ALL);
+ if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+ printf("bind to %s: %s\n", name, strerror(errno));
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
new file mode 100644
index 000000000..7235e292a
--- /dev/null
+++ b/samples/bpf/libbpf.h
@@ -0,0 +1,190 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+struct bpf_insn;
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+ int max_entries);
+int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
+int bpf_lookup_elem(int fd, void *key, void *value);
+int bpf_delete_elem(int fd, void *key);
+int bpf_get_next_key(int fd, void *key, void *next_key);
+
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const struct bpf_insn *insns, int insn_len,
+ const char *license, int kern_version);
+
+#define LOG_BUF_SIZE 65536
+extern char bpf_log_buf[LOG_BUF_SIZE];
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+/* create RAW socket and bind to interface 'name' */
+int open_raw_sock(const char *name);
+
+struct perf_event_attr;
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+ int group_fd, unsigned long flags);
+#endif
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
new file mode 100644
index 000000000..a0ce251c5
--- /dev/null
+++ b/samples/bpf/sock_example.c
@@ -0,0 +1,101 @@
+/* eBPF example program:
+ * - creates arraymap in kernel with key 4 bytes and value 8 bytes
+ *
+ * - loads eBPF program:
+ * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)];
+ * *(u32*)(fp - 4) = r0;
+ * // assuming packet is IPv4, lookup ip->proto in a map
+ * value = bpf_map_lookup_elem(map_fd, fp - 4);
+ * if (value)
+ * (*(u64*)value) += 1;
+ *
+ * - attaches this program to eth0 raw socket
+ *
+ * - every second user space reads map[tcp], map[udp], map[icmp] to see
+ * how many packets of given protocol were seen on eth0
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <stddef.h>
+#include "libbpf.h"
+
+static int test_sock(void)
+{
+ int sock = -1, map_fd, prog_fd, i, key;
+ long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 256);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ goto cleanup;
+ }
+
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(),
+ };
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
+ "GPL", 0);
+ if (prog_fd < 0) {
+ printf("failed to load prog '%s'\n", strerror(errno));
+ goto cleanup;
+ }
+
+ sock = open_raw_sock("lo");
+
+ if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+ sizeof(prog_fd)) < 0) {
+ printf("setsockopt %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (i = 0; i < 10; i++) {
+ key = IPPROTO_TCP;
+ assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
+
+ key = IPPROTO_UDP;
+ assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0);
+
+ key = IPPROTO_ICMP;
+ assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
+
+ printf("TCP %lld UDP %lld ICMP %lld packets\n",
+ tcp_cnt, udp_cnt, icmp_cnt);
+ sleep(1);
+ }
+
+cleanup:
+ /* maps, programs, raw sockets will auto cleanup on process exit */
+ return 0;
+}
+
+int main(void)
+{
+ FILE *f;
+
+ f = popen("ping -c5 localhost", "r");
+ (void)f;
+
+ return test_sock();
+}
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
new file mode 100644
index 000000000..ed18e9a49
--- /dev/null
+++ b/samples/bpf/sockex1_kern.c
@@ -0,0 +1,29 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 256,
+};
+
+SEC("socket1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+ long *value;
+
+ if (skb->pkt_type != PACKET_OUTGOING)
+ return 0;
+
+ value = bpf_map_lookup_elem(&my_map, &index);
+ if (value)
+ __sync_fetch_and_add(value, skb->len);
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
new file mode 100644
index 000000000..678ce4693
--- /dev/null
+++ b/samples/bpf/sockex1_user.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
+ sizeof(prog_fd[0])) == 0);
+
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ long long tcp_cnt, udp_cnt, icmp_cnt;
+ int key;
+
+ key = IPPROTO_TCP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
+
+ key = IPPROTO_UDP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
+
+ key = IPPROTO_ICMP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
+
+ printf("TCP %lld UDP %lld ICMP %lld bytes\n",
+ tcp_cnt, udp_cnt, icmp_cnt);
+ sleep(1);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
new file mode 100644
index 000000000..ba0e177ff
--- /dev/null
+++ b/samples/bpf/sockex2_kern.c
@@ -0,0 +1,221 @@
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u16 thoff;
+ __u8 ip_proto;
+};
+
+static inline int proto_ports_offset(__u64 proto)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ return 0;
+ case IPPROTO_AH:
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+ struct flow_keys *flow)
+{
+ __u64 verlen;
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ *ip_proto = 0;
+ else
+ *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (*ip_proto != IPPROTO_GRE) {
+ flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ if (likely(verlen == 0x45))
+ nhoff += 20;
+ else
+ nhoff += (verlen & 0xF) << 2;
+
+ return nhoff;
+}
+
+static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+ struct flow_keys *flow)
+{
+ *ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ flow->src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ flow->dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ return nhoff;
+}
+
+static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow)
+{
+ __u64 nhoff = ETH_HLEN;
+ __u64 ip_proto;
+ __u64 proto = load_half(skb, 12);
+ int poff;
+
+ if (proto == ETH_P_8021AD) {
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (proto == ETH_P_8021Q) {
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (likely(proto == ETH_P_IP))
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ else if (proto == ETH_P_IPV6)
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ else
+ return false;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u64 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u64 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ proto = gre_proto;
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ if (proto == ETH_P_8021Q) {
+ proto = load_half(skb,
+ nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (proto == ETH_P_IP)
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ else if (proto == ETH_P_IPV6)
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ else
+ return false;
+ break;
+ }
+ case IPPROTO_IPIP:
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ break;
+ case IPPROTO_IPV6:
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ break;
+ default:
+ break;
+ }
+
+ flow->ip_proto = ip_proto;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ nhoff += poff;
+ flow->ports = load_word(skb, nhoff);
+ }
+
+ flow->thoff = (__u16) nhoff;
+
+ return true;
+}
+
+struct pair {
+ long packets;
+ long bytes;
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__be32),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1024,
+};
+
+SEC("socket2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ struct flow_keys flow;
+ struct pair *value;
+ u32 key;
+
+ if (!flow_dissector(skb, &flow))
+ return 0;
+
+ key = flow.dst;
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
+ } else {
+ struct pair val = {1, skb->len};
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
new file mode 100644
index 000000000..29a276d76
--- /dev/null
+++ b/samples/bpf/sockex2_user.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
+ sizeof(prog_fd[0])) == 0);
+
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ int key = 0, next_key;
+ struct pair value;
+
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ printf("ip %s bytes %lld packets %lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key)}),
+ value.bytes, value.packets);
+ key = next_key;
+ }
+ sleep(1);
+ }
+ return 0;
+}
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
new file mode 100644
index 000000000..7c27710f8
--- /dev/null
+++ b/samples/bpf/tcbpf1_kern.c
@@ -0,0 +1,67 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+
+#include "bpf_helpers.h"
+
+/* compiler workaround */
+#define _htonl __builtin_bswap32
+
+static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
+{
+ bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
+}
+
+#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
+#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
+
+static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
+{
+ __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
+ bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
+}
+
+#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check))
+#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
+
+#define IS_PSEUDO 0x10
+
+static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
+{
+ __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
+
+ bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
+ bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
+}
+
+#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
+static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
+{
+ __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
+
+ bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
+ bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
+}
+
+SEC("classifier")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
+ long *value;
+
+ if (proto == IPPROTO_TCP) {
+ set_ip_tos(skb, 8);
+ set_tcp_ip_src(skb, 0xA010101);
+ set_tcp_dest_port(skb, 5001);
+ }
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
new file mode 100644
index 000000000..6299ee95c
--- /dev/null
+++ b/samples/bpf/test_maps.c
@@ -0,0 +1,291 @@
+/*
+ * Testsuite for eBPF maps
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include "libbpf.h"
+
+/* sanity tests for map API */
+static void test_hashmap_sanity(int i, void *data)
+{
+ long long key, next_key, value;
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2);
+ if (map_fd < 0) {
+ printf("failed to create hashmap '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* insert key=1 element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ /* BPF_NOEXIST means: add new element if it doesn't exist */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists */
+ errno == EEXIST);
+
+ assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == EINVAL);
+
+ /* check that key=1 can be found */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
+
+ key = 2;
+ /* check that key=2 is not found */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means: update existing element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
+ /* key=2 is not there */
+ errno == ENOENT);
+
+ /* insert key=2 element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be inserted
+ * due to max_entries limit
+ */
+ key = 0;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* check that key = 0 doesn't exist */
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
+
+ /* iterate over two elements */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
+ (next_key == 1 || next_key == 2));
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
+ (next_key == 1 || next_key == 2));
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* delete both elements */
+ key = 1;
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ key = 2;
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* check that map is empty */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+ close(map_fd);
+}
+
+static void test_arraymap_sanity(int i, void *data)
+{
+ int key, next_key, map_fd;
+ long long value;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2);
+ if (map_fd < 0) {
+ printf("failed to create arraymap '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* insert key=1 element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* check that key=1 can be found */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
+
+ key = 0;
+ /* check that key=0 is also found and zero initialized */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
+
+
+ /* key=0 and key=1 were inserted, check that key=2 cannot be inserted
+ * due to max_entries limit
+ */
+ key = 2;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* check that key = 2 doesn't exist */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* iterate over two elements */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* delete shouldn't succeed */
+ key = 1;
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL);
+
+ close(map_fd);
+}
+
+#define MAP_SIZE (32 * 1024)
+static void test_map_large(void)
+{
+ struct bigkey {
+ int a;
+ char b[116];
+ long long c;
+ } key;
+ int map_fd, i, value;
+
+ /* allocate 4Mbyte of memory */
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE);
+ if (map_fd < 0) {
+ printf("failed to create large map '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = (struct bigkey) {.c = i};
+ value = i;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
+ }
+ key.c = -1;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* iterate through all elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_get_next_key(map_fd, &key, &key) == 0);
+ assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
+
+ key.c = 0;
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
+ key.a = 1;
+ assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(map_fd);
+}
+
+/* fork N children and wait for them to complete */
+static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data)
+{
+ pid_t pid[tasks];
+ int i;
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ fn(i, data);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("couldn't spawn #%d process\n", i);
+ exit(1);
+ }
+ }
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+static void test_map_stress(void)
+{
+ run_parallel(100, test_hashmap_sanity, NULL);
+ run_parallel(100, test_arraymap_sanity, NULL);
+}
+
+#define TASKS 1024
+#define DO_UPDATE 1
+#define DO_DELETE 0
+static void do_work(int fn, void *data)
+{
+ int map_fd = ((int *)data)[0];
+ int do_update = ((int *)data)[1];
+ int i;
+ int key, value;
+
+ for (i = fn; i < MAP_SIZE; i += TASKS) {
+ key = value = i;
+ if (do_update)
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
+ else
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ }
+}
+
+static void test_map_parallel(void)
+{
+ int i, map_fd, key = 0, value = 0;
+ int data[2];
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE);
+ if (map_fd < 0) {
+ printf("failed to create map for parallel test '%s'\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ data[0] = map_fd;
+ data[1] = DO_UPDATE;
+ /* use the same map_fd in children to add elements to this map
+ * child_0 adds key=0, key=1024, key=2048, ...
+ * child_1 adds key=1, key=1025, key=2049, ...
+ * child_1023 adds key=1023, ...
+ */
+ run_parallel(TASKS, do_work, data);
+
+ /* check that key=0 is already there */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* check that all elements were inserted */
+ key = -1;
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_get_next_key(map_fd, &key, &key) == 0);
+ assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
+
+ /* another check for all elements */
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = MAP_SIZE - i - 1;
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 &&
+ value == key);
+ }
+
+ /* now let's delete all elemenets in parallel */
+ data[1] = DO_DELETE;
+ run_parallel(TASKS, do_work, data);
+
+ /* nothing should be left */
+ key = -1;
+ assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
+}
+
+int main(void)
+{
+ test_hashmap_sanity(0, NULL);
+ test_arraymap_sanity(0, NULL);
+ test_map_large();
+ test_map_parallel();
+ test_map_stress();
+ printf("test_maps: OK\n");
+ return 0;
+}
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
new file mode 100644
index 000000000..12f3780af
--- /dev/null
+++ b/samples/bpf/test_verifier.c
@@ -0,0 +1,835 @@
+/*
+ * Testsuite for eBPF verifier
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <linux/filter.h>
+#include <stddef.h>
+#include "libbpf.h"
+
+#define MAX_INSNS 512
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+struct bpf_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ int fixup[32];
+ const char *errstr;
+ enum {
+ ACCEPT,
+ REJECT
+ } result;
+};
+
+static struct bpf_test tests[] = {
+ {
+ "add+sub+mul",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "unreachable",
+ .insns = {
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "unreachable2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "out of range jump",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "out of range jump2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "test1 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .result = REJECT,
+ },
+ {
+ "test2 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .result = REJECT,
+ },
+ {
+ "test3 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test4 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test5 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "no bpf_exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "loop (back-edge)",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "loop2 (back-edge)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "conditional loop",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "read uninitialized register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "read invalid register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit in all branches",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "stack out of bounds",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_CALL uses reserved",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_CALL uses reserved",
+ .result = REJECT,
+ },
+ {
+ "invalid function call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid func 1234567",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {2},
+ .errstr = "invalid indirect read from stack",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid read from stack",
+ .result = REJECT,
+ },
+ {
+ "check valid spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+
+ /* fill it back into R2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+
+ /* should be able to access R0 = *(R2 + 8) */
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check corrupted spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+
+ /* mess up with R1 pointer on stack */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
+
+ /* fill back into R0 should fail */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "corrupted spill",
+ .result = REJECT,
+ },
+ {
+ "invalid src register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in ST",
+ .insns = {
+ BPF_ST_MEM(BPF_B, 14, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid src register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R12 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R11 is invalid",
+ .result = REJECT,
+ },
+ {
+ "junk insn",
+ .insns = {
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM",
+ .result = REJECT,
+ },
+ {
+ "junk insn2",
+ .insns = {
+ BPF_RAW_INSN(1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_LDX uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "junk insn3",
+ .insns = {
+ BPF_RAW_INSN(-1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_ALU opcode f0",
+ .result = REJECT,
+ },
+ {
+ "junk insn4",
+ .insns = {
+ BPF_RAW_INSN(-1, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_ALU opcode f0",
+ .result = REJECT,
+ },
+ {
+ "junk insn5",
+ .insns = {
+ BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ALU uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "misaligned read from stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned access",
+ .result = REJECT,
+ },
+ {
+ "invalid map_fd for function call",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "fd 0 is not pointing to valid bpf_map",
+ .result = REJECT,
+ },
+ {
+ "don't check return value before access",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "R0 invalid mem access 'map_value_or_null'",
+ .result = REJECT,
+ },
+ {
+ "access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "misaligned access",
+ .result = REJECT,
+ },
+ {
+ "sometimes access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "R0 invalid mem access",
+ .result = REJECT,
+ },
+ {
+ "jump test 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "jump test 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 14),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "jump test 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 19),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 15),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 7),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {24},
+ .result = ACCEPT,
+ },
+ {
+ "jump test 4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "jump test 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields bad1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {4},
+ .errstr = "different pointers",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad3",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -12),
+ },
+ .fixup = {6},
+ .errstr = "different pointers",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -13),
+ },
+ .fixup = {7},
+ .errstr = "different pointers",
+ .result = REJECT,
+ },
+};
+
+static int probe_filter_length(struct bpf_insn *fp)
+{
+ int len = 0;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+
+ return len + 1;
+}
+
+static int create_map(void)
+{
+ long long key, value = 0;
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 1024);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ }
+
+ return map_fd;
+}
+
+static int test(void)
+{
+ int prog_fd, i, pass_cnt = 0, err_cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct bpf_insn *prog = tests[i].insns;
+ int prog_len = probe_filter_length(prog);
+ int *fixup = tests[i].fixup;
+ int map_fd = -1;
+
+ if (*fixup) {
+ map_fd = create_map();
+
+ do {
+ prog[*fixup].imm = map_fd;
+ fixup++;
+ } while (*fixup);
+ }
+ printf("#%d %s ", i, tests[i].descr);
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+ prog_len * sizeof(struct bpf_insn),
+ "GPL", 0);
+
+ if (tests[i].result == ACCEPT) {
+ if (prog_fd < 0) {
+ printf("FAIL\nfailed to load prog '%s'\n",
+ strerror(errno));
+ printf("%s", bpf_log_buf);
+ err_cnt++;
+ goto fail;
+ }
+ } else {
+ if (prog_fd >= 0) {
+ printf("FAIL\nunexpected success to load\n");
+ printf("%s", bpf_log_buf);
+ err_cnt++;
+ goto fail;
+ }
+ if (strstr(bpf_log_buf, tests[i].errstr) == 0) {
+ printf("FAIL\nunexpected error message: %s",
+ bpf_log_buf);
+ err_cnt++;
+ goto fail;
+ }
+ }
+
+ pass_cnt++;
+ printf("OK\n");
+fail:
+ if (map_fd >= 0)
+ close(map_fd);
+ close(prog_fd);
+
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test();
+}
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
new file mode 100644
index 000000000..316204637
--- /dev/null
+++ b/samples/bpf/tracex1_kern.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ /* attaches to kprobe netif_receive_skb,
+ * looks for packets on loobpack device and prints them
+ */
+ char devname[IFNAMSIZ] = {};
+ struct net_device *dev;
+ struct sk_buff *skb;
+ int len;
+
+ /* non-portable! works for the given kernel only */
+ skb = (struct sk_buff *) ctx->di;
+
+ dev = _(skb->dev);
+
+ len = _(skb->len);
+
+ bpf_probe_read(devname, sizeof(devname), dev->name);
+
+ if (devname[0] == 'l' && devname[1] == 'o') {
+ char fmt[] = "skb %p len %d\n";
+ /* using bpf_trace_printk() for DEBUG ONLY */
+ bpf_trace_printk(fmt, sizeof(fmt), skb, len);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
new file mode 100644
index 000000000..31a48183b
--- /dev/null
+++ b/samples/bpf/tracex1_user.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping -c5 localhost", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
new file mode 100644
index 000000000..19ec1cfc4
--- /dev/null
+++ b/samples/bpf/tracex2_kern.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/kfree_skb")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ long loc = 0;
+ long init_val = 1;
+ long *value;
+
+ /* x64 specific: read ip of kfree_skb caller.
+ * non-portable version of __builtin_return_address(0)
+ */
+ bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp);
+
+ value = bpf_map_lookup_elem(&my_map, &loc);
+ if (value)
+ *value += 1;
+ else
+ bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
+ return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+struct bpf_map_def SEC("maps") my_hist_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 64,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog3(struct pt_regs *ctx)
+{
+ long write_size = ctx->dx; /* arg3 */
+ long init_val = 1;
+ long *value;
+ u32 index = log2l(write_size);
+
+ value = bpf_map_lookup_elem(&my_hist_map, &index);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
new file mode 100644
index 000000000..91b8d0896
--- /dev/null
+++ b/samples/bpf/tracex2_user.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_INDEX 64
+#define MAX_STARS 38
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+static void print_hist(int fd)
+{
+ int key;
+ long value;
+ long data[MAX_INDEX] = {};
+ char starstr[MAX_STARS];
+ int i;
+ int max_ind = -1;
+ long max_value = 0;
+
+ for (key = 0; key < MAX_INDEX; key++) {
+ bpf_lookup_elem(fd, &key, &value);
+ data[key] = value;
+ if (value && key > max_ind)
+ max_ind = key;
+ if (value > max_value)
+ max_value = value;
+ }
+
+ printf(" syscall write() stats\n");
+ printf(" byte_size : count distribution\n");
+ for (i = 1; i <= max_ind + 1; i++) {
+ stars(starstr, data[i - 1], max_value, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1, data[i - 1],
+ MAX_STARS, starstr);
+ }
+}
+static void int_exit(int sig)
+{
+ print_hist(map_fd[1]);
+ exit(0);
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ long key, next_key, value;
+ FILE *f;
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ signal(SIGINT, int_exit);
+
+ /* start 'ping' in the background to have some kfree_skb events */
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ /* start 'dd' in the background to have plenty of 'write' syscalls */
+ f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
+ (void) f;
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ key = 0;
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ printf("location 0x%lx count %ld\n", next_key, value);
+ key = next_key;
+ }
+ if (key)
+ printf("\n");
+ sleep(1);
+ }
+ print_hist(map_fd[1]);
+
+ return 0;
+}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
new file mode 100644
index 000000000..255ff2792
--- /dev/null
+++ b/samples/bpf/tracex3_kern.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(u64),
+ .max_entries = 4096,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/blk_mq_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ long rq = ctx->di;
+ u64 val = bpf_ktime_get_ns();
+
+ bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
+ return 0;
+}
+
+static unsigned int log2l(unsigned long long n)
+{
+#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; }
+ int i = -(n == 0);
+ S(32); S(16); S(8); S(4); S(2); S(1);
+ return i;
+#undef S
+}
+
+#define SLOTS 100
+
+struct bpf_map_def SEC("maps") lat_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = SLOTS,
+};
+
+SEC("kprobe/blk_update_request")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ long rq = ctx->di;
+ u64 *value, l, base;
+ u32 index;
+
+ value = bpf_map_lookup_elem(&my_map, &rq);
+ if (!value)
+ return 0;
+
+ u64 cur_time = bpf_ktime_get_ns();
+ u64 delta = cur_time - *value;
+
+ bpf_map_delete_elem(&my_map, &rq);
+
+ /* the lines below are computing index = log10(delta)*10
+ * using integer arithmetic
+ * index = 29 ~ 1 usec
+ * index = 59 ~ 1 msec
+ * index = 89 ~ 1 sec
+ * index = 99 ~ 10sec or more
+ * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3
+ */
+ l = log2l(delta);
+ base = 1ll << l;
+ index = (l * 64 + (delta - base) * 64 / base) * 3 / 64;
+
+ if (index >= SLOTS)
+ index = SLOTS - 1;
+
+ value = bpf_map_lookup_elem(&lat_map, &index);
+ if (value)
+ __sync_fetch_and_add((long *)value, 1);
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
new file mode 100644
index 000000000..0aaa933ab
--- /dev/null
+++ b/samples/bpf/tracex3_user.c
@@ -0,0 +1,150 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+#define SLOTS 100
+
+static void clear_stats(int fd)
+{
+ __u32 key;
+ __u64 value = 0;
+
+ for (key = 0; key < SLOTS; key++)
+ bpf_update_elem(fd, &key, &value, BPF_ANY);
+}
+
+const char *color[] = {
+ "\033[48;5;255m",
+ "\033[48;5;252m",
+ "\033[48;5;250m",
+ "\033[48;5;248m",
+ "\033[48;5;246m",
+ "\033[48;5;244m",
+ "\033[48;5;242m",
+ "\033[48;5;240m",
+ "\033[48;5;238m",
+ "\033[48;5;236m",
+ "\033[48;5;234m",
+ "\033[48;5;232m",
+};
+const int num_colors = ARRAY_SIZE(color);
+
+const char nocolor[] = "\033[00m";
+
+const char *sym[] = {
+ " ",
+ " ",
+ ".",
+ ".",
+ "*",
+ "*",
+ "o",
+ "o",
+ "O",
+ "O",
+ "#",
+ "#",
+};
+
+bool full_range = false;
+bool text_only = false;
+
+static void print_banner(void)
+{
+ if (full_range)
+ printf("|1ns |10ns |100ns |1us |10us |100us"
+ " |1ms |10ms |100ms |1s |10s\n");
+ else
+ printf("|1us |10us |100us |1ms |10ms "
+ "|100ms |1s |10s\n");
+}
+
+static void print_hist(int fd)
+{
+ __u32 key;
+ __u64 value;
+ __u64 cnt[SLOTS];
+ __u64 max_cnt = 0;
+ __u64 total_events = 0;
+
+ for (key = 0; key < SLOTS; key++) {
+ value = 0;
+ bpf_lookup_elem(fd, &key, &value);
+ cnt[key] = value;
+ total_events += value;
+ if (value > max_cnt)
+ max_cnt = value;
+ }
+ clear_stats(fd);
+ for (key = full_range ? 0 : 29; key < SLOTS; key++) {
+ int c = num_colors * cnt[key] / (max_cnt + 1);
+
+ if (text_only)
+ printf("%s", sym[c]);
+ else
+ printf("%s %s", color[c], nocolor);
+ }
+ printf(" # %lld\n", total_events);
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 1; i < ac; i++) {
+ if (strcmp(argv[i], "-a") == 0) {
+ full_range = true;
+ } else if (strcmp(argv[i], "-t") == 0) {
+ text_only = true;
+ } else if (strcmp(argv[i], "-h") == 0) {
+ printf("Usage:\n"
+ " -a display wider latency range\n"
+ " -t text only\n");
+ return 1;
+ }
+ }
+
+ printf(" heatmap of IO latency\n");
+ if (text_only)
+ printf(" %s", sym[num_colors - 1]);
+ else
+ printf(" %s %s", color[num_colors - 1], nocolor);
+ printf(" - many events with this latency\n");
+
+ if (text_only)
+ printf(" %s", sym[0]);
+ else
+ printf(" %s %s", color[0], nocolor);
+ printf(" - few events\n");
+
+ for (i = 0; ; i++) {
+ if (i % 20 == 0)
+ print_banner();
+ print_hist(map_fd[1]);
+ sleep(2);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
new file mode 100644
index 000000000..126b80512
--- /dev/null
+++ b/samples/bpf/tracex4_kern.c
@@ -0,0 +1,54 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct pair {
+ u64 val;
+ u64 ip;
+};
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1000000,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/kmem_cache_free")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ long ptr = ctx->si;
+
+ bpf_map_delete_elem(&my_map, &ptr);
+ return 0;
+}
+
+SEC("kretprobe/kmem_cache_alloc_node")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ long ptr = ctx->ax;
+ long ip = 0;
+
+ /* get ip address of kmem_cache_alloc_node() caller */
+ bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip)));
+
+ struct pair v = {
+ .val = bpf_ktime_get_ns(),
+ .ip = ip,
+ };
+
+ bpf_map_update_elem(&my_map, &ptr, &v, BPF_ANY);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
new file mode 100644
index 000000000..bc4a3bdea
--- /dev/null
+++ b/samples/bpf/tracex4_user.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+struct pair {
+ long long val;
+ __u64 ip;
+};
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static void print_old_objects(int fd)
+{
+ long long val = time_get_ns();
+ __u64 key, next_key;
+ struct pair v;
+
+ key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+
+ key = -1;
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &v);
+ key = next_key;
+ if (val - v.val < 1000000000ll)
+ /* object was allocated more then 1 sec ago */
+ continue;
+ printf("obj 0x%llx is %2lldsec old was allocated at ip %llx\n",
+ next_key, (val - v.val) / 1000000000ll, v.ip);
+ }
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 0; ; i++) {
+ print_old_objects(map_fd[1]);
+ sleep(1);
+ }
+
+ return 0;
+}
diff --git a/samples/hidraw/.gitignore b/samples/hidraw/.gitignore
new file mode 100644
index 000000000..05e51a685
--- /dev/null
+++ b/samples/hidraw/.gitignore
@@ -0,0 +1 @@
+hid-example
diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile
new file mode 100644
index 000000000..a9ab96188
--- /dev/null
+++ b/samples/hidraw/Makefile
@@ -0,0 +1,12 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := hid-example
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include
+
+all: hid-example
diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c
new file mode 100644
index 000000000..92e6c1511
--- /dev/null
+++ b/samples/hidraw/hid-example.c
@@ -0,0 +1,181 @@
+/*
+ * Hidraw Userspace Example
+ *
+ * Copyright (c) 2010 Alan Ott <alan@signal11.us>
+ * Copyright (c) 2010 Signal 11 Software
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using hidraw.
+ */
+
+/* Linux */
+#include <linux/types.h>
+#include <linux/input.h>
+#include <linux/hidraw.h>
+
+/*
+ * Ugly hack to work around failing compilation on systems that don't
+ * yet populate new version of hidraw.h to userspace.
+ */
+#ifndef HIDIOCSFEATURE
+#warning Please have your distro update the userspace kernel headers
+#define HIDIOCSFEATURE(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x06, len)
+#define HIDIOCGFEATURE(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x07, len)
+#endif
+
+/* Unix */
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/* C */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+const char *bus_str(int bus);
+
+int main(int argc, char **argv)
+{
+ int fd;
+ int i, res, desc_size = 0;
+ char buf[256];
+ struct hidraw_report_descriptor rpt_desc;
+ struct hidraw_devinfo info;
+ char *device = "/dev/hidraw0";
+
+ if (argc > 1)
+ device = argv[1];
+
+ /* Open the Device with non-blocking reads. In real life,
+ don't use a hard coded path; use libudev instead. */
+ fd = open(device, O_RDWR|O_NONBLOCK);
+
+ if (fd < 0) {
+ perror("Unable to open device");
+ return 1;
+ }
+
+ memset(&rpt_desc, 0x0, sizeof(rpt_desc));
+ memset(&info, 0x0, sizeof(info));
+ memset(buf, 0x0, sizeof(buf));
+
+ /* Get Report Descriptor Size */
+ res = ioctl(fd, HIDIOCGRDESCSIZE, &desc_size);
+ if (res < 0)
+ perror("HIDIOCGRDESCSIZE");
+ else
+ printf("Report Descriptor Size: %d\n", desc_size);
+
+ /* Get Report Descriptor */
+ rpt_desc.size = desc_size;
+ res = ioctl(fd, HIDIOCGRDESC, &rpt_desc);
+ if (res < 0) {
+ perror("HIDIOCGRDESC");
+ } else {
+ printf("Report Descriptor:\n");
+ for (i = 0; i < rpt_desc.size; i++)
+ printf("%hhx ", rpt_desc.value[i]);
+ puts("\n");
+ }
+
+ /* Get Raw Name */
+ res = ioctl(fd, HIDIOCGRAWNAME(256), buf);
+ if (res < 0)
+ perror("HIDIOCGRAWNAME");
+ else
+ printf("Raw Name: %s\n", buf);
+
+ /* Get Physical Location */
+ res = ioctl(fd, HIDIOCGRAWPHYS(256), buf);
+ if (res < 0)
+ perror("HIDIOCGRAWPHYS");
+ else
+ printf("Raw Phys: %s\n", buf);
+
+ /* Get Raw Info */
+ res = ioctl(fd, HIDIOCGRAWINFO, &info);
+ if (res < 0) {
+ perror("HIDIOCGRAWINFO");
+ } else {
+ printf("Raw Info:\n");
+ printf("\tbustype: %d (%s)\n",
+ info.bustype, bus_str(info.bustype));
+ printf("\tvendor: 0x%04hx\n", info.vendor);
+ printf("\tproduct: 0x%04hx\n", info.product);
+ }
+
+ /* Set Feature */
+ buf[0] = 0x9; /* Report Number */
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ res = ioctl(fd, HIDIOCSFEATURE(4), buf);
+ if (res < 0)
+ perror("HIDIOCSFEATURE");
+ else
+ printf("ioctl HIDIOCGFEATURE returned: %d\n", res);
+
+ /* Get Feature */
+ buf[0] = 0x9; /* Report Number */
+ res = ioctl(fd, HIDIOCGFEATURE(256), buf);
+ if (res < 0) {
+ perror("HIDIOCGFEATURE");
+ } else {
+ printf("ioctl HIDIOCGFEATURE returned: %d\n", res);
+ printf("Report data (not containing the report number):\n\t");
+ for (i = 0; i < res; i++)
+ printf("%hhx ", buf[i]);
+ puts("\n");
+ }
+
+ /* Send a Report to the Device */
+ buf[0] = 0x1; /* Report Number */
+ buf[1] = 0x77;
+ res = write(fd, buf, 2);
+ if (res < 0) {
+ printf("Error: %d\n", errno);
+ perror("write");
+ } else {
+ printf("write() wrote %d bytes\n", res);
+ }
+
+ /* Get a report from the device */
+ res = read(fd, buf, 16);
+ if (res < 0) {
+ perror("read");
+ } else {
+ printf("read() read %d bytes:\n\t", res);
+ for (i = 0; i < res; i++)
+ printf("%hhx ", buf[i]);
+ puts("\n");
+ }
+ close(fd);
+ return 0;
+}
+
+const char *
+bus_str(int bus)
+{
+ switch (bus) {
+ case BUS_USB:
+ return "USB";
+ break;
+ case BUS_HIL:
+ return "HIL";
+ break;
+ case BUS_BLUETOOTH:
+ return "Bluetooth";
+ break;
+ case BUS_VIRTUAL:
+ return "Virtual";
+ break;
+ default:
+ return "Other";
+ break;
+ }
+}
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 000000000..0f5c31c2f
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 000000000..ef7f32291
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,90 @@
+/*
+ * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * usage: insmod data_breakpoint.ko ksym=<ksym_name>
+ *
+ * This file is a kernel module that places a breakpoint over ksym_name kernel
+ * variable using Hardware Breakpoint register. The corresponding handler which
+ * prints a backtrace is invoked every time a write operation is performed on
+ * that variable.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ *
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ */
+#include <linux/module.h> /* Needed by all modules */
+#include <linux/kernel.h> /* Needed for KERN_INFO */
+#include <linux/init.h> /* Needed for the macros */
+#include <linux/kallsyms.h>
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+
+struct perf_event * __percpu *sample_hbp;
+
+static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
+MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
+ " write operations on the kernel symbol");
+
+static void sample_hbp_handler(struct perf_event *bp,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ printk(KERN_INFO "%s value is changed\n", ksym_name);
+ dump_stack();
+ printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
+}
+
+static int __init hw_break_module_init(void)
+{
+ int ret;
+ struct perf_event_attr attr;
+
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = kallsyms_lookup_name(ksym_name);
+ attr.bp_len = HW_BREAKPOINT_LEN_4;
+ attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+
+ sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
+ if (IS_ERR((void __force *)sample_hbp)) {
+ ret = PTR_ERR((void __force *)sample_hbp);
+ goto fail;
+ }
+
+ printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
+
+ return 0;
+
+fail:
+ printk(KERN_INFO "Breakpoint registration failed\n");
+
+ return ret;
+}
+
+static void __exit hw_break_module_exit(void)
+{
+ unregister_wide_hw_breakpoint(sample_hbp);
+ printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
+}
+
+module_init(hw_break_module_init);
+module_exit(hw_break_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("K.Prasad");
+MODULE_DESCRIPTION("ksym breakpoint");
diff --git a/samples/kdb/Makefile b/samples/kdb/Makefile
new file mode 100644
index 000000000..fbedf39d9
--- /dev/null
+++ b/samples/kdb/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_KDB) += kdb_hello.o
diff --git a/samples/kdb/kdb_hello.c b/samples/kdb/kdb_hello.c
new file mode 100644
index 000000000..c1c2fa0f6
--- /dev/null
+++ b/samples/kdb/kdb_hello.c
@@ -0,0 +1,60 @@
+/*
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2010 Wind River Systems, Inc. All Rights Reserved.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kdb.h>
+
+/*
+ * All kdb shell command call backs receive argc and argv, where
+ * argv[0] is the command the end user typed
+ */
+static int kdb_hello_cmd(int argc, const char **argv)
+{
+ if (argc > 1)
+ return KDB_ARGCOUNT;
+
+ if (argc)
+ kdb_printf("Hello %s.\n", argv[1]);
+ else
+ kdb_printf("Hello world!\n");
+
+ return 0;
+}
+
+
+static int __init kdb_hello_cmd_init(void)
+{
+ /*
+ * Registration of a dynamically added kdb command is done with
+ * kdb_register() with the arguments being:
+ * 1: The name of the shell command
+ * 2: The function that processes the command
+ * 3: Description of the usage of any arguments
+ * 4: Descriptive text when you run help
+ * 5: Number of characters to complete the command
+ * 0 == type the whole command
+ * 1 == match both "g" and "go" for example
+ */
+ kdb_register("hello", kdb_hello_cmd, "[string]",
+ "Say Hello World or Hello [string]", 0);
+ return 0;
+}
+
+static void __exit kdb_hello_cmd_exit(void)
+{
+ kdb_unregister("hello");
+}
+
+module_init(kdb_hello_cmd_init);
+module_exit(kdb_hello_cmd_exit);
+
+MODULE_AUTHOR("WindRiver");
+MODULE_DESCRIPTION("KDB example to add a hello command");
+MODULE_LICENSE("GPL");
diff --git a/samples/kdbus/.gitignore b/samples/kdbus/.gitignore
new file mode 100644
index 000000000..ee07d9857
--- /dev/null
+++ b/samples/kdbus/.gitignore
@@ -0,0 +1 @@
+kdbus-workers
diff --git a/samples/kdbus/Makefile b/samples/kdbus/Makefile
new file mode 100644
index 000000000..137f84272
--- /dev/null
+++ b/samples/kdbus/Makefile
@@ -0,0 +1,9 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+hostprogs-$(CONFIG_SAMPLE_KDBUS) += kdbus-workers
+
+always := $(hostprogs-y)
+
+HOSTCFLAGS_kdbus-workers.o += -I$(objtree)/usr/include
+HOSTLOADLIBES_kdbus-workers := -lrt
diff --git a/samples/kdbus/kdbus-api.h b/samples/kdbus/kdbus-api.h
new file mode 100644
index 000000000..7f3abae18
--- /dev/null
+++ b/samples/kdbus/kdbus-api.h
@@ -0,0 +1,114 @@
+#ifndef KDBUS_API_H
+#define KDBUS_API_H
+
+#include <sys/ioctl.h>
+#include <linux/kdbus.h>
+
+#define KDBUS_ALIGN8(l) (((l) + 7) & ~7)
+#define KDBUS_ITEM_HEADER_SIZE offsetof(struct kdbus_item, data)
+#define KDBUS_ITEM_SIZE(s) KDBUS_ALIGN8((s) + KDBUS_ITEM_HEADER_SIZE)
+#define KDBUS_ITEM_NEXT(item) \
+ (typeof(item))((uint8_t *)(item) + KDBUS_ALIGN8((item)->size))
+#define KDBUS_FOREACH(iter, first, _size) \
+ for ((iter) = (first); \
+ ((uint8_t *)(iter) < (uint8_t *)(first) + (_size)) && \
+ ((uint8_t *)(iter) >= (uint8_t *)(first)); \
+ (iter) = (void *)((uint8_t *)(iter) + KDBUS_ALIGN8((iter)->size)))
+
+static inline int kdbus_cmd_bus_make(int control_fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(control_fd, KDBUS_CMD_BUS_MAKE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_endpoint_make(int bus_fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(bus_fd, KDBUS_CMD_ENDPOINT_MAKE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_endpoint_update(int ep_fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(ep_fd, KDBUS_CMD_ENDPOINT_UPDATE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_hello(int bus_fd, struct kdbus_cmd_hello *cmd)
+{
+ int ret = ioctl(bus_fd, KDBUS_CMD_HELLO, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_update(int fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(fd, KDBUS_CMD_UPDATE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_byebye(int conn_fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_BYEBYE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_free(int conn_fd, struct kdbus_cmd_free *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_FREE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_conn_info(int conn_fd, struct kdbus_cmd_info *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_CONN_INFO, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_bus_creator_info(int conn_fd, struct kdbus_cmd_info *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_BUS_CREATOR_INFO, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_list(int fd, struct kdbus_cmd_list *cmd)
+{
+ int ret = ioctl(fd, KDBUS_CMD_LIST, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_send(int conn_fd, struct kdbus_cmd_send *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_SEND, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_recv(int conn_fd, struct kdbus_cmd_recv *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_RECV, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_name_acquire(int conn_fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_NAME_ACQUIRE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_name_release(int conn_fd, struct kdbus_cmd *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_NAME_RELEASE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_match_add(int conn_fd, struct kdbus_cmd_match *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_MATCH_ADD, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+static inline int kdbus_cmd_match_remove(int conn_fd, struct kdbus_cmd_match *cmd)
+{
+ int ret = ioctl(conn_fd, KDBUS_CMD_MATCH_REMOVE, cmd);
+ return (ret < 0) ? (errno > 0 ? -errno : -EINVAL) : 0;
+}
+
+#endif /* KDBUS_API_H */
diff --git a/samples/kdbus/kdbus-workers.c b/samples/kdbus/kdbus-workers.c
new file mode 100644
index 000000000..c3ba95863
--- /dev/null
+++ b/samples/kdbus/kdbus-workers.c
@@ -0,0 +1,1345 @@
+/*
+ * Copyright (C) 2013-2015 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * kdbus is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+/*
+ * Example: Workers
+ * This program computes prime-numbers based on the sieve of Eratosthenes. The
+ * master sets up a shared memory region and spawns workers which clear out the
+ * non-primes. The master reacts to keyboard input and to client-requests to
+ * control what each worker does. Note that this is in no way meant as efficient
+ * way to compute primes. It should only serve as example how a master/worker
+ * concept can be implemented with kdbus used as control messages.
+ *
+ * The main process is called the 'master'. It creates a new, private bus which
+ * will be used between the master and its workers to communicate. The master
+ * then spawns a fixed number of workers. Whenever a worker dies (detected via
+ * SIGCHLD), the master spawns a new worker. When done, the master waits for all
+ * workers to exit, prints a status report and exits itself.
+ *
+ * The master process does *not* keep track of its workers. Instead, this
+ * example implements a PULL model. That is, the master acquires a well-known
+ * name on the bus which each worker uses to request tasks from the master. If
+ * there are no more tasks, the master will return an empty task-list, which
+ * casues a worker to exit immediately.
+ *
+ * As tasks can be computationally expensive, we support cancellation. Whenever
+ * the master process is interrupted, it will drop its well-known name on the
+ * bus. This causes kdbus to broadcast a name-change notification. The workers
+ * check for broadcast messages regularly and will exit if they receive one.
+ *
+ * This example exists of 4 objects:
+ * * master: The master object contains the context of the master process. This
+ * process manages the prime-context, spawns workers and assigns
+ * prime-ranges to each worker to compute.
+ * The master itself does not do any prime-computations itself.
+ * * child: The child object contains the context of a worker. It inherits the
+ * prime context from its parent (the master) and then creates a new
+ * bus context to request prime-ranges to compute.
+ * * prime: The "prime" object is used to abstract how we compute primes. When
+ * allocated, it prepares a memory region to hold 1 bit for each
+ * natural number up to a fixed maximum ('MAX_PRIMES').
+ * The memory region is backed by a memfd which we share between
+ * processes. Each worker now gets assigned a range of natural
+ * numbers which it clears multiples of off the memory region. The
+ * master process is responsible of distributing all natural numbers
+ * up to the fixed maximum to its workers.
+ * * bus: The bus object is an abstraction of the kdbus API. It is pretty
+ * straightfoward and only manages the connection-fd plus the
+ * memory-mapped pool in a single object.
+ *
+ * This example is in reversed order, which should make it easier to read
+ * top-down, but requires some forward-declarations. Just ignore those.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* glibc < 2.7 does not ship sys/signalfd.h */
+#if __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 7
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/memfd.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/poll.h>
+#include <sys/signalfd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include "kdbus-api.h"
+
+/* FORWARD DECLARATIONS */
+
+#define POOL_SIZE (16 * 1024 * 1024)
+#define MAX_PRIMES (2UL << 24)
+#define WORKER_COUNT (16)
+#define PRIME_STEPS (65536 * 4)
+
+static const char *arg_busname = "example-workers";
+static const char *arg_modname = "kdbus";
+static const char *arg_master = "org.freedesktop.master";
+
+static int err_assert(int r_errno, const char *msg, const char *func, int line,
+ const char *file)
+{
+ r_errno = (r_errno != 0) ? -abs(r_errno) : -EFAULT;
+ if (r_errno < 0) {
+ errno = -r_errno;
+ fprintf(stderr, "ERR: %s: %m (%s:%d in %s)\n",
+ msg, func, line, file);
+ }
+ return r_errno;
+}
+
+#define err_r(_r, _msg) err_assert((_r), (_msg), __func__, __LINE__, __FILE__)
+#define err(_msg) err_r(errno, (_msg))
+
+struct prime;
+struct bus;
+struct master;
+struct child;
+
+struct prime {
+ int fd;
+ uint8_t *area;
+ size_t max;
+ size_t done;
+ size_t status;
+};
+
+static int prime_new(struct prime **out);
+static void prime_free(struct prime *p);
+static bool prime_done(struct prime *p);
+static void prime_consume(struct prime *p, size_t amount);
+static int prime_run(struct prime *p, struct bus *cancel, size_t number);
+static void prime_print(struct prime *p);
+
+struct bus {
+ int fd;
+ uint8_t *pool;
+};
+
+static int bus_open_connection(struct bus **out, uid_t uid, const char *name,
+ uint64_t recv_flags);
+static void bus_close_connection(struct bus *b);
+static void bus_poool_free_slice(struct bus *b, uint64_t offset);
+static int bus_acquire_name(struct bus *b, const char *name);
+static int bus_install_name_loss_match(struct bus *b, const char *name);
+static int bus_poll(struct bus *b);
+static int bus_make(uid_t uid, const char *name);
+
+struct master {
+ size_t n_workers;
+ size_t max_workers;
+
+ int signal_fd;
+ int control_fd;
+
+ struct prime *prime;
+ struct bus *bus;
+};
+
+static int master_new(struct master **out);
+static void master_free(struct master *m);
+static int master_run(struct master *m);
+static int master_poll(struct master *m);
+static int master_handle_stdin(struct master *m);
+static int master_handle_signal(struct master *m);
+static int master_handle_bus(struct master *m);
+static int master_reply(struct master *m, const struct kdbus_msg *msg);
+static int master_waitpid(struct master *m);
+static int master_spawn(struct master *m);
+
+struct child {
+ struct bus *bus;
+ struct prime *prime;
+};
+
+static int child_new(struct child **out, struct prime *p);
+static void child_free(struct child *c);
+static int child_run(struct child *c);
+
+/* END OF FORWARD DECLARATIONS */
+
+/*
+ * This is the main entrypoint of this example. It is pretty straightforward. We
+ * create a master object, run the computation, print a status report and then
+ * exit. Nothing particularly interesting here, so lets look into the master
+ * object...
+ */
+int main(int argc, char **argv)
+{
+ struct master *m = NULL;
+ int r;
+
+ r = master_new(&m);
+ if (r < 0)
+ goto out;
+
+ r = master_run(m);
+ if (r < 0)
+ goto out;
+
+ if (0)
+ prime_print(m->prime);
+
+out:
+ master_free(m);
+ if (r < 0 && r != -EINTR)
+ fprintf(stderr, "failed\n");
+ else
+ fprintf(stderr, "done\n");
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+/*
+ * ...this will allocate a new master context. It keeps track of the current
+ * number of children/workers that are running, manages a signalfd to track
+ * SIGCHLD, and creates a private kdbus bus. Afterwards, it opens its connection
+ * to the bus and acquires a well known-name (arg_master).
+ */
+static int master_new(struct master **out)
+{
+ struct master *m;
+ sigset_t smask;
+ int r;
+
+ m = calloc(1, sizeof(*m));
+ if (!m)
+ return err("cannot allocate master");
+
+ m->max_workers = WORKER_COUNT;
+ m->signal_fd = -1;
+ m->control_fd = -1;
+
+ /* Block SIGINT and SIGCHLD signals */
+ sigemptyset(&smask);
+ sigaddset(&smask, SIGINT);
+ sigaddset(&smask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &smask, NULL);
+
+ m->signal_fd = signalfd(-1, &smask, SFD_CLOEXEC);
+ if (m->signal_fd < 0) {
+ r = err("cannot create signalfd");
+ goto error;
+ }
+
+ r = prime_new(&m->prime);
+ if (r < 0)
+ goto error;
+
+ m->control_fd = bus_make(getuid(), arg_busname);
+ if (m->control_fd < 0) {
+ r = m->control_fd;
+ goto error;
+ }
+
+ /*
+ * Open a bus connection for the master, and require each received
+ * message to have a metadata item of type KDBUS_ITEM_PIDS attached.
+ * The current UID is needed to compute the name of the bus node to
+ * connect to.
+ */
+ r = bus_open_connection(&m->bus, getuid(),
+ arg_busname, KDBUS_ATTACH_PIDS);
+ if (r < 0)
+ goto error;
+
+ /*
+ * Acquire a well-known name on the bus, so children can address
+ * messages to the master using KDBUS_DST_ID_NAME as destination-ID
+ * of messages.
+ */
+ r = bus_acquire_name(m->bus, arg_master);
+ if (r < 0)
+ goto error;
+
+ *out = m;
+ return 0;
+
+error:
+ master_free(m);
+ return r;
+}
+
+/* pretty straightforward destructor of a master object */
+static void master_free(struct master *m)
+{
+ if (!m)
+ return;
+
+ bus_close_connection(m->bus);
+ if (m->control_fd >= 0)
+ close(m->control_fd);
+ prime_free(m->prime);
+ if (m->signal_fd >= 0)
+ close(m->signal_fd);
+ free(m);
+}
+
+static int master_run(struct master *m)
+{
+ int res, r = 0;
+
+ while (!prime_done(m->prime)) {
+ while (m->n_workers < m->max_workers) {
+ r = master_spawn(m);
+ if (r < 0)
+ break;
+ }
+
+ r = master_poll(m);
+ if (r < 0)
+ break;
+ }
+
+ if (r < 0) {
+ bus_close_connection(m->bus);
+ m->bus = NULL;
+ }
+
+ while (m->n_workers > 0) {
+ res = master_poll(m);
+ if (res < 0) {
+ if (m->bus) {
+ bus_close_connection(m->bus);
+ m->bus = NULL;
+ }
+ r = res;
+ }
+ }
+
+ return r == -EINTR ? 0 : r;
+}
+
+static int master_poll(struct master *m)
+{
+ struct pollfd fds[3] = {};
+ int r = 0, n = 0;
+
+ /*
+ * Add stdin, the eventfd and the connection owner file descriptor to
+ * the pollfd table, and handle incoming traffic on the latter in
+ * master_handle_bus().
+ */
+ fds[n].fd = STDIN_FILENO;
+ fds[n++].events = POLLIN;
+ fds[n].fd = m->signal_fd;
+ fds[n++].events = POLLIN;
+ if (m->bus) {
+ fds[n].fd = m->bus->fd;
+ fds[n++].events = POLLIN;
+ }
+
+ r = poll(fds, n, -1);
+ if (r < 0)
+ return err("poll() failed");
+
+ if (fds[0].revents & POLLIN)
+ r = master_handle_stdin(m);
+ else if (fds[0].revents)
+ r = err("ERR/HUP on stdin");
+ if (r < 0)
+ return r;
+
+ if (fds[1].revents & POLLIN)
+ r = master_handle_signal(m);
+ else if (fds[1].revents)
+ r = err("ERR/HUP on signalfd");
+ if (r < 0)
+ return r;
+
+ if (fds[2].revents & POLLIN)
+ r = master_handle_bus(m);
+ else if (fds[2].revents)
+ r = err("ERR/HUP on bus");
+
+ return r;
+}
+
+static int master_handle_stdin(struct master *m)
+{
+ char buf[128];
+ ssize_t l;
+ int r = 0;
+
+ l = read(STDIN_FILENO, buf, sizeof(buf));
+ if (l < 0)
+ return err("cannot read stdin");
+ if (l == 0)
+ return err_r(-EINVAL, "EOF on stdin");
+
+ while (l-- > 0) {
+ switch (buf[l]) {
+ case 'q':
+ /* quit */
+ r = -EINTR;
+ break;
+ case '\n':
+ case ' ':
+ /* ignore */
+ break;
+ default:
+ if (isgraph(buf[l]))
+ fprintf(stderr, "invalid input '%c'\n", buf[l]);
+ else
+ fprintf(stderr, "invalid input 0x%x\n", buf[l]);
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int master_handle_signal(struct master *m)
+{
+ struct signalfd_siginfo val;
+ ssize_t l;
+
+ l = read(m->signal_fd, &val, sizeof(val));
+ if (l < 0)
+ return err("cannot read signalfd");
+ if (l != sizeof(val))
+ return err_r(-EINVAL, "invalid data from signalfd");
+
+ switch (val.ssi_signo) {
+ case SIGCHLD:
+ return master_waitpid(m);
+ case SIGINT:
+ return err_r(-EINTR, "interrupted");
+ default:
+ return err_r(-EINVAL, "caught invalid signal");
+ }
+}
+
+static int master_handle_bus(struct master *m)
+{
+ struct kdbus_cmd_recv recv = { .size = sizeof(recv) };
+ const struct kdbus_msg *msg = NULL;
+ const struct kdbus_item *item;
+ const struct kdbus_vec *vec = NULL;
+ int r = 0;
+
+ /*
+ * To receive a message, the KDBUS_CMD_RECV ioctl is used.
+ * It takes an argument of type 'struct kdbus_cmd_recv', which
+ * will contain information on the received message when the call
+ * returns. See kdbus.message(7).
+ */
+ r = kdbus_cmd_recv(m->bus->fd, &recv);
+ /*
+ * EAGAIN is returned when there is no message waiting on this
+ * connection. This is not an error - simply bail out.
+ */
+ if (r == -EAGAIN)
+ return 0;
+ if (r < 0)
+ return err_r(r, "cannot receive message");
+
+ /*
+ * Messages received by a connection are stored inside the connection's
+ * pool, at an offset that has been returned in the 'recv' command
+ * struct above. The value describes the relative offset from the
+ * start address of the pool. A message is described with
+ * 'struct kdbus_msg'. See kdbus.message(7).
+ */
+ msg = (void *)(m->bus->pool + recv.msg.offset);
+
+ /*
+ * A messages describes its actual payload in an array of items.
+ * KDBUS_FOREACH() is a simple iterator that walks such an array.
+ * struct kdbus_msg has a field to denote its total size, which is
+ * needed to determine the number of items in the array.
+ */
+ KDBUS_FOREACH(item, msg->items,
+ msg->size - offsetof(struct kdbus_msg, items)) {
+ /*
+ * An item of type PAYLOAD_OFF describes in-line memory
+ * stored in the pool at a described offset. That offset is
+ * relative to the start address of the message header.
+ * This example program only expects one single item of that
+ * type, remembers the struct kdbus_vec member of the item
+ * when it sees it, and bails out if there is more than one
+ * of them.
+ */
+ if (item->type == KDBUS_ITEM_PAYLOAD_OFF) {
+ if (vec) {
+ r = err_r(-EEXIST,
+ "message with multiple vecs");
+ break;
+ }
+ vec = &item->vec;
+ if (vec->size != 1) {
+ r = err_r(-EINVAL, "invalid message size");
+ break;
+ }
+
+ /*
+ * MEMFDs are transported as items of type PAYLOAD_MEMFD.
+ * If such an item is attached, a new file descriptor was
+ * installed into the task when KDBUS_CMD_RECV was called, and
+ * its number is stored in item->memfd.fd.
+ * Implementers *must* handle this item type and close the
+ * file descriptor when no longer needed in order to prevent
+ * file descriptor exhaustion. This example program just bails
+ * out with an error in this case, as memfds are not expected
+ * in this context.
+ */
+ } else if (item->type == KDBUS_ITEM_PAYLOAD_MEMFD) {
+ r = err_r(-EINVAL, "message with memfd");
+ break;
+ }
+ }
+ if (r < 0)
+ goto exit;
+ if (!vec) {
+ r = err_r(-EINVAL, "empty message");
+ goto exit;
+ }
+
+ switch (*((const uint8_t *)msg + vec->offset)) {
+ case 'r': {
+ r = master_reply(m, msg);
+ break;
+ }
+ default:
+ r = err_r(-EINVAL, "invalid message type");
+ break;
+ }
+
+exit:
+ /*
+ * We are done with the memory slice that was given to us through
+ * recv.msg.offset. Tell the kernel it can use it for other content
+ * in the future. See kdbus.pool(7).
+ */
+ bus_poool_free_slice(m->bus, recv.msg.offset);
+ return r;
+}
+
+static int master_reply(struct master *m, const struct kdbus_msg *msg)
+{
+ struct kdbus_cmd_send cmd;
+ struct kdbus_item *item;
+ struct kdbus_msg *reply;
+ size_t size, status, p[2];
+ int r;
+
+ /*
+ * This functions sends a message over kdbus. To do this, it uses the
+ * KDBUS_CMD_SEND ioctl, which takes a command struct argument of type
+ * 'struct kdbus_cmd_send'. This struct stores a pointer to the actual
+ * message to send. See kdbus.message(7).
+ */
+ p[0] = m->prime->done;
+ p[1] = prime_done(m->prime) ? 0 : PRIME_STEPS;
+
+ size = sizeof(*reply);
+ size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+ /* Prepare the message to send */
+ reply = alloca(size);
+ memset(reply, 0, size);
+ reply->size = size;
+
+ /* Each message has a cookie that can be used to send replies */
+ reply->cookie = 1;
+
+ /* The payload_type is arbitrary, but it must be non-zero */
+ reply->payload_type = 0xdeadbeef;
+
+ /*
+ * We are sending a reply. Let the kernel know the cookie of the
+ * message we are replying to.
+ */
+ reply->cookie_reply = msg->cookie;
+
+ /*
+ * Messages can either be directed to a well-known name (stored as
+ * string) or to a unique name (stored as number). This example does
+ * the latter. If the message would be directed to a well-known name
+ * instead, the message's dst_id field would be set to
+ * KDBUS_DST_ID_NAME, and the name would be attaches in an item of type
+ * KDBUS_ITEM_DST_NAME. See below for an example, and also refer to
+ * kdbus.message(7).
+ */
+ reply->dst_id = msg->src_id;
+
+ /* Our message has exactly one item to store its payload */
+ item = reply->items;
+ item->type = KDBUS_ITEM_PAYLOAD_VEC;
+ item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+ item->vec.address = (uintptr_t)p;
+ item->vec.size = sizeof(p);
+
+ /*
+ * Now prepare the command struct, and reference the message we want
+ * to send.
+ */
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.size = sizeof(cmd);
+ cmd.msg_address = (uintptr_t)reply;
+
+ /*
+ * Finally, employ the command on the connection owner
+ * file descriptor.
+ */
+ r = kdbus_cmd_send(m->bus->fd, &cmd);
+ if (r < 0)
+ return err_r(r, "cannot send reply");
+
+ if (p[1]) {
+ prime_consume(m->prime, p[1]);
+ status = m->prime->done * 10000 / m->prime->max;
+ if (status != m->prime->status) {
+ m->prime->status = status;
+ fprintf(stderr, "status: %7.3lf%%\n",
+ (double)status / 100);
+ }
+ }
+
+ return 0;
+}
+
+static int master_waitpid(struct master *m)
+{
+ pid_t pid;
+ int r;
+
+ while ((pid = waitpid(-1, &r, WNOHANG)) > 0) {
+ if (m->n_workers > 0)
+ --m->n_workers;
+ if (!WIFEXITED(r))
+ r = err_r(-EINVAL, "child died unexpectedly");
+ else if (WEXITSTATUS(r) != 0)
+ r = err_r(-WEXITSTATUS(r), "child failed");
+ }
+
+ return r;
+}
+
+static int master_spawn(struct master *m)
+{
+ struct child *c = NULL;
+ struct prime *p = NULL;
+ pid_t pid;
+ int r;
+
+ /* Spawn off one child and call child_run() inside it */
+
+ pid = fork();
+ if (pid < 0)
+ return err("cannot fork");
+ if (pid > 0) {
+ /* parent */
+ ++m->n_workers;
+ return 0;
+ }
+
+ /* child */
+
+ p = m->prime;
+ m->prime = NULL;
+ master_free(m);
+
+ r = child_new(&c, p);
+ if (r < 0)
+ goto exit;
+
+ r = child_run(c);
+
+exit:
+ child_free(c);
+ exit(abs(r));
+}
+
+static int child_new(struct child **out, struct prime *p)
+{
+ struct child *c;
+ int r;
+
+ c = calloc(1, sizeof(*c));
+ if (!c)
+ return err("cannot allocate child");
+
+ c->prime = p;
+
+ /*
+ * Open a connection to the bus and require each received message to
+ * carry a list of the well-known names the sendind connection currently
+ * owns. The current UID is needed in order to determine the name of the
+ * bus node to connect to.
+ */
+ r = bus_open_connection(&c->bus, getuid(),
+ arg_busname, KDBUS_ATTACH_NAMES);
+ if (r < 0)
+ goto error;
+
+ /*
+ * Install a kdbus match so the child's connection gets notified when
+ * the master loses its well-known name.
+ */
+ r = bus_install_name_loss_match(c->bus, arg_master);
+ if (r < 0)
+ goto error;
+
+ *out = c;
+ return 0;
+
+error:
+ child_free(c);
+ return r;
+}
+
+static void child_free(struct child *c)
+{
+ if (!c)
+ return;
+
+ bus_close_connection(c->bus);
+ prime_free(c->prime);
+ free(c);
+}
+
+static int child_run(struct child *c)
+{
+ struct kdbus_cmd_send cmd;
+ struct kdbus_item *item;
+ struct kdbus_vec *vec = NULL;
+ struct kdbus_msg *msg;
+ struct timespec spec;
+ size_t n, steps, size;
+ int r = 0;
+
+ /*
+ * Let's send a message to the master and ask for work. To do this,
+ * we use the KDBUS_CMD_SEND ioctl, which takes an argument of type
+ * 'struct kdbus_cmd_send'. This struct stores a pointer to the actual
+ * message to send. See kdbus.message(7).
+ */
+ size = sizeof(*msg);
+ size += KDBUS_ITEM_SIZE(strlen(arg_master) + 1);
+ size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_vec));
+
+ msg = alloca(size);
+ memset(msg, 0, size);
+ msg->size = size;
+
+ /*
+ * Tell the kernel that we expect a reply to this message. This means
+ * that
+ *
+ * a) The remote peer will gain temporary permission to talk to us
+ * even if it would not be allowed to normally.
+ *
+ * b) A timeout value is required.
+ *
+ * For asynchronous send commands, if no reply is received, we will
+ * get a kernel notification with an item of type
+ * KDBUS_ITEM_REPLY_TIMEOUT attached.
+ *
+ * For synchronous send commands (which this example does), the
+ * ioctl will block until a reply is received or the timeout is
+ * exceeded.
+ */
+ msg->flags = KDBUS_MSG_EXPECT_REPLY;
+
+ /* Set our cookie. Replies must use this cookie to send their reply. */
+ msg->cookie = 1;
+
+ /* The payload_type is arbitrary, but it must be non-zero */
+ msg->payload_type = 0xdeadbeef;
+
+ /*
+ * We are sending our message to the current owner of a well-known
+ * name. This makes an item of type KDBUS_ITEM_DST_NAME mandatory.
+ */
+ msg->dst_id = KDBUS_DST_ID_NAME;
+
+ /*
+ * Set the reply timeout to 5 seconds. Timeouts are always set in
+ * absolute timestamps, based con CLOCK_MONOTONIC. See kdbus.message(7).
+ */
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &spec);
+ msg->timeout_ns += (5 + spec.tv_sec) * 1000ULL * 1000ULL * 1000ULL;
+ msg->timeout_ns += spec.tv_nsec;
+
+ /*
+ * Fill the appended items. First, set the well-known name of the
+ * destination we want to talk to.
+ */
+ item = msg->items;
+ item->type = KDBUS_ITEM_DST_NAME;
+ item->size = KDBUS_ITEM_HEADER_SIZE + strlen(arg_master) + 1;
+ strcpy(item->str, arg_master);
+
+ /*
+ * The 2nd item contains a vector to memory we want to send. It
+ * can be content of any type. In our case, we're sending a one-byte
+ * string only. The memory referenced by this item will be copied into
+ * the pool of the receiver connection, and does not need to be valid
+ * after the command is employed.
+ */
+ item = KDBUS_ITEM_NEXT(item);
+ item->type = KDBUS_ITEM_PAYLOAD_VEC;
+ item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(struct kdbus_vec);
+ item->vec.address = (uintptr_t)"r";
+ item->vec.size = 1;
+
+ /* Set up the command struct and reference the message we prepared */
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.size = sizeof(cmd);
+ cmd.msg_address = (uintptr_t)msg;
+
+ /*
+ * The send commands knows a mode in which it will block until a
+ * reply to a message is received. This example uses that mode.
+ * The pool offset to the received reply will be stored in the command
+ * struct after the send command returned. See below.
+ */
+ cmd.flags = KDBUS_SEND_SYNC_REPLY;
+
+ /*
+ * Finally, employ the command on the connection owner
+ * file descriptor.
+ */
+ r = kdbus_cmd_send(c->bus->fd, &cmd);
+ if (r == -ESRCH || r == -EPIPE || r == -ECONNRESET)
+ return 0;
+ if (r < 0)
+ return err_r(r, "cannot send request to master");
+
+ /*
+ * The command was sent with the KDBUS_SEND_SYNC_REPLY flag set,
+ * and returned successfully, which means that cmd.reply.offset now
+ * points to a message inside our connection's pool where the reply
+ * is found. This is equivalent to receiving the reply with
+ * KDBUS_CMD_RECV, but it doesn't require waiting for the reply with
+ * poll() and also saves the ioctl to receive the message.
+ */
+ msg = (void *)(c->bus->pool + cmd.reply.offset);
+
+ /*
+ * A messages describes its actual payload in an array of items.
+ * KDBUS_FOREACH() is a simple iterator that walks such an array.
+ * struct kdbus_msg has a field to denote its total size, which is
+ * needed to determine the number of items in the array.
+ */
+ KDBUS_FOREACH(item, msg->items,
+ msg->size - offsetof(struct kdbus_msg, items)) {
+ /*
+ * An item of type PAYLOAD_OFF describes in-line memory
+ * stored in the pool at a described offset. That offset is
+ * relative to the start address of the message header.
+ * This example program only expects one single item of that
+ * type, remembers the struct kdbus_vec member of the item
+ * when it sees it, and bails out if there is more than one
+ * of them.
+ */
+ if (item->type == KDBUS_ITEM_PAYLOAD_OFF) {
+ if (vec) {
+ r = err_r(-EEXIST,
+ "message with multiple vecs");
+ break;
+ }
+ vec = &item->vec;
+ if (vec->size != 2 * sizeof(size_t)) {
+ r = err_r(-EINVAL, "invalid message size");
+ break;
+ }
+ /*
+ * MEMFDs are transported as items of type PAYLOAD_MEMFD.
+ * If such an item is attached, a new file descriptor was
+ * installed into the task when KDBUS_CMD_RECV was called, and
+ * its number is stored in item->memfd.fd.
+ * Implementers *must* handle this item type close the
+ * file descriptor when no longer needed in order to prevent
+ * file descriptor exhaustion. This example program just bails
+ * out with an error in this case, as memfds are not expected
+ * in this context.
+ */
+ } else if (item->type == KDBUS_ITEM_PAYLOAD_MEMFD) {
+ r = err_r(-EINVAL, "message with memfd");
+ break;
+ }
+ }
+ if (r < 0)
+ goto exit;
+ if (!vec) {
+ r = err_r(-EINVAL, "empty message");
+ goto exit;
+ }
+
+ n = ((size_t *)((const uint8_t *)msg + vec->offset))[0];
+ steps = ((size_t *)((const uint8_t *)msg + vec->offset))[1];
+
+ while (steps-- > 0) {
+ ++n;
+ r = prime_run(c->prime, c->bus, n);
+ if (r < 0)
+ break;
+ r = bus_poll(c->bus);
+ if (r != 0) {
+ r = r < 0 ? r : -EINTR;
+ break;
+ }
+ }
+
+exit:
+ /*
+ * We are done with the memory slice that was given to us through
+ * cmd.reply.offset. Tell the kernel it can use it for other content
+ * in the future. See kdbus.pool(7).
+ */
+ bus_poool_free_slice(c->bus, cmd.reply.offset);
+ return r;
+}
+
+/*
+ * Prime Computation
+ *
+ */
+
+static int prime_new(struct prime **out)
+{
+ struct prime *p;
+ int r;
+
+ p = calloc(1, sizeof(*p));
+ if (!p)
+ return err("cannot allocate prime memory");
+
+ p->fd = -1;
+ p->area = MAP_FAILED;
+ p->max = MAX_PRIMES;
+
+ /*
+ * Prepare and map a memfd to store the bit-fields for the number
+ * ranges we want to perform the prime detection on.
+ */
+ p->fd = syscall(__NR_memfd_create, "prime-area", MFD_CLOEXEC);
+ if (p->fd < 0) {
+ r = err("cannot create memfd");
+ goto error;
+ }
+
+ r = ftruncate(p->fd, p->max / 8 + 1);
+ if (r < 0) {
+ r = err("cannot ftruncate area");
+ goto error;
+ }
+
+ p->area = mmap(NULL, p->max / 8 + 1, PROT_READ | PROT_WRITE,
+ MAP_SHARED, p->fd, 0);
+ if (p->area == MAP_FAILED) {
+ r = err("cannot mmap memfd");
+ goto error;
+ }
+
+ *out = p;
+ return 0;
+
+error:
+ prime_free(p);
+ return r;
+}
+
+static void prime_free(struct prime *p)
+{
+ if (!p)
+ return;
+
+ if (p->area != MAP_FAILED)
+ munmap(p->area, p->max / 8 + 1);
+ if (p->fd >= 0)
+ close(p->fd);
+ free(p);
+}
+
+static bool prime_done(struct prime *p)
+{
+ return p->done >= p->max;
+}
+
+static void prime_consume(struct prime *p, size_t amount)
+{
+ p->done += amount;
+}
+
+static int prime_run(struct prime *p, struct bus *cancel, size_t number)
+{
+ size_t i, n = 0;
+ int r;
+
+ if (number < 2 || number > 65535)
+ return 0;
+
+ for (i = number * number;
+ i < p->max && i > number;
+ i += number) {
+ p->area[i / 8] |= 1 << (i % 8);
+
+ if (!(++n % (1 << 20))) {
+ r = bus_poll(cancel);
+ if (r != 0)
+ return r < 0 ? r : -EINTR;
+ }
+ }
+
+ return 0;
+}
+
+static void prime_print(struct prime *p)
+{
+ size_t i, l = 0;
+
+ fprintf(stderr, "PRIMES:");
+ for (i = 0; i < p->max; ++i) {
+ if (!(p->area[i / 8] & (1 << (i % 8))))
+ fprintf(stderr, "%c%7zu", !(l++ % 16) ? '\n' : ' ', i);
+ }
+ fprintf(stderr, "\nEND\n");
+}
+
+static int bus_open_connection(struct bus **out, uid_t uid, const char *name,
+ uint64_t recv_flags)
+{
+ struct kdbus_cmd_hello hello;
+ char path[128];
+ struct bus *b;
+ int r;
+
+ /*
+ * The 'bus' object is our representation of a kdbus connection which
+ * stores two details: the connection owner file descriptor, and the
+ * mmap()ed memory of its associated pool. See kdbus.connection(7) and
+ * kdbus.pool(7).
+ */
+ b = calloc(1, sizeof(*b));
+ if (!b)
+ return err("cannot allocate bus memory");
+
+ b->fd = -1;
+ b->pool = MAP_FAILED;
+
+ /* Compute the name of the bus node to connect to. */
+ snprintf(path, sizeof(path), "/sys/fs/%s/%lu-%s/bus",
+ arg_modname, (unsigned long)uid, name);
+ b->fd = open(path, O_RDWR | O_CLOEXEC);
+ if (b->fd < 0) {
+ r = err("cannot open bus");
+ goto error;
+ }
+
+ /*
+ * To make a connection to the bus, the KDBUS_CMD_HELLO ioctl is used.
+ * It takes an argument of type 'struct kdbus_cmd_hello'.
+ */
+ memset(&hello, 0, sizeof(hello));
+ hello.size = sizeof(hello);
+
+ /*
+ * Specify a mask of metadata attach flags, describing metadata items
+ * that this new connection allows to be sent.
+ */
+ hello.attach_flags_send = _KDBUS_ATTACH_ALL;
+
+ /*
+ * Specify a mask of metadata attach flags, describing metadata items
+ * that this new connection wants to be receive along with each message.
+ */
+ hello.attach_flags_recv = recv_flags;
+
+ /*
+ * A connection may choose the size of its pool, but the number has to
+ * comply with two rules: a) it must be greater than 0, and b) it must
+ * be a mulitple of PAGE_SIZE. See kdbus.pool(7).
+ */
+ hello.pool_size = POOL_SIZE;
+
+ /*
+ * Now employ the command on the file descriptor opened above.
+ * This command will turn the file descriptor into a connection-owner
+ * file descriptor that controls the life-time of the connection; once
+ * it's closed, the connection is shut down.
+ */
+ r = kdbus_cmd_hello(b->fd, &hello);
+ if (r < 0) {
+ err_r(r, "HELLO failed");
+ goto error;
+ }
+
+ bus_poool_free_slice(b, hello.offset);
+
+ /*
+ * Map the pool of the connection. Its size has been set in the
+ * command struct above. See kdbus.pool(7).
+ */
+ b->pool = mmap(NULL, POOL_SIZE, PROT_READ, MAP_SHARED, b->fd, 0);
+ if (b->pool == MAP_FAILED) {
+ r = err("cannot mmap pool");
+ goto error;
+ }
+
+ *out = b;
+ return 0;
+
+error:
+ bus_close_connection(b);
+ return r;
+}
+
+static void bus_close_connection(struct bus *b)
+{
+ if (!b)
+ return;
+
+ /*
+ * A bus connection is closed by simply calling close() on the
+ * connection owner file descriptor. The unique name and all owned
+ * well-known names of the conneciton will disappear.
+ * See kdbus.connection(7).
+ */
+ if (b->pool != MAP_FAILED)
+ munmap(b->pool, POOL_SIZE);
+ if (b->fd >= 0)
+ close(b->fd);
+ free(b);
+}
+
+static void bus_poool_free_slice(struct bus *b, uint64_t offset)
+{
+ struct kdbus_cmd_free cmd = {
+ .size = sizeof(cmd),
+ .offset = offset,
+ };
+ int r;
+
+ /*
+ * Once we're done with a piece of pool memory that was returned
+ * by a command, we have to call the KDBUS_CMD_FREE ioctl on it so it
+ * can be reused. The command takes an argument of type
+ * 'struct kdbus_cmd_free', in which the pool offset of the slice to
+ * free is stored. The ioctl is employed on the connection owner
+ * file descriptor. See kdbus.pool(7),
+ */
+ r = kdbus_cmd_free(b->fd, &cmd);
+ if (r < 0)
+ err_r(r, "cannot free pool slice");
+}
+
+static int bus_acquire_name(struct bus *b, const char *name)
+{
+ struct kdbus_item *item;
+ struct kdbus_cmd *cmd;
+ size_t size;
+ int r;
+
+ /*
+ * This function acquires a well-known name on the bus through the
+ * KDBUS_CMD_NAME_ACQUIRE ioctl. This ioctl takes an argument of type
+ * 'struct kdbus_cmd', which is assembled below. See kdbus.name(7).
+ */
+ size = sizeof(*cmd);
+ size += KDBUS_ITEM_SIZE(strlen(name) + 1);
+
+ cmd = alloca(size);
+ memset(cmd, 0, size);
+ cmd->size = size;
+
+ /*
+ * The command requires an item of type KDBUS_ITEM_NAME, and its
+ * content must be a valid bus name.
+ */
+ item = cmd->items;
+ item->type = KDBUS_ITEM_NAME;
+ item->size = KDBUS_ITEM_HEADER_SIZE + strlen(name) + 1;
+ strcpy(item->str, name);
+
+ /*
+ * Employ the command on the connection owner file descriptor.
+ */
+ r = kdbus_cmd_name_acquire(b->fd, cmd);
+ if (r < 0)
+ return err_r(r, "cannot acquire name");
+
+ return 0;
+}
+
+static int bus_install_name_loss_match(struct bus *b, const char *name)
+{
+ struct kdbus_cmd_match *match;
+ struct kdbus_item *item;
+ size_t size;
+ int r;
+
+ /*
+ * In order to install a match for signal messages, we have to
+ * assemble a 'struct kdbus_cmd_match' and use it along with the
+ * KDBUS_CMD_MATCH_ADD ioctl. See kdbus.match(7).
+ */
+ size = sizeof(*match);
+ size += KDBUS_ITEM_SIZE(sizeof(item->name_change) + strlen(name) + 1);
+
+ match = alloca(size);
+ memset(match, 0, size);
+ match->size = size;
+
+ /*
+ * A match is comprised of many 'rules', each of which describes a
+ * mandatory detail of the message. All rules of a match must be
+ * satified in order to make a message pass.
+ */
+ item = match->items;
+
+ /*
+ * In this case, we're interested in notifications that inform us
+ * about a well-known name being removed from the bus.
+ */
+ item->type = KDBUS_ITEM_NAME_REMOVE;
+ item->size = KDBUS_ITEM_HEADER_SIZE +
+ sizeof(item->name_change) + strlen(name) + 1;
+
+ /*
+ * We could limit the match further and require a specific unique-ID
+ * to be the new or the old owner of the name. In this case, however,
+ * we don't, and allow 'any' id.
+ */
+ item->name_change.old_id.id = KDBUS_MATCH_ID_ANY;
+ item->name_change.new_id.id = KDBUS_MATCH_ID_ANY;
+
+ /* Copy in the well-known name we're interested in */
+ strcpy(item->name_change.name, name);
+
+ /*
+ * Add the match through the KDBUS_CMD_MATCH_ADD ioctl, employed on
+ * the connection owner fd.
+ */
+ r = kdbus_cmd_match_add(b->fd, match);
+ if (r < 0)
+ return err_r(r, "cannot add match");
+
+ return 0;
+}
+
+static int bus_poll(struct bus *b)
+{
+ struct pollfd fds[1] = {};
+ int r;
+
+ /*
+ * A connection endpoint supports poll() and will wake-up the
+ * task with POLLIN set once a message has arrived.
+ */
+ fds[0].fd = b->fd;
+ fds[0].events = POLLIN;
+ r = poll(fds, sizeof(fds) / sizeof(*fds), 0);
+ if (r < 0)
+ return err("cannot poll bus");
+ return !!(fds[0].revents & POLLIN);
+}
+
+static int bus_make(uid_t uid, const char *name)
+{
+ struct kdbus_item *item;
+ struct kdbus_cmd *make;
+ char path[128], busname[128];
+ size_t size;
+ int r, fd;
+
+ /*
+ * Compute the full path to the 'control' node. 'arg_modname' may be
+ * set to a different value than 'kdbus' for development purposes.
+ * The 'control' node is the primary entry point to kdbus that must be
+ * used in order to create a bus. See kdbus(7) and kdbus.bus(7).
+ */
+ snprintf(path, sizeof(path), "/sys/fs/%s/control", arg_modname);
+
+ /*
+ * Compute the bus name. A valid bus name must always be prefixed with
+ * the EUID of the currently running process in order to avoid name
+ * conflicts. See kdbus.bus(7).
+ */
+ snprintf(busname, sizeof(busname), "%lu-%s", (unsigned long)uid, name);
+
+ fd = open(path, O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return err("cannot open control file");
+
+ /*
+ * The KDBUS_CMD_BUS_MAKE ioctl takes an argument of type
+ * 'struct kdbus_cmd', and expects at least two items attached to
+ * it: one to decribe the bloom parameters to be propagated to
+ * connections of the bus, and the name of the bus that was computed
+ * above. Assemble this struct now, and fill it with values.
+ */
+ size = sizeof(*make);
+ size += KDBUS_ITEM_SIZE(sizeof(struct kdbus_bloom_parameter));
+ size += KDBUS_ITEM_SIZE(strlen(busname) + 1);
+
+ make = alloca(size);
+ memset(make, 0, size);
+ make->size = size;
+
+ /*
+ * Each item has a 'type' and 'size' field, and must be stored at an
+ * 8-byte aligned address. The KDBUS_ITEM_NEXT macro is used to advance
+ * the pointer. See kdbus.item(7) for more details.
+ */
+ item = make->items;
+ item->type = KDBUS_ITEM_BLOOM_PARAMETER;
+ item->size = KDBUS_ITEM_HEADER_SIZE + sizeof(item->bloom_parameter);
+ item->bloom_parameter.size = 8;
+ item->bloom_parameter.n_hash = 1;
+
+ /* The name of the new bus is stored in the next item. */
+ item = KDBUS_ITEM_NEXT(item);
+ item->type = KDBUS_ITEM_MAKE_NAME;
+ item->size = KDBUS_ITEM_HEADER_SIZE + strlen(busname) + 1;
+ strcpy(item->str, busname);
+
+ /*
+ * Now create the bus via the KDBUS_CMD_BUS_MAKE ioctl and return the
+ * fd that was used back to the caller of this function. This fd is now
+ * called a 'bus owner file descriptor', and it controls the life-time
+ * of the newly created bus; once the file descriptor is closed, the
+ * bus goes away, and all connections are shut down. See kdbus.bus(7).
+ */
+ r = kdbus_cmd_bus_make(fd, make);
+ if (r < 0) {
+ err_r(r, "cannot make bus");
+ close(fd);
+ return r;
+ }
+
+ return fd;
+}
+
+#else
+
+#warning "Skipping compilation due to unsupported libc version"
+
+int main(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Compilation of %s was skipped due to unsupported libc.\n",
+ argv[0]);
+
+ return EXIT_FAILURE;
+}
+
+#endif /* libc sanity check */
diff --git a/samples/kfifo/Makefile b/samples/kfifo/Makefile
new file mode 100644
index 000000000..bcc9484a1
--- /dev/null
+++ b/samples/kfifo/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_KFIFO) += bytestream-example.o dma-example.o inttype-example.o record-example.o
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
new file mode 100644
index 000000000..2fca916d9
--- /dev/null
+++ b/samples/kfifo/bytestream-example.c
@@ -0,0 +1,194 @@
+/*
+ * Sample kfifo byte stream implementation
+ *
+ * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net>
+ *
+ * Released under the GPL version 2 only.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <linux/kfifo.h>
+
+/*
+ * This module shows how to create a byte stream fifo.
+ */
+
+/* fifo size in elements (bytes) */
+#define FIFO_SIZE 32
+
+/* name of the proc entry */
+#define PROC_FIFO "bytestream-fifo"
+
+/* lock for procfs read access */
+static DEFINE_MUTEX(read_lock);
+
+/* lock for procfs write access */
+static DEFINE_MUTEX(write_lock);
+
+/*
+ * define DYNAMIC in this example for a dynamically allocated fifo.
+ *
+ * Otherwise the fifo storage will be a part of the fifo structure.
+ */
+#if 0
+#define DYNAMIC
+#endif
+
+#ifdef DYNAMIC
+static struct kfifo test;
+#else
+static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE);
+#endif
+
+static const unsigned char expected_result[FIFO_SIZE] = {
+ 3, 4, 5, 6, 7, 8, 9, 0,
+ 1, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42,
+};
+
+static int __init testfunc(void)
+{
+ unsigned char buf[6];
+ unsigned char i, j;
+ unsigned int ret;
+
+ printk(KERN_INFO "byte stream fifo test start\n");
+
+ /* put string into the fifo */
+ kfifo_in(&test, "hello", 5);
+
+ /* put values into the fifo */
+ for (i = 0; i != 10; i++)
+ kfifo_put(&test, i);
+
+ /* show the number of used elements */
+ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test));
+
+ /* get max of 5 bytes from the fifo */
+ i = kfifo_out(&test, buf, 5);
+ printk(KERN_INFO "buf: %.*s\n", i, buf);
+
+ /* get max of 2 elements from the fifo */
+ ret = kfifo_out(&test, buf, 2);
+ printk(KERN_INFO "ret: %d\n", ret);
+ /* and put it back to the end of the fifo */
+ ret = kfifo_in(&test, buf, ret);
+ printk(KERN_INFO "ret: %d\n", ret);
+
+ /* skip first element of the fifo */
+ printk(KERN_INFO "skip 1st element\n");
+ kfifo_skip(&test);
+
+ /* put values into the fifo until is full */
+ for (i = 20; kfifo_put(&test, i); i++)
+ ;
+
+ printk(KERN_INFO "queue len: %u\n", kfifo_len(&test));
+
+ /* show the first value without removing from the fifo */
+ if (kfifo_peek(&test, &i))
+ printk(KERN_INFO "%d\n", i);
+
+ /* check the correctness of all values in the fifo */
+ j = 0;
+ while (kfifo_get(&test, &i)) {
+ printk(KERN_INFO "item = %d\n", i);
+ if (i != expected_result[j++]) {
+ printk(KERN_WARNING "value mismatch: test failed\n");
+ return -EIO;
+ }
+ }
+ if (j != ARRAY_SIZE(expected_result)) {
+ printk(KERN_WARNING "size mismatch: test failed\n");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
+
+ return 0;
+}
+
+static ssize_t fifo_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ unsigned int copied;
+
+ if (mutex_lock_interruptible(&write_lock))
+ return -ERESTARTSYS;
+
+ ret = kfifo_from_user(&test, buf, count, &copied);
+
+ mutex_unlock(&write_lock);
+
+ return ret ? ret : copied;
+}
+
+static ssize_t fifo_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ unsigned int copied;
+
+ if (mutex_lock_interruptible(&read_lock))
+ return -ERESTARTSYS;
+
+ ret = kfifo_to_user(&test, buf, count, &copied);
+
+ mutex_unlock(&read_lock);
+
+ return ret ? ret : copied;
+}
+
+static const struct file_operations fifo_fops = {
+ .owner = THIS_MODULE,
+ .read = fifo_read,
+ .write = fifo_write,
+ .llseek = noop_llseek,
+};
+
+static int __init example_init(void)
+{
+#ifdef DYNAMIC
+ int ret;
+
+ ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL);
+ if (ret) {
+ printk(KERN_ERR "error kfifo_alloc\n");
+ return ret;
+ }
+#else
+ INIT_KFIFO(test);
+#endif
+ if (testfunc() < 0) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -EIO;
+ }
+
+ if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void __exit example_exit(void)
+{
+ remove_proc_entry(PROC_FIFO, NULL);
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+}
+
+module_init(example_init);
+module_exit(example_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
new file mode 100644
index 000000000..aa243db93
--- /dev/null
+++ b/samples/kfifo/dma-example.c
@@ -0,0 +1,143 @@
+/*
+ * Sample fifo dma implementation
+ *
+ * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net>
+ *
+ * Released under the GPL version 2 only.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+
+/*
+ * This module shows how to handle fifo dma operations.
+ */
+
+/* fifo size in elements (bytes) */
+#define FIFO_SIZE 32
+
+static struct kfifo fifo;
+
+static int __init example_init(void)
+{
+ int i;
+ unsigned int ret;
+ unsigned int nents;
+ struct scatterlist sg[10];
+
+ printk(KERN_INFO "DMA fifo test start\n");
+
+ if (kfifo_alloc(&fifo, FIFO_SIZE, GFP_KERNEL)) {
+ printk(KERN_WARNING "error kfifo_alloc\n");
+ return -ENOMEM;
+ }
+
+ printk(KERN_INFO "queue size: %u\n", kfifo_size(&fifo));
+
+ kfifo_in(&fifo, "test", 4);
+
+ for (i = 0; i != 9; i++)
+ kfifo_put(&fifo, i);
+
+ /* kick away first byte */
+ kfifo_skip(&fifo);
+
+ printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo));
+
+ /*
+ * Configure the kfifo buffer to receive data from DMA input.
+ *
+ * .--------------------------------------.
+ * | 0 | 1 | 2 | ... | 12 | 13 | ... | 31 |
+ * |---|------------------|---------------|
+ * \_/ \________________/ \_____________/
+ * \ \ \
+ * \ \_allocated data \
+ * \_*free space* \_*free space*
+ *
+ * We need two different SG entries: one for the free space area at the
+ * end of the kfifo buffer (19 bytes) and another for the first free
+ * byte at the beginning, after the kfifo_skip().
+ */
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ nents = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE);
+ printk(KERN_INFO "DMA sgl entries: %d\n", nents);
+ if (!nents) {
+ /* fifo is full and no sgl was created */
+ printk(KERN_WARNING "error kfifo_dma_in_prepare\n");
+ return -EIO;
+ }
+
+ /* receive data */
+ printk(KERN_INFO "scatterlist for receive:\n");
+ for (i = 0; i < nents; i++) {
+ printk(KERN_INFO
+ "sg[%d] -> "
+ "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
+ i, sg[i].page_link, sg[i].offset, sg[i].length);
+
+ if (sg_is_last(&sg[i]))
+ break;
+ }
+
+ /* put here your code to setup and exectute the dma operation */
+ /* ... */
+
+ /* example: zero bytes received */
+ ret = 0;
+
+ /* finish the dma operation and update the received data */
+ kfifo_dma_in_finish(&fifo, ret);
+
+ /* Prepare to transmit data, example: 8 bytes */
+ nents = kfifo_dma_out_prepare(&fifo, sg, ARRAY_SIZE(sg), 8);
+ printk(KERN_INFO "DMA sgl entries: %d\n", nents);
+ if (!nents) {
+ /* no data was available and no sgl was created */
+ printk(KERN_WARNING "error kfifo_dma_out_prepare\n");
+ return -EIO;
+ }
+
+ printk(KERN_INFO "scatterlist for transmit:\n");
+ for (i = 0; i < nents; i++) {
+ printk(KERN_INFO
+ "sg[%d] -> "
+ "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
+ i, sg[i].page_link, sg[i].offset, sg[i].length);
+
+ if (sg_is_last(&sg[i]))
+ break;
+ }
+
+ /* put here your code to setup and exectute the dma operation */
+ /* ... */
+
+ /* example: 5 bytes transmitted */
+ ret = 5;
+
+ /* finish the dma operation and update the transmitted data */
+ kfifo_dma_out_finish(&fifo, ret);
+
+ ret = kfifo_len(&fifo);
+ printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo));
+
+ if (ret != 7) {
+ printk(KERN_WARNING "size mismatch: test failed");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
+
+ return 0;
+}
+
+static void __exit example_exit(void)
+{
+ kfifo_free(&fifo);
+}
+
+module_init(example_init);
+module_exit(example_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
new file mode 100644
index 000000000..8dc3c2e71
--- /dev/null
+++ b/samples/kfifo/inttype-example.c
@@ -0,0 +1,185 @@
+/*
+ * Sample kfifo int type implementation
+ *
+ * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net>
+ *
+ * Released under the GPL version 2 only.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <linux/kfifo.h>
+
+/*
+ * This module shows how to create a int type fifo.
+ */
+
+/* fifo size in elements (ints) */
+#define FIFO_SIZE 32
+
+/* name of the proc entry */
+#define PROC_FIFO "int-fifo"
+
+/* lock for procfs read access */
+static DEFINE_MUTEX(read_lock);
+
+/* lock for procfs write access */
+static DEFINE_MUTEX(write_lock);
+
+/*
+ * define DYNAMIC in this example for a dynamically allocated fifo.
+ *
+ * Otherwise the fifo storage will be a part of the fifo structure.
+ */
+#if 0
+#define DYNAMIC
+#endif
+
+#ifdef DYNAMIC
+static DECLARE_KFIFO_PTR(test, int);
+#else
+static DEFINE_KFIFO(test, int, FIFO_SIZE);
+#endif
+
+static const int expected_result[FIFO_SIZE] = {
+ 3, 4, 5, 6, 7, 8, 9, 0,
+ 1, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42,
+};
+
+static int __init testfunc(void)
+{
+ int buf[6];
+ int i, j;
+ unsigned int ret;
+
+ printk(KERN_INFO "int fifo test start\n");
+
+ /* put values into the fifo */
+ for (i = 0; i != 10; i++)
+ kfifo_put(&test, i);
+
+ /* show the number of used elements */
+ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test));
+
+ /* get max of 2 elements from the fifo */
+ ret = kfifo_out(&test, buf, 2);
+ printk(KERN_INFO "ret: %d\n", ret);
+ /* and put it back to the end of the fifo */
+ ret = kfifo_in(&test, buf, ret);
+ printk(KERN_INFO "ret: %d\n", ret);
+
+ /* skip first element of the fifo */
+ printk(KERN_INFO "skip 1st element\n");
+ kfifo_skip(&test);
+
+ /* put values into the fifo until is full */
+ for (i = 20; kfifo_put(&test, i); i++)
+ ;
+
+ printk(KERN_INFO "queue len: %u\n", kfifo_len(&test));
+
+ /* show the first value without removing from the fifo */
+ if (kfifo_peek(&test, &i))
+ printk(KERN_INFO "%d\n", i);
+
+ /* check the correctness of all values in the fifo */
+ j = 0;
+ while (kfifo_get(&test, &i)) {
+ printk(KERN_INFO "item = %d\n", i);
+ if (i != expected_result[j++]) {
+ printk(KERN_WARNING "value mismatch: test failed\n");
+ return -EIO;
+ }
+ }
+ if (j != ARRAY_SIZE(expected_result)) {
+ printk(KERN_WARNING "size mismatch: test failed\n");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
+
+ return 0;
+}
+
+static ssize_t fifo_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ unsigned int copied;
+
+ if (mutex_lock_interruptible(&write_lock))
+ return -ERESTARTSYS;
+
+ ret = kfifo_from_user(&test, buf, count, &copied);
+
+ mutex_unlock(&write_lock);
+
+ return ret ? ret : copied;
+}
+
+static ssize_t fifo_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ unsigned int copied;
+
+ if (mutex_lock_interruptible(&read_lock))
+ return -ERESTARTSYS;
+
+ ret = kfifo_to_user(&test, buf, count, &copied);
+
+ mutex_unlock(&read_lock);
+
+ return ret ? ret : copied;
+}
+
+static const struct file_operations fifo_fops = {
+ .owner = THIS_MODULE,
+ .read = fifo_read,
+ .write = fifo_write,
+ .llseek = noop_llseek,
+};
+
+static int __init example_init(void)
+{
+#ifdef DYNAMIC
+ int ret;
+
+ ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL);
+ if (ret) {
+ printk(KERN_ERR "error kfifo_alloc\n");
+ return ret;
+ }
+#endif
+ if (testfunc() < 0) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -EIO;
+ }
+
+ if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void __exit example_exit(void)
+{
+ remove_proc_entry(PROC_FIFO, NULL);
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+}
+
+module_init(example_init);
+module_exit(example_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
new file mode 100644
index 000000000..2d7529eeb
--- /dev/null
+++ b/samples/kfifo/record-example.c
@@ -0,0 +1,201 @@
+/*
+ * Sample dynamic sized record fifo implementation
+ *
+ * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net>
+ *
+ * Released under the GPL version 2 only.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <linux/kfifo.h>
+
+/*
+ * This module shows how to create a variable sized record fifo.
+ */
+
+/* fifo size in elements (bytes) */
+#define FIFO_SIZE 128
+
+/* name of the proc entry */
+#define PROC_FIFO "record-fifo"
+
+/* lock for procfs read access */
+static DEFINE_MUTEX(read_lock);
+
+/* lock for procfs write access */
+static DEFINE_MUTEX(write_lock);
+
+/*
+ * define DYNAMIC in this example for a dynamically allocated fifo.
+ *
+ * Otherwise the fifo storage will be a part of the fifo structure.
+ */
+#if 0
+#define DYNAMIC
+#endif
+
+/*
+ * struct kfifo_rec_ptr_1 and STRUCT_KFIFO_REC_1 can handle records of a
+ * length between 0 and 255 bytes.
+ *
+ * struct kfifo_rec_ptr_2 and STRUCT_KFIFO_REC_2 can handle records of a
+ * length between 0 and 65535 bytes.
+ */
+
+#ifdef DYNAMIC
+struct kfifo_rec_ptr_1 test;
+
+#else
+typedef STRUCT_KFIFO_REC_1(FIFO_SIZE) mytest;
+
+static mytest test;
+#endif
+
+static const char *expected_result[] = {
+ "a",
+ "bb",
+ "ccc",
+ "dddd",
+ "eeeee",
+ "ffffff",
+ "ggggggg",
+ "hhhhhhhh",
+ "iiiiiiiii",
+ "jjjjjjjjjj",
+};
+
+static int __init testfunc(void)
+{
+ char buf[100];
+ unsigned int i;
+ unsigned int ret;
+ struct { unsigned char buf[6]; } hello = { "hello" };
+
+ printk(KERN_INFO "record fifo test start\n");
+
+ kfifo_in(&test, &hello, sizeof(hello));
+
+ /* show the size of the next record in the fifo */
+ printk(KERN_INFO "fifo peek len: %u\n", kfifo_peek_len(&test));
+
+ /* put in variable length data */
+ for (i = 0; i < 10; i++) {
+ memset(buf, 'a' + i, i + 1);
+ kfifo_in(&test, buf, i + 1);
+ }
+
+ /* skip first element of the fifo */
+ printk(KERN_INFO "skip 1st element\n");
+ kfifo_skip(&test);
+
+ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test));
+
+ /* show the first record without removing from the fifo */
+ ret = kfifo_out_peek(&test, buf, sizeof(buf));
+ if (ret)
+ printk(KERN_INFO "%.*s\n", ret, buf);
+
+ /* check the correctness of all values in the fifo */
+ i = 0;
+ while (!kfifo_is_empty(&test)) {
+ ret = kfifo_out(&test, buf, sizeof(buf));
+ buf[ret] = '\0';
+ printk(KERN_INFO "item = %.*s\n", ret, buf);
+ if (strcmp(buf, expected_result[i++])) {
+ printk(KERN_WARNING "value mismatch: test failed\n");
+ return -EIO;
+ }
+ }
+ if (i != ARRAY_SIZE(expected_result)) {
+ printk(KERN_WARNING "size mismatch: test failed\n");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
+
+ return 0;
+}
+
+static ssize_t fifo_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ unsigned int copied;
+
+ if (mutex_lock_interruptible(&write_lock))
+ return -ERESTARTSYS;
+
+ ret = kfifo_from_user(&test, buf, count, &copied);
+
+ mutex_unlock(&write_lock);
+
+ return ret ? ret : copied;
+}
+
+static ssize_t fifo_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ unsigned int copied;
+
+ if (mutex_lock_interruptible(&read_lock))
+ return -ERESTARTSYS;
+
+ ret = kfifo_to_user(&test, buf, count, &copied);
+
+ mutex_unlock(&read_lock);
+
+ return ret ? ret : copied;
+}
+
+static const struct file_operations fifo_fops = {
+ .owner = THIS_MODULE,
+ .read = fifo_read,
+ .write = fifo_write,
+ .llseek = noop_llseek,
+};
+
+static int __init example_init(void)
+{
+#ifdef DYNAMIC
+ int ret;
+
+ ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL);
+ if (ret) {
+ printk(KERN_ERR "error kfifo_alloc\n");
+ return ret;
+ }
+#else
+ INIT_KFIFO(test);
+#endif
+ if (testfunc() < 0) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -EIO;
+ }
+
+ if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void __exit example_exit(void)
+{
+ remove_proc_entry(PROC_FIFO, NULL);
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+}
+
+module_init(example_init);
+module_exit(example_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>");
diff --git a/samples/kobject/Makefile b/samples/kobject/Makefile
new file mode 100644
index 000000000..4a194203c
--- /dev/null
+++ b/samples/kobject/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_KOBJECT) += kobject-example.o kset-example.o
diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c
new file mode 100644
index 000000000..2e0740f06
--- /dev/null
+++ b/samples/kobject/kobject-example.c
@@ -0,0 +1,146 @@
+/*
+ * Sample kobject implementation
+ *
+ * Copyright (C) 2004-2007 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2007 Novell Inc.
+ *
+ * Released under the GPL version 2 only.
+ *
+ */
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+/*
+ * This module shows how to create a simple subdirectory in sysfs called
+ * /sys/kernel/kobject-example In that directory, 3 files are created:
+ * "foo", "baz", and "bar". If an integer is written to these files, it can be
+ * later read out of it.
+ */
+
+static int foo;
+static int baz;
+static int bar;
+
+/*
+ * The "foo" file where a static variable is read from and written to.
+ */
+static ssize_t foo_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", foo);
+}
+
+static ssize_t foo_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+
+ ret = kstrtoint(buf, 10, &foo);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+/* Sysfs attributes cannot be world-writable. */
+static struct kobj_attribute foo_attribute =
+ __ATTR(foo, 0664, foo_show, foo_store);
+
+/*
+ * More complex function where we determine which variable is being accessed by
+ * looking at the attribute for the "baz" and "bar" files.
+ */
+static ssize_t b_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ int var;
+
+ if (strcmp(attr->attr.name, "baz") == 0)
+ var = baz;
+ else
+ var = bar;
+ return sprintf(buf, "%d\n", var);
+}
+
+static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int var, ret;
+
+ ret = kstrtoint(buf, 10, &var);
+ if (ret < 0)
+ return ret;
+
+ if (strcmp(attr->attr.name, "baz") == 0)
+ baz = var;
+ else
+ bar = var;
+ return count;
+}
+
+static struct kobj_attribute baz_attribute =
+ __ATTR(baz, 0664, b_show, b_store);
+static struct kobj_attribute bar_attribute =
+ __ATTR(bar, 0664, b_show, b_store);
+
+
+/*
+ * Create a group of attributes so that we can create and destroy them all
+ * at once.
+ */
+static struct attribute *attrs[] = {
+ &foo_attribute.attr,
+ &baz_attribute.attr,
+ &bar_attribute.attr,
+ NULL, /* need to NULL terminate the list of attributes */
+};
+
+/*
+ * An unnamed attribute group will put all of the attributes directly in
+ * the kobject directory. If we specify a name, a subdirectory will be
+ * created for the attributes with the directory being the name of the
+ * attribute group.
+ */
+static struct attribute_group attr_group = {
+ .attrs = attrs,
+};
+
+static struct kobject *example_kobj;
+
+static int __init example_init(void)
+{
+ int retval;
+
+ /*
+ * Create a simple kobject with the name of "kobject_example",
+ * located under /sys/kernel/
+ *
+ * As this is a simple directory, no uevent will be sent to
+ * userspace. That is why this function should not be used for
+ * any type of dynamic kobjects, where the name and number are
+ * not known ahead of time.
+ */
+ example_kobj = kobject_create_and_add("kobject_example", kernel_kobj);
+ if (!example_kobj)
+ return -ENOMEM;
+
+ /* Create the files associated with this kobject */
+ retval = sysfs_create_group(example_kobj, &attr_group);
+ if (retval)
+ kobject_put(example_kobj);
+
+ return retval;
+}
+
+static void __exit example_exit(void)
+{
+ kobject_put(example_kobj);
+}
+
+module_init(example_init);
+module_exit(example_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>");
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
new file mode 100644
index 000000000..a55bff52b
--- /dev/null
+++ b/samples/kobject/kset-example.c
@@ -0,0 +1,289 @@
+/*
+ * Sample kset and ktype implementation
+ *
+ * Copyright (C) 2004-2007 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2007 Novell Inc.
+ *
+ * Released under the GPL version 2 only.
+ *
+ */
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+/*
+ * This module shows how to create a kset in sysfs called
+ * /sys/kernel/kset-example
+ * Then tree kobjects are created and assigned to this kset, "foo", "baz",
+ * and "bar". In those kobjects, attributes of the same name are also
+ * created and if an integer is written to these files, it can be later
+ * read out of it.
+ */
+
+
+/*
+ * This is our "object" that we will create a few of and register them with
+ * sysfs.
+ */
+struct foo_obj {
+ struct kobject kobj;
+ int foo;
+ int baz;
+ int bar;
+};
+#define to_foo_obj(x) container_of(x, struct foo_obj, kobj)
+
+/* a custom attribute that works just for a struct foo_obj. */
+struct foo_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct foo_obj *foo, struct foo_attribute *attr, char *buf);
+ ssize_t (*store)(struct foo_obj *foo, struct foo_attribute *attr, const char *buf, size_t count);
+};
+#define to_foo_attr(x) container_of(x, struct foo_attribute, attr)
+
+/*
+ * The default show function that must be passed to sysfs. This will be
+ * called by sysfs for whenever a show function is called by the user on a
+ * sysfs file associated with the kobjects we have registered. We need to
+ * transpose back from a "default" kobject to our custom struct foo_obj and
+ * then call the show function for that specific object.
+ */
+static ssize_t foo_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct foo_attribute *attribute;
+ struct foo_obj *foo;
+
+ attribute = to_foo_attr(attr);
+ foo = to_foo_obj(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(foo, attribute, buf);
+}
+
+/*
+ * Just like the default show function above, but this one is for when the
+ * sysfs "store" is requested (when a value is written to a file.)
+ */
+static ssize_t foo_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct foo_attribute *attribute;
+ struct foo_obj *foo;
+
+ attribute = to_foo_attr(attr);
+ foo = to_foo_obj(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(foo, attribute, buf, len);
+}
+
+/* Our custom sysfs_ops that we will associate with our ktype later on */
+static const struct sysfs_ops foo_sysfs_ops = {
+ .show = foo_attr_show,
+ .store = foo_attr_store,
+};
+
+/*
+ * The release function for our object. This is REQUIRED by the kernel to
+ * have. We free the memory held in our object here.
+ *
+ * NEVER try to get away with just a "blank" release function to try to be
+ * smarter than the kernel. Turns out, no one ever is...
+ */
+static void foo_release(struct kobject *kobj)
+{
+ struct foo_obj *foo;
+
+ foo = to_foo_obj(kobj);
+ kfree(foo);
+}
+
+/*
+ * The "foo" file where the .foo variable is read from and written to.
+ */
+static ssize_t foo_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", foo_obj->foo);
+}
+
+static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret;
+
+ ret = kstrtoint(buf, 10, &foo_obj->foo);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+/* Sysfs attributes cannot be world-writable. */
+static struct foo_attribute foo_attribute =
+ __ATTR(foo, 0664, foo_show, foo_store);
+
+/*
+ * More complex function where we determine which variable is being accessed by
+ * looking at the attribute for the "baz" and "bar" files.
+ */
+static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr,
+ char *buf)
+{
+ int var;
+
+ if (strcmp(attr->attr.name, "baz") == 0)
+ var = foo_obj->baz;
+ else
+ var = foo_obj->bar;
+ return sprintf(buf, "%d\n", var);
+}
+
+static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr,
+ const char *buf, size_t count)
+{
+ int var, ret;
+
+ ret = kstrtoint(buf, 10, &var);
+ if (ret < 0)
+ return ret;
+
+ if (strcmp(attr->attr.name, "baz") == 0)
+ foo_obj->baz = var;
+ else
+ foo_obj->bar = var;
+ return count;
+}
+
+static struct foo_attribute baz_attribute =
+ __ATTR(baz, 0664, b_show, b_store);
+static struct foo_attribute bar_attribute =
+ __ATTR(bar, 0664, b_show, b_store);
+
+/*
+ * Create a group of attributes so that we can create and destroy them all
+ * at once.
+ */
+static struct attribute *foo_default_attrs[] = {
+ &foo_attribute.attr,
+ &baz_attribute.attr,
+ &bar_attribute.attr,
+ NULL, /* need to NULL terminate the list of attributes */
+};
+
+/*
+ * Our own ktype for our kobjects. Here we specify our sysfs ops, the
+ * release function, and the set of default attributes we want created
+ * whenever a kobject of this type is registered with the kernel.
+ */
+static struct kobj_type foo_ktype = {
+ .sysfs_ops = &foo_sysfs_ops,
+ .release = foo_release,
+ .default_attrs = foo_default_attrs,
+};
+
+static struct kset *example_kset;
+static struct foo_obj *foo_obj;
+static struct foo_obj *bar_obj;
+static struct foo_obj *baz_obj;
+
+static struct foo_obj *create_foo_obj(const char *name)
+{
+ struct foo_obj *foo;
+ int retval;
+
+ /* allocate the memory for the whole object */
+ foo = kzalloc(sizeof(*foo), GFP_KERNEL);
+ if (!foo)
+ return NULL;
+
+ /*
+ * As we have a kset for this kobject, we need to set it before calling
+ * the kobject core.
+ */
+ foo->kobj.kset = example_kset;
+
+ /*
+ * Initialize and add the kobject to the kernel. All the default files
+ * will be created here. As we have already specified a kset for this
+ * kobject, we don't have to set a parent for the kobject, the kobject
+ * will be placed beneath that kset automatically.
+ */
+ retval = kobject_init_and_add(&foo->kobj, &foo_ktype, NULL, "%s", name);
+ if (retval) {
+ kobject_put(&foo->kobj);
+ return NULL;
+ }
+
+ /*
+ * We are always responsible for sending the uevent that the kobject
+ * was added to the system.
+ */
+ kobject_uevent(&foo->kobj, KOBJ_ADD);
+
+ return foo;
+}
+
+static void destroy_foo_obj(struct foo_obj *foo)
+{
+ kobject_put(&foo->kobj);
+}
+
+static int __init example_init(void)
+{
+ /*
+ * Create a kset with the name of "kset_example",
+ * located under /sys/kernel/
+ */
+ example_kset = kset_create_and_add("kset_example", NULL, kernel_kobj);
+ if (!example_kset)
+ return -ENOMEM;
+
+ /*
+ * Create three objects and register them with our kset
+ */
+ foo_obj = create_foo_obj("foo");
+ if (!foo_obj)
+ goto foo_error;
+
+ bar_obj = create_foo_obj("bar");
+ if (!bar_obj)
+ goto bar_error;
+
+ baz_obj = create_foo_obj("baz");
+ if (!baz_obj)
+ goto baz_error;
+
+ return 0;
+
+baz_error:
+ destroy_foo_obj(bar_obj);
+bar_error:
+ destroy_foo_obj(foo_obj);
+foo_error:
+ kset_unregister(example_kset);
+ return -EINVAL;
+}
+
+static void __exit example_exit(void)
+{
+ destroy_foo_obj(baz_obj);
+ destroy_foo_obj(bar_obj);
+ destroy_foo_obj(foo_obj);
+ kset_unregister(example_kset);
+}
+
+module_init(example_init);
+module_exit(example_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>");
diff --git a/samples/kprobes/Makefile b/samples/kprobes/Makefile
new file mode 100644
index 000000000..68739bc4f
--- /dev/null
+++ b/samples/kprobes/Makefile
@@ -0,0 +1,5 @@
+# builds the kprobes example kernel modules;
+# then to use one (as root): insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_KPROBES) += kprobe_example.o jprobe_example.o
+obj-$(CONFIG_SAMPLE_KRETPROBES) += kretprobe_example.o
diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c
new file mode 100644
index 000000000..9119ac6a8
--- /dev/null
+++ b/samples/kprobes/jprobe_example.c
@@ -0,0 +1,67 @@
+/*
+ * Here's a sample kernel module showing the use of jprobes to dump
+ * the arguments of do_fork().
+ *
+ * For more information on theory of operation of jprobes, see
+ * Documentation/kprobes.txt
+ *
+ * Build and insert the kernel module as done in the kprobe example.
+ * You will see the trace data in /var/log/messages and on the
+ * console whenever do_fork() is invoked to create a new process.
+ * (Some messages may be suppressed if syslogd is configured to
+ * eliminate duplicate messages.)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+
+/*
+ * Jumper probe for do_fork.
+ * Mirror principle enables access to arguments of the probed routine
+ * from the probe handler.
+ */
+
+/* Proxy routine having the same arguments as actual do_fork() routine */
+static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
+ unsigned long stack_size, int __user *parent_tidptr,
+ int __user *child_tidptr)
+{
+ pr_info("jprobe: clone_flags = 0x%lx, stack_start = 0x%lx "
+ "stack_size = 0x%lx\n", clone_flags, stack_start, stack_size);
+
+ /* Always end with a call to jprobe_return(). */
+ jprobe_return();
+ return 0;
+}
+
+static struct jprobe my_jprobe = {
+ .entry = jdo_fork,
+ .kp = {
+ .symbol_name = "do_fork",
+ },
+};
+
+static int __init jprobe_init(void)
+{
+ int ret;
+
+ ret = register_jprobe(&my_jprobe);
+ if (ret < 0) {
+ printk(KERN_INFO "register_jprobe failed, returned %d\n", ret);
+ return -1;
+ }
+ printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n",
+ my_jprobe.kp.addr, my_jprobe.entry);
+ return 0;
+}
+
+static void __exit jprobe_exit(void)
+{
+ unregister_jprobe(&my_jprobe);
+ printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr);
+}
+
+module_init(jprobe_init)
+module_exit(jprobe_exit)
+MODULE_LICENSE("GPL");
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
new file mode 100644
index 000000000..366db1a9f
--- /dev/null
+++ b/samples/kprobes/kprobe_example.c
@@ -0,0 +1,109 @@
+/*
+ * NOTE: This example is works on x86 and powerpc.
+ * Here's a sample kernel module showing the use of kprobes to dump a
+ * stack trace and selected registers when do_fork() is called.
+ *
+ * For more information on theory of operation of kprobes, see
+ * Documentation/kprobes.txt
+ *
+ * You will see the trace data in /var/log/messages and on the console
+ * whenever do_fork() is invoked to create a new process.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+
+/* For each probe you need to allocate a kprobe structure */
+static struct kprobe kp = {
+ .symbol_name = "do_fork",
+};
+
+/* kprobe pre_handler: called just before the probed instruction is executed */
+static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+{
+#ifdef CONFIG_X86
+ printk(KERN_INFO "pre_handler: p->addr = 0x%p, ip = %lx,"
+ " flags = 0x%lx\n",
+ p->addr, regs->ip, regs->flags);
+#endif
+#ifdef CONFIG_PPC
+ printk(KERN_INFO "pre_handler: p->addr = 0x%p, nip = 0x%lx,"
+ " msr = 0x%lx\n",
+ p->addr, regs->nip, regs->msr);
+#endif
+#ifdef CONFIG_MIPS
+ printk(KERN_INFO "pre_handler: p->addr = 0x%p, epc = 0x%lx,"
+ " status = 0x%lx\n",
+ p->addr, regs->cp0_epc, regs->cp0_status);
+#endif
+#ifdef CONFIG_TILEGX
+ printk(KERN_INFO "pre_handler: p->addr = 0x%p, pc = 0x%lx,"
+ " ex1 = 0x%lx\n",
+ p->addr, regs->pc, regs->ex1);
+#endif
+
+ /* A dump_stack() here will give a stack backtrace */
+ return 0;
+}
+
+/* kprobe post_handler: called after the probed instruction is executed */
+static void handler_post(struct kprobe *p, struct pt_regs *regs,
+ unsigned long flags)
+{
+#ifdef CONFIG_X86
+ printk(KERN_INFO "post_handler: p->addr = 0x%p, flags = 0x%lx\n",
+ p->addr, regs->flags);
+#endif
+#ifdef CONFIG_PPC
+ printk(KERN_INFO "post_handler: p->addr = 0x%p, msr = 0x%lx\n",
+ p->addr, regs->msr);
+#endif
+#ifdef CONFIG_MIPS
+ printk(KERN_INFO "post_handler: p->addr = 0x%p, status = 0x%lx\n",
+ p->addr, regs->cp0_status);
+#endif
+#ifdef CONFIG_TILEGX
+ printk(KERN_INFO "post_handler: p->addr = 0x%p, ex1 = 0x%lx\n",
+ p->addr, regs->ex1);
+#endif
+}
+
+/*
+ * fault_handler: this is called if an exception is generated for any
+ * instruction within the pre- or post-handler, or when Kprobes
+ * single-steps the probed instruction.
+ */
+static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
+{
+ printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn",
+ p->addr, trapnr);
+ /* Return 0 because we don't handle the fault. */
+ return 0;
+}
+
+static int __init kprobe_init(void)
+{
+ int ret;
+ kp.pre_handler = handler_pre;
+ kp.post_handler = handler_post;
+ kp.fault_handler = handler_fault;
+
+ ret = register_kprobe(&kp);
+ if (ret < 0) {
+ printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
+ return ret;
+ }
+ printk(KERN_INFO "Planted kprobe at %p\n", kp.addr);
+ return 0;
+}
+
+static void __exit kprobe_exit(void)
+{
+ unregister_kprobe(&kp);
+ printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr);
+}
+
+module_init(kprobe_init)
+module_exit(kprobe_exit)
+MODULE_LICENSE("GPL");
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
new file mode 100644
index 000000000..1041b6731
--- /dev/null
+++ b/samples/kprobes/kretprobe_example.c
@@ -0,0 +1,107 @@
+/*
+ * kretprobe_example.c
+ *
+ * Here's a sample kernel module showing the use of return probes to
+ * report the return value and total time taken for probed function
+ * to run.
+ *
+ * usage: insmod kretprobe_example.ko func=<func_name>
+ *
+ * If no func_name is specified, do_fork is instrumented
+ *
+ * For more information on theory of operation of kretprobes, see
+ * Documentation/kprobes.txt
+ *
+ * Build and insert the kernel module as done in the kprobe example.
+ * You will see the trace data in /var/log/messages and on the console
+ * whenever the probed function returns. (Some messages may be suppressed
+ * if syslogd is configured to eliminate duplicate messages.)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/ktime.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
+
+static char func_name[NAME_MAX] = "do_fork";
+module_param_string(func, func_name, NAME_MAX, S_IRUGO);
+MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
+ " function's execution time");
+
+/* per-instance private data */
+struct my_data {
+ ktime_t entry_stamp;
+};
+
+/* Here we use the entry_hanlder to timestamp function entry */
+static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ struct my_data *data;
+
+ if (!current->mm)
+ return 1; /* Skip kernel threads */
+
+ data = (struct my_data *)ri->data;
+ data->entry_stamp = ktime_get();
+ return 0;
+}
+
+/*
+ * Return-probe handler: Log the return value and duration. Duration may turn
+ * out to be zero consistently, depending upon the granularity of time
+ * accounting on the platform.
+ */
+static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ int retval = regs_return_value(regs);
+ struct my_data *data = (struct my_data *)ri->data;
+ s64 delta;
+ ktime_t now;
+
+ now = ktime_get();
+ delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
+ printk(KERN_INFO "%s returned %d and took %lld ns to execute\n",
+ func_name, retval, (long long)delta);
+ return 0;
+}
+
+static struct kretprobe my_kretprobe = {
+ .handler = ret_handler,
+ .entry_handler = entry_handler,
+ .data_size = sizeof(struct my_data),
+ /* Probe up to 20 instances concurrently. */
+ .maxactive = 20,
+};
+
+static int __init kretprobe_init(void)
+{
+ int ret;
+
+ my_kretprobe.kp.symbol_name = func_name;
+ ret = register_kretprobe(&my_kretprobe);
+ if (ret < 0) {
+ printk(KERN_INFO "register_kretprobe failed, returned %d\n",
+ ret);
+ return -1;
+ }
+ printk(KERN_INFO "Planted return probe at %s: %p\n",
+ my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
+ return 0;
+}
+
+static void __exit kretprobe_exit(void)
+{
+ unregister_kretprobe(&my_kretprobe);
+ printk(KERN_INFO "kretprobe at %p unregistered\n",
+ my_kretprobe.kp.addr);
+
+ /* nmissed > 0 suggests that maxactive was set too low. */
+ printk(KERN_INFO "Missed probing %d instances of %s\n",
+ my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
+}
+
+module_init(kretprobe_init)
+module_exit(kretprobe_exit)
+MODULE_LICENSE("GPL");
diff --git a/samples/livepatch/Makefile b/samples/livepatch/Makefile
new file mode 100644
index 000000000..10319d7ea
--- /dev/null
+++ b/samples/livepatch/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_LIVEPATCH) += livepatch-sample.o
diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c
new file mode 100644
index 000000000..fb8c8614e
--- /dev/null
+++ b/samples/livepatch/livepatch-sample.c
@@ -0,0 +1,91 @@
+/*
+ * livepatch-sample.c - Kernel Live Patching Sample Module
+ *
+ * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+/*
+ * This (dumb) live patch overrides the function that prints the
+ * kernel boot cmdline when /proc/cmdline is read.
+ *
+ * Example:
+ *
+ * $ cat /proc/cmdline
+ * <your cmdline>
+ *
+ * $ insmod livepatch-sample.ko
+ * $ cat /proc/cmdline
+ * this has been live patched
+ *
+ * $ echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled
+ * $ cat /proc/cmdline
+ * <your cmdline>
+ */
+
+#include <linux/seq_file.h>
+static int livepatch_cmdline_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%s\n", "this has been live patched");
+ return 0;
+}
+
+static struct klp_func funcs[] = {
+ {
+ .old_name = "cmdline_proc_show",
+ .new_func = livepatch_cmdline_proc_show,
+ }, { }
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = funcs,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int livepatch_init(void)
+{
+ int ret;
+
+ ret = klp_register_patch(&patch);
+ if (ret)
+ return ret;
+ ret = klp_enable_patch(&patch);
+ if (ret) {
+ WARN_ON(klp_unregister_patch(&patch));
+ return ret;
+ }
+ return 0;
+}
+
+static void livepatch_exit(void)
+{
+ WARN_ON(klp_disable_patch(&patch));
+ WARN_ON(klp_unregister_patch(&patch));
+}
+
+module_init(livepatch_init);
+module_exit(livepatch_exit);
+MODULE_LICENSE("GPL");
diff --git a/samples/pktgen/pktgen.conf-1-1 b/samples/pktgen/pktgen.conf-1-1
new file mode 100644
index 000000000..f91daad9e
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-1-1
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. One CPU example. We add eth1.
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+
+# device config
+# delay 0 means maximum speed.
+
+CLONE_SKB="clone_skb 1000000"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 60"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst 10.10.11.2"
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-1-flows b/samples/pktgen/pktgen.conf-1-1-flows
new file mode 100755
index 000000000..081749c97
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-1-1-flows
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. One CPU example. We add eth1.
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+
+# device config
+# delay 0
+# We need to do alloc for every skb since we cannot clone here.
+
+CLONE_SKB="clone_skb 0"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 60"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ # Random address with in the min-max range
+ pgset "flag IPDST_RND"
+ pgset "dst_min 10.0.0.0"
+ pgset "dst_max 10.255.255.255"
+
+ # 8k Concurrent flows at 4 pkts
+ pgset "flows 8192"
+ pgset "flowlen 4"
+
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-1-ip6 b/samples/pktgen/pktgen.conf-1-1-ip6
new file mode 100755
index 000000000..0b9ffd47f
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-1-1-ip6
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. One CPU example. We add eth1.
+# IPv6. Note increase in minimal packet length
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+
+# device config
+# delay 0
+
+CLONE_SKB="clone_skb 1000000"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 66"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst6 fec0::1"
+ pgset "src6 fec0::2"
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-1-ip6-rdos b/samples/pktgen/pktgen.conf-1-1-ip6-rdos
new file mode 100755
index 000000000..ad98e5f40
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-1-1-ip6-rdos
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. One CPU example. We add eth1.
+# IPv6. Note increase in minimal packet length
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+
+# device config
+# delay 0 means maximum speed.
+
+# We need to do alloc for every skb since we cannot clone here.
+CLONE_SKB="clone_skb 0"
+
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 66"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst6_min fec0::1"
+ pgset "dst6_max fec0::FFFF:FFFF"
+
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-1-rdos b/samples/pktgen/pktgen.conf-1-1-rdos
new file mode 100755
index 000000000..c7553be49
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-1-1-rdos
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. One CPU example. We add eth1.
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+
+# device config
+# delay 0
+
+# We need to do alloc for every skb since we cannot clone here.
+
+CLONE_SKB="clone_skb 0"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 60"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ # Random address with in the min-max range
+ pgset "flag IPDST_RND"
+ pgset "dst_min 10.0.0.0"
+ pgset "dst_max 10.255.255.255"
+
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-1-2 b/samples/pktgen/pktgen.conf-1-2
new file mode 100755
index 000000000..ba4eb26e1
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-1-2
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# One CPU means one thread. One CPU example. We add eth1, eth2 respectivly.
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+ echo "Adding eth2"
+ pgset "add_device eth2"
+
+
+# device config
+# delay 0 means maximum speed.
+
+CLONE_SKB="clone_skb 1000000"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 60"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst 10.10.11.2"
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+PGDEV=/proc/net/pktgen/eth2
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst 192.168.2.2"
+ pgset "dst_mac 00:04:23:08:91:de"
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2
diff --git a/samples/pktgen/pktgen.conf-2-1 b/samples/pktgen/pktgen.conf-2-1
new file mode 100644
index 000000000..e108e97d6
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-2-1
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. Two CPU example. We add eth1 to the first
+# and leave the second idle.
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+# We need to remove old config since we dont use this thread. We can only
+# one NIC on one CPU due to affinity reasons.
+
+PGDEV=/proc/net/pktgen/kpktgend_1
+ echo "Removing all devices"
+ pgset "rem_device_all"
+
+# device config
+# delay 0 means maximum speed.
+
+CLONE_SKB="clone_skb 1000000"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 60"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst 10.10.11.2"
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-2-2 b/samples/pktgen/pktgen.conf-2-2
new file mode 100644
index 000000000..acea15503
--- /dev/null
+++ b/samples/pktgen/pktgen.conf-2-2
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+#modprobe pktgen
+
+
+function pgset() {
+ local result
+
+ echo $1 > $PGDEV
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [ "$result" = "" ]; then
+ cat $PGDEV | fgrep Result:
+ fi
+}
+
+# Config Start Here -----------------------------------------------------------
+
+
+# thread config
+# Each CPU has its own thread. Two CPU example. We add eth1, eth2 respectively.
+
+PGDEV=/proc/net/pktgen/kpktgend_0
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth1"
+ pgset "add_device eth1"
+
+PGDEV=/proc/net/pktgen/kpktgend_1
+ echo "Removing all devices"
+ pgset "rem_device_all"
+ echo "Adding eth2"
+ pgset "add_device eth2"
+
+
+# device config
+# delay 0 means maximum speed.
+
+CLONE_SKB="clone_skb 1000000"
+# NIC adds 4 bytes CRC
+PKT_SIZE="pkt_size 60"
+
+# COUNT 0 means forever
+#COUNT="count 0"
+COUNT="count 10000000"
+DELAY="delay 0"
+
+PGDEV=/proc/net/pktgen/eth1
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst 10.10.11.2"
+ pgset "dst_mac 00:04:23:08:91:dc"
+
+PGDEV=/proc/net/pktgen/eth2
+ echo "Configuring $PGDEV"
+ pgset "$COUNT"
+ pgset "$CLONE_SKB"
+ pgset "$PKT_SIZE"
+ pgset "$DELAY"
+ pgset "dst 192.168.2.2"
+ pgset "dst_mac 00:04:23:08:91:de"
+
+# Time to run
+PGDEV=/proc/net/pktgen/pgctrl
+
+ echo "Running... ctrl^C to stop"
+ trap true INT
+ pgset "start"
+ echo "Done"
+ cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2
diff --git a/samples/rpmsg/Makefile b/samples/rpmsg/Makefile
new file mode 100644
index 000000000..2d4973c69
--- /dev/null
+++ b/samples/rpmsg/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_RPMSG_CLIENT) += rpmsg_client_sample.o
diff --git a/samples/rpmsg/rpmsg_client_sample.c b/samples/rpmsg/rpmsg_client_sample.c
new file mode 100644
index 000000000..59b134408
--- /dev/null
+++ b/samples/rpmsg/rpmsg_client_sample.c
@@ -0,0 +1,100 @@
+/*
+ * Remote processor messaging - sample client driver
+ *
+ * Copyright (C) 2011 Texas Instruments, Inc.
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * Ohad Ben-Cohen <ohad@wizery.com>
+ * Brian Swetland <swetland@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rpmsg.h>
+
+#define MSG "hello world!"
+#define MSG_LIMIT 100
+
+static void rpmsg_sample_cb(struct rpmsg_channel *rpdev, void *data, int len,
+ void *priv, u32 src)
+{
+ int ret;
+ static int rx_count;
+
+ dev_info(&rpdev->dev, "incoming msg %d (src: 0x%x)\n", ++rx_count, src);
+
+ print_hex_dump(KERN_DEBUG, __func__, DUMP_PREFIX_NONE, 16, 1,
+ data, len, true);
+
+ /* samples should not live forever */
+ if (rx_count >= MSG_LIMIT) {
+ dev_info(&rpdev->dev, "goodbye!\n");
+ return;
+ }
+
+ /* send a new message now */
+ ret = rpmsg_send(rpdev, MSG, strlen(MSG));
+ if (ret)
+ dev_err(&rpdev->dev, "rpmsg_send failed: %d\n", ret);
+}
+
+static int rpmsg_sample_probe(struct rpmsg_channel *rpdev)
+{
+ int ret;
+
+ dev_info(&rpdev->dev, "new channel: 0x%x -> 0x%x!\n",
+ rpdev->src, rpdev->dst);
+
+ /* send a message to our remote processor */
+ ret = rpmsg_send(rpdev, MSG, strlen(MSG));
+ if (ret) {
+ dev_err(&rpdev->dev, "rpmsg_send failed: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void rpmsg_sample_remove(struct rpmsg_channel *rpdev)
+{
+ dev_info(&rpdev->dev, "rpmsg sample client driver is removed\n");
+}
+
+static struct rpmsg_device_id rpmsg_driver_sample_id_table[] = {
+ { .name = "rpmsg-client-sample" },
+ { },
+};
+MODULE_DEVICE_TABLE(rpmsg, rpmsg_driver_sample_id_table);
+
+static struct rpmsg_driver rpmsg_sample_client = {
+ .drv.name = KBUILD_MODNAME,
+ .drv.owner = THIS_MODULE,
+ .id_table = rpmsg_driver_sample_id_table,
+ .probe = rpmsg_sample_probe,
+ .callback = rpmsg_sample_cb,
+ .remove = rpmsg_sample_remove,
+};
+
+static int __init rpmsg_client_sample_init(void)
+{
+ return register_rpmsg_driver(&rpmsg_sample_client);
+}
+module_init(rpmsg_client_sample_init);
+
+static void __exit rpmsg_client_sample_fini(void)
+{
+ unregister_rpmsg_driver(&rpmsg_sample_client);
+}
+module_exit(rpmsg_client_sample_fini);
+
+MODULE_DESCRIPTION("Remote processor messaging sample client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
new file mode 100644
index 000000000..78fb78184
--- /dev/null
+++ b/samples/seccomp/.gitignore
@@ -0,0 +1,3 @@
+bpf-direct
+bpf-fancy
+dropper
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
new file mode 100644
index 000000000..1b4e4b8f5
--- /dev/null
+++ b/samples/seccomp/Makefile
@@ -0,0 +1,48 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+hostprogs-$(CONFIG_SECCOMP_FILTER) := bpf-fancy dropper bpf-direct
+
+HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
+HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
+HOSTCFLAGS_bpf-helper.o += -I$(objtree)/usr/include
+HOSTCFLAGS_bpf-helper.o += -idirafter $(objtree)/include
+bpf-fancy-objs := bpf-fancy.o bpf-helper.o
+
+HOSTCFLAGS_dropper.o += -I$(objtree)/usr/include
+HOSTCFLAGS_dropper.o += -idirafter $(objtree)/include
+dropper-objs := dropper.o
+
+HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include
+HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
+bpf-direct-objs := bpf-direct.o
+
+# Try to match the kernel target.
+ifndef CROSS_COMPILE
+ifndef CONFIG_64BIT
+
+# s390 has -m31 flag to build 31 bit binaries
+ifndef CONFIG_S390
+MFLAG = -m32
+else
+MFLAG = -m31
+endif
+
+HOSTCFLAGS_bpf-direct.o += $(MFLAG)
+HOSTCFLAGS_dropper.o += $(MFLAG)
+HOSTCFLAGS_bpf-helper.o += $(MFLAG)
+HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
+HOSTLOADLIBES_bpf-direct += $(MFLAG)
+HOSTLOADLIBES_bpf-fancy += $(MFLAG)
+HOSTLOADLIBES_dropper += $(MFLAG)
+endif
+always := $(hostprogs-y)
+else
+# MIPS system calls are defined based on the -mabi that is passed
+# to the toolchain which may or may not be a valid option
+# for the host toolchain. So disable tests if target architecture
+# is MIPS but the host isn't.
+ifndef CONFIG_MIPS
+always := $(hostprogs-y)
+endif
+endif
diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c
new file mode 100644
index 000000000..151ec3f52
--- /dev/null
+++ b/samples/seccomp/bpf-direct.c
@@ -0,0 +1,190 @@
+/*
+ * Seccomp filter example for x86 (32-bit and 64-bit) with BPF macros
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Author: Will Drewry <wad@chromium.org>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using prctl(PR_SET_SECCOMP, 2, ...).
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define SUPPORTED_ARCH 1
+#endif
+
+#if defined(SUPPORTED_ARCH)
+#define __USE_GNU 1
+#define _GNU_SOURCE 1
+
+#include <linux/types.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+#define syscall_nr (offsetof(struct seccomp_data, nr))
+
+#if defined(__i386__)
+#define REG_RESULT REG_EAX
+#define REG_SYSCALL REG_EAX
+#define REG_ARG0 REG_EBX
+#define REG_ARG1 REG_ECX
+#define REG_ARG2 REG_EDX
+#define REG_ARG3 REG_ESI
+#define REG_ARG4 REG_EDI
+#define REG_ARG5 REG_EBP
+#elif defined(__x86_64__)
+#define REG_RESULT REG_RAX
+#define REG_SYSCALL REG_RAX
+#define REG_ARG0 REG_RDI
+#define REG_ARG1 REG_RSI
+#define REG_ARG2 REG_RDX
+#define REG_ARG3 REG_R10
+#define REG_ARG4 REG_R8
+#define REG_ARG5 REG_R9
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+static void emulator(int nr, siginfo_t *info, void *void_context)
+{
+ ucontext_t *ctx = (ucontext_t *)(void_context);
+ int syscall;
+ char *buf;
+ ssize_t bytes;
+ size_t len;
+ if (info->si_code != SYS_SECCOMP)
+ return;
+ if (!ctx)
+ return;
+ syscall = ctx->uc_mcontext.gregs[REG_SYSCALL];
+ buf = (char *) ctx->uc_mcontext.gregs[REG_ARG1];
+ len = (size_t) ctx->uc_mcontext.gregs[REG_ARG2];
+
+ if (syscall != __NR_write)
+ return;
+ if (ctx->uc_mcontext.gregs[REG_ARG0] != STDERR_FILENO)
+ return;
+ /* Redirect stderr messages to stdout. Doesn't handle EINTR, etc */
+ ctx->uc_mcontext.gregs[REG_RESULT] = -1;
+ if (write(STDOUT_FILENO, "[ERR] ", 6) > 0) {
+ bytes = write(STDOUT_FILENO, buf, len);
+ ctx->uc_mcontext.gregs[REG_RESULT] = bytes;
+ }
+ return;
+}
+
+static int install_emulator(void)
+{
+ struct sigaction act;
+ sigset_t mask;
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGSYS);
+
+ act.sa_sigaction = &emulator;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSYS, &act, NULL) < 0) {
+ perror("sigaction");
+ return -1;
+ }
+ if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
+ perror("sigprocmask");
+ return -1;
+ }
+ return 0;
+}
+
+static int install_filter(void)
+{
+ struct sock_filter filter[] = {
+ /* Grab the system call number */
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr),
+ /* Jump table for the allowed syscalls */
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_rt_sigreturn, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+#ifdef __NR_sigreturn
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_sigreturn, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+#endif
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit_group, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_read, 1, 0),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_write, 3, 2),
+
+ /* Check that read is only using stdin. */
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+
+ /* Check that write is only using stdout */
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDOUT_FILENO, 1, 0),
+ /* Trap attempts to write to stderr */
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDERR_FILENO, 1, 2),
+
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("prctl(NO_NEW_PRIVS)");
+ return 1;
+ }
+
+
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ perror("prctl");
+ return 1;
+ }
+ return 0;
+}
+
+#define payload(_c) (_c), sizeof((_c))
+int main(int argc, char **argv)
+{
+ char buf[4096];
+ ssize_t bytes = 0;
+ if (install_emulator())
+ return 1;
+ if (install_filter())
+ return 1;
+ syscall(__NR_write, STDOUT_FILENO,
+ payload("OHAI! WHAT IS YOUR NAME? "));
+ bytes = syscall(__NR_read, STDIN_FILENO, buf, sizeof(buf));
+ syscall(__NR_write, STDOUT_FILENO, payload("HELLO, "));
+ syscall(__NR_write, STDOUT_FILENO, buf, bytes);
+ syscall(__NR_write, STDERR_FILENO,
+ payload("Error message going to STDERR\n"));
+ return 0;
+}
+#else /* SUPPORTED_ARCH */
+/*
+ * This sample is x86-only. Since kernel samples are compiled with the
+ * host toolchain, a non-x86 host will result in using only the main()
+ * below.
+ */
+int main(void)
+{
+ return 1;
+}
+#endif /* SUPPORTED_ARCH */
diff --git a/samples/seccomp/bpf-fancy.c b/samples/seccomp/bpf-fancy.c
new file mode 100644
index 000000000..e8b24f443
--- /dev/null
+++ b/samples/seccomp/bpf-fancy.c
@@ -0,0 +1,104 @@
+/*
+ * Seccomp BPF example using a macro-based generator.
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Author: Will Drewry <wad@chromium.org>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using prctl(PR_ATTACH_SECCOMP_FILTER).
+ */
+
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "bpf-helper.h"
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+
+int main(int argc, char **argv)
+{
+ struct bpf_labels l = {
+ .count = 0,
+ };
+ static const char msg1[] = "Please type something: ";
+ static const char msg2[] = "You typed: ";
+ char buf[256];
+ struct sock_filter filter[] = {
+ /* TODO: LOAD_SYSCALL_NR(arch) and enforce an arch */
+ LOAD_SYSCALL_NR,
+ SYSCALL(__NR_exit, ALLOW),
+ SYSCALL(__NR_exit_group, ALLOW),
+ SYSCALL(__NR_write, JUMP(&l, write_fd)),
+ SYSCALL(__NR_read, JUMP(&l, read)),
+ DENY, /* Don't passthrough into a label */
+
+ LABEL(&l, read),
+ ARG(0),
+ JNE(STDIN_FILENO, DENY),
+ ARG(1),
+ JNE((unsigned long)buf, DENY),
+ ARG(2),
+ JGE(sizeof(buf), DENY),
+ ALLOW,
+
+ LABEL(&l, write_fd),
+ ARG(0),
+ JEQ(STDOUT_FILENO, JUMP(&l, write_buf)),
+ JEQ(STDERR_FILENO, JUMP(&l, write_buf)),
+ DENY,
+
+ LABEL(&l, write_buf),
+ ARG(1),
+ JEQ((unsigned long)msg1, JUMP(&l, msg1_len)),
+ JEQ((unsigned long)msg2, JUMP(&l, msg2_len)),
+ JEQ((unsigned long)buf, JUMP(&l, buf_len)),
+ DENY,
+
+ LABEL(&l, msg1_len),
+ ARG(2),
+ JLT(sizeof(msg1), ALLOW),
+ DENY,
+
+ LABEL(&l, msg2_len),
+ ARG(2),
+ JLT(sizeof(msg2), ALLOW),
+ DENY,
+
+ LABEL(&l, buf_len),
+ ARG(2),
+ JLT(sizeof(buf), ALLOW),
+ DENY,
+ };
+ struct sock_fprog prog = {
+ .filter = filter,
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ };
+ ssize_t bytes;
+ bpf_resolve_jumps(&l, filter, sizeof(filter)/sizeof(*filter));
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("prctl(NO_NEW_PRIVS)");
+ return 1;
+ }
+
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ perror("prctl(SECCOMP)");
+ return 1;
+ }
+ syscall(__NR_write, STDOUT_FILENO, msg1, strlen(msg1));
+ bytes = syscall(__NR_read, STDIN_FILENO, buf, sizeof(buf)-1);
+ bytes = (bytes > 0 ? bytes : 0);
+ syscall(__NR_write, STDERR_FILENO, msg2, strlen(msg2));
+ syscall(__NR_write, STDERR_FILENO, buf, bytes);
+ /* Now get killed */
+ syscall(__NR_write, STDERR_FILENO, msg2, strlen(msg2)+2);
+ return 0;
+}
diff --git a/samples/seccomp/bpf-helper.c b/samples/seccomp/bpf-helper.c
new file mode 100644
index 000000000..05cb4d5ff
--- /dev/null
+++ b/samples/seccomp/bpf-helper.c
@@ -0,0 +1,95 @@
+/*
+ * Seccomp BPF helper functions
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Author: Will Drewry <wad@chromium.org>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using prctl(PR_ATTACH_SECCOMP_FILTER).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "bpf-helper.h"
+
+int bpf_resolve_jumps(struct bpf_labels *labels,
+ struct sock_filter *filter, size_t count)
+{
+ struct sock_filter *begin = filter;
+ __u8 insn = count - 1;
+
+ if (count < 1)
+ return -1;
+ /*
+ * Walk it once, backwards, to build the label table and do fixups.
+ * Since backward jumps are disallowed by BPF, this is easy.
+ */
+ filter += insn;
+ for (; filter >= begin; --insn, --filter) {
+ if (filter->code != (BPF_JMP+BPF_JA))
+ continue;
+ switch ((filter->jt<<8)|filter->jf) {
+ case (JUMP_JT<<8)|JUMP_JF:
+ if (labels->labels[filter->k].location == 0xffffffff) {
+ fprintf(stderr, "Unresolved label: '%s'\n",
+ labels->labels[filter->k].label);
+ return 1;
+ }
+ filter->k = labels->labels[filter->k].location -
+ (insn + 1);
+ filter->jt = 0;
+ filter->jf = 0;
+ continue;
+ case (LABEL_JT<<8)|LABEL_JF:
+ if (labels->labels[filter->k].location != 0xffffffff) {
+ fprintf(stderr, "Duplicate label use: '%s'\n",
+ labels->labels[filter->k].label);
+ return 1;
+ }
+ labels->labels[filter->k].location = insn;
+ filter->k = 0; /* fall through */
+ filter->jt = 0;
+ filter->jf = 0;
+ continue;
+ }
+ }
+ return 0;
+}
+
+/* Simple lookup table for labels. */
+__u32 seccomp_bpf_label(struct bpf_labels *labels, const char *label)
+{
+ struct __bpf_label *begin = labels->labels, *end;
+ int id;
+
+ if (labels->count == BPF_LABELS_MAX) {
+ fprintf(stderr, "Too many labels\n");
+ exit(1);
+ }
+ if (labels->count == 0) {
+ begin->label = label;
+ begin->location = 0xffffffff;
+ labels->count++;
+ return 0;
+ }
+ end = begin + labels->count;
+ for (id = 0; begin < end; ++begin, ++id) {
+ if (!strcmp(label, begin->label))
+ return id;
+ }
+ begin->label = label;
+ begin->location = 0xffffffff;
+ labels->count++;
+ return id;
+}
+
+void seccomp_bpf_print(struct sock_filter *filter, size_t count)
+{
+ struct sock_filter *end = filter + count;
+ for ( ; filter < end; ++filter)
+ printf("{ code=%u,jt=%u,jf=%u,k=%u },\n",
+ filter->code, filter->jt, filter->jf, filter->k);
+}
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
new file mode 100644
index 000000000..38ee70f3c
--- /dev/null
+++ b/samples/seccomp/bpf-helper.h
@@ -0,0 +1,243 @@
+/*
+ * Example wrapper around BPF macros.
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Author: Will Drewry <wad@chromium.org>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using prctl(PR_SET_SECCOMP, 2, ...).
+ *
+ * No guarantees are provided with respect to the correctness
+ * or functionality of this code.
+ */
+#ifndef __BPF_HELPER_H__
+#define __BPF_HELPER_H__
+
+#include <asm/bitsperlong.h> /* for __BITS_PER_LONG */
+#include <endian.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h> /* for seccomp_data */
+#include <linux/types.h>
+#include <linux/unistd.h>
+#include <stddef.h>
+
+#define BPF_LABELS_MAX 256
+struct bpf_labels {
+ int count;
+ struct __bpf_label {
+ const char *label;
+ __u32 location;
+ } labels[BPF_LABELS_MAX];
+};
+
+int bpf_resolve_jumps(struct bpf_labels *labels,
+ struct sock_filter *filter, size_t count);
+__u32 seccomp_bpf_label(struct bpf_labels *labels, const char *label);
+void seccomp_bpf_print(struct sock_filter *filter, size_t count);
+
+#define JUMP_JT 0xff
+#define JUMP_JF 0xff
+#define LABEL_JT 0xfe
+#define LABEL_JF 0xfe
+
+#define ALLOW \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+#define DENY \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+#define JUMP(labels, label) \
+ BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
+ JUMP_JT, JUMP_JF)
+#define LABEL(labels, label) \
+ BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
+ LABEL_JT, LABEL_JF)
+#define SYSCALL(nr, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (nr), 0, 1), \
+ jt
+
+/* Lame, but just an example */
+#define FIND_LABEL(labels, label) seccomp_bpf_label((labels), #label)
+
+#define EXPAND(...) __VA_ARGS__
+
+/* Ensure that we load the logically correct offset. */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)])
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32)
+#else
+#error "Unknown endianness"
+#endif
+
+/* Map all width-sensitive operations */
+#if __BITS_PER_LONG == 32
+
+#define JEQ(x, jt) JEQ32(x, EXPAND(jt))
+#define JNE(x, jt) JNE32(x, EXPAND(jt))
+#define JGT(x, jt) JGT32(x, EXPAND(jt))
+#define JLT(x, jt) JLT32(x, EXPAND(jt))
+#define JGE(x, jt) JGE32(x, EXPAND(jt))
+#define JLE(x, jt) JLE32(x, EXPAND(jt))
+#define JA(x, jt) JA32(x, EXPAND(jt))
+#define ARG(i) ARG_32(i)
+
+#elif __BITS_PER_LONG == 64
+
+/* Ensure that we load the logically correct offset. */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define ENDIAN(_lo, _hi) _lo, _hi
+#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define ENDIAN(_lo, _hi) _hi, _lo
+#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)])
+#endif
+
+union arg64 {
+ struct {
+ __u32 ENDIAN(lo32, hi32);
+ };
+ __u64 u64;
+};
+
+#define JEQ(x, jt) \
+ JEQ64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+#define JGT(x, jt) \
+ JGT64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+#define JGE(x, jt) \
+ JGE64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+#define JNE(x, jt) \
+ JNE64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+#define JLT(x, jt) \
+ JLT64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+#define JLE(x, jt) \
+ JLE64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+
+#define JA(x, jt) \
+ JA64(((union arg64){.u64 = (x)}).lo32, \
+ ((union arg64){.u64 = (x)}).hi32, \
+ EXPAND(jt))
+#define ARG(i) ARG_64(i)
+
+#else
+#error __BITS_PER_LONG value unusable.
+#endif
+
+/* Loads the arg into A */
+#define ARG_32(idx) \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx))
+
+/* Loads hi into A and lo in X */
+#define ARG_64(idx) \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)), \
+ BPF_STMT(BPF_ST, 0), /* lo -> M[0] */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, HI_ARG(idx)), \
+ BPF_STMT(BPF_ST, 1) /* hi -> M[1] */
+
+#define JEQ32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 0, 1), \
+ jt
+
+#define JNE32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 1, 0), \
+ jt
+
+/* Checks the lo, then swaps to check the hi. A=lo,X=hi */
+#define JEQ64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define JNE64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define JA32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (value), 0, 1), \
+ jt
+
+#define JA64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define JGE32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 0, 1), \
+ jt
+
+#define JLT32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \
+ jt
+
+/* Shortcut checking if hi > arg.hi. */
+#define JGE64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define JLT64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define JGT32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 0, 1), \
+ jt
+
+#define JLE32(value, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 1, 0), \
+ jt
+
+/* Check hi > args.hi first, then do the GE checking */
+#define JGT64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define JLE64(lo, hi, jt) \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \
+ BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \
+ BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \
+ BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \
+ jt, \
+ BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */
+
+#define LOAD_SYSCALL_NR \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, nr))
+
+#endif /* __BPF_HELPER_H__ */
diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c
new file mode 100644
index 000000000..c69c347c7
--- /dev/null
+++ b/samples/seccomp/dropper.c
@@ -0,0 +1,68 @@
+/*
+ * Naive system call dropper built on seccomp_filter.
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Author: Will Drewry <wad@chromium.org>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using prctl(PR_SET_SECCOMP, 2, ...).
+ *
+ * When run, returns the specified errno for the specified
+ * system call number against the given architecture.
+ *
+ * Run this one as root as PR_SET_NO_NEW_PRIVS is not called.
+ */
+
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+static int install_filter(int nr, int arch, int error)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ (offsetof(struct seccomp_data, arch))),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, arch, 0, 3),
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ (offsetof(struct seccomp_data, nr))),
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K,
+ SECCOMP_RET_ERRNO|(error & SECCOMP_RET_DATA)),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+ if (prctl(PR_SET_SECCOMP, 2, &prog)) {
+ perror("prctl");
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 5) {
+ fprintf(stderr, "Usage:\n"
+ "dropper <syscall_nr> <arch> <errno> <prog> [<args>]\n"
+ "Hint: AUDIT_ARCH_I386: 0x%X\n"
+ " AUDIT_ARCH_X86_64: 0x%X\n"
+ "\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64);
+ return 1;
+ }
+ if (install_filter(strtol(argv[1], NULL, 0), strtol(argv[2], NULL, 0),
+ strtol(argv[3], NULL, 0)))
+ return 1;
+ execv(argv[4], &argv[4]);
+ printf("Failed to execv\n");
+ return 255;
+}
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644
index 000000000..0f8d92120
--- /dev/null
+++ b/samples/trace_events/Makefile
@@ -0,0 +1,14 @@
+# builds the trace events example kernel modules;
+# then to use one (as root): insmod <module_name.ko>
+
+# If you include a trace header outside of include/trace/events
+# then the file that does the #define CREATE_TRACE_POINTS must
+# have that tracer file in its main search path. This is because
+# define_trace.h will include it, and must be able to find it from
+# the include/trace directory.
+#
+# Here trace-events-sample.c does the CREATE_TRACE_POINTS.
+#
+CFLAGS_trace-events-sample.o := -I$(src)
+
+obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644
index 000000000..880a7d1d2
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.c
@@ -0,0 +1,130 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+
+/*
+ * Any file that uses trace points, must include the header.
+ * But only one file, must include the header by defining
+ * CREATE_TRACE_POINTS first. This will make the C code that
+ * creates the handles for the trace points.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace-events-sample.h"
+
+static const char *random_strings[] = {
+ "Mother Goose",
+ "Snoopy",
+ "Gandalf",
+ "Frodo",
+ "One ring to rule them all"
+};
+
+static void simple_thread_func(int cnt)
+{
+ int array[6];
+ int len = cnt % 5;
+ int i;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+
+ for (i = 0; i < len; i++)
+ array[i] = i + 1;
+ array[i] = 0;
+
+ /* Silly tracepoints */
+ trace_foo_bar("hello", cnt, array, random_strings[len],
+ tsk_cpus_allowed(current));
+
+ trace_foo_with_template_simple("HELLO", cnt);
+
+ trace_foo_bar_with_cond("Some times print", cnt);
+
+ trace_foo_with_template_cond("prints other times", cnt);
+
+ trace_foo_with_template_print("I have to be different", cnt);
+}
+
+static int simple_thread(void *arg)
+{
+ int cnt = 0;
+
+ while (!kthread_should_stop())
+ simple_thread_func(cnt++);
+
+ return 0;
+}
+
+static struct task_struct *simple_tsk;
+static struct task_struct *simple_tsk_fn;
+
+static void simple_thread_func_fn(int cnt)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+
+ /* More silly tracepoints */
+ trace_foo_bar_with_fn("Look at me", cnt);
+ trace_foo_with_template_fn("Look at me too", cnt);
+}
+
+static int simple_thread_fn(void *arg)
+{
+ int cnt = 0;
+
+ while (!kthread_should_stop())
+ simple_thread_func_fn(cnt++);
+
+ return 0;
+}
+
+static DEFINE_MUTEX(thread_mutex);
+
+void foo_bar_reg(void)
+{
+ pr_info("Starting thread for foo_bar_fn\n");
+ /*
+ * We shouldn't be able to start a trace when the module is
+ * unloading (there's other locks to prevent that). But
+ * for consistency sake, we still take the thread_mutex.
+ */
+ mutex_lock(&thread_mutex);
+ simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn");
+ mutex_unlock(&thread_mutex);
+}
+
+void foo_bar_unreg(void)
+{
+ pr_info("Killing thread for foo_bar_fn\n");
+ /* protect against module unloading */
+ mutex_lock(&thread_mutex);
+ if (simple_tsk_fn)
+ kthread_stop(simple_tsk_fn);
+ simple_tsk_fn = NULL;
+ mutex_unlock(&thread_mutex);
+}
+
+static int __init trace_event_init(void)
+{
+ simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
+ if (IS_ERR(simple_tsk))
+ return -1;
+
+ return 0;
+}
+
+static void __exit trace_event_exit(void)
+{
+ kthread_stop(simple_tsk);
+ mutex_lock(&thread_mutex);
+ if (simple_tsk_fn)
+ kthread_stop(simple_tsk_fn);
+ simple_tsk_fn = NULL;
+ mutex_unlock(&thread_mutex);
+}
+
+module_init(trace_event_init);
+module_exit(trace_event_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-events-sample");
+MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644
index 000000000..125d6402f
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.h
@@ -0,0 +1,523 @@
+/*
+ * If TRACE_SYSTEM is defined, that will be the directory created
+ * in the ftrace directory under /sys/kernel/tracing/events/<system>
+ *
+ * The define_trace.h below will also look for a file name of
+ * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ * In this case, it would look for sample.h
+ *
+ * If the header name will be different than the system name
+ * (as in this case), then you can override the header name that
+ * define_trace.h will look up by defining TRACE_INCLUDE_FILE
+ *
+ * This file is called trace-events-sample.h but we want the system
+ * to be called "sample". Therefore we must define the name of this
+ * file:
+ *
+ * #define TRACE_INCLUDE_FILE trace-events-sample
+ *
+ * As we do an the bottom of this file.
+ *
+ * Notice that TRACE_SYSTEM should be defined outside of #if
+ * protection, just like TRACE_INCLUDE_FILE.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sample-trace
+
+/*
+ * TRACE_SYSTEM is expected to be a C valid variable (alpha-numeric
+ * and underscore), although it may start with numbers. If for some
+ * reason it is not, you need to add the following lines:
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR sample_trace
+/*
+ * But the above is only needed if TRACE_SYSTEM is not alpha-numeric
+ * and underscored. By default, TRACE_SYSTEM_VAR will be equal to
+ * TRACE_SYSTEM. As TRACE_SYSTEM_VAR must be alpha-numeric, if
+ * TRACE_SYSTEM is not, then TRACE_SYSTEM_VAR must be defined with
+ * only alpha-numeric and underscores.
+ *
+ * The TRACE_SYSTEM_VAR is only used internally and not visible to
+ * user space.
+ */
+
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ * || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENT_SAMPLE_H
+
+/*
+ * All trace headers should include tracepoint.h, until we finally
+ * make it into a standard header.
+ */
+#include <linux/tracepoint.h>
+
+/*
+ * The TRACE_EVENT macro is broken up into 5 parts.
+ *
+ * name: name of the trace point. This is also how to enable the tracepoint.
+ * A function called trace_foo_bar() will be created.
+ *
+ * proto: the prototype of the function trace_foo_bar()
+ * Here it is trace_foo_bar(char *foo, int bar).
+ *
+ * args: must match the arguments in the prototype.
+ * Here it is simply "foo, bar".
+ *
+ * struct: This defines the way the data will be stored in the ring buffer.
+ * The items declared here become part of a special structure
+ * called "__entry", which can be used in the fast_assign part of the
+ * TRACE_EVENT macro.
+ *
+ * Here are the currently defined types you can use:
+ *
+ * __field : Is broken up into type and name. Where type can be any
+ * primitive type (integer, long or pointer).
+ *
+ * __field(int, foo)
+ *
+ * __entry->foo = 5;
+ *
+ * __field_struct : This can be any static complex data type (struct, union
+ * but not an array). Be careful using complex types, as each
+ * event is limited in size, and copying large amounts of data
+ * into the ring buffer can slow things down.
+ *
+ * __field_struct(struct bar, foo)
+ *
+ * __entry->bar.x = y;
+
+ * __array: There are three fields (type, name, size). The type is the
+ * type of elements in teh array, the name is the name of the array.
+ * size is the number of items in the array (not the total size).
+ *
+ * __array( char, foo, 10) is the same as saying: char foo[10];
+ *
+ * Assigning arrays can be done like any array:
+ *
+ * __entry->foo[0] = 'a';
+ *
+ * memcpy(__entry->foo, bar, 10);
+ *
+ * __dynamic_array: This is similar to array, but can vary is size from
+ * instance to instance of the tracepoint being called.
+ * Like __array, this too has three elements (type, name, size);
+ * type is the type of the element, name is the name of the array.
+ * The size is different than __array. It is not a static number,
+ * but the algorithm to figure out the length of the array for the
+ * specific instance of tracepoint. Again, size is the numebr of
+ * items in the array, not the total length in bytes.
+ *
+ * __dynamic_array( int, foo, bar) is similar to: int foo[bar];
+ *
+ * Note, unlike arrays, you must use the __get_dynamic_array() macro
+ * to access the array.
+ *
+ * memcpy(__get_dynamic_array(foo), bar, 10);
+ *
+ * Notice, that "__entry" is not needed here.
+ *
+ * __string: This is a special kind of __dynamic_array. It expects to
+ * have a nul terminated character array passed to it (it allows
+ * for NULL too, which would be converted into "(null)"). __string
+ * takes two paramenter (name, src), where name is the name of
+ * the string saved, and src is the string to copy into the
+ * ring buffer.
+ *
+ * __string(foo, bar) is similar to: strcpy(foo, bar)
+ *
+ * To assign a string, use the helper macro __assign_str().
+ *
+ * __assign_str(foo, bar);
+ *
+ * In most cases, the __assign_str() macro will take the same
+ * parameters as the __string() macro had to declare the string.
+ *
+ * __bitmask: This is another kind of __dynamic_array, but it expects
+ * an array of longs, and the number of bits to parse. It takes
+ * two parameters (name, nr_bits), where name is the name of the
+ * bitmask to save, and the nr_bits is the number of bits to record.
+ *
+ * __bitmask(target_cpu, nr_cpumask_bits)
+ *
+ * To assign a bitmask, use the __assign_bitmask() helper macro.
+ *
+ * __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits);
+ *
+ *
+ * fast_assign: This is a C like function that is used to store the items
+ * into the ring buffer. A special variable called "__entry" will be the
+ * structure that points into the ring buffer and has the same fields as
+ * described by the struct part of TRACE_EVENT above.
+ *
+ * printk: This is a way to print out the data in pretty print. This is
+ * useful if the system crashes and you are logging via a serial line,
+ * the data can be printed to the console using this "printk" method.
+ * This is also used to print out the data from the trace files.
+ * Again, the __entry macro is used to access the data from the ring buffer.
+ *
+ * Note, __dynamic_array, __string, and __bitmask require special helpers
+ * to access the data.
+ *
+ * For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo)
+ * Use __get_dynamic_array_len(foo) to get the length of the array
+ * saved. Note, __get_dynamic_array_len() returns the total allocated
+ * length of the dynamic array; __print_array() expects the second
+ * parameter to be the number of elements. To get that, the array length
+ * needs to be divided by the element size.
+ *
+ * For __string(foo, bar) use __get_str(foo)
+ *
+ * For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus)
+ *
+ *
+ * Note, that for both the assign and the printk, __entry is the handler
+ * to the data structure in the ring buffer, and is defined by the
+ * TP_STRUCT__entry.
+ */
+
+/*
+ * It is OK to have helper functions in the file, but they need to be protected
+ * from being defined more than once. Remember, this file gets included more
+ * than once.
+ */
+#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
+#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
+static inline int __length_of(const int *list)
+{
+ int i;
+
+ if (!list)
+ return 0;
+
+ for (i = 0; list[i]; i++)
+ ;
+ return i;
+}
+
+enum {
+ TRACE_SAMPLE_FOO = 2,
+ TRACE_SAMPLE_BAR = 4,
+ TRACE_SAMPLE_ZOO = 8,
+};
+#endif
+
+/*
+ * If enums are used in the TP_printk(), their names will be shown in
+ * format files and not their values. This can cause problems with user
+ * space programs that parse the format files to know how to translate
+ * the raw binary trace output into human readable text.
+ *
+ * To help out user space programs, any enum that is used in the TP_printk()
+ * should be defined by TRACE_DEFINE_ENUM() macro. All that is needed to
+ * be done is to add this macro with the enum within it in the trace
+ * header file, and it will be converted in the output.
+ */
+
+TRACE_DEFINE_ENUM(TRACE_SAMPLE_FOO);
+TRACE_DEFINE_ENUM(TRACE_SAMPLE_BAR);
+TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO);
+
+TRACE_EVENT(foo_bar,
+
+ TP_PROTO(const char *foo, int bar, const int *lst,
+ const char *string, const struct cpumask *mask),
+
+ TP_ARGS(foo, bar, lst, string, mask),
+
+ TP_STRUCT__entry(
+ __array( char, foo, 10 )
+ __field( int, bar )
+ __dynamic_array(int, list, __length_of(lst))
+ __string( str, string )
+ __bitmask( cpus, num_possible_cpus() )
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->foo, foo, 10);
+ __entry->bar = bar;
+ memcpy(__get_dynamic_array(list), lst,
+ __length_of(lst) * sizeof(int));
+ __assign_str(str, string);
+ __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
+ ),
+
+ TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar,
+
+/*
+ * Notice here the use of some helper functions. This includes:
+ *
+ * __print_symbolic( variable, { value, "string" }, ... ),
+ *
+ * The variable is tested against each value of the { } pair. If
+ * the variable matches one of the values, then it will print the
+ * string in that pair. If non are matched, it returns a string
+ * version of the number (if __entry->bar == 7 then "7" is returned).
+ */
+ __print_symbolic(__entry->bar,
+ { 0, "zero" },
+ { TRACE_SAMPLE_FOO, "TWO" },
+ { TRACE_SAMPLE_BAR, "FOUR" },
+ { TRACE_SAMPLE_ZOO, "EIGHT" },
+ { 10, "TEN" }
+ ),
+
+/*
+ * __print_flags( variable, "delim", { value, "flag" }, ... ),
+ *
+ * This is similar to __print_symbolic, except that it tests the bits
+ * of the value. If ((FLAG & variable) == FLAG) then the string is
+ * printed. If more than one flag matches, then each one that does is
+ * also printed with delim in between them.
+ * If not all bits are accounted for, then the not found bits will be
+ * added in hex format: 0x506 will show BIT2|BIT4|0x500
+ */
+ __print_flags(__entry->bar, "|",
+ { 1, "BIT1" },
+ { 2, "BIT2" },
+ { 4, "BIT3" },
+ { 8, "BIT4" }
+ ),
+/*
+ * __print_array( array, len, element_size )
+ *
+ * This prints out the array that is defined by __array in a nice format.
+ */
+ __print_array(__get_dynamic_array(list),
+ __get_dynamic_array_len(list) / sizeof(int),
+ sizeof(int)),
+ __get_str(str), __get_bitmask(cpus))
+);
+
+/*
+ * There may be a case where a tracepoint should only be called if
+ * some condition is set. Otherwise the tracepoint should not be called.
+ * But to do something like:
+ *
+ * if (cond)
+ * trace_foo();
+ *
+ * Would cause a little overhead when tracing is not enabled, and that
+ * overhead, even if small, is not something we want. As tracepoints
+ * use static branch (aka jump_labels), where no branch is taken to
+ * skip the tracepoint when not enabled, and a jmp is placed to jump
+ * to the tracepoint code when it is enabled, having a if statement
+ * nullifies that optimization. It would be nice to place that
+ * condition within the static branch. This is where TRACE_EVENT_CONDITION
+ * comes in.
+ *
+ * TRACE_EVENT_CONDITION() is just like TRACE_EVENT, except it adds another
+ * parameter just after args. Where TRACE_EVENT has:
+ *
+ * TRACE_EVENT(name, proto, args, struct, assign, printk)
+ *
+ * the CONDITION version has:
+ *
+ * TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, printk)
+ *
+ * Everything is the same as TRACE_EVENT except for the new cond. Think
+ * of the cond variable as:
+ *
+ * if (cond)
+ * trace_foo_bar_with_cond();
+ *
+ * Except that the logic for the if branch is placed after the static branch.
+ * That is, the if statement that processes the condition will not be
+ * executed unless that traecpoint is enabled. Otherwise it still remains
+ * a nop.
+ */
+TRACE_EVENT_CONDITION(foo_bar_with_cond,
+
+ TP_PROTO(const char *foo, int bar),
+
+ TP_ARGS(foo, bar),
+
+ TP_CONDITION(!(bar % 10)),
+
+ TP_STRUCT__entry(
+ __string( foo, foo )
+ __field( int, bar )
+ ),
+
+ TP_fast_assign(
+ __assign_str(foo, foo);
+ __entry->bar = bar;
+ ),
+
+ TP_printk("foo %s %d", __get_str(foo), __entry->bar)
+);
+
+void foo_bar_reg(void);
+void foo_bar_unreg(void);
+
+/*
+ * Now in the case that some function needs to be called when the
+ * tracepoint is enabled and/or when it is disabled, the
+ * TRACE_EVENT_FN() serves this purpose. This is just like TRACE_EVENT()
+ * but adds two more parameters at the end:
+ *
+ * TRACE_EVENT_FN( name, proto, args, struct, assign, printk, reg, unreg)
+ *
+ * reg and unreg are functions with the prototype of:
+ *
+ * void reg(void)
+ *
+ * The reg function gets called before the tracepoint is enabled, and
+ * the unreg function gets called after the tracepoint is disabled.
+ *
+ * Note, reg and unreg are allowed to be NULL. If you only need to
+ * call a function before enabling, or after disabling, just set one
+ * function and pass in NULL for the other parameter.
+ */
+TRACE_EVENT_FN(foo_bar_with_fn,
+
+ TP_PROTO(const char *foo, int bar),
+
+ TP_ARGS(foo, bar),
+
+ TP_STRUCT__entry(
+ __string( foo, foo )
+ __field( int, bar )
+ ),
+
+ TP_fast_assign(
+ __assign_str(foo, foo);
+ __entry->bar = bar;
+ ),
+
+ TP_printk("foo %s %d", __get_str(foo), __entry->bar),
+
+ foo_bar_reg, foo_bar_unreg
+);
+
+/*
+ * Each TRACE_EVENT macro creates several helper functions to produce
+ * the code to add the tracepoint, create the files in the trace
+ * directory, hook it to perf, assign the values and to print out
+ * the raw data from the ring buffer. To prevent too much bloat,
+ * if there are more than one tracepoint that uses the same format
+ * for the proto, args, struct, assign and printk, and only the name
+ * is different, it is highly recommended to use the DECLARE_EVENT_CLASS
+ *
+ * DECLARE_EVENT_CLASS() macro creates most of the functions for the
+ * tracepoint. Then DEFINE_EVENT() is use to hook a tracepoint to those
+ * functions. This DEFINE_EVENT() is an instance of the class and can
+ * be enabled and disabled separately from other events (either TRACE_EVENT
+ * or other DEFINE_EVENT()s).
+ *
+ * Note, TRACE_EVENT() itself is simply defined as:
+ *
+ * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \
+ * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
+ * DEFINE_EVENT(name, name, proto, args)
+ *
+ * The DEFINE_EVENT() also can be declared with conditions and reg functions:
+ *
+ * DEFINE_EVENT_CONDITION(template, name, proto, args, cond);
+ * DEFINE_EVENT_FN(template, name, proto, args, reg, unreg);
+ */
+DECLARE_EVENT_CLASS(foo_template,
+
+ TP_PROTO(const char *foo, int bar),
+
+ TP_ARGS(foo, bar),
+
+ TP_STRUCT__entry(
+ __string( foo, foo )
+ __field( int, bar )
+ ),
+
+ TP_fast_assign(
+ __assign_str(foo, foo);
+ __entry->bar = bar;
+ ),
+
+ TP_printk("foo %s %d", __get_str(foo), __entry->bar)
+);
+
+/*
+ * Here's a better way for the previous samples (except, the first
+ * exmaple had more fields and could not be used here).
+ */
+DEFINE_EVENT(foo_template, foo_with_template_simple,
+ TP_PROTO(const char *foo, int bar),
+ TP_ARGS(foo, bar));
+
+DEFINE_EVENT_CONDITION(foo_template, foo_with_template_cond,
+ TP_PROTO(const char *foo, int bar),
+ TP_ARGS(foo, bar),
+ TP_CONDITION(!(bar % 8)));
+
+
+DEFINE_EVENT_FN(foo_template, foo_with_template_fn,
+ TP_PROTO(const char *foo, int bar),
+ TP_ARGS(foo, bar),
+ foo_bar_reg, foo_bar_unreg);
+
+/*
+ * Anytime two events share basically the same values and have
+ * the same output, use the DECLARE_EVENT_CLASS() and DEFINE_EVENT()
+ * when ever possible.
+ */
+
+/*
+ * If the event is similar to the DECLARE_EVENT_CLASS, but you need
+ * to have a different output, then use DEFINE_EVENT_PRINT() which
+ * lets you override the TP_printk() of the class.
+ */
+
+DEFINE_EVENT_PRINT(foo_template, foo_with_template_print,
+ TP_PROTO(const char *foo, int bar),
+ TP_ARGS(foo, bar),
+ TP_printk("bar %s %d", __get_str(foo), __entry->bar));
+
+#endif
+
+/***** NOTICE! The #if protection ends here. *****/
+
+
+/*
+ * There are several ways I could have done this. If I left out the
+ * TRACE_INCLUDE_PATH, then it would default to the kernel source
+ * include/trace/events directory.
+ *
+ * I could specify a path from the define_trace.h file back to this
+ * file.
+ *
+ * #define TRACE_INCLUDE_PATH ../../samples/trace_events
+ *
+ * But the safest and easiest way to simply make it use the directory
+ * that the file is in is to add in the Makefile:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(src)
+ *
+ * This will make sure the current path is part of the include
+ * structure for our file so that define_trace.h can find it.
+ *
+ * I could have made only the top level directory the include:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)
+ *
+ * And then let the path to this directory be the TRACE_INCLUDE_PATH:
+ *
+ * #define TRACE_INCLUDE_PATH samples/trace_events
+ *
+ * But then if something defines "samples" or "trace_events" as a macro
+ * then we could risk that being converted too, and give us an unexpected
+ * result.
+ */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+/*
+ * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
+ */
+#define TRACE_INCLUDE_FILE trace-events-sample
+#include <trace/define_trace.h>
diff --git a/samples/uhid/Makefile b/samples/uhid/Makefile
new file mode 100644
index 000000000..c95a69656
--- /dev/null
+++ b/samples/uhid/Makefile
@@ -0,0 +1,10 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := uhid-example
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_uhid-example.o += -I$(objtree)/usr/include
diff --git a/samples/uhid/uhid-example.c b/samples/uhid/uhid-example.c
new file mode 100644
index 000000000..7d58a4b8d
--- /dev/null
+++ b/samples/uhid/uhid-example.c
@@ -0,0 +1,464 @@
+/*
+ * UHID Example
+ *
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * The code may be used by anyone for any purpose,
+ * and can serve as a starting point for developing
+ * applications using uhid.
+ */
+
+/*
+ * UHID Example
+ * This example emulates a basic 3 buttons mouse with wheel over UHID. Run this
+ * program as root and then use the following keys to control the mouse:
+ * q: Quit the application
+ * 1: Toggle left button (down, up, ...)
+ * 2: Toggle right button
+ * 3: Toggle middle button
+ * a: Move mouse left
+ * d: Move mouse right
+ * w: Move mouse up
+ * s: Move mouse down
+ * r: Move wheel up
+ * f: Move wheel down
+ *
+ * Additionally to 3 button mouse, 3 keyboard LEDs are also supported (LED_NUML,
+ * LED_CAPSL and LED_SCROLLL). The device doesn't generate any related keyboard
+ * events, though. You need to manually write the EV_LED/LED_XY/1 activation
+ * input event to the evdev device to see it being sent to this device.
+ *
+ * If uhid is not available as /dev/uhid, then you can pass a different path as
+ * first argument.
+ * If <linux/uhid.h> is not installed in /usr, then compile this with:
+ * gcc -o ./uhid_test -Wall -I./include ./samples/uhid/uhid-example.c
+ * And ignore the warning about kernel headers. However, it is recommended to
+ * use the installed uhid.h if available.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <unistd.h>
+#include <linux/uhid.h>
+
+/*
+ * HID Report Desciptor
+ * We emulate a basic 3 button mouse with wheel and 3 keyboard LEDs. This is
+ * the report-descriptor as the kernel will parse it:
+ *
+ * INPUT(1)[INPUT]
+ * Field(0)
+ * Physical(GenericDesktop.Pointer)
+ * Application(GenericDesktop.Mouse)
+ * Usage(3)
+ * Button.0001
+ * Button.0002
+ * Button.0003
+ * Logical Minimum(0)
+ * Logical Maximum(1)
+ * Report Size(1)
+ * Report Count(3)
+ * Report Offset(0)
+ * Flags( Variable Absolute )
+ * Field(1)
+ * Physical(GenericDesktop.Pointer)
+ * Application(GenericDesktop.Mouse)
+ * Usage(3)
+ * GenericDesktop.X
+ * GenericDesktop.Y
+ * GenericDesktop.Wheel
+ * Logical Minimum(-128)
+ * Logical Maximum(127)
+ * Report Size(8)
+ * Report Count(3)
+ * Report Offset(8)
+ * Flags( Variable Relative )
+ * OUTPUT(2)[OUTPUT]
+ * Field(0)
+ * Application(GenericDesktop.Keyboard)
+ * Usage(3)
+ * LED.NumLock
+ * LED.CapsLock
+ * LED.ScrollLock
+ * Logical Minimum(0)
+ * Logical Maximum(1)
+ * Report Size(1)
+ * Report Count(3)
+ * Report Offset(0)
+ * Flags( Variable Absolute )
+ *
+ * This is the mapping that we expect:
+ * Button.0001 ---> Key.LeftBtn
+ * Button.0002 ---> Key.RightBtn
+ * Button.0003 ---> Key.MiddleBtn
+ * GenericDesktop.X ---> Relative.X
+ * GenericDesktop.Y ---> Relative.Y
+ * GenericDesktop.Wheel ---> Relative.Wheel
+ * LED.NumLock ---> LED.NumLock
+ * LED.CapsLock ---> LED.CapsLock
+ * LED.ScrollLock ---> LED.ScrollLock
+ *
+ * This information can be verified by reading /sys/kernel/debug/hid/<dev>/rdesc
+ * This file should print the same information as showed above.
+ */
+
+static unsigned char rdesc[] = {
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x02, /* USAGE (Mouse) */
+ 0xa1, 0x01, /* COLLECTION (Application) */
+ 0x09, 0x01, /* USAGE (Pointer) */
+ 0xa1, 0x00, /* COLLECTION (Physical) */
+ 0x85, 0x01, /* REPORT_ID (1) */
+ 0x05, 0x09, /* USAGE_PAGE (Button) */
+ 0x19, 0x01, /* USAGE_MINIMUM (Button 1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (Button 3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x81, 0x02, /* INPUT (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x30, /* USAGE (X) */
+ 0x09, 0x31, /* USAGE (Y) */
+ 0x09, 0x38, /* USAGE (WHEEL) */
+ 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */
+ 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */
+ 0x75, 0x08, /* REPORT_SIZE (8) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x81, 0x06, /* INPUT (Data,Var,Rel) */
+ 0xc0, /* END_COLLECTION */
+ 0xc0, /* END_COLLECTION */
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x06, /* USAGE (Keyboard) */
+ 0xa1, 0x01, /* COLLECTION (Application) */
+ 0x85, 0x02, /* REPORT_ID (2) */
+ 0x05, 0x08, /* USAGE_PAGE (Led) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x91, 0x02, /* Output (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+ 0xc0, /* END_COLLECTION */
+};
+
+static int uhid_write(int fd, const struct uhid_event *ev)
+{
+ ssize_t ret;
+
+ ret = write(fd, ev, sizeof(*ev));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot write to uhid: %m\n");
+ return -errno;
+ } else if (ret != sizeof(*ev)) {
+ fprintf(stderr, "Wrong size written to uhid: %ld != %lu\n",
+ ret, sizeof(ev));
+ return -EFAULT;
+ } else {
+ return 0;
+ }
+}
+
+static int create(int fd)
+{
+ struct uhid_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_CREATE;
+ strcpy((char*)ev.u.create.name, "test-uhid-device");
+ ev.u.create.rd_data = rdesc;
+ ev.u.create.rd_size = sizeof(rdesc);
+ ev.u.create.bus = BUS_USB;
+ ev.u.create.vendor = 0x15d9;
+ ev.u.create.product = 0x0a37;
+ ev.u.create.version = 0;
+ ev.u.create.country = 0;
+
+ return uhid_write(fd, &ev);
+}
+
+static void destroy(int fd)
+{
+ struct uhid_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_DESTROY;
+
+ uhid_write(fd, &ev);
+}
+
+/* This parses raw output reports sent by the kernel to the device. A normal
+ * uhid program shouldn't do this but instead just forward the raw report.
+ * However, for ducomentational purposes, we try to detect LED events here and
+ * print debug messages for it. */
+static void handle_output(struct uhid_event *ev)
+{
+ /* LED messages are adverised via OUTPUT reports; ignore the rest */
+ if (ev->u.output.rtype != UHID_OUTPUT_REPORT)
+ return;
+ /* LED reports have length 2 bytes */
+ if (ev->u.output.size != 2)
+ return;
+ /* first byte is report-id which is 0x02 for LEDs in our rdesc */
+ if (ev->u.output.data[0] != 0x2)
+ return;
+
+ /* print flags payload */
+ fprintf(stderr, "LED output report received with flags %x\n",
+ ev->u.output.data[1]);
+}
+
+static int event(int fd)
+{
+ struct uhid_event ev;
+ ssize_t ret;
+
+ memset(&ev, 0, sizeof(ev));
+ ret = read(fd, &ev, sizeof(ev));
+ if (ret == 0) {
+ fprintf(stderr, "Read HUP on uhid-cdev\n");
+ return -EFAULT;
+ } else if (ret < 0) {
+ fprintf(stderr, "Cannot read uhid-cdev: %m\n");
+ return -errno;
+ } else if (ret != sizeof(ev)) {
+ fprintf(stderr, "Invalid size read from uhid-dev: %ld != %lu\n",
+ ret, sizeof(ev));
+ return -EFAULT;
+ }
+
+ switch (ev.type) {
+ case UHID_START:
+ fprintf(stderr, "UHID_START from uhid-dev\n");
+ break;
+ case UHID_STOP:
+ fprintf(stderr, "UHID_STOP from uhid-dev\n");
+ break;
+ case UHID_OPEN:
+ fprintf(stderr, "UHID_OPEN from uhid-dev\n");
+ break;
+ case UHID_CLOSE:
+ fprintf(stderr, "UHID_CLOSE from uhid-dev\n");
+ break;
+ case UHID_OUTPUT:
+ fprintf(stderr, "UHID_OUTPUT from uhid-dev\n");
+ handle_output(&ev);
+ break;
+ case UHID_OUTPUT_EV:
+ fprintf(stderr, "UHID_OUTPUT_EV from uhid-dev\n");
+ break;
+ default:
+ fprintf(stderr, "Invalid event from uhid-dev: %u\n", ev.type);
+ }
+
+ return 0;
+}
+
+static bool btn1_down;
+static bool btn2_down;
+static bool btn3_down;
+static signed char abs_hor;
+static signed char abs_ver;
+static signed char wheel;
+
+static int send_event(int fd)
+{
+ struct uhid_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_INPUT;
+ ev.u.input.size = 5;
+
+ ev.u.input.data[0] = 0x1;
+ if (btn1_down)
+ ev.u.input.data[1] |= 0x1;
+ if (btn2_down)
+ ev.u.input.data[1] |= 0x2;
+ if (btn3_down)
+ ev.u.input.data[1] |= 0x4;
+
+ ev.u.input.data[2] = abs_hor;
+ ev.u.input.data[3] = abs_ver;
+ ev.u.input.data[4] = wheel;
+
+ return uhid_write(fd, &ev);
+}
+
+static int keyboard(int fd)
+{
+ char buf[128];
+ ssize_t ret, i;
+
+ ret = read(STDIN_FILENO, buf, sizeof(buf));
+ if (ret == 0) {
+ fprintf(stderr, "Read HUP on stdin\n");
+ return -EFAULT;
+ } else if (ret < 0) {
+ fprintf(stderr, "Cannot read stdin: %m\n");
+ return -errno;
+ }
+
+ for (i = 0; i < ret; ++i) {
+ switch (buf[i]) {
+ case '1':
+ btn1_down = !btn1_down;
+ ret = send_event(fd);
+ if (ret)
+ return ret;
+ break;
+ case '2':
+ btn2_down = !btn2_down;
+ ret = send_event(fd);
+ if (ret)
+ return ret;
+ break;
+ case '3':
+ btn3_down = !btn3_down;
+ ret = send_event(fd);
+ if (ret)
+ return ret;
+ break;
+ case 'a':
+ abs_hor = -20;
+ ret = send_event(fd);
+ abs_hor = 0;
+ if (ret)
+ return ret;
+ break;
+ case 'd':
+ abs_hor = 20;
+ ret = send_event(fd);
+ abs_hor = 0;
+ if (ret)
+ return ret;
+ break;
+ case 'w':
+ abs_ver = -20;
+ ret = send_event(fd);
+ abs_ver = 0;
+ if (ret)
+ return ret;
+ break;
+ case 's':
+ abs_ver = 20;
+ ret = send_event(fd);
+ abs_ver = 0;
+ if (ret)
+ return ret;
+ break;
+ case 'r':
+ wheel = 1;
+ ret = send_event(fd);
+ wheel = 0;
+ if (ret)
+ return ret;
+ break;
+ case 'f':
+ wheel = -1;
+ ret = send_event(fd);
+ wheel = 0;
+ if (ret)
+ return ret;
+ break;
+ case 'q':
+ return -ECANCELED;
+ default:
+ fprintf(stderr, "Invalid input: %c\n", buf[i]);
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int fd;
+ const char *path = "/dev/uhid";
+ struct pollfd pfds[2];
+ int ret;
+ struct termios state;
+
+ ret = tcgetattr(STDIN_FILENO, &state);
+ if (ret) {
+ fprintf(stderr, "Cannot get tty state\n");
+ } else {
+ state.c_lflag &= ~ICANON;
+ state.c_cc[VMIN] = 1;
+ ret = tcsetattr(STDIN_FILENO, TCSANOW, &state);
+ if (ret)
+ fprintf(stderr, "Cannot set tty state\n");
+ }
+
+ if (argc >= 2) {
+ if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+ fprintf(stderr, "Usage: %s [%s]\n", argv[0], path);
+ return EXIT_SUCCESS;
+ } else {
+ path = argv[1];
+ }
+ }
+
+ fprintf(stderr, "Open uhid-cdev %s\n", path);
+ fd = open(path, O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open uhid-cdev %s: %m\n", path);
+ return EXIT_FAILURE;
+ }
+
+ fprintf(stderr, "Create uhid device\n");
+ ret = create(fd);
+ if (ret) {
+ close(fd);
+ return EXIT_FAILURE;
+ }
+
+ pfds[0].fd = STDIN_FILENO;
+ pfds[0].events = POLLIN;
+ pfds[1].fd = fd;
+ pfds[1].events = POLLIN;
+
+ fprintf(stderr, "Press 'q' to quit...\n");
+ while (1) {
+ ret = poll(pfds, 2, -1);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot poll for fds: %m\n");
+ break;
+ }
+ if (pfds[0].revents & POLLHUP) {
+ fprintf(stderr, "Received HUP on stdin\n");
+ break;
+ }
+ if (pfds[1].revents & POLLHUP) {
+ fprintf(stderr, "Received HUP on uhid-cdev\n");
+ break;
+ }
+
+ if (pfds[0].revents & POLLIN) {
+ ret = keyboard(fd);
+ if (ret)
+ break;
+ }
+ if (pfds[1].revents & POLLIN) {
+ ret = event(fd);
+ if (ret)
+ break;
+ }
+ }
+
+ fprintf(stderr, "Destroy uhid device\n");
+ destroy(fd);
+ return EXIT_SUCCESS;
+}