[RFC 2/2] samples: bpf: IO profiling example

From: He Kuang
Date: Fri Feb 06 2015 - 01:59:43 EST


For the first node, we sample and record its characteristic
(eg. address), and add the result to a global hash table. For other
nodes, they'll be traced when their context matches the characteristics
in previous tables.

Signed-off-by: He Kuang <hekuang@xxxxxxxxxx>
---
samples/bpf/Makefile | 4 +
samples/bpf/tracex5_kern.c | 195 +++++++++++++++++++++++++++++++++++++++++++++
samples/bpf/tracex5_user.c | 56 +++++++++++++
3 files changed, 255 insertions(+)
create mode 100644 samples/bpf/tracex5_kern.c
create mode 100644 samples/bpf/tracex5_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index aec1f52..529deb7 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -11,6 +11,7 @@ hostprogs-y += tracex1
hostprogs-y += tracex2
hostprogs-y += tracex3
hostprogs-y += tracex4
+hostprogs-y += tracex5

dropmon-objs := dropmon.o libbpf.o
test_verifier-objs := test_verifier.o libbpf.o
@@ -22,6 +23,7 @@ tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
+tracex5-objs := bpf_load.o libbpf.o tracex5_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -31,6 +33,7 @@ always += tracex1_kern.o
always += tracex2_kern.o
always += tracex3_kern.o
always += tracex4_kern.o
+always += tracex5_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(objtree)/include/uapi/
@@ -43,6 +46,7 @@ HOSTLOADLIBES_tracex1 += -lelf
HOSTLOADLIBES_tracex2 += -lelf
HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf
+HOSTLOADLIBES_tracex5 += -lelf

# point this to your LLVM backend with bpf support
#LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
new file mode 100644
index 0000000..67c03b0
--- /dev/null
+++ b/samples/bpf/tracex5_kern.c
@@ -0,0 +1,195 @@
+/*
+ In this example, we placed several nodes in kernel source, all these
+ nodes in the data stream path, and represent the data transfer.
+
+ we sampling on the first node and record the characteristic in it, add
+ to a global hash table, then the next node will be traced when it's
+ context matches the characteristics in previous tables.'
+*/
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <trace/bpf_trace.h>
+#include "bpf_helpers.h"
+
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+
+struct globals {
+ int num_samples;
+ int samples;
+};
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct globals),
+ .max_entries = 1,
+};
+
+/* trigger table: symbol(page) */
+struct bpf_map_def SEC("maps") trigger_page_hash = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u64),
+ .value_size = sizeof(int),
+ .max_entries = 512,
+};
+
+/* trigger table: symbol(bio) */
+struct bpf_map_def SEC("maps") trigger_bio_hash = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u64),
+ .value_size = sizeof(int),
+ .max_entries = 512,
+};
+
+
+/* trigger table symbol(req) */
+struct bpf_map_def SEC("maps") trigger_req_hash = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u64),
+ .value_size = sizeof(int),
+ .max_entries = 512,
+};
+
+//iov -> page
+SEC("events/filemap/iov_iter_copy_from_user_atomic")
+int node1(struct bpf_context *ctx)
+{
+ int ind = 0;
+ struct globals *g = bpf_map_lookup_elem(&global_map, &ind);
+
+ if (!g)
+ return 0;
+
+ g->num_samples++;
+ if ((g->num_samples >> 10) * 1024 == g->num_samples) {
+ struct page* page = (struct page*)ctx->arg1;
+ int fill = 1;
+
+ __sync_fetch_and_add(&g->samples, 1);
+
+ bpf_map_update_elem(&trigger_page_hash, &page, &fill, BPF_ANY);
+
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ ext4.h is not in the include pathes, it's not convenient to fetch
+ struct fields now.
+
+ Though in this example we should avoid this, but if we want to use
+ bio->bi_iter, we should use the struct definition.
+*/
+struct ext4_io_submit {
+ int io_op;
+ struct bio *io_bio;
+};
+
+//page -> bio
+SEC("events/ext4/ext4_bio_write_page")
+int node_2(struct bpf_context *ctx)
+{
+ u64 page_addr = ctx->arg2;
+ struct ext4_io_submit* io = (struct ext4_io_submit*)ctx->arg1;
+ struct bio* bio = bpf_fetch_ptr(&io->io_bio);
+ void *value;
+ int fill = 1;
+
+ value = bpf_map_lookup_elem(&trigger_page_hash, &page_addr);
+ if (!value)
+ return 0;
+
+ bpf_map_update_elem(&trigger_bio_hash, &bio, &fill, BPF_ANY);
+ return 1;
+}
+
+//bio -> rq case1
+SEC("events/block/blk_queue_bio")
+int node_3_1(struct bpf_context *ctx)
+{
+ struct request *request = (struct request*)ctx->arg1;
+ struct bio* bio = (struct bio * )ctx->arg2;
+ void *value;
+ int fill = 1;
+ int i;
+ int found = 0;
+
+ value = bpf_map_lookup_elem(&trigger_bio_hash, &bio);
+ if (!value) {
+ return 0;
+ }
+
+ bpf_map_update_elem(&trigger_req_hash, &request, &fill, BPF_ANY);
+ return 1;
+}
+
+//bio -> rq case2
+SEC("events/block/bio_attempt_front_merge")
+int node_3_2(struct bpf_context *ctx)
+{
+ struct request *request = (struct request*)ctx->arg1;
+ struct bio* bio = (struct bio * )ctx->arg2;
+ void *value;
+ int fill = 1;
+
+ value = bpf_map_lookup_elem(&trigger_bio_hash, &bio);
+ if (!value) {
+ return 0;
+ }
+
+ bpf_map_update_elem(&trigger_req_hash, &request, &fill, BPF_ANY);
+ return 1;
+}
+
+//bio -> rq case3
+SEC("events/block/bio_attempt_back_merge")
+int node_3_3(struct bpf_context *ctx)
+{
+ struct request *request = (struct request*)ctx->arg1;
+ struct bio* bio = (struct bio * )ctx->arg2;
+ void *value;
+ int fill = 1;
+
+ value = bpf_map_lookup_elem(&trigger_bio_hash, &bio);
+ if (!value) {
+ return 0;
+ }
+
+ bpf_map_update_elem(&trigger_req_hash, &request, &fill, BPF_ANY);
+ return 1;
+}
+
+//req -> vq
+SEC("events/scsi/virtscsi_add_cmd")
+int node_4(struct bpf_context *ctx)
+{
+ struct request *request = (struct request*)ctx->arg2;
+ void *value;
+ int fill = 1;
+
+ value = bpf_map_lookup_elem(&trigger_req_hash, &request);
+ if (!value) {
+ return 0;
+ }
+ return 1;
+}
+
+//end: bio
+SEC("events/block/bio_endio")
+int node_5(struct bpf_context *ctx)
+{
+ struct bio* bio = (struct bio*)ctx->arg1;
+ void *value;
+
+ value = bpf_map_lookup_elem(&trigger_bio_hash, &bio);
+ if (!value) {
+ return 0;
+ }
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
new file mode 100644
index 0000000..814f926
--- /dev/null
+++ b/samples/bpf/tracex5_user.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+struct globals {
+ int num_samples;
+ int samples;
+};
+
+static void print_hist(int fd)
+{
+ struct globals g = {};
+ int key = 0;
+
+ bpf_lookup_elem(map_fd[0], &key, &g);
+
+ printf("Total samples = %d, num=%d\n", g.samples, g.num_samples);
+}
+
+static void int_exit(int sig)
+{
+ print_hist(map_fd[0]);
+ exit(0);
+}
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ /* start 'dd' in the background to have plenty of 'write' syscalls */
+ f = popen("dd if=/dev/zero of=/mnt/data/testfile bs=4k count=5000", "r");
+ (void) f;
+
+ signal(SIGINT, int_exit);
+
+ if (fork() == 0) {
+ read_trace_pipe();
+ } else {
+ sleep(120);
+ print_hist(map_fd[0]);
+ }
+ return 0;
+}
--
2.2.0.33.gc18b867

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/