From 0cdf5640e4f6940bdbbefee4bb0adb7dffb185ec Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 18:42:00 +0200 Subject: ebpf: include perf_event only where really needed Commit ea317b267e9d ("bpf: Add new bpf map type to store the pointer to struct perf_event") added perf_event.h to the main eBPF header, so it gets included for all users. perf_event.h is actually only needed from array map side, so lets sanitize this a bit. Signed-off-by: Daniel Borkmann Cc: Kaixu Xia Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 29ace107f236..2fecc4aed119 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) -- cgit v1.2.3 From aaac3ba95e4c8b496d22f68bd1bc01cfbf525eca Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 22:23:22 -0700 Subject: bpf: charge user for creation of BPF maps and programs since eBPF programs and maps use kernel memory consider it 'locked' memory from user accounting point of view and charge it against RLIMIT_MEMLOCK limit. This limit is typically set to 64Kbytes by distros, so almost all bpf+tracing programs would need to increase it, since they use maps, but kernel charges maximum map size upfront. For example the hash map of 1024 elements will be charged as 64Kbyte. It's inconvenient for current users and changes current behavior for root, but probably worth doing to be consistent root vs non-root. Similar accounting logic is done by mmap of perf_event. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 2fecc4aed119..f2d9e698c753 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -49,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; - + array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; array->elem_size = elem_size; return &array->map; -- cgit v1.2.3 From a43eec304259a6c637f4014a6d4767159b6a3aa3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 20 Oct 2015 20:02:34 -0700 Subject: bpf: introduce bpf_perf_event_output() helper This helper is used to send raw data from eBPF program into special PERF_TYPE_SOFTWARE/PERF_COUNT_SW_BPF_OUTPUT perf_event. User space needs to perf_event_open() it (either for one or all cpus) and store FD into perf_event_array (similar to bpf_perf_event_read() helper) before eBPF program can send data into it. Today the programs triggered by kprobe collect the data and either store it into the maps or print it via bpf_trace_printk() where latter is the debug facility and not suitable to stream the data. This new helper replaces such bpf_trace_printk() usage and allows programs to have dedicated channel into user space for post-processing of the raw data collected. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f2d9e698c753..e3cfe46b074f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -295,6 +295,8 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) return (void *)attr; if (attr->type != PERF_TYPE_RAW && + !(attr->type == PERF_TYPE_SOFTWARE && + attr->config == PERF_COUNT_SW_BPF_OUTPUT) && attr->type != PERF_TYPE_HARDWARE) { perf_event_release_kernel(event); return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 62544ce8e01c1879d420ba309f7f319d24c0f4e6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 22 Oct 2015 17:10:14 -0700 Subject: bpf: fix bpf_perf_event_read() helper Fix safety checks for bpf_perf_event_read(): - only non-inherited events can be added to perf_event_array map (do this check statically at map insertion time) - dynamically check that event is local and !pmu->count Otherwise buggy bpf program can cause kernel splat. Also fix error path after perf_event_attrs() and remove redundant 'extern'. Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") Signed-off-by: Alexei Starovoitov Tested-by: Wang Nan Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e3cfe46b074f..3f4c99e06c6b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -292,16 +292,23 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) attr = perf_event_attrs(event); if (IS_ERR(attr)) - return (void *)attr; + goto err; - if (attr->type != PERF_TYPE_RAW && - !(attr->type == PERF_TYPE_SOFTWARE && - attr->config == PERF_COUNT_SW_BPF_OUTPUT) && - attr->type != PERF_TYPE_HARDWARE) { - perf_event_release_kernel(event); - return ERR_PTR(-EINVAL); - } - return event; + if (attr->inherit) + goto err; + + if (attr->type == PERF_TYPE_RAW) + return event; + + if (attr->type == PERF_TYPE_HARDWARE) + return event; + + if (attr->type == PERF_TYPE_SOFTWARE && + attr->config == PERF_COUNT_SW_BPF_OUTPUT) + return event; +err: + perf_event_release_kernel(event); + return ERR_PTR(-EINVAL); } static void perf_event_fd_array_put_ptr(void *ptr) -- cgit v1.2.3 From fbca9d2d35c6ef1b323fae75cc9545005ba25097 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 30 Nov 2015 13:02:56 +0100 Subject: bpf, array: fix heap out-of-bounds access when updating elements During own review but also reported by Dmitry's syzkaller [1] it has been noticed that we trigger a heap out-of-bounds access on eBPF array maps when updating elements. This happens with each map whose map->value_size (specified during map creation time) is not multiple of 8 bytes. In array_map_alloc(), elem_size is round_up(attr->value_size, 8) and used to align array map slots for faster access. However, in function array_map_update_elem(), we update the element as ... memcpy(array->value + array->elem_size * index, value, array->elem_size); ... where we access 'value' out-of-bounds, since it was allocated from map_update_elem() from syscall side as kmalloc(map->value_size, GFP_USER) and later on copied through copy_from_user(value, uvalue, map->value_size). Thus, up to 7 bytes, we can access out-of-bounds. Same could happen from within an eBPF program, where in worst case we access beyond an eBPF program's designated stack. Since 1be7f75d1668 ("bpf: enable non-root eBPF programs") didn't hit an official release yet, it only affects priviledged users. In case of array_map_lookup_elem(), the verifier prevents eBPF programs from accessing beyond map->value_size through check_map_access(). Also from syscall side map_lookup_elem() only copies map->value_size back to user, so nothing could leak. [1] http://github.com/google/syzkaller Fixes: 28fbcfa08d8e ("bpf: add array type of eBPF maps") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3f4c99e06c6b..4c67ce39732e 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -105,7 +105,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_size * index, value, array->elem_size); + memcpy(array->value + array->elem_size * index, value, map->value_size); return 0; } -- cgit v1.2.3 From 01b3f52157ff5a47d6d8d796f396a4b34a53c61d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 29 Nov 2015 16:59:35 -0800 Subject: bpf: fix allocation warnings in bpf maps and integer overflow For large map->value_size the user space can trigger memory allocation warnings like: WARNING: CPU: 2 PID: 11122 at mm/page_alloc.c:2989 __alloc_pages_nodemask+0x695/0x14e0() Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x68/0x92 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:493 [< inline >] __alloc_pages_slowpath mm/page_alloc.c:2989 [] __alloc_pages_nodemask+0x695/0x14e0 mm/page_alloc.c:3235 [] alloc_pages_current+0xee/0x340 mm/mempolicy.c:2055 [< inline >] alloc_pages include/linux/gfp.h:451 [] alloc_kmem_pages+0x16/0xf0 mm/page_alloc.c:3414 [] kmalloc_order+0x19/0x60 mm/slab_common.c:1007 [] kmalloc_order_trace+0x1f/0xa0 mm/slab_common.c:1018 [< inline >] kmalloc_large include/linux/slab.h:390 [] __kmalloc+0x234/0x250 mm/slub.c:3525 [< inline >] kmalloc include/linux/slab.h:463 [< inline >] map_update_elem kernel/bpf/syscall.c:288 [< inline >] SYSC_bpf kernel/bpf/syscall.c:744 To avoid never succeeding kmalloc with order >= MAX_ORDER check that elem->value_size and computed elem_size are within limits for both hash and array type maps. Also add __GFP_NOWARN to kmalloc(value_size | elem_size) to avoid OOM warnings. Note kmalloc(key_size) is highly unlikely to trigger OOM, since key_size <= 512, so keep those kmalloc-s as-is. Large value_size can cause integer overflows in elem_size and map.pages formulas, so check for that as well. Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") Reported-by: Dmitry Vyukov Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 4c67ce39732e..b0799bced518 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) attr->value_size == 0) return ERR_PTR(-EINVAL); + if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + return ERR_PTR(-E2BIG); + elem_size = round_up(attr->value_size, 8); /* check round_up into zero and u32 overflow */ if (elem_size == 0 || - attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size) + attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) return ERR_PTR(-ENOMEM); array_size = sizeof(*array) + attr->max_entries * elem_size; -- cgit v1.2.3