diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/Makefile | 1 | ||||
-rw-r--r-- | kernel/bpf/core.c | 77 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 3 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 36 |
4 files changed, 90 insertions, 27 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index af3ab6164ff5..be282c135a66 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := core.o +CFLAGS_core.o += $(call cc-disable-warning, override-init) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e46106c6ac39..e7211b0fa27c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -290,6 +290,12 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +/* All BPF JIT sysctl knobs here. */ +int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); +int bpf_jit_harden __read_mostly; +int bpf_jit_kallsyms __read_mostly; +long bpf_jit_limit __read_mostly; + static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, unsigned long *symbol_start, @@ -358,8 +364,6 @@ static DEFINE_SPINLOCK(bpf_lock); static LIST_HEAD(bpf_kallsyms); static struct latch_tree_root bpf_tree __cacheline_aligned; -int bpf_jit_kallsyms __read_mostly; - static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) { WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); @@ -486,27 +490,75 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, return ret; } +static atomic_long_t bpf_jit_current; + +/* Can be overridden by an arch's JIT compiler if it has a custom, + * dedicated BPF backend memory area, or if neither of the two + * below apply. + */ +u64 __weak bpf_jit_alloc_exec_limit(void) +{ +#if defined(MODULES_VADDR) + return MODULES_END - MODULES_VADDR; +#else + return VMALLOC_END - VMALLOC_START; +#endif +} + +static int __init bpf_jit_charge_init(void) +{ + /* Only used as heuristic here to derive limit. */ + bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + PAGE_SIZE), LONG_MAX); + return 0; +} +pure_initcall(bpf_jit_charge_init); + +static int bpf_jit_charge_modmem(u32 pages) +{ + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { + if (!capable(CAP_SYS_ADMIN)) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } + } + + return 0; +} + +static void bpf_jit_uncharge_modmem(u32 pages) +{ + atomic_long_sub(pages, &bpf_jit_current); +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - unsigned int size, hole, start; + u32 size, hole, start, pages; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + pages = size / PAGE_SIZE; + + if (bpf_jit_charge_modmem(pages)) + return NULL; hdr = module_alloc(size); - if (hdr == NULL) + if (!hdr) { + bpf_jit_uncharge_modmem(pages); return NULL; + } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = size / PAGE_SIZE; + hdr->pages = pages; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -519,7 +571,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { + u32 pages = hdr->pages; + module_memfree(hdr); + bpf_jit_uncharge_modmem(pages); } /* This symbol is only overridden by archs that have different @@ -540,8 +595,6 @@ void __weak bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } -int bpf_jit_harden __read_mostly; - static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, struct bpf_insn *to_buff) @@ -1327,9 +1380,13 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #else -static unsigned int __bpf_prog_ret0(const void *ctx, - const struct bpf_insn *insn) +static unsigned int __bpf_prog_ret0_warn(const void *ctx, + const struct bpf_insn *insn) { + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON + * is not working properly, so warn about it! + */ + WARN_ON_ONCE(1); return 0; } #endif @@ -1386,7 +1443,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; #else - fp->bpf_func = __bpf_prog_ret0; + fp->bpf_func = __bpf_prog_ret0_warn; #endif /* eBPF JITs can rewrite the program in case constant diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 482bf42e21a4..1060eee6c8d5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -388,8 +388,7 @@ static int dev_map_notification(struct notifier_block *notifier, struct bpf_dtab_netdev *dev, *odev; dev = READ_ONCE(dtab->netdev_map[i]); - if (!dev || - dev->dev->ifindex != netdev->ifindex) + if (!dev || netdev != dev->dev) continue; odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); if (dev == odev) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2d828d346982..f5c1d5479ba3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -348,12 +348,12 @@ static int map_create(union bpf_attr *attr) err = bpf_map_new_fd(map); if (err < 0) { /* failed to allocate fd. - * bpf_map_put() is needed because the above + * bpf_map_put_with_uref() is needed because the above * bpf_map_alloc_id() has published the map * to the userspace and the userspace may * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. */ - bpf_map_put(map); + bpf_map_put_with_uref(map); return err; } @@ -1067,20 +1067,26 @@ static int bpf_prog_load(union bpf_attr *attr) if (err) goto free_used_maps; - err = bpf_prog_new_fd(prog); - if (err < 0) { - /* failed to allocate fd. - * bpf_prog_put() is needed because the above - * bpf_prog_alloc_id() has published the prog - * to the userspace and the userspace may - * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. - */ - bpf_prog_put(prog); - return err; - } - + /* Upon success of bpf_prog_alloc_id(), the BPF prog is + * effectively publicly exposed. However, retrieving via + * bpf_prog_get_fd_by_id() will take another reference, + * therefore it cannot be gone underneath us. + * + * Only for the time /after/ successful bpf_prog_new_fd() + * and before returning to userspace, we might just hold + * one reference and any parallel close on that fd could + * rip everything out. Hence, below notifications must + * happen before bpf_prog_new_fd(). + * + * Also, any failure handling from this point onwards must + * be using bpf_prog_put() given the program is exposed. + */ bpf_prog_kallsyms_add(prog); trace_bpf_prog_load(prog, err); + + err = bpf_prog_new_fd(prog); + if (err < 0) + bpf_prog_put(prog); return err; free_used_maps: @@ -1348,7 +1354,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) fd = bpf_map_new_fd(map); if (fd < 0) - bpf_map_put(map); + bpf_map_put_with_uref(map); return fd; } |