From 7dd68b3279f1792103d12e69933db3128c6d416e Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 18 Dec 2019 23:44:35 -0800 Subject: bpf: Support replacing cgroup-bpf program in MULTI mode The common use-case in production is to have multiple cgroup-bpf programs per attach type that cover multiple use-cases. Such programs are attached with BPF_F_ALLOW_MULTI and can be maintained by different people. Order of programs usually matters, for example imagine two egress programs: the first one drops packets and the second one counts packets. If they're swapped the result of counting program will be different. It brings operational challenges with updating cgroup-bpf program(s) attached with BPF_F_ALLOW_MULTI since there is no way to replace a program: * One way to update is to detach all programs first and then attach the new version(s) again in the right order. This introduces an interruption in the work a program is doing and may not be acceptable (e.g. if it's egress firewall); * Another way is attach the new version of a program first and only then detach the old version. This introduces the time interval when two versions of same program are working, what may not be acceptable if a program is not idempotent. It also imposes additional burden on program developers to make sure that two versions of their program can co-exist. Solve the problem by introducing a "replace" mode in BPF_PROG_ATTACH command for cgroup-bpf programs being attached with BPF_F_ALLOW_MULTI flag. This mode is enabled by newly introduced BPF_F_REPLACE attach flag and bpf_attr.replace_bpf_fd attribute to pass fd of the old program to replace That way user can replace any program among those attached with BPF_F_ALLOW_MULTI flag without the problems described above. Details of the new API: * If BPF_F_REPLACE is set but replace_bpf_fd doesn't have valid descriptor of BPF program, BPF_PROG_ATTACH will return corresponding error (EINVAL or EBADF). * If replace_bpf_fd has valid descriptor of BPF program but such a program is not attached to specified cgroup, BPF_PROG_ATTACH will return ENOENT. BPF_F_REPLACE is introduced to make the user intent clear, since replace_bpf_fd alone can't be used for this (its default value, 0, is a valid fd). BPF_F_REPLACE also makes it possible to extend the API in the future (e.g. add BPF_F_BEFORE and BPF_F_AFTER if needed). Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Narkyiko Link: https://lore.kernel.org/bpf/30cd850044a0057bdfcaaf154b7d2f39850ba813.1576741281.git.rdna@fb.com --- include/linux/bpf-cgroup.h | 4 +++- include/uapi/linux/bpf.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 169fd25f6bc2..18f6a6da7c3c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); @@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dbbcf0b02970..7df436da542d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -231,6 +231,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +254,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -442,6 +448,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ -- cgit v1.2.3-71-gd317