From a4393861a351f66fef1102e775743c86a276afce Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:28 +0000 Subject: bpf, sk_msg: Let ULP restore sk_proto and write_space callback We don't need a fallback for when the socket is not using ULP. tcp_update_ulp handles this case exactly the same as we do in sk_psock_restore_proto. Get rid of the duplicated code. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-2-jakub@cloudflare.com --- include/linux/skmsg.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14d61bba0b79..8605947d6c08 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -361,16 +361,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, sk->sk_prot->unhash = psock->saved_unhash; if (psock->sk_proto) { - struct inet_connection_sock *icsk = inet_csk(sk); - bool has_ulp = !!icsk->icsk_ulp_data; - - if (has_ulp) { - tcp_update_ulp(sk, psock->sk_proto, - psock->saved_write_space); - } else { - sk->sk_prot = psock->sk_proto; - sk->sk_write_space = psock->saved_write_space; - } + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); psock->sk_proto = NULL; } else { sk->sk_write_space = psock->saved_write_space; -- cgit v1.2.3-71-gd317 From a178b4585865a4c756c41bc5376f63416b7d9271 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:29 +0000 Subject: bpf, sk_msg: Don't clear saved sock proto on restore There is no need to clear psock->sk_proto when restoring socket protocol callbacks in sk->sk_prot. The psock is about to get detached from the sock and eventually destroyed. At worst we will restore the protocol callbacks and the write callback twice. This makes reasoning about psock state easier. Once psock is initialized, we can count on psock->sk_proto always being set. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-3-jakub@cloudflare.com --- include/linux/skmsg.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8605947d6c08..d90ef61712a1 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -359,13 +359,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - - if (psock->sk_proto) { - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); - psock->sk_proto = NULL; - } else { - sk->sk_write_space = psock->saved_write_space; - } + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3-71-gd317 From b8e202d1d1d0f182f01062804efb523ea9a9008c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:13 +0000 Subject: net, sk_msg: Annotate lockless access to sk_prot on clone sk_msg and ULP frameworks override protocol callbacks pointer in sk->sk_prot, while tcp accesses it locklessly when cloning the listening socket, that is with neither sk_lock nor sk_callback_lock held. Once we enable use of listening sockets with sockmap (and hence sk_msg), there will be shared access to sk->sk_prot if socket is getting cloned while being inserted/deleted to/from the sockmap from another CPU: Read side: tcp_v4_rcv sk = __inet_lookup_skb(...) tcp_check_req(sk) inet_csk(sk)->icsk_af_ops->syn_recv_sock tcp_v4_syn_recv_sock tcp_create_openreq_child inet_csk_clone_lock sk_clone_lock READ_ONCE(sk->sk_prot) Write side: sock_map_ops->map_update_elem sock_map_update_elem sock_map_update_common sock_map_link_no_progs tcp_bpf_init tcp_bpf_update_sk_prot sk_psock_update_proto WRITE_ONCE(sk->sk_prot, ops) sock_map_ops->map_delete_elem sock_map_delete_elem __sock_map_delete sock_map_unref sk_psock_put sk_psock_drop sk_psock_restore_proto tcp_update_ulp WRITE_ONCE(sk->sk_prot, proto) Mark the shared access with READ_ONCE/WRITE_ONCE annotations. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com --- include/linux/skmsg.h | 3 ++- net/core/sock.c | 8 +++++--- net/ipv4/tcp_bpf.c | 4 +++- net/ipv4/tcp_ulp.c | 3 ++- net/tls/tls_main.c | 3 ++- 5 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index d90ef61712a1..112765bd146d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk, psock->saved_write_space = sk->sk_write_space; psock->sk_proto = sk->sk_prot; - sk->sk_prot = ops; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, ops); } static inline void sk_psock_restore_proto(struct sock *sk, diff --git a/net/core/sock.c b/net/core/sock.c index a4c8fac781ff..bf1173b93eda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1572,13 +1572,14 @@ static inline void sock_lock_init(struct sock *sk) */ static void sock_copy(struct sock *nsk, const struct sock *osk) { + const struct proto *prot = READ_ONCE(osk->sk_prot); #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; @@ -1792,16 +1793,17 @@ static void sk_init_common(struct sock *sk) */ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { + struct proto *prot = READ_ONCE(sk->sk_prot); struct sock *newsk; bool is_charged = true; - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); + newsk = sk_prot_alloc(prot, priority, sk->sk_family); if (newsk != NULL) { struct sk_filter *filter; sock_copy(newsk, sk); - newsk->sk_prot_creator = sk->sk_prot; + newsk->sk_prot_creator = prot; /* SANITY */ if (likely(newsk->sk_net_refcnt)) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8a01428f80c1..dd183b050642 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -645,8 +645,10 @@ static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed * or added requiring sk_prot hook updates. We keep original saved * hooks in this case. + * + * Pairs with lockless read in sk_clone_lock(). */ - sk->sk_prot = &tcp_bpf_prots[family][config]; + WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); } static int tcp_bpf_assert_proto_ops(struct proto *ops) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 38d3ad141161..6c43fa189195 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -106,7 +106,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto, if (!icsk->icsk_ulp_ops) { sk->sk_write_space = write_space; - sk->sk_prot = proto; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, proto); return; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 94774c0e5ff3..82225bcc1117 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -742,7 +742,8 @@ static void tls_update(struct sock *sk, struct proto *p, ctx->sk_write_space = write_space; ctx->sk_proto = p; } else { - sk->sk_prot = p; + /* Pairs with lockless read in sk_clone_lock(). */ + WRITE_ONCE(sk->sk_prot, p); sk->sk_write_space = write_space; } } -- cgit v1.2.3-71-gd317