From 1b986589680a2a5b6fc1ac196ea69925a93d9dd9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:02 -0700 Subject: bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to bpf_sk_release Lorenz Bauer [thanks!] reported that a ptr returned by bpf_tcp_sock(sk) can still be accessed after bpf_sk_release(sk). Both bpf_tcp_sock() and bpf_sk_fullsock() have the same issue. This patch addresses them together. A simple reproducer looks like this: sk = bpf_sk_lookup_tcp(); /* if (!sk) ... */ tp = bpf_tcp_sock(sk); /* if (!tp) ... */ bpf_sk_release(sk); snd_cwnd = tp->snd_cwnd; /* oops! The verifier does not complain. */ The problem is the verifier did not scrub the register's states of the tcp_sock ptr (tp) after bpf_sk_release(sk). [ Note that when calling bpf_tcp_sock(sk), the sk is not always refcount-acquired. e.g. bpf_tcp_sock(skb->sk). The verifier works fine for this case. ] Currently, the verifier does not track if a helper's return ptr (in REG_0) is "carry"-ing one of its argument's refcount status. To carry this info, the reg1->id needs to be stored in reg0. One approach was tried, like "reg0->id = reg1->id", when calling "bpf_tcp_sock()". The main idea was to avoid adding another "ref_obj_id" for the same reg. However, overlapping the NULL marking and ref tracking purpose in one "id" does not work well: ref_sk = bpf_sk_lookup_tcp(); fullsock = bpf_sk_fullsock(ref_sk); tp = bpf_tcp_sock(ref_sk); if (!fullsock) { bpf_sk_release(ref_sk); return 0; } /* fullsock_reg->id is marked for NOT-NULL. * Same for tp_reg->id because they have the same id. */ /* oops. verifier did not complain about the missing !tp check */ snd_cwnd = tp->snd_cwnd; Hence, a new "ref_obj_id" is needed in "struct bpf_reg_state". With a new ref_obj_id, when bpf_sk_release(sk) is called, the verifier can scrub all reg states which has a ref_obj_id match. It is done with the changes in release_reg_references() in this patch. While fixing it, sk_to_full_sk() is removed from bpf_tcp_sock() and bpf_sk_fullsock() to avoid these helpers from returning another ptr. It will make bpf_sk_release(tp) possible: sk = bpf_sk_lookup_tcp(); /* if (!sk) ... */ tp = bpf_tcp_sock(sk); /* if (!tp) ... */ bpf_sk_release(tp); A separate helper "bpf_get_listener_sock()" will be added in a later patch to do sk_to_full_sk(). Misc change notes: - To allow bpf_sk_release(tp), the arg of bpf_sk_release() is changed from ARG_PTR_TO_SOCKET to ARG_PTR_TO_SOCK_COMMON. ARG_PTR_TO_SOCKET is removed from bpf.h since no helper is using it. - arg_type_is_refcounted() is renamed to arg_type_may_be_refcounted() because ARG_PTR_TO_SOCK_COMMON is the only one and skb->sk is not refcounted. All bpf_sk_release(), bpf_sk_fullsock() and bpf_tcp_sock() take ARG_PTR_TO_SOCK_COMMON. - check_refcount_ok() ensures is_acquire_function() cannot take arg_type_may_be_refcounted() as its argument. - The check_func_arg() can only allow one refcount-ed arg. It is guaranteed by check_refcount_ok() which ensures at most one arg can be refcounted. Hence, it is a verifier internal error if >1 refcount arg found in check_func_arg(). - In release_reference(), release_reference_state() is called first to ensure a match on "reg->ref_obj_id" can be found before scrubbing the reg states with release_reg_references(). - reg_is_refcounted() is no longer needed. 1. In mark_ptr_or_null_regs(), its usage is replaced by "ref_obj_id && ref_obj_id == id" because, when is_null == true, release_reference_state() should only be called on the ref_obj_id obtained by a acquire helper (i.e. is_acquire_function() == true). Otherwise, the following would happen: sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ fullsock = bpf_sk_fullsock(sk); if (!fullsock) { /* * release_reference_state(fullsock_reg->ref_obj_id) * where fullsock_reg->ref_obj_id == sk_reg->ref_obj_id. * * Hence, the following bpf_sk_release(sk) will fail * because the ref state has already been released in the * earlier release_reference_state(fullsock_reg->ref_obj_id). */ bpf_sk_release(sk); } 2. In release_reg_references(), the current reg_is_refcounted() call is unnecessary because the id check is enough. - The type_is_refcounted() and type_is_refcounted_or_null() are no longer needed also because reg_is_refcounted() is removed. Fixes: 655a51e536c0 ("bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock") Reported-by: Lorenz Bauer Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - include/linux/bpf_verifier.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a2132e09dc1c..f02367faa58d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -193,7 +193,6 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ - ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 69f7a3449eda..7d8228d1c898 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,6 +66,46 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. */ u32 id; + /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned + * from a pointer-cast helper, bpf_sk_fullsock() and + * bpf_tcp_sock(). + * + * Consider the following where "sk" is a reference counted + * pointer returned from "sk = bpf_sk_lookup_tcp();": + * + * 1: sk = bpf_sk_lookup_tcp(); + * 2: if (!sk) { return 0; } + * 3: fullsock = bpf_sk_fullsock(sk); + * 4: if (!fullsock) { bpf_sk_release(sk); return 0; } + * 5: tp = bpf_tcp_sock(fullsock); + * 6: if (!tp) { bpf_sk_release(sk); return 0; } + * 7: bpf_sk_release(sk); + * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain + * + * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and + * "tp" ptr should be invalidated also. In order to do that, + * the reg holding "fullsock" and "sk" need to remember + * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id + * such that the verifier can reset all regs which have + * ref_obj_id matching the sk_reg->id. + * + * sk_reg->ref_obj_id is set to sk_reg->id at line 1. + * sk_reg->id will stay as NULL-marking purpose only. + * After NULL-marking is done, sk_reg->id can be reset to 0. + * + * After "fullsock = bpf_sk_fullsock(sk);" at line 3, + * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id. + * + * After "tp = bpf_tcp_sock(fullsock);" at line 5, + * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id + * which is the same as sk_reg->ref_obj_id. + * + * From the verifier perspective, if sk, fullsock and tp + * are not NULL, they are the same ptr with different + * reg->type. In particular, bpf_sk_release(tp) is also + * allowed and has the same effect as bpf_sk_release(sk). + */ + u32 ref_obj_id; /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset -- cgit v1.2.3-71-gd317 From 9804501fa1228048857910a6bf23e085aade37cc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 14 Mar 2019 13:47:59 +0800 Subject: appletalk: Fix potential NULL pointer dereference in unregister_snap_client register_snap_client may return NULL, all the callers check it, but only print a warning. This will result in NULL pointer dereference in unregister_snap_client and other places. It has always been used like this since v2.6 Reported-by: Dan Carpenter Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/atalk.h | 2 +- net/appletalk/aarp.c | 15 ++++++++++++--- net/appletalk/ddp.c | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atalk.h b/include/linux/atalk.h index d5cfc0b15b76..f6034ba774be 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) #define AARP_RESOLVE_TIME (10 * HZ) extern struct datalink_proto *ddp_dl, *aarp_dl; -extern void aarp_proto_init(void); +extern int aarp_proto_init(void); /* Inter module exports */ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 49a16cee2aae..420a98bf79b5 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } timer_setup(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 795fbc6c06aa..709d2542f729 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { @@ -1921,17 +1918,22 @@ static int __init atalk_init(void) goto out_proto; ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); rc = register_netdevice_notifier(&ddp_notifier); if (rc) - goto out_sock; + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; - aarp_proto_init(); rc = atalk_proc_init(); if (rc) goto out_aarp; @@ -1945,11 +1947,13 @@ out_proc: atalk_proc_exit(); out_aarp: aarp_cleanup_module(); +out_dev: unregister_netdevice_notifier(&ddp_notifier); -out_sock: +out_snap: dev_remove_pack(&ppptalk_packet_type); dev_remove_pack(<alk_packet_type); unregister_snap_client(ddp_dl); +out_sock: sock_unregister(PF_APPLETALK); out_proto: proto_unregister(&ddp_proto); -- cgit v1.2.3-71-gd317 From 8a3c245c031944f2176118270e7bc5d4fd4a1075 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 14 Mar 2019 10:45:23 -0300 Subject: net: add documentation to socket.c Adds missing sphinx documentation to the socket.c's functions. Also fixes some whitespaces. I also changed the style of older documentation as an effort to have an uniform documentation style. Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++ include/linux/socket.h | 12 +-- net/socket.c | 277 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 271 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 651fca72286c..c606c72311d0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -83,6 +83,12 @@ enum sock_type { #endif /* ARCH_HAS_SOCKET_TYPES */ +/** + * enum sock_shutdown_cmd - Shutdown types + * @SHUT_RD: shutdown receptions + * @SHUT_WR: shutdown transmissions + * @SHUT_RDWR: shutdown receptions/transmissions + */ enum sock_shutdown_cmd { SHUT_RD, SHUT_WR, diff --git a/include/linux/socket.h b/include/linux/socket.h index 6016daeecee4..b57cd8bf96e2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,7 +26,7 @@ typedef __kernel_sa_family_t sa_family_t; /* * 1003.1g requires sa_family_t and that sa_data is char. */ - + struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ @@ -44,7 +44,7 @@ struct linger { * system, not 4.3. Thus msg_accrights(len) are now missing. They * belong in an obscure libc emulation or the bin. */ - + struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -54,7 +54,7 @@ struct msghdr { unsigned int msg_flags; /* flags on received message */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; - + struct user_msghdr { void __user *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -122,7 +122,7 @@ struct cmsghdr { * inside range, given by msg->msg_controllen before using * ancillary object DATA. --ANK (980731) */ - + static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { @@ -264,10 +264,10 @@ struct ucred { /* Maximum queue length specifiable by listen. */ #define SOMAXCONN 128 -/* Flags we can use with send/ and recv. +/* Flags we can use with send/ and recv. Added those for 1003.1g not all are supported yet */ - + #define MSG_OOB 1 #define MSG_PEEK 2 #define MSG_DONTROUTE 4 diff --git a/net/socket.c b/net/socket.c index 3c176a12fe48..8255f5bda0aa 100644 --- a/net/socket.c +++ b/net/socket.c @@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ +/** + * sock_alloc_file - Bind a &socket to a &file + * @sock: socket + * @flags: file status flags + * @dname: protocol name + * + * Returns the &file bound with @sock, implicitly storing it + * in sock->file. If dname is %NULL, sets to "". + * On failure the return is a ERR pointer (see linux/err.h). + * This function uses GFP_KERNEL internally. + */ + struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct file *file; @@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags) return PTR_ERR(newfile); } +/** + * sock_from_file - Return the &socket bounded to @file. + * @file: file + * @err: pointer to an error code return + * + * On failure returns %NULL and assigns -ENOTSOCK to @err. + */ + struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) @@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = { }; /** - * sock_alloc - allocate a socket + * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes - * NULL is returned. + * NULL is returned. This functions uses GFP_KERNEL internally. */ struct socket *sock_alloc(void) @@ -561,7 +581,7 @@ struct socket *sock_alloc(void) EXPORT_SYMBOL(sock_alloc); /** - * sock_release - close a socket + * sock_release - close a socket * @sock: socket to close * * The socket is released from the protocol stack if it has a release @@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ + static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); @@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) } EXPORT_SYMBOL(sock_sendmsg); +/** + * kernel_sendmsg - send a message through @sock (kernel-space) + * @sock: socket + * @msg: message header + * @vec: kernel vec + * @num: vec array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + */ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +/** + * kernel_sendmsg_locked - send a message through @sock (kernel-space) + * @sk: sock + * @msg: message header + * @vec: output s/g array + * @num: output s/g array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + * Caller must hold @sk. + */ + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +/** + * sock_recvmsg - receive a message from @sock + * @sock: socket + * @msg: message to receive + * @flags: message flags + * + * Receives @msg from @sock, passing through LSM. Returns the total number + * of bytes received, or an error. + */ + static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { @@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) EXPORT_SYMBOL(sock_recvmsg); /** - * kernel_recvmsg - Receive a message from a socket (kernel space) - * @sock: The socket to receive the message from - * @msg: Received message - * @vec: Input s/g array for message data - * @num: Size of input s/g array - * @size: Number of bytes to read - * @flags: Message flags (MSG_DONTWAIT, etc...) + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) * - * On return the msg structure contains the scatter/gather array passed in the - * vec argument. The array is modified so that it consists of the unfilled - * portion of the original array. + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. * - * The returned value is the total number of bytes received, or an error. + * The returned value is the total number of bytes received, or an error. */ + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ + struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; @@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) return err; } +/** + * sock_create_lite - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * Creates a new socket and assigns it to @res, passing through LSM. + * The new socket initialization is not complete, see kernel_accept(). + * Returns 0 or an error. On failure @res is set to %NULL. + * This function internally uses GFP_KERNEL. + */ + int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; @@ -1224,6 +1309,21 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); +/** + * __sock_create - creates a socket + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * @kern: boolean for kernel space sockets + * + * Creates a new socket and assigns it to @res, passing through LSM. + * Returns 0 or an error. On failure @res is set to %NULL. @kern must + * be set to true if the socket resides in kernel space. + * This function internally uses GFP_KERNEL. + */ + int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { @@ -1333,12 +1433,35 @@ out_release: } EXPORT_SYMBOL(__sock_create); +/** + * sock_create - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } EXPORT_SYMBOL(sock_create); +/** + * sock_create_kern - creates a socket (kernel space) + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { return __sock_create(net, family, type, protocol, res, 1); @@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, } #endif +/** + * kernel_bind - bind an address to a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: length of address + * + * Returns 0 or an error. + */ + int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } EXPORT_SYMBOL(kernel_bind); +/** + * kernel_listen - move socket to listening state (kernel space) + * @sock: socket + * @backlog: pending connections queue size + * + * Returns 0 or an error. + */ + int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } EXPORT_SYMBOL(kernel_listen); +/** + * kernel_accept - accept a connection (kernel space) + * @sock: listening socket + * @newsock: new connected socket + * @flags: flags + * + * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. + * If it fails, @newsock is guaranteed to be %NULL. + * Returns 0 or an error. + */ + int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; @@ -3359,6 +3510,19 @@ done: } EXPORT_SYMBOL(kernel_accept); +/** + * kernel_connect - connect a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: address length + * @flags: flags (O_NONBLOCK, ...) + * + * For datagram sockets, @addr is the addres to which datagrams are sent + * by default, and the only address from which datagrams are received. + * For stream sockets, attempts to connect to @addr. + * Returns 0 or an error code. + */ + int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { @@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); +/** + * kernel_getsockname - get the address which the socket is bound (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is bound. + * Returns 0 or an error code. + */ + int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); +/** + * kernel_peername - get the address which the socket is connected (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is connected. + * Returns 0 or an error code. + */ + int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); +/** + * kernel_getsockopt - get a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Assigns the option length to @optlen. + * Returns 0 or an error. + */ + int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { @@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_getsockopt); +/** + * kernel_setsockopt - set a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Returns 0 or an error. + */ + int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { @@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_setsockopt); +/** + * kernel_sendpage - send a &page through a socket (kernel space) + * @sock: socket + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + */ + int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +/** + * kernel_sendpage_locked - send a &page through the locked sock (kernel space) + * @sk: sock + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + * Caller must hold @sk. + */ + int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); +/** + * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * @sock: socket + * @how: connection part + * + * Returns 0 or an error. + */ + int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); -/* This routine returns the IP overhead imposed by a socket i.e. - * the length of the underlying IP header, depending on whether - * this is an IPv4 or IPv6 socket and the length from IP options turned - * on at the socket. Assumes that the caller has a lock on the socket. +/** + * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket + * @sk: socket + * + * This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. Assumes that the caller has a lock on the socket. */ + u32 kernel_sock_ip_overhead(struct sock *sk) { struct inet_sock *inet; -- cgit v1.2.3-71-gd317 From cd1b772d4881d1cd15b90ec17aab9ac7950e8850 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Oct 2018 16:32:31 +0100 Subject: driver core: remove BUS_ATTR() There are now no in-kernel users of BUS_ATTR() so drop it from device.h Everyone should use BUS_ATTR_RO/RW/WO() from now on. Cc: "Rafael J. Wysocki" Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index b425a7ee04ce..4e6987e11f68 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -49,8 +49,6 @@ struct bus_attribute { ssize_t (*store)(struct bus_type *bus, const char *buf, size_t count); }; -#define BUS_ATTR(_name, _mode, _show, _store) \ - struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store) #define BUS_ATTR_RW(_name) \ struct bus_attribute bus_attr_##_name = __ATTR_RW(_name) #define BUS_ATTR_RO(_name) \ -- cgit v1.2.3-71-gd317 From b45a02e13ee74b6fde56df4d76786058821a3aba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Mar 2019 15:54:16 +0100 Subject: gpio: amd-fch: Fix bogus SPDX identifier spdxcheck.py complains: include/linux/platform_data/gpio/gpio-amd-fch.h: 1:28 Invalid License ID: GPL+ which is correct because GPL+ is not a valid identifier. Of course this could have been caught by checkpatch.pl _before_ submitting or merging the patch. WARNING: 'SPDX-License-Identifier: GPL+ */' is not supported in LICENSES/... #271: FILE: include/linux/platform_data/gpio/gpio-amd-fch.h:1: +/* SPDX-License-Identifier: GPL+ */ Fix it under the assumption that the author meant GPL-2.0+, which makes sense as the corresponding C file is using that identifier. Fixes: e09d168f13f0 ("gpio: AMD G-Series PCH gpio driver") Signed-off-by: Thomas Gleixner Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/gpio/gpio-amd-fch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h index a867637e172d..9e46678edb2a 100644 --- a/include/linux/platform_data/gpio/gpio-amd-fch.h +++ b/include/linux/platform_data/gpio/gpio-amd-fch.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL+ */ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * AMD FCH gpio driver platform-data -- cgit v1.2.3-71-gd317 From a3ac7917b73070010c05b4485b8582a6c9cd69b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Mar 2019 14:49:00 -0700 Subject: Revert "parport: daisy: use new parport device model" This reverts commit 1aec4211204d9463d1fd209eb50453de16254599. Steven Rostedt reports that it causes a hang at bootup and bisected it to this commit. The troigger is apparently a module alias for "parport_lowlevel" that points to "parport_pc", which causes a hang with modprobe -q -- parport_lowlevel blocking forever with a backtrace like this: wait_for_completion_killable+0x1c/0x28 call_usermodehelper_exec+0xa7/0x108 __request_module+0x351/0x3d8 get_lowlevel_driver+0x28/0x41 [parport] __parport_register_driver+0x39/0x1f4 [parport] daisy_drv_init+0x31/0x4f [parport] parport_bus_init+0x5d/0x7b [parport] parport_default_proc_register+0x26/0x1000 [parport] do_one_initcall+0xc2/0x1e0 do_init_module+0x50/0x1d4 load_module+0x1c2e/0x21b3 sys_init_module+0xef/0x117 Supid says: "Due to the new device model daisy driver will now try to find the parallel ports while trying to register its driver so that it can bind with them. Now, since daisy driver is loaded while parport bus is initialising the list of parport is still empty and it tries to load the lowlevel driver, which has an alias set to parport_pc, now causes a deadlock" But I don't think the daisy driver should be loaded by the parport initialization in the first place, so let's revert the whole change. If the daisy driver can just initialize separately on its own (like a driver should), instead of hooking into the parport init sequence directly, this issue probably would go away. Reported-and-bisected-by: Steven Rostedt (VMware) Reported-by: Michal Kubecek Acked-by: Greg Kroah-Hartman Cc: Sudip Mukherjee Signed-off-by: Linus Torvalds --- drivers/parport/daisy.c | 32 +------------------------------- drivers/parport/probe.c | 2 +- drivers/parport/share.c | 10 +--------- include/linux/parport.h | 13 ------------- 4 files changed, 3 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c index 56dd83a45e55..5484a46dafda 100644 --- a/drivers/parport/daisy.c +++ b/drivers/parport/daisy.c @@ -213,12 +213,10 @@ void parport_daisy_fini(struct parport *port) struct pardevice *parport_open(int devnum, const char *name) { struct daisydev *p = topology; - struct pardev_cb par_cb; struct parport *port; struct pardevice *dev; int daisy; - memset(&par_cb, 0, sizeof(par_cb)); spin_lock(&topology_lock); while (p && p->devnum != devnum) p = p->next; @@ -232,7 +230,7 @@ struct pardevice *parport_open(int devnum, const char *name) port = parport_get_port(p->port); spin_unlock(&topology_lock); - dev = parport_register_dev_model(port, name, &par_cb, devnum); + dev = parport_register_device(port, name, NULL, NULL, NULL, 0, NULL); parport_put_port(port); if (!dev) return NULL; @@ -482,31 +480,3 @@ static int assign_addrs(struct parport *port) kfree(deviceid); return detected; } - -static int daisy_drv_probe(struct pardevice *par_dev) -{ - struct device_driver *drv = par_dev->dev.driver; - - if (strcmp(drv->name, "daisy_drv")) - return -ENODEV; - if (strcmp(par_dev->name, daisy_dev_name)) - return -ENODEV; - - return 0; -} - -static struct parport_driver daisy_driver = { - .name = "daisy_drv", - .probe = daisy_drv_probe, - .devmodel = true, -}; - -int daisy_drv_init(void) -{ - return parport_register_driver(&daisy_driver); -} - -void daisy_drv_exit(void) -{ - parport_unregister_driver(&daisy_driver); -} diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c index e5e6a463a941..e035174ba205 100644 --- a/drivers/parport/probe.c +++ b/drivers/parport/probe.c @@ -257,7 +257,7 @@ static ssize_t parport_read_device_id (struct parport *port, char *buffer, ssize_t parport_device_id (int devnum, char *buffer, size_t count) { ssize_t retval = -ENXIO; - struct pardevice *dev = parport_open(devnum, daisy_dev_name); + struct pardevice *dev = parport_open (devnum, "Device ID probe"); if (!dev) return -ENXIO; diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 0171b8dbcdcd..5dc53d420ca8 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -137,19 +137,11 @@ static struct bus_type parport_bus_type = { int parport_bus_init(void) { - int retval; - - retval = bus_register(&parport_bus_type); - if (retval) - return retval; - daisy_drv_init(); - - return 0; + return bus_register(&parport_bus_type); } void parport_bus_exit(void) { - daisy_drv_exit(); bus_unregister(&parport_bus_type); } diff --git a/include/linux/parport.h b/include/linux/parport.h index f41f1d041e2c..397607a0c0eb 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -460,7 +460,6 @@ extern size_t parport_ieee1284_epp_read_addr (struct parport *, void *, size_t, int); /* IEEE1284.3 functions */ -#define daisy_dev_name "Device ID probe" extern int parport_daisy_init (struct parport *port); extern void parport_daisy_fini (struct parport *port); extern struct pardevice *parport_open (int devnum, const char *name); @@ -469,18 +468,6 @@ extern ssize_t parport_device_id (int devnum, char *buffer, size_t len); extern void parport_daisy_deselect_all (struct parport *port); extern int parport_daisy_select (struct parport *port, int daisy, int mode); -#ifdef CONFIG_PARPORT_1284 -extern int daisy_drv_init(void); -extern void daisy_drv_exit(void); -#else -static inline int daisy_drv_init(void) -{ - return 0; -} - -static inline void daisy_drv_exit(void) {} -#endif - /* Lowlevel drivers _can_ call this support function to handle irqs. */ static inline void parport_generic_irq(struct parport *port) { -- cgit v1.2.3-71-gd317 From db779ef67ffeadbb44e9e818eb64dbe528e2f48f Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Tue, 26 Mar 2019 12:20:28 +0530 Subject: proc/kcore: Remove unused kclist_add_remap() Commit bf904d2762ee ("x86/pti/64: Remove the SYSCALL64 entry trampoline") removed the sole usage of kclist_add_remap() but it did not remove the left-over definition from the include file. Fix the same. Signed-off-by: Bhupesh Sharma Signed-off-by: Borislav Petkov Cc: Adrian Hunter Cc: Andrew Morton Cc: Dave Anderson Cc: Dave Young Cc: "David S. Miller" Cc: Ingo Molnar Cc: James Morse Cc: Kairui Song Cc: kexec@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: Michael Ellerman Cc: Omar Sandoval Cc: "Peter Zijlstra (Intel)" Cc: Rahul Lakkireddy Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/1553583028-17804-1-git-send-email-bhsharma@redhat.com --- include/linux/kcore.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 8c3f8c14eeaa..94b561df3877 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -38,22 +38,11 @@ struct vmcoredd_node { #ifdef CONFIG_PROC_KCORE void __init kclist_add(struct kcore_list *, void *, size_t, int type); -static inline -void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) -{ - m->vaddr = (unsigned long)vaddr; - kclist_add(m, addr, sz, KCORE_REMAP); -} #else static inline void kclist_add(struct kcore_list *new, void *addr, size_t size, int type) { } - -static inline -void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) -{ -} #endif #endif /* _LINUX_KCORE_H */ -- cgit v1.2.3-71-gd317 From 450895d04ba13a96886eddfeddb11556ae8624f1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 24 Mar 2019 00:18:46 +0200 Subject: net: phy: bcm54xx: Encode link speed and activity into LEDs Previously the green and amber LEDs on this quad PHY were solid, to indicate an encoding of the link speed (10/100/1000). This keeps the LEDs always on just as before, but now they flash on Rx/Tx activity. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 13 +++++++++++++ include/linux/brcmphy.h | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9605d4fe540b..cb86a3e90c7d 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -323,6 +323,19 @@ static int bcm54xx_config_init(struct phy_device *phydev) bcm54xx_phydsp_config(phydev); + /* Encode link speed into LED1 and LED3 pair (green/amber). + * Also flash these two LEDs on activity. This means configuring + * them for MULTICOLOR and encoding link/activity into them. + */ + val = BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1); + bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, val); + + val = BCM_LED_MULTICOLOR_IN_PHASE | + BCM5482_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT); + bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val); + return 0; } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 9cd00a37b8d3..6db2d9a6e503 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -148,6 +148,22 @@ #define BCM_LED_SRC_OFF 0xe /* Tied high */ #define BCM_LED_SRC_ON 0xf /* Tied low */ +/* + * Broadcom Multicolor LED configurations (expansion register 4) + */ +#define BCM_EXP_MULTICOLOR (MII_BCM54XX_EXP_SEL_ER + 0x04) +#define BCM_LED_MULTICOLOR_IN_PHASE BIT(8) +#define BCM_LED_MULTICOLOR_LINK_ACT 0x0 +#define BCM_LED_MULTICOLOR_SPEED 0x1 +#define BCM_LED_MULTICOLOR_ACT_FLASH 0x2 +#define BCM_LED_MULTICOLOR_FDX 0x3 +#define BCM_LED_MULTICOLOR_OFF 0x4 +#define BCM_LED_MULTICOLOR_ON 0x5 +#define BCM_LED_MULTICOLOR_ALT 0x6 +#define BCM_LED_MULTICOLOR_FLASH 0x7 +#define BCM_LED_MULTICOLOR_LINK 0x8 +#define BCM_LED_MULTICOLOR_ACT 0x9 +#define BCM_LED_MULTICOLOR_PROGRAM 0xa /* * BCM5482: Shadow registers -- cgit v1.2.3-71-gd317 From 0532a1b0d045115521a93acf28f1270df89ad806 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Mar 2019 09:19:34 +0100 Subject: virt: vbox: Implement passing requestor info to the host for VirtualBox 6.0.x VirtualBox 6.0.x has a new feature where the guest kernel driver passes info about the origin of the request (e.g. userspace or kernelspace) to the hypervisor. If we do not pass this information then when running the 6.0.x userspace guest-additions tools on a 6.0.x host, some requests will get denied with a VERR_VERSION_MISMATCH error, breaking vboxservice.service and the mounting of shared folders marked to be auto-mounted. This commit implements passing the requestor info to the host, fixing this. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.c | 106 ++++++++++++++++++++--------- drivers/virt/vboxguest/vboxguest_core.h | 15 ++-- drivers/virt/vboxguest/vboxguest_linux.c | 26 ++++++- drivers/virt/vboxguest/vboxguest_utils.c | 32 +++++---- drivers/virt/vboxguest/vboxguest_version.h | 9 ++- drivers/virt/vboxguest/vmmdev.h | 8 ++- include/linux/vbox_utils.h | 12 ++-- include/uapi/linux/vbox_vmmdev_types.h | 60 ++++++++++++++++ 8 files changed, 197 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index df7d09409efe..8ca333f21292 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -27,6 +27,10 @@ #define GUEST_MAPPINGS_TRIES 5 +#define VBG_KERNEL_REQUEST \ + (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV | \ + VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN) + /** * Reserves memory in which the VMM can relocate any guest mappings * that are floating around. @@ -48,7 +52,8 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev) int i, rc; /* Query the required space. */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO, + VBG_KERNEL_REQUEST); if (!req) return; @@ -135,7 +140,8 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev) * Tell the host that we're going to free the memory we reserved for * it, the free it up. (Leak the memory if anything goes wrong here.) */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO, + VBG_KERNEL_REQUEST); if (!req) return; @@ -172,8 +178,10 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) struct vmmdev_guest_info2 *req2 = NULL; int rc, ret = -ENOMEM; - req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO); - req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2); + req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO, + VBG_KERNEL_REQUEST); + req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2, + VBG_KERNEL_REQUEST); if (!req1 || !req2) goto out_free; @@ -187,8 +195,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) req2->additions_minor = VBG_VERSION_MINOR; req2->additions_build = VBG_VERSION_BUILD; req2->additions_revision = VBG_SVN_REV; - /* (no features defined yet) */ - req2->additions_features = 0; + req2->additions_features = + VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO; strlcpy(req2->name, VBG_VERSION_STRING, sizeof(req2->name)); @@ -230,7 +238,8 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active) struct vmmdev_guest_status *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -423,7 +432,8 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled) struct vmmdev_heartbeat *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -457,7 +467,8 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev) gdev->guest_heartbeat_req = vbg_req_alloc( sizeof(*gdev->guest_heartbeat_req), - VMMDEVREQ_GUEST_HEARTBEAT); + VMMDEVREQ_GUEST_HEARTBEAT, + VBG_KERNEL_REQUEST); if (!gdev->guest_heartbeat_req) return -ENOMEM; @@ -528,7 +539,8 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev, struct vmmdev_mask *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -567,8 +579,14 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev, u32 changed, previous; int rc, ret = 0; - /* Allocate a request buffer before taking the spinlock */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK); + /* + * Allocate a request buffer before taking the spinlock, when + * the session is being terminated the requestor is the kernel, + * as we're cleaning up. + */ + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK, + session_termination ? VBG_KERNEL_REQUEST : + session->requestor); if (!req) { if (!session_termination) return -ENOMEM; @@ -627,7 +645,8 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev) struct vmmdev_mask *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -662,8 +681,14 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev, u32 changed, previous; int rc, ret = 0; - /* Allocate a request buffer before taking the spinlock */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES); + /* + * Allocate a request buffer before taking the spinlock, when + * the session is being terminated the requestor is the kernel, + * as we're cleaning up. + */ + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES, + session_termination ? VBG_KERNEL_REQUEST : + session->requestor); if (!req) { if (!session_termination) return -ENOMEM; @@ -722,7 +747,8 @@ static int vbg_query_host_version(struct vbg_dev *gdev) struct vmmdev_host_version *req; int rc, ret; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -783,19 +809,24 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events) gdev->mem_balloon.get_req = vbg_req_alloc(sizeof(*gdev->mem_balloon.get_req), - VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ); + VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ, + VBG_KERNEL_REQUEST); gdev->mem_balloon.change_req = vbg_req_alloc(sizeof(*gdev->mem_balloon.change_req), - VMMDEVREQ_CHANGE_MEMBALLOON); + VMMDEVREQ_CHANGE_MEMBALLOON, + VBG_KERNEL_REQUEST); gdev->cancel_req = vbg_req_alloc(sizeof(*(gdev->cancel_req)), - VMMDEVREQ_HGCM_CANCEL2); + VMMDEVREQ_HGCM_CANCEL2, + VBG_KERNEL_REQUEST); gdev->ack_events_req = vbg_req_alloc(sizeof(*gdev->ack_events_req), - VMMDEVREQ_ACKNOWLEDGE_EVENTS); + VMMDEVREQ_ACKNOWLEDGE_EVENTS, + VBG_KERNEL_REQUEST); gdev->mouse_status_req = vbg_req_alloc(sizeof(*gdev->mouse_status_req), - VMMDEVREQ_GET_MOUSE_STATUS); + VMMDEVREQ_GET_MOUSE_STATUS, + VBG_KERNEL_REQUEST); if (!gdev->mem_balloon.get_req || !gdev->mem_balloon.change_req || !gdev->cancel_req || !gdev->ack_events_req || @@ -892,9 +923,9 @@ void vbg_core_exit(struct vbg_dev *gdev) * vboxguest_linux.c calls this when userspace opens the char-device. * Return: A pointer to the new session or an ERR_PTR on error. * @gdev: The Guest extension device. - * @user: Set if this is a session for the vboxuser device. + * @requestor: VMMDEV_REQUESTOR_* flags */ -struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user) +struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor) { struct vbg_session *session; @@ -903,7 +934,7 @@ struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user) return ERR_PTR(-ENOMEM); session->gdev = gdev; - session->user_session = user; + session->requestor = requestor; return session; } @@ -924,7 +955,9 @@ void vbg_core_close_session(struct vbg_session *session) if (!session->hgcm_client_ids[i]) continue; - vbg_hgcm_disconnect(gdev, session->hgcm_client_ids[i], &rc); + /* requestor is kernel here, as we're cleaning up. */ + vbg_hgcm_disconnect(gdev, VBG_KERNEL_REQUEST, + session->hgcm_client_ids[i], &rc); } kfree(session); @@ -1152,7 +1185,8 @@ static int vbg_req_allowed(struct vbg_dev *gdev, struct vbg_session *session, return -EPERM; } - if (trusted_apps_only && session->user_session) { + if (trusted_apps_only && + (session->requestor & VMMDEV_REQUESTOR_USER_DEVICE)) { vbg_err("Denying userspace vmm call type %#08x through vboxuser device node\n", req->request_type); return -EPERM; @@ -1209,8 +1243,8 @@ static int vbg_ioctl_hgcm_connect(struct vbg_dev *gdev, if (i >= ARRAY_SIZE(session->hgcm_client_ids)) return -EMFILE; - ret = vbg_hgcm_connect(gdev, &conn->u.in.loc, &client_id, - &conn->hdr.rc); + ret = vbg_hgcm_connect(gdev, session->requestor, &conn->u.in.loc, + &client_id, &conn->hdr.rc); mutex_lock(&gdev->session_mutex); if (ret == 0 && conn->hdr.rc >= 0) { @@ -1251,7 +1285,8 @@ static int vbg_ioctl_hgcm_disconnect(struct vbg_dev *gdev, if (i >= ARRAY_SIZE(session->hgcm_client_ids)) return -EINVAL; - ret = vbg_hgcm_disconnect(gdev, client_id, &disconn->hdr.rc); + ret = vbg_hgcm_disconnect(gdev, session->requestor, client_id, + &disconn->hdr.rc); mutex_lock(&gdev->session_mutex); if (ret == 0 && disconn->hdr.rc >= 0) @@ -1313,12 +1348,12 @@ static int vbg_ioctl_hgcm_call(struct vbg_dev *gdev, } if (IS_ENABLED(CONFIG_COMPAT) && f32bit) - ret = vbg_hgcm_call32(gdev, client_id, + ret = vbg_hgcm_call32(gdev, session->requestor, client_id, call->function, call->timeout_ms, VBG_IOCTL_HGCM_CALL_PARMS32(call), call->parm_count, &call->hdr.rc); else - ret = vbg_hgcm_call(gdev, client_id, + ret = vbg_hgcm_call(gdev, session->requestor, client_id, call->function, call->timeout_ms, VBG_IOCTL_HGCM_CALL_PARMS(call), call->parm_count, &call->hdr.rc); @@ -1408,6 +1443,7 @@ static int vbg_ioctl_check_balloon(struct vbg_dev *gdev, } static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, + struct vbg_session *session, struct vbg_ioctl_write_coredump *dump) { struct vmmdev_write_core_dump *req; @@ -1415,7 +1451,8 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, if (vbg_ioctl_chk(&dump->hdr, sizeof(dump->u.in), 0)) return -EINVAL; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP, + session->requestor); if (!req) return -ENOMEM; @@ -1476,7 +1513,7 @@ int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data) case VBG_IOCTL_CHECK_BALLOON: return vbg_ioctl_check_balloon(gdev, data); case VBG_IOCTL_WRITE_CORE_DUMP: - return vbg_ioctl_write_core_dump(gdev, data); + return vbg_ioctl_write_core_dump(gdev, session, data); } /* Variable sized requests. */ @@ -1508,7 +1545,8 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features) struct vmmdev_mouse_status *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 7ad9ec45bfa9..4188c12b839f 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -154,15 +154,15 @@ struct vbg_session { * host. Protected by vbg_gdev.session_mutex. */ u32 guest_caps; - /** Does this session belong to a root process or a user one? */ - bool user_session; + /** VMMDEV_REQUESTOR_* flags */ + u32 requestor; /** Set on CANCEL_ALL_WAITEVENTS, protected by vbg_devevent_spinlock. */ bool cancel_waiters; }; int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events); void vbg_core_exit(struct vbg_dev *gdev); -struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user); +struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor); void vbg_core_close_session(struct vbg_session *session); int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data); int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features); @@ -172,12 +172,13 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id); void vbg_linux_mouse_event(struct vbg_dev *gdev); /* Private (non exported) functions form vboxguest_utils.c */ -void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type, + u32 requestor); void vbg_req_free(void *req, size_t len); int vbg_req_perform(struct vbg_dev *gdev, void *req); int vbg_hgcm_call32( - struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, - struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, - int *vbox_status); + struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function, + u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32, + u32 parm_count, int *vbox_status); #endif diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c index 6e2a9619192d..6e8c0f1c1056 100644 --- a/drivers/virt/vboxguest/vboxguest_linux.c +++ b/drivers/virt/vboxguest/vboxguest_linux.c @@ -5,6 +5,7 @@ * Copyright (C) 2006-2016 Oracle Corporation */ +#include #include #include #include @@ -28,6 +29,23 @@ static DEFINE_MUTEX(vbg_gdev_mutex); /** Global vbg_gdev pointer used by vbg_get/put_gdev. */ static struct vbg_dev *vbg_gdev; +static u32 vbg_misc_device_requestor(struct inode *inode) +{ + u32 requestor = VMMDEV_REQUESTOR_USERMODE | + VMMDEV_REQUESTOR_CON_DONT_KNOW | + VMMDEV_REQUESTOR_TRUST_NOT_GIVEN; + + if (from_kuid(current_user_ns(), current->cred->uid) == 0) + requestor |= VMMDEV_REQUESTOR_USR_ROOT; + else + requestor |= VMMDEV_REQUESTOR_USR_USER; + + if (in_egroup_p(inode->i_gid)) + requestor |= VMMDEV_REQUESTOR_GRP_VBOX; + + return requestor; +} + static int vbg_misc_device_open(struct inode *inode, struct file *filp) { struct vbg_session *session; @@ -36,7 +54,7 @@ static int vbg_misc_device_open(struct inode *inode, struct file *filp) /* misc_open sets filp->private_data to our misc device */ gdev = container_of(filp->private_data, struct vbg_dev, misc_device); - session = vbg_core_open_session(gdev, false); + session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode)); if (IS_ERR(session)) return PTR_ERR(session); @@ -53,7 +71,8 @@ static int vbg_misc_device_user_open(struct inode *inode, struct file *filp) gdev = container_of(filp->private_data, struct vbg_dev, misc_device_user); - session = vbg_core_open_session(gdev, false); + session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode) | + VMMDEV_REQUESTOR_USER_DEVICE); if (IS_ERR(session)) return PTR_ERR(session); @@ -115,7 +134,8 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, req == VBG_IOCTL_VMMDEV_REQUEST_BIG; if (is_vmmdev_req) - buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT); + buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT, + session->requestor); else buf = kmalloc(size, GFP_KERNEL); if (!buf) diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index bf4474214b4d..75fd140b02ff 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -62,7 +62,8 @@ VBG_LOG(vbg_err, pr_err); VBG_LOG(vbg_debug, pr_debug); #endif -void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) +void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type, + u32 requestor) { struct vmmdev_request_header *req; int order = get_order(PAGE_ALIGN(len)); @@ -78,7 +79,7 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) req->request_type = req_type; req->rc = VERR_GENERAL_FAILURE; req->reserved1 = 0; - req->reserved2 = 0; + req->requestor = requestor; return req; } @@ -119,7 +120,7 @@ static bool hgcm_req_done(struct vbg_dev *gdev, return done; } -int vbg_hgcm_connect(struct vbg_dev *gdev, +int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor, struct vmmdev_hgcm_service_location *loc, u32 *client_id, int *vbox_status) { @@ -127,7 +128,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, int rc; hgcm_connect = vbg_req_alloc(sizeof(*hgcm_connect), - VMMDEVREQ_HGCM_CONNECT); + VMMDEVREQ_HGCM_CONNECT, requestor); if (!hgcm_connect) return -ENOMEM; @@ -153,13 +154,15 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, } EXPORT_SYMBOL(vbg_hgcm_connect); -int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status) +int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor, + u32 client_id, int *vbox_status) { struct vmmdev_hgcm_disconnect *hgcm_disconnect = NULL; int rc; hgcm_disconnect = vbg_req_alloc(sizeof(*hgcm_disconnect), - VMMDEVREQ_HGCM_DISCONNECT); + VMMDEVREQ_HGCM_DISCONNECT, + requestor); if (!hgcm_disconnect) return -ENOMEM; @@ -593,9 +596,10 @@ static int hgcm_call_copy_back_result( return 0; } -int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, - u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms, - u32 parm_count, int *vbox_status) +int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id, + u32 function, u32 timeout_ms, + struct vmmdev_hgcm_function_parameter *parms, u32 parm_count, + int *vbox_status) { struct vmmdev_hgcm_call *call; void **bounce_bufs = NULL; @@ -615,7 +619,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, goto free_bounce_bufs; } - call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL); + call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL, requestor); if (!call) { ret = -ENOMEM; goto free_bounce_bufs; @@ -647,9 +651,9 @@ EXPORT_SYMBOL(vbg_hgcm_call); #ifdef CONFIG_COMPAT int vbg_hgcm_call32( - struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, - struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, - int *vbox_status) + struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function, + u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32, + u32 parm_count, int *vbox_status) { struct vmmdev_hgcm_function_parameter *parm64 = NULL; u32 i, size; @@ -689,7 +693,7 @@ int vbg_hgcm_call32( goto out_free; } - ret = vbg_hgcm_call(gdev, client_id, function, timeout_ms, + ret = vbg_hgcm_call(gdev, requestor, client_id, function, timeout_ms, parm64, parm_count, vbox_status); if (ret < 0) goto out_free; diff --git a/drivers/virt/vboxguest/vboxguest_version.h b/drivers/virt/vboxguest/vboxguest_version.h index 77f0c8f8a231..84834dad38d5 100644 --- a/drivers/virt/vboxguest/vboxguest_version.h +++ b/drivers/virt/vboxguest/vboxguest_version.h @@ -9,11 +9,10 @@ #ifndef __VBOX_VERSION_H__ #define __VBOX_VERSION_H__ -/* Last synced October 4th 2017 */ -#define VBG_VERSION_MAJOR 5 -#define VBG_VERSION_MINOR 2 +#define VBG_VERSION_MAJOR 6 +#define VBG_VERSION_MINOR 0 #define VBG_VERSION_BUILD 0 -#define VBG_SVN_REV 68940 -#define VBG_VERSION_STRING "5.2.0" +#define VBG_SVN_REV 127566 +#define VBG_VERSION_STRING "6.0.0" #endif diff --git a/drivers/virt/vboxguest/vmmdev.h b/drivers/virt/vboxguest/vmmdev.h index 5e2ae978935d..6337b8d75d96 100644 --- a/drivers/virt/vboxguest/vmmdev.h +++ b/drivers/virt/vboxguest/vmmdev.h @@ -98,8 +98,8 @@ struct vmmdev_request_header { s32 rc; /** Reserved field no.1. MBZ. */ u32 reserved1; - /** Reserved field no.2. MBZ. */ - u32 reserved2; + /** IN: Requestor information (VMMDEV_REQUESTOR_*) */ + u32 requestor; }; VMMDEV_ASSERT_SIZE(vmmdev_request_header, 24); @@ -247,6 +247,8 @@ struct vmmdev_guest_info { }; VMMDEV_ASSERT_SIZE(vmmdev_guest_info, 24 + 8); +#define VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO BIT(0) + /** struct vmmdev_guestinfo2 - Guest information report, version 2. */ struct vmmdev_guest_info2 { /** Header. */ @@ -259,7 +261,7 @@ struct vmmdev_guest_info2 { u32 additions_build; /** SVN revision. */ u32 additions_revision; - /** Feature mask, currently unused. */ + /** Feature mask. */ u32 additions_features; /** * The intentional meaning of this field was: diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h index a240ed2a0372..ff56c443180c 100644 --- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -24,15 +24,17 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); #define vbg_debug pr_debug #endif -int vbg_hgcm_connect(struct vbg_dev *gdev, +int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor, struct vmmdev_hgcm_service_location *loc, u32 *client_id, int *vbox_status); -int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status); +int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor, + u32 client_id, int *vbox_status); -int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, - u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms, - u32 parm_count, int *vbox_status); +int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id, + u32 function, u32 timeout_ms, + struct vmmdev_hgcm_function_parameter *parms, u32 parm_count, + int *vbox_status); /** * Convert a VirtualBox status code to a standard Linux kernel return value. diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index 0e68024f36c7..26f39816af14 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -102,6 +102,66 @@ enum vmmdev_request_type { #define VMMDEVREQ_HGCM_CALL VMMDEVREQ_HGCM_CALL32 #endif +/* vmmdev_request_header.requestor defines */ + +/* Requestor user not given. */ +#define VMMDEV_REQUESTOR_USR_NOT_GIVEN 0x00000000 +/* The kernel driver (vboxguest) is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV 0x00000001 +/* Some other kernel driver is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV_OTHER 0x00000002 +/* The root or a admin user is the requestor. */ +#define VMMDEV_REQUESTOR_USR_ROOT 0x00000003 +/* Regular joe user is making the request. */ +#define VMMDEV_REQUESTOR_USR_USER 0x00000006 +/* User classification mask. */ +#define VMMDEV_REQUESTOR_USR_MASK 0x00000007 + +/* Kernel mode request. Note this is 0, check for !USERMODE instead. */ +#define VMMDEV_REQUESTOR_KERNEL 0x00000000 +/* User mode request. */ +#define VMMDEV_REQUESTOR_USERMODE 0x00000008 +/* User or kernel mode classification mask. */ +#define VMMDEV_REQUESTOR_MODE_MASK 0x00000008 + +/* Don't know the physical console association of the requestor. */ +#define VMMDEV_REQUESTOR_CON_DONT_KNOW 0x00000000 +/* + * The request originates with a process that is NOT associated with the + * physical console. + */ +#define VMMDEV_REQUESTOR_CON_NO 0x00000010 +/* Requestor process is associated with the physical console. */ +#define VMMDEV_REQUESTOR_CON_YES 0x00000020 +/* Console classification mask. */ +#define VMMDEV_REQUESTOR_CON_MASK 0x00000030 + +/* Requestor is member of special VirtualBox user group. */ +#define VMMDEV_REQUESTOR_GRP_VBOX 0x00000080 + +/* Note: trust level is for windows guests only, linux always uses not-given */ +/* Requestor trust level: Unspecified */ +#define VMMDEV_REQUESTOR_TRUST_NOT_GIVEN 0x00000000 +/* Requestor trust level: Untrusted (SID S-1-16-0) */ +#define VMMDEV_REQUESTOR_TRUST_UNTRUSTED 0x00001000 +/* Requestor trust level: Untrusted (SID S-1-16-4096) */ +#define VMMDEV_REQUESTOR_TRUST_LOW 0x00002000 +/* Requestor trust level: Medium (SID S-1-16-8192) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM 0x00003000 +/* Requestor trust level: Medium plus (SID S-1-16-8448) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM_PLUS 0x00004000 +/* Requestor trust level: High (SID S-1-16-12288) */ +#define VMMDEV_REQUESTOR_TRUST_HIGH 0x00005000 +/* Requestor trust level: System (SID S-1-16-16384) */ +#define VMMDEV_REQUESTOR_TRUST_SYSTEM 0x00006000 +/* Requestor trust level >= Protected (SID S-1-16-20480, S-1-16-28672) */ +#define VMMDEV_REQUESTOR_TRUST_PROTECTED 0x00007000 +/* Requestor trust level mask */ +#define VMMDEV_REQUESTOR_TRUST_MASK 0x00007000 + +/* Requestor is using the less trusted user device node (/dev/vboxuser) */ +#define VMMDEV_REQUESTOR_USER_DEVICE 0x00008000 + /** HGCM service location types. */ enum vmmdev_hgcm_service_location_type { VMMDEV_HGCM_LOC_INVALID = 0, -- cgit v1.2.3-71-gd317 From 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:43:34 -0700 Subject: mm/hotplug: fix offline undo_isolate_page_range() Commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") introduced move_pfn_range_to_zone() which calls memmap_init_zone() during onlining a memory block. memmap_init_zone() will reset pagetype flags and makes migrate type to be MOVABLE. However, in __offline_pages(), it also call undo_isolate_page_range() after offline_isolated_pages() to do the same thing. Due to commit 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") changed __first_valid_page() to skip offline pages, undo_isolate_page_range() here just waste CPU cycles looping around the offlining PFN range while doing nothing, because __first_valid_page() will return NULL as offline_isolated_pages() has already marked all memory sections within the pfn range as offline via offline_mem_sections(). Also, after calling the "useless" undo_isolate_page_range() here, it reaches the point of no returning by notifying MEM_OFFLINE. Those pages will be marked as MIGRATE_MOVABLE again once onlining. The only thing left to do is to decrease the number of isolated pageblocks zone counter which would make some paths of the page allocation slower that the above commit introduced. Even if alloc_contig_range() can be used to isolate 16GB-hugetlb pages on ppc64, an "int" should still be enough to represent the number of pageblocks there. Fix an incorrect comment along the way. [cai@lca.pw: v4] Link: http://lkml.kernel.org/r/20190314150641.59358-1-cai@lca.pw Link: http://lkml.kernel.org/r/20190313143133.46200-1-cai@lca.pw Fixes: 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") Signed-off-by: Qian Cai Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: Vlastimil Babka Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-isolation.h | 10 --------- mm/memory_hotplug.c | 17 +++++++++++---- mm/page_alloc.c | 2 +- mm/page_isolation.c | 48 ++++++++++++++++++++++++++---------------- mm/sparse.c | 2 +- 5 files changed, 45 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 4eb26d278046..280ae96dc4c3 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zone, struct page *page, /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. - * If specified range includes migrate types other than MOVABLE or CMA, - * this will fail with -EBUSY. - * - * For isolating all pages in the range finally, the caller have to - * free all pages in the range. test_page_isolated() can be used for - * test it. - * - * The following flags are allowed (they can be combined in a bit mask) - * SKIP_HWPOISON - ignore hwpoison pages - * REPORT_FAILURE - report details about the failure to isolate the range */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f767582af4f8..0e0a16021fd5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1576,7 +1576,7 @@ static int __ref __offline_pages(unsigned long start_pfn, { unsigned long pfn, nr_pages; long offlined_pages; - int ret, node; + int ret, node, nr_isolate_pageblock; unsigned long flags; unsigned long valid_start, valid_end; struct zone *zone; @@ -1602,10 +1602,11 @@ static int __ref __offline_pages(unsigned long start_pfn, ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE, SKIP_HWPOISON | REPORT_FAILURE); - if (ret) { + if (ret < 0) { reason = "failure to isolate range"; goto failed_removal; } + nr_isolate_pageblock = ret; arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; @@ -1657,8 +1658,16 @@ static int __ref __offline_pages(unsigned long start_pfn, /* Ok, all of our target is isolated. We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); - /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + + /* + * Onlining will reset pagetype flags and makes migrate type + * MOVABLE, so just need to decrease the number of isolated + * pageblocks zone counter here. + */ + spin_lock_irqsave(&zone->lock, flags); + zone->nr_isolate_pageblock -= nr_isolate_pageblock; + spin_unlock_irqrestore(&zone->lock, flags); + /* removal success */ adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); zone->present_pages -= offlined_pages; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 03fcf73d47da..d96ca5bc555b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8233,7 +8233,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, ret = start_isolate_page_range(pfn_max_align_down(start), pfn_max_align_up(end), migratetype, 0); - if (ret) + if (ret < 0) return ret; /* diff --git a/mm/page_isolation.c b/mm/page_isolation.c index ce323e56b34d..bf4159d771c7 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -160,27 +160,36 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) return NULL; } -/* - * start_isolate_page_range() -- make page-allocation-type of range of pages - * to be MIGRATE_ISOLATE. - * @start_pfn: The lower PFN of the range to be isolated. - * @end_pfn: The upper PFN of the range to be isolated. - * @migratetype: migrate type to set in error recovery. +/** + * start_isolate_page_range() - make page-allocation-type of range of pages to + * be MIGRATE_ISOLATE. + * @start_pfn: The lower PFN of the range to be isolated. + * @end_pfn: The upper PFN of the range to be isolated. + * start_pfn/end_pfn must be aligned to pageblock_order. + * @migratetype: Migrate type to set in error recovery. + * @flags: The following flags are allowed (they can be combined in + * a bit mask) + * SKIP_HWPOISON - ignore hwpoison pages + * REPORT_FAILURE - report details about the failure to + * isolate the range * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the - * future will not be allocated again. - * - * start_pfn/end_pfn must be aligned to pageblock_order. - * Return 0 on success and -EBUSY if any part of range cannot be isolated. + * future will not be allocated again. If specified range includes migrate types + * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all + * pages in the range finally, the caller have to free all pages in the range. + * test_page_isolated() can be used for test it. * * There is no high level synchronization mechanism that prevents two threads - * from trying to isolate overlapping ranges. If this happens, one thread + * from trying to isolate overlapping ranges. If this happens, one thread * will notice pageblocks in the overlapping range already set to isolate. * This happens in set_migratetype_isolate, and set_migratetype_isolate - * returns an error. We then clean up by restoring the migration type on - * pageblocks we may have modified and return -EBUSY to caller. This + * returns an error. We then clean up by restoring the migration type on + * pageblocks we may have modified and return -EBUSY to caller. This * prevents two threads from simultaneously working on overlapping ranges. + * + * Return: the number of isolated pageblocks on success and -EBUSY if any part + * of range cannot be isolated. */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype, int flags) @@ -188,6 +197,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned long pfn; unsigned long undo_pfn; struct page *page; + int nr_isolate_pageblock = 0; BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); @@ -196,13 +206,15 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && - set_migratetype_isolate(page, migratetype, flags)) { - undo_pfn = pfn; - goto undo; + if (page) { + if (set_migratetype_isolate(page, migratetype, flags)) { + undo_pfn = pfn; + goto undo; + } + nr_isolate_pageblock++; } } - return 0; + return nr_isolate_pageblock; undo: for (pfn = start_pfn; pfn < undo_pfn; diff --git a/mm/sparse.c b/mm/sparse.c index 69904aa6165b..56e057c432f9 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -567,7 +567,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn) } #ifdef CONFIG_MEMORY_HOTREMOVE -/* Mark all memory sections within the pfn range as online */ +/* Mark all memory sections within the pfn range as offline */ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; -- cgit v1.2.3-71-gd317 From 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 28 Mar 2019 20:43:42 -0700 Subject: mm: add support for kmem caches in DMA32 zone Patch series "iommu/io-pgtable-arm-v7s: Use DMA32 zone for page tables", v6. This is a followup to the discussion in [1], [2]. IOMMUs using ARMv7 short-descriptor format require page tables (level 1 and 2) to be allocated within the first 4GB of RAM, even on 64-bit systems. For L1 tables that are bigger than a page, we can just use __get_free_pages with GFP_DMA32 (on arm64 systems only, arm would still use GFP_DMA). For L2 tables that only take 1KB, it would be a waste to allocate a full page, so we considered 3 approaches: 1. This series, adding support for GFP_DMA32 slab caches. 2. genalloc, which requires pre-allocating the maximum number of L2 page tables (4096, so 4MB of memory). 3. page_frag, which is not very memory-efficient as it is unable to reuse freed fragments until the whole page is freed. [3] This series is the most memory-efficient approach. stable@ note: We confirmed that this is a regression, and IOMMU errors happen on 4.19 and linux-next/master on MT8173 (elm, Acer Chromebook R13). The issue most likely starts from commit ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32"), i.e. 4.15, and presumably breaks a number of Mediatek platforms (and maybe others?). [1] https://lists.linuxfoundation.org/pipermail/iommu/2018-November/030876.html [2] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html [3] https://patchwork.codeaurora.org/patch/671639/ This patch (of 3): IOMMUs using ARMv7 short-descriptor format require page tables to be allocated within the first 4GB of RAM, even on 64-bit systems. On arm64, this is done by passing GFP_DMA32 flag to memory allocation functions. For IOMMU L2 tables that only take 1KB, it would be a waste to allocate a full page using get_free_pages, so we considered 3 approaches: 1. This patch, adding support for GFP_DMA32 slab caches. 2. genalloc, which requires pre-allocating the maximum number of L2 page tables (4096, so 4MB of memory). 3. page_frag, which is not very memory-efficient as it is unable to reuse freed fragments until the whole page is freed. This change makes it possible to create a custom cache in DMA32 zone using kmem_cache_create, then allocate memory using kmem_cache_alloc. We do not create a DMA32 kmalloc cache array, as there are currently no users of kmalloc(..., GFP_DMA32). These calls will continue to trigger a warning, as we keep GFP_DMA32 in GFP_SLAB_BUG_MASK. This implies that calls to kmem_cache_*alloc on a SLAB_CACHE_DMA32 kmem_cache must _not_ use GFP_DMA32 (it is anyway redundant and unnecessary). Link: http://lkml.kernel.org/r/20181210011504.122604-2-drinkcat@chromium.org Signed-off-by: Nicolas Boichat Acked-by: Vlastimil Babka Acked-by: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Michal Hocko Cc: Mel Gorman Cc: Sasha Levin Cc: Huaisheng Ye Cc: Mike Rapoport Cc: Yong Wu Cc: Matthias Brugger Cc: Tomasz Figa Cc: Yingjoe Chen Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Hsin-Yi Wang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 ++ mm/slab.c | 2 ++ mm/slab.h | 3 ++- mm/slab_common.c | 2 +- mm/slub.c | 5 +++++ 5 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 11b45f7ae405..9449b19c5f10 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -32,6 +32,8 @@ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +/* Use GFP_DMA32 memory */ +#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) /* DEBUG: Store the last owner for bug hunting */ #define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) /* Panic if kmem_cache_create() fails */ diff --git a/mm/slab.c b/mm/slab.c index 28652e4218e0..329bfe67f2ca 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2115,6 +2115,8 @@ done: cachep->allocflags = __GFP_COMP; if (flags & SLAB_CACHE_DMA) cachep->allocflags |= GFP_DMA; + if (flags & SLAB_CACHE_DMA32) + cachep->allocflags |= GFP_DMA32; if (flags & SLAB_RECLAIM_ACCOUNT) cachep->allocflags |= __GFP_RECLAIMABLE; cachep->size = size; diff --git a/mm/slab.h b/mm/slab.h index e5e6658eeacc..43ac818b8592 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, /* Legal flag mask for kmem_cache_create(), for various configurations */ -#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ +#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ + SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) #if defined(CONFIG_DEBUG_SLAB) diff --git a/mm/slab_common.c b/mm/slab_common.c index 03eeb8b7b4b1..58251ba63e4a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ - SLAB_ACCOUNT) + SLAB_CACHE_DMA32 | SLAB_ACCOUNT) /* * Merge control. If this is set then no merging of slab caches will occur. diff --git a/mm/slub.c b/mm/slub.c index 1b08fbcb7e61..d30ede89f4a6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3589,6 +3589,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) if (s->flags & SLAB_CACHE_DMA) s->allocflags |= GFP_DMA; + if (s->flags & SLAB_CACHE_DMA32) + s->allocflags |= GFP_DMA32; + if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; @@ -5679,6 +5682,8 @@ static char *create_unique_id(struct kmem_cache *s) */ if (s->flags & SLAB_CACHE_DMA) *p++ = 'd'; + if (s->flags & SLAB_CACHE_DMA32) + *p++ = 'D'; if (s->flags & SLAB_RECLAIM_ACCOUNT) *p++ = 'a'; if (s->flags & SLAB_CONSISTENCY_CHECKS) -- cgit v1.2.3-71-gd317 From a953e7721fa9999fd628885ed451e16641a23d1e Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 28 Mar 2019 20:43:51 -0700 Subject: include/linux/hugetlb.h: convert to use vm_fault_t kbuild produces the below warning: tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 5453a3df2a5eb49bc24615d4cf0d66b2aae05e5f commit 3d3539018d2c ("mm: create the new vm_fault_t type") reproduce: # apt-get install sparse git checkout 3d3539018d2cbd12e5af4a132636ee7fd8d43ef0 make ARCH=x86_64 allmodconfig make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' >> mm/memory.c:3968:21: sparse: incorrect type in assignment (different >> base types) @@ expected restricted vm_fault_t [usertype] ret @@ >> got e] ret @@ mm/memory.c:3968:21: expected restricted vm_fault_t [usertype] ret mm/memory.c:3968:21: got int This patch converts to return vm_fault_t type for hugetlb_fault() when CONFIG_HUGETLB_PAGE=n. Regarding the sparse warning, Luc said: : This is the expected behaviour. The constant 0 is magic regarding bitwise : types but ({ ...; 0; }) is not, it is just an ordinary expression of type : 'int'. : : So, IMHO, Souptick's patch is the right thing to do. Link: http://lkml.kernel.org/r/20190318162604.GA31553@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Mike Kravetz Cc: Matthew Wilcox Cc: Luc Van Oostenryck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ea35263eb76b..11943b60f208 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -203,7 +203,6 @@ static inline void hugetlb_show_meminfo(void) #define pud_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) -#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address, sz) 0 @@ -234,6 +233,13 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, { BUG(); } +static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + unsigned int flags) +{ + BUG(); + return 0; +} #endif /* !CONFIG_HUGETLB_PAGE */ /* -- cgit v1.2.3-71-gd317 From b736523f0759d1debeb56f8e0c4c87a2bea0fb23 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Mar 2019 20:44:05 -0700 Subject: include/linux/list.h: fix list_is_first() kernel-doc Fix typo of kernel-doc parameter notation (there should be no space between '@' and the parameter name). Also fixes bogus kernel-doc notation output formatting. Link: http://lkml.kernel.org/r/ddce8b80-9a8a-d52d-3546-87b2211c089a@infradead.org Fixes: 70b44595eafe9 ("mm, compaction: use free lists to quickly locate a migration source") Signed-off-by: Randy Dunlap Acked-by: Mel Gorman Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 79626b5ab36c..58aa3adf94e6 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -207,7 +207,7 @@ static inline void list_bulk_move_tail(struct list_head *head, } /** - * list_is_first -- tests whether @ list is the first entry in list @head + * list_is_first -- tests whether @list is the first entry in list @head * @list: the entry to test * @head: the head of the list */ -- cgit v1.2.3-71-gd317 From fcfc2aa0185f4a731d05a21e9f359968fdfd02e7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 28 Mar 2019 20:44:13 -0700 Subject: ptrace: take into account saved_sigmask in PTRACE{GET,SET}SIGMASK There are a few system calls (pselect, ppoll, etc) which replace a task sigmask while they are running in a kernel-space When a task calls one of these syscalls, the kernel saves a current sigmask in task->saved_sigmask and sets a syscall sigmask. On syscall-exit-stop, ptrace traps a task before restoring the saved_sigmask, so PTRACE_GETSIGMASK returns the syscall sigmask and PTRACE_SETSIGMASK does nothing, because its sigmask is replaced by saved_sigmask, when the task returns to user-space. This patch fixes this problem. PTRACE_GETSIGMASK returns saved_sigmask if it's set. PTRACE_SETSIGMASK drops the TIF_RESTORE_SIGMASK flag. Link: http://lkml.kernel.org/r/20181120060616.6043-1-avagin@gmail.com Fixes: 29000caecbe8 ("ptrace: add ability to get/set signal-blocked mask") Signed-off-by: Andrei Vagin Acked-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/signal.h | 18 ++++++++++++++++++ kernel/ptrace.c | 15 +++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ae5655197698..e412c092c1e8 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -418,10 +418,20 @@ static inline void set_restore_sigmask(void) set_thread_flag(TIF_RESTORE_SIGMASK); WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } + +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} + static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); @@ -439,6 +449,10 @@ static inline void set_restore_sigmask(void) current->restore_sigmask = true; WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + tsk->restore_sigmask = false; +} static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; @@ -447,6 +461,10 @@ static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return tsk->restore_sigmask; +} static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 771e93f9c43f..6f357f4fc859 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Access another process' address space via ptrace. @@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setsiginfo(child, &siginfo); break; - case PTRACE_GETSIGMASK: + case PTRACE_GETSIGMASK: { + sigset_t *mask; + if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } - if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + if (test_tsk_restore_sigmask(child)) + mask = &child->saved_sigmask; + else + mask = &child->blocked; + + if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; + } case PTRACE_SETSIGMASK: { sigset_t new_set; @@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request, child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); + clear_tsk_restore_sigmask(child); + ret = 0; break; } -- cgit v1.2.3-71-gd317