From dc7019b7d0e188d4093b34bd0747ed0d668c63bf Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 14 Jun 2021 17:33:14 -0500 Subject: tee: add tee_shm_alloc_kernel_buf() Adds a new function tee_shm_alloc_kernel_buf() to allocate shared memory from a kernel driver. This function can later be made more lightweight by unnecessary dma-buf export. Cc: stable@vger.kernel.org Reviewed-by: Tyler Hicks Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 54269e47ac9a..8990f7628387 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -332,6 +332,7 @@ void *tee_get_drvdata(struct tee_device *teedev); * @returns a pointer to 'struct tee_shm' */ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); /** * tee_shm_register() - Register shared memory buffer -- cgit v1.2.3-71-gd317 From 376e4199e327a5cf29b8ec8fb0f64f3d8b429819 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Mon, 14 Jun 2021 17:33:15 -0500 Subject: tee: Correct inappropriate usage of TEE_SHM_DMA_BUF flag Currently TEE_SHM_DMA_BUF flag has been inappropriately used to not register shared memory allocated for private usage by underlying TEE driver: OP-TEE in this case. So rather add a new flag as TEE_SHM_PRIV that can be utilized by underlying TEE drivers for private allocation and usage of shared memory. With this corrected, allow tee_shm_alloc_kernel_buf() to allocate a shared memory region without the backing of dma-buf. Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Co-developed-by: Tyler Hicks Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/call.c | 2 +- drivers/tee/optee/core.c | 3 ++- drivers/tee/optee/rpc.c | 5 +++-- drivers/tee/optee/shm_pool.c | 8 ++++++-- drivers/tee/tee_shm.c | 4 ++-- include/linux/tee_drv.h | 1 + 6 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 387e94768182..945f03da0223 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -184,7 +184,7 @@ static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params, struct optee_msg_arg *ma; shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params), - TEE_SHM_MAPPED); + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (IS_ERR(shm)) return shm; diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 651d49b53d3b..5ce13b099d7d 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -278,7 +278,8 @@ static void optee_release(struct tee_context *ctx) if (!ctxdata) return; - shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm)) { arg = tee_shm_get_va(shm, 0); /* diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 1849180b0278..efbaff7ad7e5 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -314,7 +314,7 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = cmd_alloc_suppl(ctx, sz); break; case OPTEE_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -502,7 +502,8 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, param->a1, + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); reg_pair_from_64(¶m->a4, ¶m->a5, diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index da06ce9b9313..c41a9a501a6e 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -27,7 +27,11 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, shm->paddr = page_to_phys(page); shm->size = PAGE_SIZE << order; - if (shm->flags & TEE_SHM_DMA_BUF) { + /* + * Shared memory private to the OP-TEE driver doesn't need + * to be registered with OP-TEE. + */ + if (!(shm->flags & TEE_SHM_PRIV)) { unsigned int nr_pages = 1 << order, i; struct page **pages; @@ -60,7 +64,7 @@ err: static void pool_op_free(struct tee_shm_pool_mgr *poolm, struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) + if (!(shm->flags & TEE_SHM_PRIV)) optee_shm_unregister(shm->ctx, shm); free_pages((unsigned long)shm->kaddr, get_order(shm->size)); diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index c65e44707cd6..8a9384a64f3e 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -117,7 +117,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) return ERR_PTR(-EINVAL); } - if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF | TEE_SHM_PRIV))) { dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); return ERR_PTR(-EINVAL); } @@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(tee_shm_alloc); */ struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size) { - return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED); } EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 8990f7628387..3ebfea0781f1 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -27,6 +27,7 @@ #define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ #define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ #define TEE_SHM_KERNEL_MAPPED BIT(6) /* Memory mapped in kernel space */ +#define TEE_SHM_PRIV BIT(7) /* Memory private to TEE driver */ struct device; struct tee_device; -- cgit v1.2.3-71-gd317 From 853a9ae29e978d37f5dfa72622a68c9ae3d7fa89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Jul 2021 10:04:27 +0200 Subject: serial: 8250: fix handle_irq locking The 8250 handle_irq callback is not just called from the interrupt handler but also from a timer callback when polling (e.g. for ports without an interrupt line). Consequently the callback must explicitly disable interrupts to avoid a potential deadlock with another interrupt in polled mode. Add back an irqrestore-version of the sysrq port-unlock helper and use it in the 8250 callbacks that need it. Fixes: 75f4e830fa9c ("serial: do not restore interrupt state in sysrq helper") Cc: stable@vger.kernel.org # 5.13 Cc: Joel Stanley Cc: Andrew Jeffery Reported-by: kernel test robot Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210714080427.28164-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_aspeed_vuart.c | 5 +++-- drivers/tty/serial/8250/8250_fsl.c | 5 +++-- drivers/tty/serial/8250/8250_port.c | 5 +++-- include/linux/serial_core.h | 24 ++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 4caab8714e2c..2350fb3bb5e4 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -329,6 +329,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int iir, lsr; + unsigned long flags; unsigned int space, count; iir = serial_port_in(port, UART_IIR); @@ -336,7 +337,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); lsr = serial_port_in(port, UART_LSR); @@ -370,7 +371,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (lsr & UART_LSR_THRE) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 4e75d2e4f87c..fc65a2293ce9 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -30,10 +30,11 @@ struct fsl8250_data { int fsl8250_handle_irq(struct uart_port *port) { unsigned char lsr, orig_lsr; + unsigned long flags; unsigned int iir; struct uart_8250_port *up = up_to_u8250p(port); - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); iir = port->serial_in(port, UART_IIR); if (iir & UART_IIR_NO_INT) { @@ -82,7 +83,7 @@ int fsl8250_handle_irq(struct uart_port *port) up->lsr_saved_flags = orig_lsr; - uart_unlock_and_check_sysrq(&up->port); + uart_unlock_and_check_sysrq_irqrestore(&up->port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2e7000f79b03..1da29a219842 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1899,11 +1899,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) unsigned char status; struct uart_8250_port *up = up_to_u8250p(port); bool skip_rx = false; + unsigned long flags; if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); status = serial_port_in(port, UART_LSR); @@ -1929,7 +1930,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) (up->ier & UART_IER_THRI)) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 52d7fb92a69d..c58cc142d23f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -518,6 +518,25 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) if (sysrq_ch) handle_sysrq(sysrq_ch); } + +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, flags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, flags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { @@ -531,6 +550,11 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { spin_unlock(&port->lock); } +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + spin_unlock_irqrestore(&port->lock, flags); +} #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* -- cgit v1.2.3-71-gd317 From bf88fef0b6f1488abeca594d377991171c00e52a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 17 Jul 2021 21:21:27 +0300 Subject: usb: otg-fsm: Fix hrtimer list corruption The HNP work can be re-scheduled while it's still in-fly. This results in re-initialization of the busy work, resetting the hrtimer's list node of the work and crashing kernel with null dereference within kernel/timer once work's timer is expired. It's very easy to trigger this problem by re-plugging USB cable quickly. Initialize HNP work only once to fix this trouble. Unable to handle kernel NULL pointer dereference at virtual address 00000126) ... PC is at __run_timers.part.0+0x150/0x228 LR is at __next_timer_interrupt+0x51/0x9c ... (__run_timers.part.0) from [] (run_timer_softirq+0x2f/0x50) (run_timer_softirq) from [] (__do_softirq+0xd5/0x2f0) (__do_softirq) from [] (irq_exit+0xab/0xb8) (irq_exit) from [] (handle_domain_irq+0x45/0x60) (handle_domain_irq) from [] (gic_handle_irq+0x6b/0x7c) (gic_handle_irq) from [] (__irq_svc+0x65/0xac) Cc: stable@vger.kernel.org Acked-by: Peter Chen Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210717182134.30262-6-digetx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/usb-otg-fsm.c | 6 +++++- include/linux/usb/otg-fsm.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 3740cf95560e..0697fde51d00 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -193,7 +193,11 @@ static void otg_start_hnp_polling(struct otg_fsm *fsm) if (!fsm->host_req_flag) return; - INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + if (!fsm->hnp_work_inited) { + INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + fsm->hnp_work_inited = true; + } + schedule_delayed_work(&fsm->hnp_polling_work, msecs_to_jiffies(T_HOST_REQ_POLL)); } diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 3aee78dda16d..784659d4dc99 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -196,6 +196,7 @@ struct otg_fsm { struct mutex lock; u8 *host_req_flag; struct delayed_work hnp_polling_work; + bool hnp_work_inited; bool state_changed; }; -- cgit v1.2.3-71-gd317 From 5f7b51bf09baca8e4f80cbe879536842bafb5f31 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 28 Jul 2021 17:01:15 +0200 Subject: netfilter: ipset: Limit the maximal range of consecutive elements to add/delete The range size of consecutive elements were not limited. Thus one could define a huge range which may result soft lockup errors due to the long execution time. Now the range size is limited to 2^20 entries. Reported-by: Brad Spengler Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 3 +++ net/netfilter/ipset/ip_set_hash_ip.c | 9 ++++++++- net/netfilter/ipset/ip_set_hash_ipmark.c | 10 +++++++++- net/netfilter/ipset/ip_set_hash_ipport.c | 3 +++ net/netfilter/ipset/ip_set_hash_ipportip.c | 3 +++ net/netfilter/ipset/ip_set_hash_ipportnet.c | 3 +++ net/netfilter/ipset/ip_set_hash_net.c | 11 ++++++++++- net/netfilter/ipset/ip_set_hash_netiface.c | 10 +++++++++- net/netfilter/ipset/ip_set_hash_netnet.c | 16 +++++++++++++++- net/netfilter/ipset/ip_set_hash_netport.c | 11 ++++++++++- net/netfilter/ipset/ip_set_hash_netportnet.c | 16 +++++++++++++++- 11 files changed, 88 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 10279c4830ac..ada1296c87d5 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -196,6 +196,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* Max range where every element is added/deleted in one step */ +#define IPSET_MAX_RANGE (1<<20) + /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index d1bef23fd4f5..dd30c03d5a23 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + /* 64bit division is not allowed on 32bit */ + if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) { ip = ntohl(h->next.ip); e.ip = htonl(ip); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 18346d18aa16..153de3457423 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; + if (e.mark == 0 && e.ip == 0) + return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { @@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (e.mark == 0 && ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } + if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e1ca11196515..7303138e46be 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index ab179e064597..334fb1ad0e86 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 8f075b44cf64..7df94f437f60 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c1a11f041ac6..1422739d9aa2 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); do { diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index ddd51c2e1cb3..9810f5bf63f5 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; if (retried) ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 6532f0505e66..3d09eefe998a 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; + u64 n = 0, m = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index ec1564a1cb5a..09cf72eb37f8 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; + u64 n = 0; bool with_ports = false; u8 cidr; int ret; @@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); + n++; + } while (ipn++ < ip_to); + + if (n*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 0e91d1e82f1c..19bcdb3141f6 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, ipn; + u64 n = 0, m = 0; bool with_ports = false; int ret; @@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); -- cgit v1.2.3-71-gd317 From 1027b96ec9d34f9abab69bc1a4dc5b1ad8ab1349 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 6 Aug 2021 16:21:24 +0800 Subject: once: Fix panic when module unload DO_ONCE DEFINE_STATIC_KEY_TRUE(___once_key); __do_once_done once_disable_jump(once_key); INIT_WORK(&w->work, once_deferred); struct once_work *w; w->key = key; schedule_work(&w->work); module unload //*the key is destroy* process_one_work once_deferred BUG_ON(!static_key_enabled(work->key)); static_key_count((struct static_key *)x) //*access key, crash* When module uses DO_ONCE mechanism, it could crash due to the above concurrency problem, we could reproduce it with link[1]. Fix it by add/put module refcount in the once work process. [1] https://lore.kernel.org/netdev/eaa6c371-465e-57eb-6be9-f4b16b9d7cbf@huawei.com/ Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Cc: David S. Miller Cc: Eric Dumazet Reported-by: Minmin chen Signed-off-by: Kefeng Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/once.h | 4 ++-- lib/once.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/once.h b/include/linux/once.h index 9225ee6d96c7..ae6f4eb41cbe 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -7,7 +7,7 @@ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags); + unsigned long *flags, struct module *mod); /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only @@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, if (unlikely(___ret)) { \ func(__VA_ARGS__); \ __do_once_done(&___done, &___once_key, \ - &___flags); \ + &___flags, THIS_MODULE); \ } \ } \ ___ret; \ diff --git a/lib/once.c b/lib/once.c index 8b7d6235217e..59149bf3bfb4 100644 --- a/lib/once.c +++ b/lib/once.c @@ -3,10 +3,12 @@ #include #include #include +#include struct once_work { struct work_struct work; struct static_key_true *key; + struct module *module; }; static void once_deferred(struct work_struct *w) @@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); static_branch_disable(work->key); + module_put(work->module); kfree(work); } -static void once_disable_jump(struct static_key_true *key) +static void once_disable_jump(struct static_key_true *key, struct module *mod) { struct once_work *w; @@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key) INIT_WORK(&w->work, once_deferred); w->key = key; + w->module = mod; + __module_get(mod); schedule_work(&w->work); } @@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags) EXPORT_SYMBOL(__do_once_start); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags) + unsigned long *flags, struct module *mod) __releases(once_lock) { *done = true; spin_unlock_irqrestore(&once_lock, *flags); - once_disable_jump(once_key); + once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); -- cgit v1.2.3-71-gd317 From 71330842ff93ae67a066c1fa68d75672527312fa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Aug 2021 21:45:32 +0200 Subject: bpf: Add _kernel suffix to internal lockdown_bpf_read Rename LOCKDOWN_BPF_READ into LOCKDOWN_BPF_READ_KERNEL so we have naming more consistent with a LOCKDOWN_BPF_WRITE_USER option that we are adding. Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko --- include/linux/security.h | 2 +- kernel/bpf/helpers.c | 4 ++-- kernel/trace/bpf_trace.c | 8 ++++---- security/security.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 24eda04221e9..724d7a4a0c91 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -123,7 +123,7 @@ enum lockdown_reason { LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, - LOCKDOWN_BPF_READ, + LOCKDOWN_BPF_READ_KERNEL, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 62cf00383910..0b04553e8c44 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1070,12 +1070,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b4916ef388ad..1836591197a5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -999,19 +999,19 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE case BPF_FUNC_probe_read: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_str_proto; #endif #ifdef CONFIG_CGROUPS diff --git a/security/security.c b/security/security.c index 09533cbb7221..6b83ab4e9d66 100644 --- a/security/security.c +++ b/security/security.c @@ -61,7 +61,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", - [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM", + [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", [LOCKDOWN_XMON_RW] = "xmon read and write access", -- cgit v1.2.3-71-gd317 From 563476ae0c5e48a028cbfa38fa9d2fc0418eb88f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 11 Apr 2021 15:32:55 +0300 Subject: net/mlx5: Synchronize correct IRQ when destroying CQ The CQ destroy is performed based on the IRQ number that is stored in cq->irqn. That number wasn't set explicitly during CQ creation and as expected some of the API users of mlx5_core_create_cq() forgot to update it. This caused to wrong synchronization call of the wrong IRQ with a number 0 instead of the real one. As a fix, set the IRQ number directly in the mlx5_core_create_cq() and update all users accordingly. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Fixes: ef1659ade359 ("IB/mlx5: Add DEVX support for CQ events") Signed-off-by: Shay Drory Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 4 +--- drivers/infiniband/hw/mlx5/devx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++----------- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 20 ++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 ++ .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 4 +--- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 +-- include/linux/mlx5/driver.h | 3 +-- 10 files changed, 27 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7abeb576b3c5..b8e5e371bb19 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -945,7 +945,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, u32 *cqb = NULL; void *cqc; int cqe_size; - unsigned int irqn; int eqn; int err; @@ -984,7 +983,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, vector, &eqn); if (err) goto err_cqb; @@ -1007,7 +1006,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - cq->mcq.irqn = irqn; if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eb9b0a2707f8..c869b2a91a28 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -975,7 +975,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( struct mlx5_ib_dev *dev; int user_vector; int dev_eqn; - unsigned int irqn; int err; if (uverbs_copy_from(&user_vector, attrs, @@ -987,7 +986,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn); if (err < 0) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index df3e4938ecdd..360e093874d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -134,6 +134,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn); cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fd250f7bcd88..24f919ef9b8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1535,15 +1535,9 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; int err; u32 i; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - if (err) - return err; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1557,7 +1551,6 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1605,11 +1598,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) void *in; void *cqc; int inlen; - unsigned int irqn_not_used; int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); if (err) return err; @@ -1983,9 +1975,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel *c; unsigned int irq; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + err = mlx5_vector2irqn(priv->mdev, ix, &irq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6e074cc457de..605c8ecc3610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -855,8 +855,8 @@ clean: return err; } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; @@ -865,8 +865,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->core.eqn; - *irqn = eq->core.irqn; + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; err = 0; break; } @@ -874,8 +876,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, return err; } + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +{ + return vector2eqnirqn(dev, vector, eqn, NULL); +} EXPORT_SYMBOL(mlx5_vector2eqn); +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + return vector2eqnirqn(dev, vector, NULL, irqn); +} + unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { return dev->priv.eq_table->num_comp_eqs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index bd66ab2af5b5..d5da4ab65766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -417,7 +417,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; u32 i; @@ -446,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -476,7 +475,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) *conn->cq.mcq.arm_db = 0; conn->cq.mcq.vector = 0; conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; - conn->cq.mcq.irqn = irqn; conn->cq.mcq.uar = fdev->conn_res.uar; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 624cedebb510..d3d628b862f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -104,4 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 12cf323a5943..9df0e73d1c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -749,7 +749,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_cqe64 *cqe; struct mlx5dr_cq *cq; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; int vector; @@ -782,7 +781,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -818,7 +817,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; - cq->mcq.irqn = irqn; cq->mcq.uar = uar; return cq; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2a31467f7ac5..379a19144a25 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -526,7 +526,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) void __iomem *uar_page = ndev->mvdev.res.uar->map; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_vdpa_cq *vcq = &mvq->cq; - unsigned int irqn; __be64 *pas; int inlen; void *cqc; @@ -566,7 +565,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, 0, &eqn); if (err) goto err_vec; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1efe37466969..25a8be58d289 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1044,8 +1044,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.2.3-71-gd317 From 51e1bb9eeaf7868db56e58f47848e364ab4c4129 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Aug 2021 12:43:17 +0200 Subject: bpf: Add lockdown check for probe_write_user helper Back then, commit 96ae52279594 ("bpf: Add bpf_probe_write_user BPF helper to be called in tracers") added the bpf_probe_write_user() helper in order to allow to override user space memory. Its original goal was to have a facility to "debug, divert, and manipulate execution of semi-cooperative processes" under CAP_SYS_ADMIN. Write to kernel was explicitly disallowed since it would otherwise tamper with its integrity. One use case was shown in cf9b1199de27 ("samples/bpf: Add test/example of using bpf_probe_write_user bpf helper") where the program DNATs traffic at the time of connect(2) syscall, meaning, it rewrites the arguments to a syscall while they're still in userspace, and before the syscall has a chance to copy the argument into kernel space. These days we have better mechanisms in BPF for achieving the same (e.g. for load-balancers), but without having to write to userspace memory. Of course the bpf_probe_write_user() helper can also be used to abuse many other things for both good or bad purpose. Outside of BPF, there is a similar mechanism for ptrace(2) such as PTRACE_PEEK{TEXT,DATA} and PTRACE_POKE{TEXT,DATA}, but would likely require some more effort. Commit 96ae52279594 explicitly dedicated the helper for experimentation purpose only. Thus, move the helper's availability behind a newly added LOCKDOWN_BPF_WRITE_USER lockdown knob so that the helper is disabled under the "integrity" mode. More fine-grained control can be implemented also from LSM side with this change. Fixes: 96ae52279594 ("bpf: Add bpf_probe_write_user BPF helper to be called in tracers") Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko --- include/linux/security.h | 1 + kernel/trace/bpf_trace.c | 5 +++-- security/security.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 724d7a4a0c91..5b7288521300 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -120,6 +120,7 @@ enum lockdown_reason { LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, LOCKDOWN_XMON_WR, + LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1836591197a5..fdd14072fc3b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -990,12 +990,13 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; - case BPF_FUNC_probe_write_user: - return bpf_get_probe_write_proto(); case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; + case BPF_FUNC_probe_write_user: + return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? + NULL : bpf_get_probe_write_proto(); case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: diff --git a/security/security.c b/security/security.c index 6b83ab4e9d66..9ffa9e9c5c55 100644 --- a/security/security.c +++ b/security/security.c @@ -58,6 +58,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_MMIOTRACE] = "unsafe mmio", [LOCKDOWN_DEBUGFS] = "debugfs access", [LOCKDOWN_XMON_WR] = "xmon write access", + [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", -- cgit v1.2.3-71-gd317 From a2baf4e8bb0f306fbed7b5e6197c02896a638ab5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 9 Aug 2021 18:04:13 -0700 Subject: bpf: Fix potentially incorrect results with bpf_get_local_storage() Commit b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") fixed a bug for bpf_get_local_storage() helper so different tasks won't mess up with each other's percpu local storage. The percpu data contains 8 slots so it can hold up to 8 contexts (same or different tasks), for 8 different program runs, at the same time. This in general is sufficient. But our internal testing showed the following warning multiple times: [...] warning: WARNING: CPU: 13 PID: 41661 at include/linux/bpf-cgroup.h:193 __cgroup_bpf_run_filter_sock_ops+0x13e/0x180 RIP: 0010:__cgroup_bpf_run_filter_sock_ops+0x13e/0x180 tcp_call_bpf.constprop.99+0x93/0xc0 tcp_conn_request+0x41e/0xa50 ? tcp_rcv_state_process+0x203/0xe00 tcp_rcv_state_process+0x203/0xe00 ? sk_filter_trim_cap+0xbc/0x210 ? tcp_v6_inbound_md5_hash.constprop.41+0x44/0x160 tcp_v6_do_rcv+0x181/0x3e0 tcp_v6_rcv+0xc65/0xcb0 ip6_protocol_deliver_rcu+0xbd/0x450 ip6_input_finish+0x11/0x20 ip6_input+0xb5/0xc0 ip6_sublist_rcv_finish+0x37/0x50 ip6_sublist_rcv+0x1dc/0x270 ipv6_list_rcv+0x113/0x140 __netif_receive_skb_list_core+0x1a0/0x210 netif_receive_skb_list_internal+0x186/0x2a0 gro_normal_list.part.170+0x19/0x40 napi_complete_done+0x65/0x150 mlx5e_napi_poll+0x1ae/0x680 __napi_poll+0x25/0x120 net_rx_action+0x11e/0x280 __do_softirq+0xbb/0x271 irq_exit_rcu+0x97/0xa0 common_interrupt+0x7f/0xa0 asm_common_interrupt+0x1e/0x40 RIP: 0010:bpf_prog_1835a9241238291a_tw_egress+0x5/0xbac ? __cgroup_bpf_run_filter_skb+0x378/0x4e0 ? do_softirq+0x34/0x70 ? ip6_finish_output2+0x266/0x590 ? ip6_finish_output+0x66/0xa0 ? ip6_output+0x6c/0x130 ? ip6_xmit+0x279/0x550 ? ip6_dst_check+0x61/0xd0 [...] Using drgn [0] to dump the percpu buffer contents showed that on this CPU slot 0 is still available, but slots 1-7 are occupied and those tasks in slots 1-7 mostly don't exist any more. So we might have issues in bpf_cgroup_storage_unset(). Further debugging confirmed that there is a bug in bpf_cgroup_storage_unset(). Currently, it tries to unset "current" slot with searching from the start. So the following sequence is possible: 1. A task is running and claims slot 0 2. Running BPF program is done, and it checked slot 0 has the "task" and ready to reset it to NULL (not yet). 3. An interrupt happens, another BPF program runs and it claims slot 1 with the *same* task. 4. The unset() in interrupt context releases slot 0 since it matches "task". 5. Interrupt is done, the task in process context reset slot 0. At the end, slot 1 is not reset and the same process can continue to occupy slots 2-7 and finally, when the above step 1-5 is repeated again, step 3 BPF program won't be able to claim an empty slot and a warning will be issued. To fix the issue, for unset() function, we should traverse from the last slot to the first. This way, the above issue can be avoided. The same reverse traversal should also be done in bpf_get_local_storage() helper itself. Otherwise, incorrect local storage may be returned to BPF program. [0] https://github.com/osandov/drgn Fixes: b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210810010413.1976277-1-yhs@fb.com --- include/linux/bpf-cgroup.h | 4 ++-- kernel/bpf/helpers.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8b77d08d4b47..6c9b10d82c80 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -201,8 +201,8 @@ static inline void bpf_cgroup_storage_unset(void) { int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) continue; this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0b04553e8c44..7a97b2f4747d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -397,8 +397,8 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) void *ptr; int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) continue; storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); -- cgit v1.2.3-71-gd317 From b69dd5b3780a7298bd893816a09da751bc0636f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Aug 2021 12:57:15 -0700 Subject: net: igmp: increase size of mr_ifc_count Some arches support cmpxchg() on 4-byte and 8-byte only. Increase mr_ifc_count width to 32bit to fix this problem. Fixes: 4a2b285e7e10 ("net: igmp: fix data-race in igmp_ifc_timer_expire()") Signed-off-by: Eric Dumazet Reported-by: Guenter Roeck Link: https://lore.kernel.org/r/20210811195715.3684218-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- net/ipv4/igmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 53aa0343bf69..aaf4f1b4c277 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -41,7 +41,7 @@ struct in_device { unsigned long mr_qri; /* Query Response Interval */ unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; - unsigned char mr_ifc_count; + u32 mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ struct timer_list mr_ifc_timer; /* interface change timer */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a51360087b19..00576bae183d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,7 +803,7 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); - u8 mr_ifc_count; + u32 mr_ifc_count; igmpv3_send_cr(in_dev); restart: -- cgit v1.2.3-71-gd317