summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/devlink.c99
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/gro_cells.c7
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/netpoll.c22
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/skbuff.c15
-rw-r--r--net/core/skmsg.c87
-rw-r--r--net/core/sock.c5
11 files changed, 201 insertions, 86 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9fcaa544f11a..81809fa735a7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -709,7 +709,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
EXPORT_SYMBOL(zerocopy_sg_from_iter);
/**
- * skb_copy_and_csum_datagram_iter - Copy datagram to an iovec iterator
+ * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator
* and update a checksum.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying from
diff --git a/net/core/dev.c b/net/core/dev.c
index 3b6b0e175fe7..ce8fea2e2788 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1069,19 +1069,6 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
}
EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
-struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
-{
- struct net_device *dev;
-
- ASSERT_RTNL();
- for_each_netdev(net, dev)
- if (dev->type == type)
- return dev;
-
- return NULL;
-}
-EXPORT_SYMBOL(__dev_getfirstbyhwtype);
-
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev, *ret = NULL;
@@ -3495,6 +3482,11 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
if (gso_segs > dev->gso_max_segs)
return features & ~NETIF_F_GSO_MASK;
+ if (!skb_shinfo(skb)->gso_type) {
+ skb_warn_bad_offload(skb);
+ return features & ~NETIF_F_GSO_MASK;
+ }
+
/* Support for GSO partial features requires software
* intervention before we can actually process the packets
* so we need to strip support for any partial features now
@@ -3867,6 +3859,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
+ qdisc_skb_cb(skb)->mru = 0;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
@@ -4180,7 +4173,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
}
EXPORT_SYMBOL(dev_queue_xmit_accel);
-int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
+int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
struct net_device *dev = skb->dev;
struct sk_buff *orig_skb = skb;
@@ -4210,17 +4203,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
dev_xmit_recursion_dec();
local_bh_enable();
-
- if (!dev_xmit_complete(ret))
- kfree_skb(skb);
-
return ret;
drop:
atomic_long_inc(&dev->tx_dropped);
kfree_skb_list(skb);
return NET_XMIT_DROP;
}
-EXPORT_SYMBOL(dev_direct_xmit);
+EXPORT_SYMBOL(__dev_direct_xmit);
/*************************************************************************
* Receiver routines
@@ -4954,6 +4943,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
+ qdisc_skb_cb(skb)->mru = 0;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
@@ -6966,7 +6956,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
EXPORT_SYMBOL(netdev_has_upper_dev);
/**
- * netdev_has_upper_dev_all - Check if device is linked to an upper device
+ * netdev_has_upper_dev_all_rcu - Check if device is linked to an upper device
* @dev: device
* @upper_dev: upper device to check
*
@@ -8204,7 +8194,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private);
/**
- * netdev_lower_change - Dispatch event about lower device state change
+ * netdev_lower_state_changed - Dispatch event about lower device state change
* @lower_dev: device
* @lower_state_info: state to dispatch
*
diff --git a/net/core/devlink.c b/net/core/devlink.c
index ab4b1368904f..88c0ac8ed444 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
return test_bit(limit, &devlink->ops->reload_limits);
}
-static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+static int devlink_reload_stat_put(struct sk_buff *msg,
enum devlink_reload_limit limit, u32 value)
{
struct nlattr *reload_stats_entry;
@@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti
if (!reload_stats_entry)
return -EMSGSIZE;
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
- nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
goto nla_put_failure;
nla_nest_end(msg, reload_stats_entry);
@@ -540,7 +539,7 @@ nla_put_failure:
static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
{
- struct nlattr *reload_stats_attr;
+ struct nlattr *reload_stats_attr, *act_info, *act_stats;
int i, j, stat_idx;
u32 value;
@@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
if (!reload_stats_attr)
return -EMSGSIZE;
- for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
- /* Remote stats are shown even if not locally supported. Stats
- * of actions with unspecified limit are shown though drivers
- * don't need to register unspecified limit.
- */
- if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
- !devlink_reload_limit_is_supported(devlink, j))
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote &&
+ !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC)
continue;
- for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
- if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
- i == DEVLINK_RELOAD_ACTION_UNSPEC ||
+ act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+ if (!act_info)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+ goto action_info_nest_cancel;
+ act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+ if (!act_stats)
+ goto action_info_nest_cancel;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported.
+ * Stats of actions with unspecified limit are shown
+ * though drivers don't need to register unspecified
+ * limit.
+ */
+ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j)) ||
devlink_reload_combination_is_invalid(i, j))
continue;
@@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
value = devlink->stats.reload_stats[stat_idx];
else
value = devlink->stats.remote_reload_stats[stat_idx];
- if (devlink_reload_stat_put(msg, i, j, value))
- goto nla_put_failure;
+ if (devlink_reload_stat_put(msg, j, value))
+ goto action_stats_nest_cancel;
}
+ nla_nest_end(msg, act_stats);
+ nla_nest_end(msg, act_info);
}
nla_nest_end(msg, reload_stats_attr);
return 0;
+action_stats_nest_cancel:
+ nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+ nla_nest_cancel(msg, act_info);
nla_put_failure:
nla_nest_cancel(msg, reload_stats_attr);
return -EMSGSIZE;
@@ -755,6 +772,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
goto nla_put_failure;
+ /* Hold rtnl lock while accessing port's netdev attributes. */
+ rtnl_lock();
spin_lock_bh(&devlink_port->type_lock);
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
goto nla_put_failure_type_locked;
@@ -763,9 +782,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
devlink_port->desired_type))
goto nla_put_failure_type_locked;
if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+ struct net *net = devlink_net(devlink_port->devlink);
struct net_device *netdev = devlink_port->type_dev;
- if (netdev &&
+ if (netdev && net_eq(net, dev_net(netdev)) &&
(nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
netdev->ifindex) ||
nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
@@ -781,6 +801,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
goto nla_put_failure_type_locked;
}
spin_unlock_bh(&devlink_port->type_lock);
+ rtnl_unlock();
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
@@ -791,6 +812,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
nla_put_failure_type_locked:
spin_unlock_bh(&devlink_port->type_lock);
+ rtnl_unlock();
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -1448,7 +1470,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
pool_index, &cur, &max);
if (err && err != -EOPNOTSUPP)
- return err;
+ goto sb_occ_get_failure;
if (!err) {
if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
goto nla_put_failure;
@@ -1461,8 +1483,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
return 0;
nla_put_failure:
+ err = -EMSGSIZE;
+sb_occ_get_failure:
genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
+ return err;
}
static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
@@ -3370,7 +3394,7 @@ out_free_msg:
nlmsg_free(msg);
}
-void devlink_flash_update_begin_notify(struct devlink *devlink)
+static void devlink_flash_update_begin_notify(struct devlink *devlink)
{
struct devlink_flash_notify params = { 0 };
@@ -3378,9 +3402,8 @@ void devlink_flash_update_begin_notify(struct devlink *devlink)
DEVLINK_CMD_FLASH_UPDATE,
&params);
}
-EXPORT_SYMBOL_GPL(devlink_flash_update_begin_notify);
-void devlink_flash_update_end_notify(struct devlink *devlink)
+static void devlink_flash_update_end_notify(struct devlink *devlink)
{
struct devlink_flash_notify params = { 0 };
@@ -3388,7 +3411,6 @@ void devlink_flash_update_end_notify(struct devlink *devlink)
DEVLINK_CMD_FLASH_UPDATE_END,
&params);
}
-EXPORT_SYMBOL_GPL(devlink_flash_update_end_notify);
void devlink_flash_update_status_notify(struct devlink *devlink,
const char *status_msg,
@@ -3429,10 +3451,12 @@ EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
struct genl_info *info)
{
- struct nlattr *nla_component, *nla_overwrite_mask;
+ struct nlattr *nla_component, *nla_overwrite_mask, *nla_file_name;
struct devlink_flash_update_params params = {};
struct devlink *devlink = info->user_ptr[0];
+ const char *file_name;
u32 supported_params;
+ int ret;
if (!devlink->ops->flash_update)
return -EOPNOTSUPP;
@@ -3442,8 +3466,6 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
supported_params = devlink->ops->supported_flash_update_params;
- params.file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
-
nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
if (nla_component) {
if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT)) {
@@ -3467,7 +3489,21 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
params.overwrite_mask = sections.value & sections.selector;
}
- return devlink->ops->flash_update(devlink, &params, info->extack);
+ nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
+ file_name = nla_data(nla_file_name);
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file");
+ return ret;
+ }
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, info->extack);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+
+ return ret;
}
static const struct devlink_param devlink_param_generic[] = {
@@ -9476,6 +9512,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(DCCP_PARSING, DROP),
DEVLINK_TRAP(GTP_PARSING, DROP),
DEVLINK_TRAP(ESP_PARSING, DROP),
+ DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -10225,12 +10262,18 @@ int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
goto out;
}
- params.file_name = file_name;
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret)
+ goto out;
mutex_lock(&devlink->lock);
+ devlink_flash_update_begin_notify(devlink);
ret = devlink->ops->flash_update(devlink, &params, NULL);
+ devlink_flash_update_end_notify(devlink);
mutex_unlock(&devlink->lock);
+ release_firmware(params.fw);
+
out:
rtnl_lock();
dev_put(dev);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7bcfb16854cb..cd80ffed6d26 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -563,7 +563,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev;
nlrule->iifindex = -1;
- nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
+ nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
dev = __dev_get_by_name(net, nlrule->iifname);
if (dev)
nlrule->iifindex = dev->ifindex;
@@ -573,7 +573,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev;
nlrule->oifindex = -1;
- nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
+ nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
dev = __dev_get_by_name(net, nlrule->oifname);
if (dev)
nlrule->oifindex = dev->ifindex;
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index e095fb871d91..6eb2e5ec2c50 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -99,9 +99,14 @@ void gro_cells_destroy(struct gro_cells *gcells)
struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
napi_disable(&cell->napi);
- netif_napi_del(&cell->napi);
+ __netif_napi_del(&cell->napi);
__skb_queue_purge(&cell->napi_skbs);
}
+ /* This barrier is needed because netpoll could access dev->napi_list
+ * under rcu protection.
+ */
+ synchronize_net();
+
free_percpu(gcells->cells);
gcells->cells = NULL;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e39e28b0a8d..9500d28a43b0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock(&n->lock);
if ((n->nud_state == NUD_FAILED) ||
+ (tbl->is_multicast &&
+ tbl->is_multicast(n->primary_key)) ||
time_after(tref, n->updated))
remove = true;
write_unlock(&n->lock);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c310c7c1cef7..960948290001 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/if_vlan.h>
+#include <net/dsa.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/addrconf.h>
@@ -657,15 +658,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
- struct net_device *ndev = NULL;
+ struct net_device *ndev = NULL, *dev = NULL;
+ struct net *net = current->nsproxy->net_ns;
struct in_device *in_dev;
int err;
rtnl_lock();
- if (np->dev_name[0]) {
- struct net *net = current->nsproxy->net_ns;
+ if (np->dev_name[0])
ndev = __dev_get_by_name(net, np->dev_name);
- }
+
if (!ndev) {
np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
err = -ENODEV;
@@ -673,6 +674,19 @@ int netpoll_setup(struct netpoll *np)
}
dev_hold(ndev);
+ /* bring up DSA management network devices up first */
+ for_each_netdev(net, dev) {
+ if (!netdev_uses_dsa(dev))
+ continue;
+
+ err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
+ if (err < 0) {
+ np_err(np, "%s failed to open %s\n",
+ np->dev_name, dev->name);
+ goto put;
+ }
+ }
+
if (netdev_master_upper_dev_get(ndev)) {
np_err(np, "%s is a slave device, aborting\n", np->dev_name);
err = -EBUSY;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7d7223691783..60917ff4a00b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1939,7 +1939,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
if (linfo[IFLA_INFO_KIND]) {
char kind[MODULE_NAME_LEN];
- nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
+ nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
ops = rtnl_link_ops_get(kind);
}
@@ -2953,9 +2953,9 @@ static struct net_device *rtnl_dev_get(struct net *net,
if (!ifname) {
ifname = buffer;
if (ifname_attr)
- nla_strlcpy(ifname, ifname_attr, IFNAMSIZ);
+ nla_strscpy(ifname, ifname_attr, IFNAMSIZ);
else if (altifname_attr)
- nla_strlcpy(ifname, altifname_attr, ALTIFNAMSIZ);
+ nla_strscpy(ifname, altifname_attr, ALTIFNAMSIZ);
else
return NULL;
}
@@ -2983,7 +2983,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
if (tb[IFLA_IFNAME])
- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
@@ -3264,7 +3264,7 @@ replay:
return err;
if (tb[IFLA_IFNAME])
- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
@@ -3296,7 +3296,7 @@ replay:
memset(linkinfo, 0, sizeof(linkinfo));
if (linkinfo[IFLA_INFO_KIND]) {
- nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
+ nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
ops = rtnl_link_ops_get(kind);
} else {
kind[0] = '\0';
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c9a5a3c262c8..bfa5c9969393 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -842,7 +842,7 @@ EXPORT_SYMBOL(consume_skb);
#endif
/**
- * consume_stateless_skb - free an skbuff, assuming it is stateless
+ * __consume_stateless_skb - free an skbuff, assuming it is stateless
* @skb: buffer to free
*
* Alike consume_skb(), but this variant assumes that this is the last
@@ -902,6 +902,8 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
+ lockdep_assert_in_softirq();
+
if (!skb_unref(skb))
return;
@@ -4208,9 +4210,6 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_MPTCP)
[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
#endif
-#if IS_ENABLED(CONFIG_KCOV)
- [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64),
-#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4228,9 +4227,6 @@ static __always_inline unsigned int skb_ext_total_length(void)
#if IS_ENABLED(CONFIG_MPTCP)
skb_ext_type_len[SKB_EXT_MPTCP] +
#endif
-#if IS_ENABLED(CONFIG_KCOV)
- skb_ext_type_len[SKB_EXT_KCOV_HANDLE] +
-#endif
0;
}
@@ -4560,7 +4556,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
if (skb && (skb_next = skb_peek(q))) {
icmp_next = is_icmp_err_skb(skb_next);
if (icmp_next)
- sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+ sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
}
spin_unlock_irqrestore(&q->lock, flags);
@@ -5798,6 +5794,9 @@ int skb_mpls_dec_ttl(struct sk_buff *skb)
if (unlikely(!eth_p_mpls(skb->protocol)))
return -EINVAL;
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
+ return -ENOMEM;
+
lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
if (!--ttl)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 654182ecf87b..25cdbb20f3a0 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
struct scatterlist *sge = sk_msg_elem(msg, i);
u32 len = sge->length;
- if (charge)
- sk_mem_uncharge(sk, len);
- if (!msg->skb)
+ /* When the skb owns the memory we free it from consume_skb path. */
+ if (!msg->skb) {
+ if (charge)
+ sk_mem_uncharge(sk, len);
put_page(sg_page(sge));
+ }
memset(sge, 0, sizeof(*sge));
return len;
}
@@ -397,28 +399,45 @@ out:
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+ struct sk_buff *skb)
{
- struct sock *sk = psock->sk;
- int copied = 0, num_sge;
struct sk_msg *msg;
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ return NULL;
+
+ if (!sk_rmem_schedule(sk, skb, skb->truesize))
+ return NULL;
+
msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
if (unlikely(!msg))
- return -EAGAIN;
- if (!sk_rmem_schedule(sk, skb, skb->len)) {
- kfree(msg);
- return -EAGAIN;
- }
+ return NULL;
sk_msg_init(msg);
+ return msg;
+}
+
+static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+ struct sk_psock *psock,
+ struct sock *sk,
+ struct sk_msg *msg)
+{
+ int num_sge, copied;
+
+ /* skb linearize may fail with ENOMEM, but lets simply try again
+ * later if this happens. Under memory pressure we don't want to
+ * drop the skb. We need to linearize the skb so that the mapping
+ * in skb_to_sgvec can not error.
+ */
+ if (skb_linearize(skb))
+ return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
if (unlikely(num_sge < 0)) {
kfree(msg);
return num_sge;
}
- sk_mem_charge(sk, skb->len);
copied = skb->len;
msg->sg.start = 0;
msg->sg.size = copied;
@@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
return copied;
}
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+{
+ struct sock *sk = psock->sk;
+ struct sk_msg *msg;
+
+ /* If we are receiving on the same sock skb->sk is already assigned,
+ * skip memory accounting and owner transition seeing it already set
+ * correctly.
+ */
+ if (unlikely(skb->sk == sk))
+ return sk_psock_skb_ingress_self(psock, skb);
+ msg = sk_psock_create_ingress_msg(sk, skb);
+ if (!msg)
+ return -EAGAIN;
+
+ /* This will transition ownership of the data from the socket where
+ * the BPF program was run initiating the redirect to the socket
+ * we will eventually receive this data on. The data will be released
+ * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
+ * into user buffers.
+ */
+ skb_set_owner_r(skb, sk);
+ return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+}
+
+/* Puts an skb on the ingress queue of the socket already assigned to the
+ * skb. In this case we do not need to check memory limits or skb_set_owner_r
+ * because the skb is already accounted for here.
+ */
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+{
+ struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ struct sock *sk = psock->sk;
+
+ if (unlikely(!msg))
+ return -EAGAIN;
+ sk_msg_init(msg);
+ return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+}
+
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
@@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
* retrying later from workqueue.
*/
if (skb_queue_empty(&psock->ingress_skb)) {
- err = sk_psock_skb_ingress(psock, skb);
+ err = sk_psock_skb_ingress_self(psock, skb);
}
if (err < 0) {
skb_queue_tail(&psock->ingress_skb, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index d422a6808405..4fd7e785f177 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2505,7 +2505,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
EXPORT_SYMBOL(sk_page_frag_refill);
-static void __lock_sock(struct sock *sk)
+void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
{
@@ -3097,7 +3097,7 @@ EXPORT_SYMBOL(release_sock);
*
* sk_lock.slock unlocked, owned = 1, BH enabled
*/
-bool lock_sock_fast(struct sock *sk)
+bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
@@ -3115,6 +3115,7 @@ bool lock_sock_fast(struct sock *sk)
* The sk_lock has mutex_lock() semantics here:
*/
mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+ __acquire(&sk->sk_lock.slock);
local_bh_enable();
return true;
}