From 66be7d2bcd826344894be09dc385f9f805136b84 Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Fri, 12 Sep 2014 08:58:49 +0200 Subject: cfg80211: add ops to query mesh proxy path table Add two new cfg80211 operations for querying a table with proxied mesh paths. Signed-off-by: Henning Rogge Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4b28dc07bcb1..846071b0cde9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -738,6 +738,10 @@ * before removing a station entry entirely, or before disassociating * or similar, cleanup will happen in the driver/device in this case. * + * @NL80211_CMD_GET_MPP: Get mesh path attributes for mesh proxy path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -912,6 +916,8 @@ enum nl80211_commands { NL80211_CMD_ADD_TX_TS, NL80211_CMD_DEL_TX_TS, + NL80211_CMD_GET_MPP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3-71-gd317 From 988568669d171774b96e59fe35ef575df7f8cffd Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 20 Oct 2014 13:20:45 +0300 Subject: cfg80211: Specify frame and reason code for NL80211_CMD_DEL_STATION The optional NL80211_ATTR_MGMT_SUBTYPE and NL80211_ATTR_REASON_CODE attributes can now be included in NL80211_CMD_DEL_STATION to indicate to the driver which frame (Deauthentication/Disassociation) and reason code in that frame should be used to indicate removal to the specific station. This is used by drivers that implement AP SME and generate those frames internally. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ include/uapi/linux/nl80211.h | 6 +++++- net/wireless/nl80211.c | 21 +++++++++++++++++++++ net/wireless/trace.h | 10 ++++++++-- 4 files changed, 39 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ebb69f671979..ed896c0b5b8b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -804,9 +804,14 @@ struct station_parameters { * Used to delete a station entry (or all stations). * * @mac: MAC address of the station to remove or NULL to remove all stations + * @subtype: Management frame subtype to use for indicating removal + * (10 = Disassociation, 12 = Deauthentication) + * @reason_code: Reason code for the Disassociation/Deauthentication frame */ struct station_del_parameters { const u8 *mac; + u8 subtype; + u16 reason_code; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 846071b0cde9..b553c48404d3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -227,7 +227,11 @@ * the interface identified by %NL80211_ATTR_IFINDEX. * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC * or, if no MAC address given, all stations, on the interface identified - * by %NL80211_ATTR_IFINDEX. + * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and + * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type + * of disconnection indication should be sent to the station + * (Deauthentication or Disassociation frame and reason code for that + * frame). * * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to * destination %NL80211_ATTR_MAC on the interface identified by diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 40cf7b937926..0c0f2045e1f8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4414,6 +4414,27 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_station) return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_MGMT_SUBTYPE]) { + params.subtype = + nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]); + if (params.subtype != IEEE80211_STYPE_DISASSOC >> 4 && + params.subtype != IEEE80211_STYPE_DEAUTH >> 4) + return -EINVAL; + } else { + /* Default to Deauthentication frame */ + params.subtype = IEEE80211_STYPE_DEAUTH >> 4; + } + + if (info->attrs[NL80211_ATTR_REASON_CODE]) { + params.reason_code = + nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + if (params.reason_code == 0) + return -EINVAL; /* 0 is reserved */ + } else { + /* Default to reason code 2 */ + params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID; + } + return rdev_del_station(rdev, dev, ¶ms); } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b1339400631d..cdb2c2ef1ae1 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -688,14 +688,20 @@ DECLARE_EVENT_CLASS(station_del, WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(sta_mac) + __field(u8, subtype) + __field(u16, reason_code) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(sta_mac, params->mac); + __entry->subtype = params->subtype; + __entry->reason_code = params->reason_code; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac)) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT + ", subtype: %u, reason_code: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac), + __entry->subtype, __entry->reason_code) ); DEFINE_EVENT(station_del, rdev_del_station, -- cgit v1.2.3-71-gd317 From 723e73acd16d2ea08cdbd8b449b7bc69389b94d4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Oct 2014 09:25:06 +0200 Subject: cfg80211: make WMM TSPEC support flag an nl80211 feature flag During the review of the corresponding wpa_supplicant patches we noticed that the only way for it to detect that this functionality is supported currently is to check for the command support. This can be misleading though, as the command was also designed to, in the future, support pure 802.11 TSPECs. Expose the WMM-TSPEC feature flag to nl80211 so later we can also expose an 802.11-TSPEC feature flag (if needed) to differentiate the two cases. Note: this change isn't needed in 3.18 as there's no driver there yet that supports the functionality at all. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 +----- include/uapi/linux/nl80211.h | 5 +++++ net/wireless/nl80211.c | 14 ++++++-------- 3 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed896c0b5b8b..77aa805d7e7c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2646,13 +2646,9 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). - * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM - * TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS - * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it - * needs to be able to handle Block-Ack agreements and other things. */ enum wiphy_flags { - WIPHY_FLAG_SUPPORTS_WMM_ADMISSION = BIT(0), + /* use hole at 0 */ /* use hole at 1 */ /* use hole at 2 */ WIPHY_FLAG_NETNS_OK = BIT(3), diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b553c48404d3..be1d5def304d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4052,6 +4052,10 @@ enum nl80211_ap_sme_features { * multiplexing powersave, ie. can turn off all but one chain * and then wake the rest up as required after, for example, * rts/cts handshake. + * @NL80211_FEATURE_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM + * TSPEC sessions (TID aka TSID 0-7) with the %NL80211_CMD_ADD_TX_TS + * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it + * needs to be able to handle Block-Ack agreements and other things. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -4080,6 +4084,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23, NL80211_FEATURE_STATIC_SMPS = 1 << 24, NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25, + NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d05fe6d6481d..d98d4ea27819 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1514,8 +1514,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); CMD(set_qos_map, SET_QOS_MAP); - if (rdev->wiphy.flags & - WIPHY_FLAG_SUPPORTS_WMM_ADMISSION) + if (rdev->wiphy.features & + NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); } /* add into the if now */ @@ -9557,7 +9557,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) u16 admitted_time = 0; int err; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)) + if (!(rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] || @@ -9573,12 +9573,10 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) return -EINVAL; /* WMM uses TIDs 0-7 even for TSPEC */ - if (tsid < IEEE80211_FIRST_TSPEC_TSID) { - if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)) - return -EINVAL; - } else { + if (tsid >= IEEE80211_FIRST_TSPEC_TSID) { /* TODO: handle 802.11 TSPEC/admission control - * need more attributes for that (e.g. BA session requirement) + * need more attributes for that (e.g. BA session requirement); + * change the WMM adminssion test above to allow both then */ return -EINVAL; } -- cgit v1.2.3-71-gd317 From e8f479b11268af3f206d1580f6b0d572d6ecb4f7 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 22 Oct 2014 12:23:05 -0700 Subject: cfg80211: support configuring vif mac addr on create This is useful when creating virtual interfaces. Keeps udev from mucking with things it shouldn't, since the default MAC is never seen by udev when specified on the cmd-line during creation. Signed-off-by: Ben Greear [check for feature flag in nl80211 to force drivers to set it] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 ++++++--- include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 4 +++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 39d7996b0609..f67948e18600 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -319,9 +319,12 @@ struct ieee80211_supported_band { /** * struct vif_params - describes virtual interface parameters * @use_4addr: use 4-address frames - * @macaddr: address to use for this virtual interface. This will only - * be used for non-netdevice interfaces. If this parameter is set - * to zero address the driver may determine the address as needed. + * @macaddr: address to use for this virtual interface. + * If this parameter is set to zero address the driver may + * determine the address as needed. + * This feature is only fully supported by drivers that enable the + * %NL80211_FEATURE_MAC_ON_CREATE flag. Others may support creating + ** only p2p devices with specified MAC. */ struct vif_params { int use_4addr; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index be1d5def304d..f7daae59248e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4056,6 +4056,9 @@ enum nl80211_ap_sme_features { * TSPEC sessions (TID aka TSID 0-7) with the %NL80211_CMD_ADD_TX_TS * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it * needs to be able to handle Block-Ack agreements and other things. + * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring + * the vif's MAC address upon creation. + * See 'macaddr' field in the vif_params (cfg80211.h). */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -4085,6 +4088,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_STATIC_SMPS = 1 << 24, NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25, NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26, + NL80211_FEATURE_MAC_ON_CREATE = 1 << 27, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d98d4ea27819..12736a7cd506 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2605,7 +2605,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if (type == NL80211_IFTYPE_P2P_DEVICE && info->attrs[NL80211_ATTR_MAC]) { + if ((type == NL80211_IFTYPE_P2P_DEVICE || + rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && + info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], ETH_ALEN); if (!is_valid_ether_addr(params.macaddr)) -- cgit v1.2.3-71-gd317 From e9105f1bead4ec3f64904564c7c6268185d6b363 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 17 Oct 2014 12:39:09 +0200 Subject: netfilter: nf_tables: add new expression nft_redir This new expression provides NAT in the redirect flavour, which is to redirect packets to local machine. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_redir.h | 21 +++++++ include/uapi/linux/netfilter/nf_tables.h | 16 ++++++ net/ipv4/netfilter/Kconfig | 9 +++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/nft_redir_ipv4.c | 77 +++++++++++++++++++++++++ net/ipv6/netfilter/Kconfig | 9 +++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_redir_ipv6.c | 77 +++++++++++++++++++++++++ net/netfilter/Kconfig | 9 +++ net/netfilter/Makefile | 1 + net/netfilter/nft_redir.c | 98 ++++++++++++++++++++++++++++++++ 11 files changed, 319 insertions(+) create mode 100644 include/net/netfilter/nft_redir.h create mode 100644 net/ipv4/netfilter/nft_redir_ipv4.c create mode 100644 net/ipv6/netfilter/nft_redir_ipv6.c create mode 100644 net/netfilter/nft_redir.c (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h new file mode 100644 index 000000000000..a2d67546afab --- /dev/null +++ b/include/net/netfilter/nft_redir.h @@ -0,0 +1,21 @@ +#ifndef _NFT_REDIR_H_ +#define _NFT_REDIR_H_ + +struct nft_redir { + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + u16 flags; +}; + +extern const struct nla_policy nft_redir_policy[]; + +int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr); + +int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + +#endif /* _NFT_REDIR_H_ */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index f31fe7b660a5..16f62a5cf04d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -837,6 +837,22 @@ enum nft_masq_attributes { }; #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) +/** + * enum nft_redir_attributes - nf_tables redirect expression netlink attributes + * + * @NFTA_REDIR_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_REDIR_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_REDIR_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_redir_attributes { + NFTA_REDIR_UNSPEC, + NFTA_REDIR_REG_PROTO_MIN, + NFTA_REDIR_REG_PROTO_MAX, + NFTA_REDIR_FLAGS, + __NFTA_REDIR_MAX +}; +#define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1) + /** * enum nft_gen_attributes - nf_tables ruleset generation attributes * diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a300e2c32b26..8358b2da1549 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -119,6 +119,15 @@ config NFT_MASQ_IPV4 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV4 + tristate "IPv4 redirect support for nf_tables" + depends on NF_TABLES_IPV4 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV4 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" depends on NF_CONNTRACK_SNMP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 34e436c92015..902bcd1597bb 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o +obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c new file mode 100644 index 000000000000..643c5967aa27 --- /dev/null +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_redir_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_ipv4_multi_range_compat mr; + unsigned int verdict; + + memset(&mr, 0, sizeof(mr)); + if (priv->sreg_proto_min) { + mr.range[0].min.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + mr.range[0].max.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + mr.range[0].flags |= priv->flags; + + verdict = nf_nat_redirect_ipv4(pkt->skb, &mr, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv4_type; +static const struct nft_expr_ops nft_redir_ipv4_ops = { + .type = &nft_redir_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv4_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "redir", + .ops = &nft_redir_ipv4_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv4_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv4_type); +} + +static void __exit nft_redir_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv4_type); +} + +module_init(nft_redir_ipv4_module_init); +module_exit(nft_redir_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 462eebbf4377..0dbe5c7953e5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -97,6 +97,15 @@ config NFT_MASQ_IPV6 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV6 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 6c2baab10f21..d2ac9f5f212c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o +obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c new file mode 100644 index 000000000000..83420eeaad1c --- /dev/null +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv6_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv6_type); +} + +static void __exit nft_redir_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv6_type); +} + +module_init(nft_redir_ipv6_module_init); +module_exit(nft_redir_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49deb4edbac6..373486ae4159 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -505,6 +505,15 @@ config NFT_MASQ This option adds the "masquerade" expression that you can use to perform NAT in the masquerade flavour. +config NFT_REDIR + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables redirect support" + help + This options adds the "redirect" expression that you can use + to perform NAT in the redirect flavour. + config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a9571be3f791..f3eb4680f2ec 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o +obj-$(CONFIG_NFT_REDIR) += nft_redir.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c new file mode 100644 index 000000000000..e27b4e35718a --- /dev/null +++ b/net/netfilter/nft_redir.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { + [NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_REDIR_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL_GPL(nft_redir_policy); + +int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_redir *priv = nft_expr_priv(expr); + u32 nla_be32; + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + if (tb[NFTA_REDIR_REG_PROTO_MIN]) { + nla_be32 = nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN]); + priv->sreg_proto_min = ntohl(nla_be32); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + + if (tb[NFTA_REDIR_REG_PROTO_MAX]) { + nla_be32 = nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX]); + priv->sreg_proto_max = ntohl(nla_be32); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } + + if (tb[NFTA_REDIR_FLAGS]) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nft_redir_init); + +int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_redir *priv = nft_expr_priv(expr); + + if (priv->sreg_proto_min) { + if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN, + htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX, + htonl(priv->sreg_proto_max))) + goto nla_put_failure; + } + + if (priv->flags != 0 && + nla_put_be32(skb, NFTA_REDIR_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} +EXPORT_SYMBOL_GPL(nft_redir_dump); + +int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_redir_validate); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); -- cgit v1.2.3-71-gd317 From 958501163ddd6ea22a98f94fa0e7ce6d4734e5c4 Mon Sep 17 00:00:00 2001 From: Kyeyoon Park Date: Thu, 23 Oct 2014 14:49:17 -0700 Subject: bridge: Add support for IEEE 802.11 Proxy ARP This feature is defined in IEEE Std 802.11-2012, 10.23.13. It allows the AP devices to keep track of the hardware-address-to-IP-address mapping of the mobile devices within the WLAN network. The AP will learn this mapping via observing DHCP, ARP, and NS/NA frames. When a request for such information is made (i.e. ARP request, Neighbor Solicitation), the AP will respond on behalf of the associated mobile device. In the process of doing so, the AP will drop the multicast request frame that was intended to go out to the wireless medium. It was recommended at the LKS workshop to do this implementation in the bridge layer. vxlan.c is already doing something very similar. The DHCP snooping code will be added to the userspace application (hostapd) per the recommendation. This RFC commit is only for IPv4. A similar approach in the bridge layer will be taken for IPv6 as well. Signed-off-by: Kyeyoon Park Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_forward.c | 5 ++++ net/bridge/br_input.c | 60 ++++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_netlink.c | 4 ++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 2 ++ 6 files changed, 72 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0bdb77e16875..7072d8325016 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -243,6 +243,7 @@ enum { IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ IFLA_BRPORT_LEARNING, /* mac learning */ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ + IFLA_BRPORT_PROXYARP, /* proxy ARP */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 992ec49a96aa..1510b54e6a2e 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -183,6 +183,11 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood unicast traffic to ports that turn it off */ if (unicast && !(p->flags & BR_FLOOD)) continue; + + /* Do not flood to ports that enable proxy ARP */ + if (p->flags & BR_PROXYARP) + continue; + prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6fd5522df696..1f1de715197c 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include "br_private.h" @@ -57,6 +59,60 @@ static int br_pass_frame_up(struct sk_buff *skb) netif_receive_skb); } +static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid) +{ + struct net_device *dev = br->dev; + struct neighbour *n; + struct arphdr *parp; + u8 *arpptr, *sha; + __be32 sip, tip; + + if (dev->flags & IFF_NOARP) + return; + + if (!pskb_may_pull(skb, arp_hdr_len(dev))) { + dev->stats.tx_dropped++; + return; + } + parp = arp_hdr(skb); + + if (parp->ar_pro != htons(ETH_P_IP) || + parp->ar_op != htons(ARPOP_REQUEST) || + parp->ar_hln != dev->addr_len || + parp->ar_pln != 4) + return; + + arpptr = (u8 *)parp + sizeof(struct arphdr); + sha = arpptr; + arpptr += dev->addr_len; /* sha */ + memcpy(&sip, arpptr, sizeof(sip)); + arpptr += sizeof(sip); + arpptr += dev->addr_len; /* tha */ + memcpy(&tip, arpptr, sizeof(tip)); + + if (ipv4_is_loopback(tip) || + ipv4_is_multicast(tip)) + return; + + n = neigh_lookup(&arp_tbl, &tip, dev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = __br_fdb_get(br, n->ha, vid); + if (f) + arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, + sha, n->ha, sha); + + neigh_release(n); + } +} + /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct sk_buff *skb) { @@ -98,6 +154,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_broadcast_ether_addr(dest)) { + if (p->flags & BR_PROXYARP && + skb->protocol == htons(ETH_P_ARP)) + br_do_proxy_arp(skb, br, vid); + skb2 = skb; unicast = false; } else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2ff9706647f2..86c239b06f6e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -60,7 +60,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || - nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP))) return -EMSGSIZE; return 0; @@ -332,6 +333,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4d783d071305..8f3f08140258 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -172,6 +172,7 @@ struct net_bridge_port #define BR_FLOOD 0x00000040 #define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) #define BR_PROMISC 0x00000080 +#define BR_PROXYARP 0x00000100 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_own_query ip4_own_query; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index e561cd59b8a6..2de5d91199e8 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -170,6 +170,7 @@ BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); +BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -213,6 +214,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, #endif + &brport_attr_proxyarp, NULL }; -- cgit v1.2.3-71-gd317 From 7202da8b7f7131d25411d81aa557e28cd941c5b6 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 27 Oct 2014 11:37:36 +0200 Subject: ethtool, net/mlx4_en: Cable info, get_module_info/eeprom ethtool support Added support for get_module_info/get_module_eeprom ethtool support for cable info reading. Added new cable types enum in include/uapi/linux/ethtool.h for ethtool use. +#define ETH_MODULE_SFF_8636 0x3 +#define ETH_MODULE_SFF_8636_LEN 256 +#define ETH_MODULE_SFF_8436 0x4 +#define ETH_MODULE_SFF_8436_LEN 256 Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 83 +++++++++++++++++++++++++ include/uapi/linux/ethtool.h | 4 ++ 2 files changed, 87 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ae83da9cd18a..279f4233de59 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1309,6 +1310,86 @@ static int mlx4_en_set_tunable(struct net_device *dev, return ret; } +static int mlx4_en_get_module_info(struct net_device *dev, + struct ethtool_modinfo *modinfo) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int ret; + u8 data[4]; + + /* Read first 2 bytes to get Module & REV ID */ + ret = mlx4_get_module_info(mdev->dev, priv->port, + 0/*offset*/, 2/*size*/, data); + if (ret < 2) + return -EIO; + + switch (data[0] /* identifier */) { + case MLX4_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX4_MODULE_ID_QSFP_PLUS: + if (data[1] >= 0x3) { /* revision id */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX4_MODULE_ID_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + break; + case MLX4_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -ENOSYS; + } + + return 0; +} + +static int mlx4_en_get_module_eeprom(struct net_device *dev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int offset = ee->offset; + int i = 0, ret; + + if (ee->len == 0) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + en_dbg(DRV, priv, + "mlx4_get_module_info i(%d) offset(%d) len(%d)\n", + i, offset, ee->len - i); + + ret = mlx4_get_module_info(mdev->dev, priv->port, + offset, ee->len - i, data + i); + + if (!ret) /* Done reading */ + return 0; + + if (ret < 0) { + en_err(priv, + "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", + i, offset, ee->len - i, ret); + return 0; + } + + i += ret; + offset += ret; + } + return 0; +} const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, @@ -1341,6 +1422,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_priv_flags = mlx4_en_get_priv_flags, .get_tunable = mlx4_en_get_tunable, .set_tunable = mlx4_en_set_tunable, + .get_module_info = mlx4_en_get_module_info, + .get_module_eeprom = mlx4_en_get_module_eeprom }; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 99b43056a6fe..b6acd78b821c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1343,6 +1343,10 @@ enum ethtool_sfeatures_retval_bits { #define ETH_MODULE_SFF_8079_LEN 256 #define ETH_MODULE_SFF_8472 0x2 #define ETH_MODULE_SFF_8472_LEN 512 +#define ETH_MODULE_SFF_8636 0x3 +#define ETH_MODULE_SFF_8636_LEN 256 +#define ETH_MODULE_SFF_8436 0x4 +#define ETH_MODULE_SFF_8436_LEN 256 /* Reset flags */ /* The reset() operation must clear the flags for the components which -- cgit v1.2.3-71-gd317 From dcf972a334dd76975bf144ca57350c1f3132c947 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 27 Oct 2014 11:37:39 +0200 Subject: ethtool, net/mlx4_en: Add 100M, 20G, 56G speeds ethtool reporting support Added 100M, 20G and 56G ethtool speed reporting support. Update mlx4_en_test_speed self test with the new speeds. Defined new link speeds in include/uapi/linux/ethtool.h: +#define SPEED_20000 20000 +#define SPEED_40000 40000 +#define SPEED_56000 56000 Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_port.c | 15 ++++++++++++--- drivers/net/ethernet/mellanox/mlx4/en_port.h | 9 ++++++--- drivers/net/ethernet/mellanox/mlx4/en_selftest.c | 12 ++++++++---- include/uapi/linux/ethtool.h | 14 +++++++++++++- 4 files changed, 39 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 0a0261d128b9..afd303662d17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -91,15 +91,24 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) * already synchronized, no need in locking */ state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK); switch (qport_context->link_speed & MLX4_EN_SPEED_MASK) { + case MLX4_EN_100M_SPEED: + state->link_speed = SPEED_100; + break; case MLX4_EN_1G_SPEED: - state->link_speed = 1000; + state->link_speed = SPEED_1000; break; case MLX4_EN_10G_SPEED_XAUI: case MLX4_EN_10G_SPEED_XFI: - state->link_speed = 10000; + state->link_speed = SPEED_10000; + break; + case MLX4_EN_20G_SPEED: + state->link_speed = SPEED_20000; break; case MLX4_EN_40G_SPEED: - state->link_speed = 40000; + state->link_speed = SPEED_40000; + break; + case MLX4_EN_56G_SPEED: + state->link_speed = SPEED_56000; break; default: state->link_speed = -1; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h index 745090b49d9e..a5fc93bfe5da 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.h +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h @@ -54,10 +54,13 @@ enum { }; enum { - MLX4_EN_1G_SPEED = 0x02, - MLX4_EN_10G_SPEED_XFI = 0x01, + MLX4_EN_100M_SPEED = 0x04, MLX4_EN_10G_SPEED_XAUI = 0x00, + MLX4_EN_10G_SPEED_XFI = 0x01, + MLX4_EN_1G_SPEED = 0x02, + MLX4_EN_20G_SPEED = 0x08, MLX4_EN_40G_SPEED = 0x40, + MLX4_EN_56G_SPEED = 0x20, MLX4_EN_OTHER_SPEED = 0x0f, }; @@ -68,7 +71,7 @@ struct mlx4_en_query_port_context { __be16 mtu; u8 reserved2; u8 link_speed; -#define MLX4_EN_SPEED_MASK 0x43 +#define MLX4_EN_SPEED_MASK 0x6f u16 reserved3[5]; __be64 mac; u8 transceiver; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 49d5afc7cfb8..2d8ee66138e8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -129,11 +129,15 @@ static int mlx4_en_test_speed(struct mlx4_en_priv *priv) if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; - /* The device supports 1G, 10G and 40G speeds */ - if (priv->port_state.link_speed != 1000 && - priv->port_state.link_speed != 10000 && - priv->port_state.link_speed != 40000) + /* The device supports 100M, 1G, 10G, 20G, 40G and 56G speed */ + if (priv->port_state.link_speed != SPEED_100 && + priv->port_state.link_speed != SPEED_1000 && + priv->port_state.link_speed != SPEED_10000 && + priv->port_state.link_speed != SPEED_20000 && + priv->port_state.link_speed != SPEED_40000 && + priv->port_state.link_speed != SPEED_56000) return priv->port_state.link_speed; + return 0; } diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b6acd78b821c..eb2095b42fbb 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1213,6 +1213,10 @@ enum ethtool_sfeatures_retval_bits { #define SUPPORTED_40000baseCR4_Full (1 << 24) #define SUPPORTED_40000baseSR4_Full (1 << 25) #define SUPPORTED_40000baseLR4_Full (1 << 26) +#define SUPPORTED_56000baseKR4_Full (1 << 27) +#define SUPPORTED_56000baseCR4_Full (1 << 28) +#define SUPPORTED_56000baseSR4_Full (1 << 29) +#define SUPPORTED_56000baseLR4_Full (1 << 30) #define ADVERTISED_10baseT_Half (1 << 0) #define ADVERTISED_10baseT_Full (1 << 1) @@ -1241,6 +1245,10 @@ enum ethtool_sfeatures_retval_bits { #define ADVERTISED_40000baseCR4_Full (1 << 24) #define ADVERTISED_40000baseSR4_Full (1 << 25) #define ADVERTISED_40000baseLR4_Full (1 << 26) +#define ADVERTISED_56000baseKR4_Full (1 << 27) +#define ADVERTISED_56000baseCR4_Full (1 << 28) +#define ADVERTISED_56000baseSR4_Full (1 << 29) +#define ADVERTISED_56000baseLR4_Full (1 << 30) /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the @@ -1248,12 +1256,16 @@ enum ethtool_sfeatures_retval_bits { * it was forced up into this mode or autonegotiated. */ -/* The forced speed, 10Mb, 100Mb, gigabit, 2.5Gb, 10GbE. */ +/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|10|20|40|56]GbE. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 #define SPEED_2500 2500 #define SPEED_10000 10000 +#define SPEED_20000 20000 +#define SPEED_40000 40000 +#define SPEED_56000 56000 + #define SPEED_UNKNOWN -1 /* Duplex, half or full. */ -- cgit v1.2.3-71-gd317 From 7fd2561e4ebdd070ebba6d3326c4c5b13942323f Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Tue, 28 Oct 2014 18:11:14 +0900 Subject: net: ipv6: Add a sysctl to make optimistic addresses useful candidates Add a sysctl that causes an interface's optimistic addresses to be considered equivalent to other non-deprecated addresses for source address selection purposes. Preferred addresses will still take precedence over optimistic addresses, subject to other ranking in the source address selection algorithm. This is useful where different interfaces are connected to different networks from different ISPs (e.g., a cell network and a home wifi network). The current behaviour complies with RFC 3484/6724, and it makes sense if the host has only one interface, or has multiple interfaces on the same network (same or cooperating administrative domain(s), but not in the multiple distinct networks case. For example, if a mobile device has an IPv6 address on an LTE network and then connects to IPv6-enabled wifi, while the wifi IPv6 address is undergoing DAD, IPv6 connections will try use the wifi default route with the LTE IPv6 address, and will get stuck until they time out. Also, because optimistic nodes can receive frames, issue an RTM_NEWADDR as soon as DAD starts (with the IFA_F_OPTIMSTIC flag appropriately set). A second RTM_NEWADDR is sent if DAD completes (the address flags have changed), otherwise an RTM_DELADDR is sent. Also: add an entry in ip-sysctl.txt for optimistic_dad. Signed-off-by: Erik Kline Acked-by: Lorenzo Colitti Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 13 ++++++++++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 46 ++++++++++++++++++++++++++++++++-- 4 files changed, 59 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9028b879a97b..368e3251c553 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1460,6 +1460,19 @@ suppress_frag_ndisc - INTEGER 1 - (default) discard fragmented neighbor discovery packets 0 - allow fragmented neighbor discovery packets +optimistic_dad - BOOLEAN + Whether to perform Optimistic Duplicate Address Detection (RFC 4429). + 0: disabled (default) + 1: enabled + +use_optimistic - BOOLEAN + If enabled, do not classify optimistic addresses as deprecated during + source address selection. Preferred addresses will still be chosen + before optimistic addresses, subject to other ranking in the source + address selection algorithm. + 0: disabled (default) + 1: enabled + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ff560537dd61..7121a2e97ce2 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -42,6 +42,7 @@ struct ipv6_devconf { __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; + __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE __s32 mc_forwarding; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index efa2666f4b8a..e863d088b9a5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -164,6 +164,7 @@ enum { DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL, DEVCONF_SUPPRESS_FRAG_NDISC, DEVCONF_ACCEPT_RA_FROM_LOCAL, + DEVCONF_USE_OPTIMISTIC, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 50b95b2db87c..8d12b7c7018f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1170,6 +1170,9 @@ enum { IPV6_SADDR_RULE_PRIVACY, IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + IPV6_SADDR_RULE_NOT_OPTIMISTIC, +#endif IPV6_SADDR_RULE_MAX }; @@ -1197,6 +1200,15 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1257,10 +1269,16 @@ static int ipv6_get_saddr_eval(struct net *net, score->scopedist = ret; break; case IPV6_SADDR_RULE_PREFERRED: + { /* Rule 3: Avoid deprecated and optimistic addresses */ + u8 avoid = IFA_F_DEPRECATED; + + if (!ipv6_use_optimistic_addr(score->ifa->idev)) + avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || - !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + !(score->ifa->flags & avoid); break; + } #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: { @@ -1306,6 +1324,14 @@ static int ipv6_get_saddr_eval(struct net *net, ret = score->ifa->prefix_len; score->matchlen = ret; break; +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + case IPV6_SADDR_RULE_NOT_OPTIMISTIC: + /* Optimistic addresses still have lower precedence than other + * preferred addresses. + */ + ret = !(score->ifa->flags & IFA_F_OPTIMISTIC); + break; +#endif default: ret = 0; } @@ -3222,8 +3248,15 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Optimistic nodes can start receiving * Frames right away */ - if (ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); + if (ipv6_use_optimistic_addr(idev)) { + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + } addrconf_dad_kick(ifp); out: @@ -4330,6 +4363,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; + array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; @@ -5155,6 +5189,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, #endif #ifdef CONFIG_IPV6_MROUTE { -- cgit v1.2.3-71-gd317 From 6e0bd6c35b021dc73a81ebd1ef79761233c48b50 Mon Sep 17 00:00:00 2001 From: Rostislav Lisovy Date: Mon, 3 Nov 2014 10:33:18 +0100 Subject: cfg80211: 802.11p OCB mode handling This patch adds new iface type (NL80211_IFTYPE_OCB) representing the OCB (Outside the Context of a BSS) mode. When establishing a connection to the network a cfg80211_join_ocb function is called (particular nl80211_command is added as well). A mandatory parameters during the ocb_join operation are 'center frequency' and 'channel width (5/10 MHz)'. Changes done in mac80211 are minimal possible required to avoid many warnings (warning: enumeration value 'NL80211_IFTYPE_OCB' not handled in switch) during compilation. Full functionality (where needed) is added in the following patch. Signed-off-by: Rostislav Lisovy Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 19 ++++++++++ include/uapi/linux/nl80211.h | 11 ++++++ net/mac80211/cfg.c | 1 + net/mac80211/chan.c | 2 + net/mac80211/iface.c | 5 +++ net/mac80211/util.c | 3 ++ net/wireless/Makefile | 2 +- net/wireless/chan.c | 8 ++++ net/wireless/core.c | 3 ++ net/wireless/core.h | 12 ++++++ net/wireless/nl80211.c | 39 ++++++++++++++++++++ net/wireless/ocb.c | 88 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 21 +++++++++++ net/wireless/trace.h | 21 +++++++++++ net/wireless/util.c | 5 ++- 15 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 net/wireless/ocb.c (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f67948e18600..5c3acd07acd9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1358,6 +1358,16 @@ struct mesh_setup { u32 basic_rates; }; +/** + * struct ocb_setup - 802.11p OCB mode setup configuration + * @chandef: defines the channel to use + * + * These parameters are fixed when connecting to the network + */ +struct ocb_setup { + struct cfg80211_chan_def chandef; +}; + /** * struct ieee80211_txq_params - TX queue parameters * @ac: AC identifier @@ -2352,6 +2362,11 @@ struct cfg80211_qos_map { * with the peer followed by immediate teardown when the addition is later * rejected) * @del_tx_ts: remove an existing TX TS + * + * @join_ocb: join the OCB network with the specified parameters + * (invoked with the wireless_dev mutex held) + * @leave_ocb: leave the current OCB network + * (invoked with the wireless_dev mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2433,6 +2448,10 @@ struct cfg80211_ops { const struct mesh_setup *setup); int (*leave_mesh)(struct wiphy *wiphy, struct net_device *dev); + int (*join_ocb)(struct wiphy *wiphy, struct net_device *dev, + struct ocb_setup *setup); + int (*leave_ocb)(struct wiphy *wiphy, struct net_device *dev); + int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f7daae59248e..9b3025e4377a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -746,6 +746,11 @@ * destination %NL80211_ATTR_MAC on the interface identified by * %NL80211_ATTR_IFINDEX. * + * @NL80211_CMD_JOIN_OCB: Join the OCB network. The center frequency and + * bandwidth of a channel must be given. + * @NL80211_CMD_LEAVE_OCB: Leave the OCB network -- no special arguments, the + * network is determined by the network interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -922,6 +927,9 @@ enum nl80211_commands { NL80211_CMD_GET_MPP, + NL80211_CMD_JOIN_OCB, + NL80211_CMD_LEAVE_OCB, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2074,6 +2082,8 @@ enum nl80211_attrs { * and therefore can't be created in the normal ways, use the * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE * commands to create and destroy one + * @NL80211_IF_TYPE_OCB: Outside Context of a BSS + * This mode corresponds to the MIB variable dot11OCBActivated=true * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @NUM_NL80211_IFTYPES: number of defined interface types * @@ -2093,6 +2103,7 @@ enum nl80211_iftype { NL80211_IFTYPE_P2P_CLIENT, NL80211_IFTYPE_P2P_GO, NL80211_IFTYPE_P2P_DEVICE, + NL80211_IFTYPE_OCB, /* keep last */ NUM_NL80211_IFTYPES, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b9659b8b70f8..1e2afc95ad09 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -230,6 +230,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_OCB: /* shouldn't happen */ WARN_ON_ONCE(1); break; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index ee71bb6f64f7..ff1f877e3b63 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -270,6 +270,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_OCB: width = vif->bss_conf.chandef.width; break; case NL80211_IFTYPE_UNSPECIFIED: @@ -909,6 +910,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_OCB: ieee80211_queue_work(&sdata->local->hw, &sdata->csa_finalize_work); break; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1ffcc0701244..d69e7532095f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -521,6 +521,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_OCB: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -631,6 +632,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_OCB: netif_carrier_off(dev); break; case NL80211_IFTYPE_WDS: @@ -1351,6 +1353,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; ieee80211_sta_setup_sdata(sdata); break; + case NL80211_IFTYPE_OCB: + /* to be implemented in the future */ + break; case NL80211_IFTYPE_ADHOC: sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; ieee80211_ibss_setup_sdata(sdata); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 666aa1306c45..d7d69c89ff34 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1841,6 +1841,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); sdata_unlock(sdata); break; + case NL80211_IFTYPE_OCB: + /* to be implemented in the future */ + break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; /* fall through */ diff --git a/net/wireless/Makefile b/net/wireless/Makefile index a761670af31d..4c9e39f04ef8 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_WEXT_SPY) += wext-spy.o obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o -cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o +cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 8f39e33e71bb..85506f1d0789 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -366,6 +366,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, break; case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -892,6 +893,13 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *radar_detect |= BIT(wdev->chandef.width); } return; + case NL80211_IFTYPE_OCB: + if (wdev->chandef.chan) { + *chan = wdev->chandef.chan; + *chanmode = CHAN_MODE_SHARED; + return; + } + break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: diff --git a/net/wireless/core.c b/net/wireless/core.c index da4dcb65ade4..a4d27927aba2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -869,6 +869,9 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_P2P_GO: __cfg80211_stop_ap(rdev, dev, true); break; + case NL80211_IFTYPE_OCB: + __cfg80211_leave_ocb(rdev, dev); + break; case NL80211_IFTYPE_WDS: /* must be handled by mac80211/driver, has no APIs */ break; diff --git a/net/wireless/core.h b/net/wireless/core.h index 7e3a3cef7df9..61ee664cf2bd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -290,6 +290,18 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef); +/* OCB */ +int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup); +int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup); +int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev); +int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev); + /* AP */ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, bool notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f7d918858d32..1a31736914e5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -885,6 +885,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) return -ENOLINK; break; case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: @@ -8275,6 +8276,28 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } +static int nl80211_join_ocb(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct ocb_setup setup = {}; + int err; + + err = nl80211_parse_chandef(rdev, info, &setup.chandef); + if (err) + return err; + + return cfg80211_join_ocb(rdev, dev, &setup); +} + +static int nl80211_leave_ocb(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + + return cfg80211_leave_ocb(rdev, dev); +} + static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -10218,6 +10241,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_JOIN_OCB, + .doit = nl80211_join_ocb, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_LEAVE_OCB, + .doit = nl80211_leave_ocb, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, #ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c new file mode 100644 index 000000000000..c00d4a792319 --- /dev/null +++ b/net/wireless/ocb.c @@ -0,0 +1,88 @@ +/* + * OCB mode implementation + * + * Copyright: (c) 2014 Czech Technical University in Prague + * (c) 2014 Volkswagen Group Research + * Author: Rostislav Lisovy + * Funded by: Volkswagen Group Research + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include "nl80211.h" +#include "core.h" +#include "rdev-ops.h" + +int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + ASSERT_WDEV_LOCK(wdev); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) + return -EOPNOTSUPP; + + if (WARN_ON(!setup->chandef.chan)) + return -EINVAL; + + err = rdev_join_ocb(rdev, dev, setup); + if (!err) + wdev->chandef = setup->chandef; + + return err; +} + +int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + wdev_lock(wdev); + err = __cfg80211_join_ocb(rdev, dev, setup); + wdev_unlock(wdev); + + return err; +} + +int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + ASSERT_WDEV_LOCK(wdev); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) + return -EOPNOTSUPP; + + if (!rdev->ops->leave_ocb) + return -EOPNOTSUPP; + + err = rdev_leave_ocb(rdev, dev); + if (!err) + memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + + return err; +} + +int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + wdev_lock(wdev); + err = __cfg80211_leave_ocb(rdev, dev); + wdev_unlock(wdev); + + return err; +} diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 71b1db3cc645..1b3864cd50ca 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -348,6 +348,27 @@ static inline int rdev_leave_mesh(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup) +{ + int ret; + trace_rdev_join_ocb(&rdev->wiphy, dev, setup); + ret = rdev->ops->join_ocb(&rdev->wiphy, dev, setup); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int rdev_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + int ret; + trace_rdev_leave_ocb(&rdev->wiphy, dev); + ret = rdev->ops->leave_ocb(&rdev->wiphy, dev); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_change_bss(struct cfg80211_registered_device *rdev, struct net_device *dev, struct bss_parameters *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index cdb2c2ef1ae1..277a85df910e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -600,6 +600,11 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_ibss, TP_ARGS(wiphy, netdev) ); +DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_ocb, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), + TP_ARGS(wiphy, netdev) +); + DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) @@ -1316,6 +1321,22 @@ TRACE_EVENT(rdev_join_ibss, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->ssid) ); +TRACE_EVENT(rdev_join_ocb, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const struct ocb_setup *setup), + TP_ARGS(wiphy, netdev, setup), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG) +); + TRACE_EVENT(rdev_set_wiphy_params, TP_PROTO(struct wiphy *wiphy, u32 changed), TP_ARGS(wiphy, changed), diff --git a/net/wireless/util.c b/net/wireless/util.c index 5e233a577d0f..d0ac795445b7 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -442,7 +442,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, break; case cpu_to_le16(0): if (iftype != NL80211_IFTYPE_ADHOC && - iftype != NL80211_IFTYPE_STATION) + iftype != NL80211_IFTYPE_STATION && + iftype != NL80211_IFTYPE_OCB) return -1; break; } @@ -519,6 +520,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; break; + case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_ADHOC: /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -937,6 +939,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (dev->ieee80211_ptr->use_4addr) break; /* fall through */ + case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_ADHOC: dev->priv_flags |= IFF_DONT_BRIDGE; -- cgit v1.2.3-71-gd317 From b17f709a24013fcbb257f6f89b4d81ac9fdf0d18 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 4 Nov 2014 09:06:56 -0800 Subject: gue: TX support for using remote checksum offload option Add if_tunnel flag TUNNEL_ENCAP_FLAG_REMCSUM to configure remote checksum offload on an IP tunnel. Add logic in gue_build_header to insert remote checksum offload option. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/fou.h | 14 +++++++++++++- include/uapi/linux/if_tunnel.h | 1 + net/ipv4/fou.c | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/fou.h b/include/net/fou.h index cf4ce8874f92..25b26ffcf1df 100644 --- a/include/net/fou.h +++ b/include/net/fou.h @@ -20,7 +20,19 @@ static size_t fou_encap_hlen(struct ip_tunnel_encap *e) static size_t gue_encap_hlen(struct ip_tunnel_encap *e) { - return sizeof(struct udphdr) + sizeof(struct guehdr); + size_t len; + bool need_priv = false; + + len = sizeof(struct udphdr) + sizeof(struct guehdr); + + if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) { + len += GUE_PLEN_REMCSUM; + need_priv = true; + } + + len += need_priv ? GUE_LEN_PRIV : 0; + + return len; } #endif diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 280d9e092283..bd3cc11a431f 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -69,6 +69,7 @@ enum tunnel_encap_types { #define TUNNEL_ENCAP_FLAG_CSUM (1<<0) #define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1) +#define TUNNEL_ENCAP_FLAG_REMCSUM (1<<2) /* SIT-mode i_flags */ #define SIT_ISATAP 0x0001 diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a3b8c5b36303..fb0db99adf9e 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -562,11 +562,19 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; struct guehdr *guehdr; - size_t optlen = 0; + size_t hdrlen, optlen = 0; __be16 sport; void *data; bool need_priv = false; + if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && + skb->ip_summed == CHECKSUM_PARTIAL) { + csum = false; + optlen += GUE_PLEN_REMCSUM; + type |= SKB_GSO_TUNNEL_REMCSUM; + need_priv = true; + } + optlen += need_priv ? GUE_LEN_PRIV : 0; skb = iptunnel_handle_offloads(skb, csum, type); @@ -578,7 +586,9 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), skb, 0, 0, false); - skb_push(skb, sizeof(struct guehdr) + optlen); + hdrlen = sizeof(struct guehdr) + optlen; + + skb_push(skb, hdrlen); guehdr = (struct guehdr *)skb->data; @@ -597,7 +607,26 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, *flags = 0; data += GUE_LEN_PRIV; - /* Add private flags */ + if (type & SKB_GSO_TUNNEL_REMCSUM) { + u16 csum_start = skb_checksum_start_offset(skb); + __be16 *pd = data; + + if (csum_start < hdrlen) + return -EINVAL; + + csum_start -= hdrlen; + pd[0] = htons(csum_start); + pd[1] = htons(csum_start + skb->csum_offset); + + if (!skb_is_gso(skb)) { + skb->ip_summed = CHECKSUM_NONE; + skb->encapsulation = 0; + } + + *flags |= GUE_PFLAG_REMCSUM; + data += GUE_PLEN_REMCSUM; + } + } fou_build_udp(skb, e, fl4, protocol, sport); -- cgit v1.2.3-71-gd317 From 25cd9ba0abc0749e5cb78e6493c6f6b3311ec6c5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 6 Oct 2014 05:05:13 -0700 Subject: openvswitch: Add basic MPLS support to kernel Allow datapath to recognize and extract MPLS labels into flow keys and execute actions which push, pop, and set labels on packets. Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer. Cc: Ravi K Cc: Leo Alterman Cc: Isaku Yamahata Cc: Joe Stringer Signed-off-by: Simon Horman Signed-off-by: Jesse Gross Signed-off-by: Pravin B Shelar --- include/net/mpls.h | 39 +++++++++++ include/uapi/linux/openvswitch.h | 32 +++++++++ net/core/dev.c | 3 +- net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 106 ++++++++++++++++++++++++++++- net/openvswitch/datapath.c | 6 +- net/openvswitch/flow.c | 30 +++++++++ net/openvswitch/flow.h | 17 +++-- net/openvswitch/flow_netlink.c | 139 ++++++++++++++++++++++++++++++++++----- net/openvswitch/flow_netlink.h | 2 +- 10 files changed, 345 insertions(+), 30 deletions(-) create mode 100644 include/net/mpls.h (limited to 'include/uapi/linux') diff --git a/include/net/mpls.h b/include/net/mpls.h new file mode 100644 index 000000000000..5b3b5addfb08 --- /dev/null +++ b/include/net/mpls.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _NET_MPLS_H +#define _NET_MPLS_H 1 + +#include +#include + +#define MPLS_HLEN 4 + +static inline bool eth_p_mpls(__be16 eth_type) +{ + return eth_type == htons(ETH_P_MPLS_UC) || + eth_type == htons(ETH_P_MPLS_MC); +} + +/* + * For non-MPLS skbs this will correspond to the network header. + * For MPLS skbs it will be before the network_header as the MPLS + * label stack lies between the end of the mac header and the network + * header. That is, for MPLS skbs the end of the mac header + * is the top of the MPLS label stack. + */ +static inline unsigned char *skb_mpls_header(struct sk_buff *skb) +{ + return skb_mac_header(skb) + skb->mac_len; +} +#endif diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 435eabc5ffaa..631056b66f80 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -293,6 +293,9 @@ enum ovs_key_attr { OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash is not computed by the datapath. */ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ + OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. + * The implementation may restrict + * the accepted length of the array. */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ @@ -340,6 +343,10 @@ struct ovs_key_ethernet { __u8 eth_dst[ETH_ALEN]; }; +struct ovs_key_mpls { + __be32 mpls_lse; +}; + struct ovs_key_ipv4 { __be32 ipv4_src; __be32 ipv4_dst; @@ -483,6 +490,19 @@ enum ovs_userspace_attr { #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) +/** + * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument. + * @mpls_lse: MPLS label stack entry to push. + * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame. + * + * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and + * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected. + */ +struct ovs_action_push_mpls { + __be32 mpls_lse; + __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ +}; + /** * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. * @vlan_tpid: Tag protocol identifier (TPID) to push. @@ -534,6 +554,15 @@ struct ovs_action_hash { * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. + * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the + * top of the packets MPLS label stack. Set the ethertype of the + * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to + * indicate the new packet contents. + * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the + * packet's MPLS label stack. Set the encapsulating frame's ethertype to + * indicate the new packet contents. This could potentially still be + * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there + * is no MPLS label stack, as determined by ethertype, no action is taken. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -550,6 +579,9 @@ enum ovs_action_attr { OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ + OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ + OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ + __OVS_ACTION_ATTR_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 40be481268de..70bb609c283d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -118,6 +118,7 @@ #include #include #include +#include #include #include #include @@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) { - if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + if (eth_p_mpls(type)) features &= skb->dev->mpls_features; return features; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 2a9673e39ca1..454ce12efbbf 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -30,6 +30,7 @@ config OPENVSWITCH config OPENVSWITCH_GRE tristate "Open vSwitch GRE tunneling support" + select NET_MPLS_GSO depends on INET depends on OPENVSWITCH depends on NET_IPGRE_DEMUX diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 922c133b1933..930b1b6e4cef 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -28,10 +28,12 @@ #include #include #include + #include #include #include #include +#include #include #include "datapath.h" @@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } +static int push_mpls(struct sk_buff *skb, + const struct ovs_action_push_mpls *mpls) +{ + __be32 *new_mpls_lse; + struct ethhdr *hdr; + + /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ + if (skb->encapsulation) + return -ENOTSUPP; + + if (skb_cow_head(skb, MPLS_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, MPLS_HLEN); + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + skb_reset_mac_header(skb); + + new_mpls_lse = (__be32 *)skb_mpls_header(skb); + *new_mpls_lse = mpls->mpls_lse; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, + MPLS_HLEN, 0)); + + hdr = eth_hdr(skb); + hdr->h_proto = mpls->mpls_ethertype; + + skb_set_inner_protocol(skb, skb->protocol); + skb->protocol = mpls->mpls_ethertype; + + return 0; +} + +static int pop_mpls(struct sk_buff *skb, const __be16 ethertype) +{ + struct ethhdr *hdr; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, + csum_partial(skb_mpls_header(skb), + MPLS_HLEN, 0)); + + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + + __skb_pull(skb, MPLS_HLEN); + skb_reset_mac_header(skb); + + /* skb_mpls_header() is used to locate the ethertype + * field correctly in the presence of VLAN tags. + */ + hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr->h_proto = ethertype; + if (eth_p_mpls(skb->protocol)) + skb->protocol = ethertype; + return 0; +} + +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) +{ + __be32 *stack; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + stack = (__be32 *)skb_mpls_header(skb); + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be32 diff[] = { ~(*stack), *mpls_lse }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } + + *stack = *mpls_lse; + + return 0; +} + /* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { @@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; + if (skb_network_offset(skb) < ETH_HLEN) skb_set_network_header(skb, ETH_HLEN); - skb_reset_mac_len(skb); + /* Update mac_len for subsequent MPLS actions */ + skb_reset_mac_len(skb); return 0; } @@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; + /* Update mac_len for subsequent MPLS actions */ + skb->mac_len += VLAN_HLEN; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data @@ -612,6 +704,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_SCTP: err = set_sctp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_MPLS: + err = set_mpls(skb, nla_data(nested_attr)); + break; } return err; @@ -690,6 +786,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, execute_hash(skb, key, a); break; + case OVS_ACTION_ATTR_PUSH_MPLS: + err = push_mpls(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_MPLS: + err = pop_mpls(skb, nla_get_be16(a)); + break; + case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f18302f32049..688cb9bc0ef1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -560,7 +560,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, 0, &acts); + &flow->key, &acts); if (err) goto err_flow_free; @@ -846,7 +846,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - 0, &acts); + &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree_acts; @@ -953,7 +953,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, return acts; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); kfree(acts); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2b78789ea7c5..90a21010fc8f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include "datapath.h" @@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return -ENOMEM; skb_reset_network_header(skb); + skb_reset_mac_len(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); /* Network layer. */ @@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv4, 0, sizeof(key->ipv4)); } + } else if (eth_p_mpls(key->eth.type)) { + size_t stack_len = MPLS_HLEN; + + /* In the presence of an MPLS label stack the end of the L2 + * header and the beginning of the L3 header differ. + * + * Advance network_header to the beginning of the L3 + * header. mac_len corresponds to the end of the L2 header. + */ + while (1) { + __be32 lse; + + error = check_header(skb, skb->mac_len + stack_len); + if (unlikely(error)) + return 0; + + memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + + if (stack_len == MPLS_HLEN) + memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); + + skb_set_network_header(skb, skb->mac_len + stack_len); + if (lse & htonl(MPLS_LS_S_MASK)) + break; + + stack_len += MPLS_HLEN; + } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 71813318c8c7..4962bee81a11 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -102,12 +102,17 @@ struct sw_flow_key { __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ __be16 type; /* Ethernet frame type. */ } eth; - struct { - u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ - u8 tos; /* IP ToS. */ - u8 ttl; /* IP TTL/hop limit. */ - u8 frag; /* One of OVS_FRAG_TYPE_*. */ - } ip; + union { + struct { + __be32 top_lse; /* top label stack entry */ + } mpls; + struct { + u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ + u8 tos; /* IP ToS. */ + u8 ttl; /* IP TTL/hop limit. */ + u8 frag; /* One of OVS_FRAG_TYPE_*. */ + } ip; + }; struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 939bcb32100f..569309c49cc0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "flow_netlink.h" @@ -134,7 +135,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_ICMP) | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) - | (1 << OVS_KEY_ATTR_ND)); + | (1 << OVS_KEY_ATTR_ND) + | (1 << OVS_KEY_ATTR_MPLS)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) @@ -149,6 +151,12 @@ static bool match_validate(const struct sw_flow_match *match, mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } + if (eth_p_mpls(match->key->eth.type)) { + key_expected |= 1 << OVS_KEY_ATTR_MPLS; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; + } + if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1 << OVS_KEY_ATTR_IPV4; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) @@ -266,6 +274,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_TUNNEL] = -1, + [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), }; static bool is_all_zero(const u8 *fp, size_t size) @@ -735,6 +744,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, attrs &= ~(1 << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { + const struct ovs_key_mpls *mpls_key; + + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); + SW_FLOW_KEY_PUT(match, mpls.top_lse, + mpls_key->mpls_lse, is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_MPLS); + } + if (attrs & (1 << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; @@ -1140,6 +1159,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, arp_key->arp_op = htons(output->ip.proto); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); + } else if (eth_p_mpls(swkey->eth.type)) { + struct ovs_key_mpls *mpls_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + if (!nla) + goto nla_put_failure; + mpls_key = nla_data(nla); + mpls_key->mpls_lse = output->mpls.top_lse; } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1336,9 +1363,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci); + static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1375,7 +1408,8 @@ static int validate_and_copy_sample(const struct nlattr *attr, if (st_acts < 0) return st_acts; - err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa, + eth_type, vlan_tci); if (err) return err; @@ -1385,10 +1419,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key) +static int validate_tp_port(const struct sw_flow_key *flow_key, + __be16 eth_type) { - if ((flow_key->eth.type == htons(ETH_P_IP) || - flow_key->eth.type == htons(ETH_P_IPV6)) && + if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && (flow_key->tp.src || flow_key->tp.dst)) return 0; @@ -1483,7 +1517,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun) + bool *set_tun, __be16 eth_type) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1508,6 +1542,9 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_TUNNEL: + if (eth_p_mpls(eth_type)) + return -EINVAL; + *set_tun = true; err = validate_and_copy_set_tun(a, sfa); if (err) @@ -1515,7 +1552,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) + if (eth_type != htons(ETH_P_IP)) return -EINVAL; if (!flow_key->ip.proto) @@ -1531,7 +1568,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) + if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; if (!flow_key->ip.proto) @@ -1553,19 +1590,24 @@ static int validate_set(const struct nlattr *a, if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); case OVS_KEY_ATTR_UDP: if (flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); + + case OVS_KEY_ATTR_MPLS: + if (!eth_p_mpls(eth_type)) + return -EINVAL; + break; case OVS_KEY_ATTR_SCTP: if (flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); default: return -EINVAL; @@ -1609,12 +1651,13 @@ static int copy_action(const struct nlattr *from, return 0; } -int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *a; + bool out_tnl_port = false; int rem, err; if (depth >= SAMPLE_ACTION_DEPTH) @@ -1626,6 +1669,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), + [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -1655,6 +1700,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; + out_tnl_port = false; + break; case OVS_ACTION_ATTR_HASH: { @@ -1671,6 +1718,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, } case OVS_ACTION_ATTR_POP_VLAN: + vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: @@ -1679,19 +1727,66 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; + vlan_tci = vlan->vlan_tci; break; case OVS_ACTION_ATTR_RECIRC: break; + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nla_data(a); + + /* Networking stack do not allow simultaneous Tunnel + * and MPLS GSO. + */ + if (out_tnl_port) + return -EINVAL; + + if (!eth_p_mpls(mpls->mpls_ethertype)) + return -EINVAL; + /* Prohibit push MPLS other than to a white list + * for packets that have a known tag order. + */ + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + (eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6) && + eth_type != htons(ETH_P_ARP) && + eth_type != htons(ETH_P_RARP) && + !eth_p_mpls(eth_type))) + return -EINVAL; + eth_type = mpls->mpls_ethertype; + break; + } + + case OVS_ACTION_ATTR_POP_MPLS: + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + !eth_p_mpls(eth_type)) + return -EINVAL; + + /* Disallow subsequent L2.5+ set and mpls_pop actions + * as there is no check here to ensure that the new + * eth_type is valid and thus set actions could + * write off the end of the packet or otherwise + * corrupt it. + * + * Support for these actions is planned using packet + * recirculation. + */ + eth_type = htons(0); + break; + case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); + err = validate_set(a, key, sfa, + &out_tnl_port, eth_type); if (err) return err; + + skip_copy = out_tnl_port; break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); + err = validate_and_copy_sample(a, key, depth, sfa, + eth_type, vlan_tci); if (err) return err; skip_copy = true; @@ -1713,6 +1808,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa) +{ + return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type, + key->eth.tci); +} + static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { const struct nlattr *a; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 206e45add888..6355b1d01329 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *); int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); -- cgit v1.2.3-71-gd317 From 1a4e96a0e989200f7180264e27b22e9a85f3fcc8 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 30 Sep 2014 10:52:32 -0700 Subject: openvswitch: Fix the type of struct ovs_key_nd nd_target field. Should be the same as other IPv6 address fields. Current master produces sparse warnings without this change. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 631056b66f80..26c36c4cf7e2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -400,9 +400,9 @@ struct ovs_key_arp { }; struct ovs_key_nd { - __u32 nd_target[4]; - __u8 nd_sll[ETH_ALEN]; - __u8 nd_tll[ETH_ALEN]; + __be32 nd_target[4]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; }; /** -- cgit v1.2.3-71-gd317 From df32dd2054b6edcbdfd3a31aec24e7fd0edba2ac Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 3 Nov 2014 12:42:34 -0800 Subject: uapi: resort Kbuild entries The entries in the Kbuild files are incorrectly sorted. Matters for aesthetics only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 88 +++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 44 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 4c94f31a8c99..72298b6887ac 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -37,27 +37,27 @@ header-y += aio_abi.h header-y += apm_bios.h header-y += arcfb.h header-y += atalk.h -header-y += atm.h -header-y += atm_eni.h -header-y += atm_he.h -header-y += atm_idt77105.h -header-y += atm_nicstar.h -header-y += atm_tcp.h -header-y += atm_zatm.h header-y += atmapi.h header-y += atmarp.h header-y += atmbr2684.h header-y += atmclip.h header-y += atmdev.h +header-y += atm_eni.h +header-y += atm.h +header-y += atm_he.h +header-y += atm_idt77105.h header-y += atmioc.h header-y += atmlec.h header-y += atmmpc.h +header-y += atm_nicstar.h header-y += atmppp.h header-y += atmsap.h header-y += atmsvc.h +header-y += atm_tcp.h +header-y += atm_zatm.h header-y += audit.h -header-y += auto_fs.h header-y += auto_fs4.h +header-y += auto_fs.h header-y += auxvec.h header-y += ax25.h header-y += b1lli.h @@ -67,8 +67,8 @@ header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h -header-y += bpf.h header-y += bpf_common.h +header-y += bpf.h header-y += bpqether.h header-y += bsg.h header-y += btrfs.h @@ -93,21 +93,21 @@ header-y += cyclades.h header-y += cycx_cfm.h header-y += dcbnl.h header-y += dccp.h -header-y += dlm.h +header-y += dlmconstants.h header-y += dlm_device.h +header-y += dlm.h header-y += dlm_netlink.h header-y += dlm_plock.h -header-y += dlmconstants.h header-y += dm-ioctl.h header-y += dm-log-userspace.h header-y += dn.h header-y += dqblk_xfs.h header-y += edd.h header-y += efs_fs_sb.h +header-y += elfcore.h header-y += elf-em.h header-y += elf-fdpic.h header-y += elf.h -header-y += elfcore.h header-y += errno.h header-y += errqueue.h header-y += ethtool.h @@ -131,15 +131,15 @@ header-y += fsl_hypervisor.h header-y += fuse.h header-y += futex.h header-y += gameport.h -header-y += gen_stats.h header-y += genetlink.h +header-y += gen_stats.h header-y += gfs2_ondisk.h header-y += gigaset_dev.h -header-y += hdlc.h header-y += hdlcdrv.h +header-y += hdlc.h header-y += hdreg.h -header-y += hid.h header-y += hiddev.h +header-y += hid.h header-y += hidraw.h header-y += hpet.h header-y += hsr_netlink.h @@ -151,7 +151,6 @@ header-y += i2o-dev.h header-y += i8k.h header-y += icmp.h header-y += icmpv6.h -header-y += if.h header-y += if_addr.h header-y += if_addrlabel.h header-y += if_alg.h @@ -165,6 +164,7 @@ header-y += if_ether.h header-y += if_fc.h header-y += if_fddi.h header-y += if_frad.h +header-y += if.h header-y += if_hippi.h header-y += if_infiniband.h header-y += if_link.h @@ -182,40 +182,40 @@ header-y += if_tunnel.h header-y += if_vlan.h header-y += if_x25.h header-y += igmp.h -header-y += in.h header-y += in6.h -header-y += in_route.h header-y += inet_diag.h +header-y += in.h header-y += inotify.h header-y += input.h +header-y += in_route.h header-y += ioctl.h -header-y += ip.h header-y += ip6_tunnel.h -header-y += ip_vs.h header-y += ipc.h +header-y += ip.h header-y += ipmi.h header-y += ipmi_msgdefs.h header-y += ipsec.h header-y += ipv6.h header-y += ipv6_route.h +header-y += ip_vs.h header-y += ipx.h header-y += irda.h header-y += irqnr.h -header-y += isdn.h header-y += isdn_divertif.h -header-y += isdn_ppp.h +header-y += isdn.h header-y += isdnif.h +header-y += isdn_ppp.h header-y += iso_fs.h -header-y += ivtv.h header-y += ivtvfb.h +header-y += ivtv.h header-y += ixjuser.h header-y += jffs2.h header-y += joystick.h -header-y += kd.h header-y += kdev_t.h -header-y += kernel-page-flags.h -header-y += kernel.h +header-y += kd.h header-y += kernelcapi.h +header-y += kernel.h +header-y += kernel-page-flags.h header-y += kexec.h header-y += keyboard.h header-y += keyctl.h @@ -231,6 +231,7 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h \ header-y += kvm_para.h endif +header-y += hw_breakpoint.h header-y += l2tp.h header-y += libc-compat.h header-y += limits.h @@ -255,43 +256,43 @@ header-y += mman.h header-y += mmtimer.h header-y += mpls.h header-y += mqueue.h -header-y += mroute.h header-y += mroute6.h +header-y += mroute.h header-y += msdos_fs.h header-y += msg.h header-y += mtio.h -header-y += n_r3964.h header-y += nbd.h -header-y += ncp.h header-y += ncp_fs.h +header-y += ncp.h header-y += ncp_mount.h header-y += ncp_no.h header-y += neighbour.h -header-y += net.h -header-y += net_dropmon.h -header-y += net_tstamp.h header-y += netconf.h header-y += netdevice.h -header-y += netlink_diag.h -header-y += netfilter.h +header-y += net_dropmon.h header-y += netfilter_arp.h header-y += netfilter_bridge.h header-y += netfilter_decnet.h +header-y += netfilter.h header-y += netfilter_ipv4.h header-y += netfilter_ipv6.h +header-y += net.h +header-y += netlink_diag.h header-y += netlink.h header-y += netrom.h +header-y += net_tstamp.h header-y += nfc.h -header-y += nfs.h header-y += nfs2.h header-y += nfs3.h header-y += nfs4.h header-y += nfs4_mount.h +header-y += nfsacl.h header-y += nfs_fs.h +header-y += nfs.h header-y += nfs_idmap.h header-y += nfs_mount.h -header-y += nfsacl.h header-y += nl80211.h +header-y += n_r3964.h header-y += nubus.h header-y += nvme.h header-y += nvram.h @@ -311,16 +312,16 @@ header-y += pfkeyv2.h header-y += pg.h header-y += phantom.h header-y += phonet.h +header-y += pktcdvd.h header-y += pkt_cls.h header-y += pkt_sched.h -header-y += pktcdvd.h header-y += pmu.h header-y += poll.h header-y += posix_types.h header-y += ppdev.h header-y += ppp-comp.h -header-y += ppp-ioctl.h header-y += ppp_defs.h +header-y += ppp-ioctl.h header-y += pps.h header-y += prctl.h header-y += psci.h @@ -352,13 +353,13 @@ header-y += seccomp.h header-y += securebits.h header-y += selinux_netlink.h header-y += sem.h -header-y += serial.h header-y += serial_core.h +header-y += serial.h header-y += serial_reg.h header-y += serio.h header-y += shm.h -header-y += signal.h header-y += signalfd.h +header-y += signal.h header-y += smiapp.h header-y += snmp.h header-y += sock_diag.h @@ -367,8 +368,8 @@ header-y += sockios.h header-y += som.h header-y += sonet.h header-y += sonypi.h -header-y += sound.h header-y += soundcard.h +header-y += sound.h header-y += stat.h header-y += stddef.h header-y += string.h @@ -387,11 +388,11 @@ header-y += time.h header-y += times.h header-y += timex.h header-y += tiocl.h -header-y += tipc.h header-y += tipc_config.h +header-y += tipc.h header-y += toshiba.h -header-y += tty.h header-y += tty_flags.h +header-y += tty.h header-y += types.h header-y += udf_fs_i.h header-y += udp.h @@ -437,6 +438,5 @@ header-y += wireless.h header-y += x25.h header-y += xattr.h header-y += xfrm.h -header-y += hw_breakpoint.h header-y += zorro.h header-y += zorro_ids.h -- cgit v1.2.3-71-gd317 From 36cbb2452cbafca64dcdd3578047433787900cf0 Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Thu, 6 Nov 2014 10:37:54 -0800 Subject: udp: Increment UDP_MIB_IGNOREDMULTI for arriving unmatched multicasts As NIC multicast filtering isn't perfect, and some platforms are quite content to spew broadcasts, we should not trigger an event for skb:kfree_skb when we do not have a match for such an incoming datagram. We do though want to avoid sweeping the matter under the rug entirely, so increment a suitable statistic. This incorporates feedback from David L. Stevens, Karl Neiss and Eric Dumazet. V3 - use bool per David Miller Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/udp.c | 12 +++++++++--- net/ipv6/proc.c | 1 + net/ipv6/udp.c | 11 ++++++++--- 5 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index df40137f33dd..30f541b32895 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -156,6 +156,7 @@ enum UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */ UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ UDP_MIB_CSUMERRORS, /* InCsumErrors */ + UDP_MIB_IGNOREDMULTI, /* IgnoredMulti */ __UDP_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f0d4eb8b99b9..6513ade8d6dc 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -181,6 +181,7 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index df19027f44f3..5d0fdca8e965 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1647,7 +1647,8 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, - struct udp_table *udptable) + struct udp_table *udptable, + int proto) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; struct hlist_nulls_node *node; @@ -1656,6 +1657,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif = skb->dev->ifindex; unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + bool inner_flushed = false; if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -1674,6 +1676,7 @@ start_lookup: dif, hnum)) { if (unlikely(count == ARRAY_SIZE(stack))) { flush_stack(stack, count, skb, ~0); + inner_flushed = true; count = 0; } stack[count++] = sk; @@ -1695,7 +1698,10 @@ start_lookup: if (count) { flush_stack(stack, count, skb, count - 1); } else { - kfree_skb(skb); + if (!inner_flushed) + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); + consume_skb(skb); } return 0; } @@ -1781,7 +1787,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, - saddr, daddr, udptable); + saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 1752cd0b4882..679253d0af84 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -136,6 +136,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9b6809232b17..b756355e9739 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -771,7 +771,7 @@ static void udp6_csum_zero_error(struct sk_buff *skb) */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, - struct udp_table *udptable) + struct udp_table *udptable, int proto) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); @@ -781,6 +781,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif = inet6_iif(skb); unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + bool inner_flushed = false; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -803,6 +804,7 @@ start_lookup: (uh->check || udp_sk(sk)->no_check6_rx)) { if (unlikely(count == ARRAY_SIZE(stack))) { flush_stack(stack, count, skb, ~0); + inner_flushed = true; count = 0; } stack[count++] = sk; @@ -821,7 +823,10 @@ start_lookup: if (count) { flush_stack(stack, count, skb, count - 1); } else { - kfree_skb(skb); + if (!inner_flushed) + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); + consume_skb(skb); } return 0; } @@ -873,7 +878,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ if (ipv6_addr_is_multicast(daddr)) return __udp6_lib_mcast_deliver(net, skb, - saddr, daddr, udptable); + saddr, daddr, udptable, proto); /* Unicast */ -- cgit v1.2.3-71-gd317 From ce674173e9f4ef7fd0dc04ea0773cdedfbf8e366 Mon Sep 17 00:00:00 2001 From: Ana Rey Date: Mon, 3 Nov 2014 18:10:50 +0100 Subject: netfilter: nft_meta: add cgroup support This allows you to filter traffic by process control group (cgroup). Signed-off-by: Ana Rey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_meta.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 16f62a5cf04d..832bc46db78b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -579,6 +579,7 @@ enum nft_exthdr_attributes { * @NFT_META_CPU: cpu id through smp_processor_id() * @NFT_META_IIFGROUP: packet input interface group * @NFT_META_OIFGROUP: packet output interface group + * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) */ enum nft_meta_keys { NFT_META_LEN, @@ -604,6 +605,7 @@ enum nft_meta_keys { NFT_META_CPU, NFT_META_IIFGROUP, NFT_META_OIFGROUP, + NFT_META_CGROUP, }; /** diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1e7c076ca63a..e99911eda915 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -165,6 +165,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; dest->data[0] = out->group; break; + case NFT_META_CGROUP: + if (skb->sk == NULL) + break; + + dest->data[0] = skb->sk->sk_classid; + break; default: WARN_ON(1); goto err; @@ -240,6 +246,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_CPU: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: + case NFT_META_CGROUP: break; default: return -EOPNOTSUPP; -- cgit v1.2.3-71-gd317 From 8f0aad6f35f7e8b3118b7b8a65e8e76b135cc4cb Mon Sep 17 00:00:00 2001 From: Wenyu Zhang Date: Thu, 6 Nov 2014 06:51:24 -0800 Subject: openvswitch: Extend packet attribute for egress tunnel info OVS vswitch has extended IPFIX exporter to export tunnel headers to improve network visibility. To export this information userspace needs to know egress tunnel for given packet. By extending packet attributes datapath can export egress tunnel info for given packet. So that userspace can ask for egress tunnel info in userspace action. This information is used to build IPFIX data for given flow. Signed-off-by: Wenyu Zhang Acked-by: Romain Lenglet Acked-by: Ben Pfaff Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 13 +++++++++ net/openvswitch/actions.c | 19 ++++++++++++ net/openvswitch/datapath.c | 21 +++++++++++--- net/openvswitch/datapath.h | 2 ++ net/openvswitch/flow.h | 62 +++++++++++++++++++++++++++++++--------- net/openvswitch/flow_netlink.c | 54 +++++++++++++++++++++++++++------- net/openvswitch/flow_netlink.h | 3 ++ net/openvswitch/vport-geneve.c | 21 +++++++++++++- net/openvswitch/vport-gre.c | 12 +++++++- net/openvswitch/vport-vxlan.c | 24 +++++++++++++++- net/openvswitch/vport.c | 61 +++++++++++++++++++++++++++++++++++++++ net/openvswitch/vport.h | 14 +++++++++ 12 files changed, 275 insertions(+), 31 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 26c36c4cf7e2..cf8185661e52 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -157,6 +157,11 @@ enum ovs_packet_cmd { * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content * specified there. + * @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION + * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an + * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the + * output port is actually a tunnel port. Contains the output tunnel key + * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -167,6 +172,8 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ + OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_* + attributes. */ __OVS_PACKET_ATTR_MAX }; @@ -315,6 +322,8 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ + OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ + OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ __OVS_TUNNEL_KEY_ATTR_MAX }; @@ -480,11 +489,15 @@ enum ovs_sample_attr { * message should be sent. Required. * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. + * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get + * tunnel info. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ + OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port + * to get tunnel info. */ __OVS_USERSPACE_ATTR_MAX }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f7e589159e4a..ceb618cf1292 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -564,6 +564,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr) { + struct ovs_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; @@ -572,6 +573,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.key = key; upcall.userdata = NULL; upcall.portid = 0; + upcall.egress_tun_info = NULL; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -583,7 +585,24 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, case OVS_USERSPACE_ATTR_PID: upcall.portid = nla_get_u32(a); break; + + case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: { + /* Get out tunnel info. */ + struct vport *vport; + + vport = ovs_vport_rcu(dp, nla_get_u32(a)); + if (vport) { + int err; + + err = ovs_vport_get_egress_tun_info(vport, skb, + &info); + if (!err) + upcall.egress_tun_info = &info; + } + break; } + + } /* End of switch. */ } return ovs_dp_upcall(dp, skb, &upcall); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6cfb44f3a7f0..c2ac340e19fb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -274,6 +274,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) upcall.key = key; upcall.userdata = NULL; upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.egress_tun_info = NULL; error = ovs_dp_upcall(dp, skb, &upcall); if (unlikely(error)) kfree_skb(skb); @@ -375,7 +376,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, return err; } -static size_t upcall_msg_size(const struct nlattr *userdata, +static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -383,8 +384,12 @@ static size_t upcall_msg_size(const struct nlattr *userdata, + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ - if (userdata) - size += NLA_ALIGN(userdata->nla_len); + if (upcall_info->userdata) + size += NLA_ALIGN(upcall_info->userdata->nla_len); + + /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */ + if (upcall_info->egress_tun_info) + size += nla_total_size(ovs_tun_key_attr_size()); return size; } @@ -440,7 +445,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info->userdata, hlen); + len = upcall_msg_size(upcall_info, hlen); user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; @@ -461,6 +466,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); + if (upcall_info->egress_tun_info) { + nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + err = ovs_nla_put_egress_tunnel_key(user_skb, + upcall_info->egress_tun_info); + BUG_ON(err); + nla_nest_end(user_skb, nla); + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c56a80d6677..2bc577bf9b31 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -114,12 +114,14 @@ struct ovs_skb_cb { * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost * counter. + * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. */ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; u32 portid; + const struct ovs_tunnel_info *egress_tun_info; }; /** diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 4962bee81a11..543b358ee57f 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -37,8 +37,8 @@ struct sk_buff; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ - (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ - FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst)) struct ovs_key_ipv4_tunnel { __be64 tun_id; @@ -47,6 +47,8 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; + __be16 tp_src; + __be16 tp_dst; } __packed __aligned(4); /* Minimize padding. */ struct ovs_tunnel_info { @@ -64,27 +66,59 @@ struct ovs_tunnel_info { FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ opt_len)) -static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, - const struct iphdr *iph, - __be64 tun_id, __be16 tun_flags, - struct geneve_opt *opts, - u8 opts_len) +static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + __be32 saddr, __be32 daddr, + u8 tos, u8 ttl, + __be16 tp_src, + __be16 tp_dst, + __be64 tun_id, + __be16 tun_flags, + struct geneve_opt *opts, + u8 opts_len) { tun_info->tunnel.tun_id = tun_id; - tun_info->tunnel.ipv4_src = iph->saddr; - tun_info->tunnel.ipv4_dst = iph->daddr; - tun_info->tunnel.ipv4_tos = iph->tos; - tun_info->tunnel.ipv4_ttl = iph->ttl; + tun_info->tunnel.ipv4_src = saddr; + tun_info->tunnel.ipv4_dst = daddr; + tun_info->tunnel.ipv4_tos = tos; + tun_info->tunnel.ipv4_ttl = ttl; tun_info->tunnel.tun_flags = tun_flags; - /* clear struct padding. */ - memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, - sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); + /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of + * the upper tunnel are used. + * E.g: GRE over IPSEC, the tp_src and tp_port are zero. + */ + tun_info->tunnel.tp_src = tp_src; + tun_info->tunnel.tp_dst = tp_dst; + + /* Clear struct padding. */ + if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE) + memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, + 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); tun_info->options = opts; tun_info->options_len = opts_len; } +static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, + const struct iphdr *iph, + __be16 tp_src, + __be16 tp_dst, + __be64 tun_id, + __be16 tun_flags, + struct geneve_opt *opts, + u8 opts_len) +{ + __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, + iph->tos, iph->ttl, + tp_src, tp_dst, + tun_id, tun_flags, + opts, opts_len); +} + +#define OVS_SW_FLOW_KEY_METADATA_SIZE \ + (offsetof(struct sw_flow_key, recirc_id) + \ + FIELD_SIZEOF(struct sw_flow_key, recirc_id)) + struct sw_flow_key { u8 tun_opts[255]; u8 tun_opts_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ed3109761827..98a3e96b7d93 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -245,6 +245,24 @@ static bool match_validate(const struct sw_flow_match *match, return true; } +size_t ovs_tun_key_attr_size(void) +{ + /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ +} + size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider @@ -254,15 +272,7 @@ size_t ovs_key_attr_size(void) return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ - + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ - + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + ovs_tun_key_attr_size() + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ @@ -393,6 +403,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), + [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), [OVS_TUNNEL_KEY_ATTR_OAM] = 0, [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, }; @@ -440,6 +452,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_TP_SRC: + SW_FLOW_KEY_PUT(match, tun_key.tp_src, + nla_get_be16(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TP_DST: + SW_FLOW_KEY_PUT(match, tun_key.tp_dst, + nla_get_be16(a), is_mask); + break; case OVS_TUNNEL_KEY_ATTR_OAM: tun_flags |= TUNNEL_OAM; break; @@ -548,6 +568,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; + if (output->tp_src && + nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) + return -EMSGSIZE; + if (output->tp_dst && + nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) + return -EMSGSIZE; if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; @@ -559,7 +585,6 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } - static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, const struct geneve_opt *tun_opts, @@ -580,6 +605,14 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, return 0; } +int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, + const struct ovs_tunnel_info *egress_tun_info) +{ + return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel, + egress_tun_info->options, + egress_tun_info->options_len); +} + static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask) { @@ -1653,6 +1686,7 @@ static int validate_userspace(const struct nlattr *attr) static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, + [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index eb0b177300ad..90bbe3785504 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -37,6 +37,7 @@ #include "flow.h" +size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, @@ -49,6 +50,8 @@ int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *); int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *, const struct nlattr *); +int ovs_nla_put_egress_tunnel_key(struct sk_buff *, + const struct ovs_tunnel_info *); int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 70c9765011f4..e31f19c922e2 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -97,7 +97,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) key = vni_to_tunnel_id(geneveh->vni); - ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), + udp_hdr(skb)->source, udp_hdr(skb)->dest, + key, flags, geneveh->options, opts_len); ovs_vport_receive(vport, skb, &tun_info); @@ -228,6 +230,22 @@ static const char *geneve_get_name(const struct vport *vport) return geneve_port->name; } +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); + + /* Get tp_src and tp_dst, refert to geneve_build_header(). + */ + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); +} + static struct vport_ops ovs_geneve_vport_ops = { .type = OVS_VPORT_TYPE_GENEVE, .create = geneve_tnl_create, @@ -236,6 +254,7 @@ static struct vport_ops ovs_geneve_vport_ops = { .get_options = geneve_get_options, .send = geneve_tnl_send, .owner = THIS_MODULE, + .get_egress_tun_info = geneve_get_egress_tun_info, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 00270b608844..8e61a5c6ae7c 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -108,7 +108,7 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_REJECT; key = key_to_tunnel_id(tpi->key, tpi->seq); - ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key, filter_tnl_flags(tpi->flags), NULL, 0); ovs_vport_receive(vport, skb, &tun_info); @@ -284,12 +284,22 @@ static void gre_tnl_destroy(struct vport *vport) gre_exit(); } +static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_GRE, skb->mark, 0, 0); +} + static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, .destroy = gre_tnl_destroy, .get_name = gre_get_name, .send = gre_tnl_send, + .get_egress_tun_info = gre_get_egress_tun_info, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 965e7500c5a6..38f95a52241b 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -69,7 +69,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); + ovs_flow_tun_info_init(&tun_info, iph, + udp_hdr(skb)->source, udp_hdr(skb)->dest, + key, TUNNEL_KEY, NULL, 0); ovs_vport_receive(vport, skb, &tun_info); } @@ -189,6 +191,25 @@ error: return err; } +static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *egress_tun_info) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct vxlan_port *vxlan_port = vxlan_vport(vport); + __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + __be16 src_port; + int port_min; + int port_max; + + inet_get_local_port_range(net, &port_min, &port_max); + src_port = udp_flow_src_port(net, skb, 0, 0, true); + + return ovs_tunnel_get_egress_info(egress_tun_info, net, + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, + src_port, dst_port); +} + static const char *vxlan_get_name(const struct vport *vport) { struct vxlan_port *vxlan_port = vxlan_vport(vport); @@ -202,6 +223,7 @@ static struct vport_ops ovs_vxlan_vport_ops = { .get_name = vxlan_get_name, .get_options = vxlan_get_options, .send = vxlan_tnl_send, + .get_egress_tun_info = vxlan_get_egress_tun_info, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 4b5dd18953a6..630e81984b65 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -573,3 +573,64 @@ void ovs_vport_deferred_free(struct vport *vport) call_rcu(&vport->rcu, free_vport_rcu); } EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); + +int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, + struct net *net, + const struct ovs_tunnel_info *tun_info, + u8 ipproto, + u32 skb_mark, + __be16 tp_src, + __be16 tp_dst) +{ + const struct ovs_key_ipv4_tunnel *tun_key; + struct rtable *rt; + struct flowi4 fl; + + if (unlikely(!tun_info)) + return -EINVAL; + + tun_key = &tun_info->tunnel; + + /* Route lookup to get srouce IP address. + * The process may need to be changed if the corresponding process + * in vports ops changed. + */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = tun_key->ipv4_dst; + fl.saddr = tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); + fl.flowi4_mark = skb_mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + + /* Generate egress_tun_info based on tun_info, + * saddr, tp_src and tp_dst + */ + __ovs_flow_tun_info_init(egress_tun_info, + fl.saddr, tun_key->ipv4_dst, + tun_key->ipv4_tos, + tun_key->ipv4_ttl, + tp_src, tp_dst, + tun_key->tun_id, + tun_key->tun_flags, + tun_info->options, + tun_info->options_len); + + return 0; +} +EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); + +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *info) +{ + /* get_egress_tun_info() is only implemented on tunnel ports. */ + if (unlikely(!vport->ops->get_egress_tun_info)) + return -EINVAL; + + return vport->ops->get_egress_tun_info(vport, skb, info); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index e41c3facf799..0635d1d761e9 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -58,6 +58,16 @@ u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); int ovs_vport_send(struct vport *, struct sk_buff *); +int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, + struct net *net, + const struct ovs_tunnel_info *tun_info, + u8 ipproto, + u32 skb_mark, + __be16 tp_src, + __be16 tp_dst); +int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ovs_tunnel_info *info); + /* The following definitions are for implementers of vport devices: */ struct vport_err_stats { @@ -146,6 +156,8 @@ struct vport_parms { * @get_name: Get the device's name. * @send: Send a packet on the device. Returns the length of the packet sent, * zero for dropped packets or negative for error. + * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for + * a packet. */ struct vport_ops { enum ovs_vport_type type; @@ -161,6 +173,8 @@ struct vport_ops { const char *(*get_name)(const struct vport *); int (*send)(struct vport *, struct sk_buff *); + int (*get_egress_tun_info)(struct vport *, struct sk_buff *, + struct ovs_tunnel_info *); struct module *owner; struct list_head list; -- cgit v1.2.3-71-gd317 From 05da5898a96c05e32aa9850c9cd89eef29471b13 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 6 Nov 2014 07:03:05 -0800 Subject: openvswitch: Add support for OVS_FLOW_ATTR_PROBE. This new flag is useful for suppressing error logging while probing for datapath features using flow commands. For backwards compatibility reasons the commands are executed normally, but error logging is suppressed. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 2 + net/openvswitch/datapath.c | 49 ++++--- net/openvswitch/datapath.h | 6 +- net/openvswitch/flow.c | 4 +- net/openvswitch/flow.h | 2 +- net/openvswitch/flow_netlink.c | 303 +++++++++++++++++++++------------------ net/openvswitch/flow_netlink.h | 10 +- 7 files changed, 208 insertions(+), 168 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index cf8185661e52..3a6dcaa359b7 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -457,6 +457,8 @@ enum ovs_flow_attr { OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ + OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error + * logging should be suppressed. */ __OVS_FLOW_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 65561ebb489e..ab141d49bb9d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -526,6 +526,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct vport *input_vport; int len; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || @@ -559,12 +560,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_kfree_skb; err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, - &flow->key); + &flow->key, log); if (err) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, &acts); + &flow->key, &acts, log); if (err) goto err_flow_free; @@ -855,15 +856,16 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *acts; struct sw_flow_match match; int error; + bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in new flow.\n"); + OVS_NLERR(log, "Flow key attr not present in new flow."); goto error; } if (!a[OVS_FLOW_ATTR_ACTIONS]) { - OVS_NLERR("Flow actions attribute not present in new flow.\n"); + OVS_NLERR(log, "Flow actions attr not present in new flow."); goto error; } @@ -878,8 +880,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Extract key. */ ovs_match_init(&match, &new_flow->unmasked_key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; @@ -887,9 +889,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - &acts); + &acts, log); if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); goto err_kfree_flow; } @@ -942,6 +944,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* The unmasked key has to be the same for flow updates. */ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + /* Look for any overlapping flow. */ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { error = -ENOENT; @@ -984,16 +987,18 @@ error: /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, const struct sw_flow_key *key, - const struct sw_flow_mask *mask) + const struct sw_flow_mask *mask, + bool log) { struct sw_flow_actions *acts; struct sw_flow_key masked_key; int error; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts, log); if (error) { - OVS_NLERR("Actions may not be safe on all matching packets.\n"); + OVS_NLERR(log, + "Actions may not be safe on all matching packets"); return ERR_PTR(error); } @@ -1012,23 +1017,25 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; int error; + bool log = !a[OVS_FLOW_ATTR_PROBE]; /* Extract key. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow key attribute not present in set flow.\n"); + OVS_NLERR(log, "Flow key attribute not present in set flow."); goto error; } ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); + acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask, + log); if (IS_ERR(acts)) { error = PTR_ERR(acts); goto error; @@ -1109,14 +1116,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_match match; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); + OVS_NLERR(log, + "Flow get message rejected, Key attribute missing."); return -EINVAL; } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1157,10 +1166,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_match match; int err; + bool log = !a[OVS_FLOW_ATTR_PROBE]; if (likely(a[OVS_FLOW_ATTR_KEY])) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); if (unlikely(err)) return err; } @@ -1250,8 +1261,10 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, }; static const struct genl_ops dp_flow_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 8389c1d68e57..3ece94563079 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -199,9 +199,9 @@ void ovs_dp_notify_wq(struct work_struct *work); int action_fifos_init(void); void action_fifos_exit(void); -#define OVS_NLERR(fmt, ...) \ +#define OVS_NLERR(logging_allowed, fmt, ...) \ do { \ - if (net_ratelimit()) \ - pr_info("netlink: " fmt, ##__VA_ARGS__); \ + if (logging_allowed && net_ratelimit()) \ + pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \ } while (0) #endif /* datapath.h */ diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 25e9abcd51f1..70bef2ab7f2b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -712,12 +712,12 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, - struct sw_flow_key *key) + struct sw_flow_key *key, bool log) { int err; /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(attr, key); + err = ovs_nla_get_flow_metadata(attr, key, log); if (err) return err; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9e0a787c8627..a8b30f334388 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -257,6 +257,6 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, /* Extract key from packet coming from userspace. */ int ovs_flow_key_extract_userspace(const struct nlattr *attr, struct sk_buff *skb, - struct sw_flow_key *key); + struct sw_flow_key *key, bool log); #endif /* flow.h */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 98a3e96b7d93..c0d066def228 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -112,7 +112,7 @@ static void update_range(struct sw_flow_match *match, } while (0) static bool match_validate(const struct sw_flow_match *match, - u64 key_attrs, u64 mask_attrs) + u64 key_attrs, u64 mask_attrs, bool log) { u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ @@ -230,15 +230,17 @@ static bool match_validate(const struct sw_flow_match *match, if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - (unsigned long long)key_attrs, (unsigned long long)key_expected); + OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", + (unsigned long long)key_attrs, + (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); + OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", + (unsigned long long)mask_attrs, + (unsigned long long)mask_allowed); return false; } @@ -328,7 +330,7 @@ static bool is_all_zero(const u8 *fp, size_t size) static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], - u64 *attrsp, bool nz) + u64 *attrsp, bool log, bool nz) { const struct nlattr *nla; u64 attrs; @@ -340,21 +342,20 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, int expected_len; if (type > OVS_KEY_ATTR_MAX) { - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", + OVS_NLERR(log, "Key type %d is out of range max %d", type, OVS_KEY_ATTR_MAX); return -EINVAL; } if (attrs & (1 << type)) { - OVS_NLERR("Duplicate key attribute (type %d).\n", type); + OVS_NLERR(log, "Duplicate key (type %d).", type); return -EINVAL; } expected_len = ovs_key_lens[type]; if (nla_len(nla) != expected_len && expected_len != -1) { - OVS_NLERR("Key attribute has unexpected length (type=%d" - ", length=%d, expected=%d).\n", type, - nla_len(nla), expected_len); + OVS_NLERR(log, "Key %d has unexpected len %d expected %d", + type, nla_len(nla), expected_len); return -EINVAL; } @@ -364,7 +365,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } } if (rem) { - OVS_NLERR("Message has %d unknown bytes.\n", rem); + OVS_NLERR(log, "Message has %d unknown bytes.", rem); return -EINVAL; } @@ -373,28 +374,84 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } static int parse_flow_mask_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) + const struct nlattr *a[], u64 *attrsp, + bool log) { - return __parse_flow_nlattrs(attr, a, attrsp, true); + return __parse_flow_nlattrs(attr, a, attrsp, log, true); } static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) + const struct nlattr *a[], u64 *attrsp, + bool log) { - return __parse_flow_nlattrs(attr, a, attrsp, false); + return __parse_flow_nlattrs(attr, a, attrsp, log, false); +} + +static int genev_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", + nla_len(a), sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (nla_len(a) % 4 != 0) { + OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", + nla_len(a)); + return -EINVAL; + } + + /* We need to record the length of the options passed + * down, otherwise packets with the same format but + * additional options will be silently matched. + */ + if (!is_mask) { + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), + false); + } else { + /* This is somewhat unusual because it looks at + * both the key and mask while parsing the + * attributes (and by extension assumes the key + * is parsed first). Normally, we would verify + * that each is the correct length and that the + * attributes line up in the validate function. + * However, that is difficult because this is + * variable length and we won't have the + * information later. + */ + if (match->key->tun_opts_len != nla_len(a)) { + OVS_NLERR(log, "Geneve option len %d != mask len %d", + match->key->tun_opts_len, nla_len(a)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + } + + opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, + nla_len(a)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), + nla_len(a), is_mask); + return 0; } static int ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask) + struct sw_flow_match *match, bool is_mask, + bool log) { struct nlattr *a; int rem; bool ttl = false; __be16 tun_flags = 0; - unsigned long opt_key_offset; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); + int err; + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), @@ -410,15 +467,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", - type, OVS_TUNNEL_KEY_ATTR_MAX); + OVS_NLERR(log, "Tunnel attr %d out of range max %d", + type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } if (ovs_tunnel_key_lens[type] != nla_len(a) && ovs_tunnel_key_lens[type] != -1) { - OVS_NLERR("IPv4 tunnel attribute type has unexpected " - " length (type=%d, length=%d, expected=%d).\n", + OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_tunnel_key_lens[type]); return -EINVAL; } @@ -464,58 +520,14 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: - tun_flags |= TUNNEL_OPTIONS_PRESENT; - if (nla_len(a) > sizeof(match->key->tun_opts)) { - OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n", - nla_len(a), - sizeof(match->key->tun_opts)); - return -EINVAL; - } - - if (nla_len(a) % 4 != 0) { - OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n", - nla_len(a)); - return -EINVAL; - } - - /* We need to record the length of the options passed - * down, otherwise packets with the same format but - * additional options will be silently matched. - */ - if (!is_mask) { - SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), - false); - } else { - /* This is somewhat unusual because it looks at - * both the key and mask while parsing the - * attributes (and by extension assumes the key - * is parsed first). Normally, we would verify - * that each is the correct length and that the - * attributes line up in the validate function. - * However, that is difficult because this is - * variable length and we won't have the - * information later. - */ - if (match->key->tun_opts_len != nla_len(a)) { - OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).", - match->key->tun_opts_len, - nla_len(a)); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, - true); - } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; - opt_key_offset = (unsigned long)GENEVE_OPTS( - (struct sw_flow_key *)0, - nla_len(a)); - SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, - nla_data(a), nla_len(a), - is_mask); + tun_flags |= TUNNEL_OPTIONS_PRESENT; break; default: - OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", + OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", type); return -EINVAL; } @@ -524,18 +536,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); if (rem > 0) { - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); + OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.", + rem); return -EINVAL; } if (!is_mask) { if (!match->key->tun_key.ipv4_dst) { - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); + OVS_NLERR(log, "IPv4 tunnel dst address is zero"); return -EINVAL; } if (!ttl) { - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); + OVS_NLERR(log, "IPv4 tunnel TTL not specified."); return -EINVAL; } } @@ -614,7 +627,8 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, } static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask) + const struct nlattr **a, bool is_mask, + bool log) { if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -642,7 +656,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, if (is_mask) { in_port = 0xffffffff; /* Always exact match in_port. */ } else if (in_port >= DP_MAX_PORTS) { - OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n", + OVS_NLERR(log, "Port %d exceeds max allowable %d", in_port, DP_MAX_PORTS); return -EINVAL; } @@ -661,7 +675,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask)) + is_mask, log)) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -669,11 +683,12 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) + const struct nlattr **a, bool is_mask, + bool log) { int err; - err = metadata_from_nlattrs(match, &attrs, a, is_mask); + err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); if (err) return err; @@ -694,9 +709,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { if (is_mask) - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); + OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); else - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); + OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); return -EINVAL; } @@ -713,8 +728,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, /* Always exact match EtherType. */ eth_type = htons(0xffff); } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", - ntohs(eth_type), ETH_P_802_3_MIN); + OVS_NLERR(log, "EtherType %x is less than min %x", + ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; } @@ -729,8 +744,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); + OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ip.proto, @@ -753,8 +768,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); + OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv6.label, @@ -784,7 +799,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); if (!is_mask && (arp_key->arp_op & htons(0xff00))) { - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", + OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", arp_key->arp_op); return -EINVAL; } @@ -885,7 +900,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, } if (attrs != 0) { - OVS_NLERR("Unknown key attributes (%llx).\n", + OVS_NLERR(log, "Unknown key attributes %llx", (unsigned long long)attrs); return -EINVAL; } @@ -926,10 +941,14 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * of this flow. * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink * attribute specifies the mask field of the wildcarded flow. + * @log: Boolean to allow kernel error logging. Normally true, but when + * probing for feature compatibility this should be passed in as false to + * suppress unnecessary error logging. */ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *nla_key, - const struct nlattr *nla_mask) + const struct nlattr *nla_mask, + bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; @@ -939,7 +958,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, bool encap_valid = false; int err; - err = parse_flow_nlattrs(nla_key, a, &key_attrs); + err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; @@ -950,7 +969,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR("Invalid Vlan frame.\n"); + OVS_NLERR(log, "Invalid Vlan frame."); return -EINVAL; } @@ -961,22 +980,22 @@ int ovs_nla_get_match(struct sw_flow_match *match, encap_valid = true; if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs); + err = parse_flow_nlattrs(encap, a, &key_attrs, log); if (err) return err; } else if (!tci) { /* Corner case for truncated 802.1Q header. */ if (nla_len(encap)) { - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); + OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); return -EINVAL; } } else { - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); + OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); return -EINVAL; } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); if (err) return err; @@ -1010,7 +1029,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, nla_mask = newmask; } - err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs); + err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); if (err) goto free_newmask; @@ -1022,7 +1041,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, __be16 tci = 0; if (!encap_valid) { - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); + OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); err = -EINVAL; goto free_newmask; } @@ -1034,12 +1053,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (eth_type == htons(0xffff)) { mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + err = parse_flow_mask_nlattrs(encap, a, + &mask_attrs, log); if (err) goto free_newmask; } else { - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", - ntohs(eth_type)); + OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", + ntohs(eth_type)); err = -EINVAL; goto free_newmask; } @@ -1048,18 +1068,19 @@ int ovs_nla_get_match(struct sw_flow_match *match, tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); + OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", + ntohs(tci)); err = -EINVAL; goto free_newmask; } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); if (err) goto free_newmask; } - if (!match_validate(match, key_attrs, mask_attrs)) + if (!match_validate(match, key_attrs, mask_attrs, log)) err = -EINVAL; free_newmask: @@ -1072,6 +1093,9 @@ free_newmask: * @key: Receives extracted in_port, priority, tun_key and skb_mark. * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. + * @log: Boolean to allow kernel error logging. Normally true, but when + * probing for feature compatibility this should be passed in as false to + * suppress unnecessary error logging. * * This parses a series of Netlink attributes that form a flow key, which must * take the same form accepted by flow_from_nlattrs(), but only enough of it to @@ -1080,14 +1104,15 @@ free_newmask: */ int ovs_nla_get_flow_metadata(const struct nlattr *attr, - struct sw_flow_key *key) + struct sw_flow_key *key, + bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; struct sw_flow_match match; u64 attrs = 0; int err; - err = parse_flow_nlattrs(attr, a, &attrs); + err = parse_flow_nlattrs(attr, a, &attrs, log); if (err) return -EINVAL; @@ -1096,7 +1121,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, key->phy.in_port = DP_MAX_PORTS; - return metadata_from_nlattrs(&match, &attrs, a, false); + return metadata_from_nlattrs(&match, &attrs, a, false, log); } int ovs_nla_put_flow(const struct sw_flow_key *swkey, @@ -1316,12 +1341,12 @@ nla_put_failure: #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size) +static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) { struct sw_flow_actions *sfa; if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size); + OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); return ERR_PTR(-EINVAL); } @@ -1341,7 +1366,7 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, - int attr_len) + int attr_len, bool log) { struct sw_flow_actions *acts; @@ -1361,7 +1386,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size); + acts = nla_alloc_flow_actions(new_acts_size, log); if (IS_ERR(acts)) return (void *)acts; @@ -1376,11 +1401,11 @@ out: } static struct nlattr *__add_action(struct sw_flow_actions **sfa, - int attrtype, void *data, int len) + int attrtype, void *data, int len, bool log) { struct nlattr *a; - a = reserve_sfa_size(sfa, nla_attr_size(len)); + a = reserve_sfa_size(sfa, nla_attr_size(len), log); if (IS_ERR(a)) return a; @@ -1395,11 +1420,11 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa, } static int add_action(struct sw_flow_actions **sfa, int attrtype, - void *data, int len) + void *data, int len, bool log) { struct nlattr *a; - a = __add_action(sfa, attrtype, data, len); + a = __add_action(sfa, attrtype, data, len, log); if (IS_ERR(a)) return PTR_ERR(a); @@ -1407,12 +1432,12 @@ static int add_action(struct sw_flow_actions **sfa, int attrtype, } static inline int add_nested_action_start(struct sw_flow_actions **sfa, - int attrtype) + int attrtype, bool log) { int used = (*sfa)->actions_len; int err; - err = add_action(sfa, attrtype, NULL, 0); + err = add_action(sfa, attrtype, NULL, 0, log); if (err) return err; @@ -1431,12 +1456,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci); + __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci) + __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1462,19 +1487,19 @@ static int validate_and_copy_sample(const struct nlattr *attr, return -EINVAL; /* validation done, copy sample action. */ - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32)); + nla_data(probability), sizeof(u32), log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); if (st_acts < 0) return st_acts; err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, - eth_type, vlan_tci); + eth_type, vlan_tci, log); if (err) return err; @@ -1511,7 +1536,7 @@ void ovs_match_init(struct sw_flow_match *match, } static int validate_and_copy_set_tun(const struct nlattr *attr, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { struct sw_flow_match match; struct sw_flow_key key; @@ -1520,7 +1545,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, int err, start; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); if (err) return err; @@ -1549,12 +1574,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; }; - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); if (start < 0) return start; a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, - sizeof(*tun_info) + key.tun_opts_len); + sizeof(*tun_info) + key.tun_opts_len, log); if (IS_ERR(a)) return PTR_ERR(a); @@ -1582,7 +1607,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun, __be16 eth_type) + bool *set_tun, __be16 eth_type, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1611,7 +1636,7 @@ static int validate_set(const struct nlattr *a, return -EINVAL; *set_tun = true; - err = validate_and_copy_set_tun(a, sfa); + err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; break; @@ -1704,12 +1729,12 @@ static int validate_userspace(const struct nlattr *attr) } static int copy_action(const struct nlattr *from, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { int totlen = NLA_ALIGN(from->nla_len); struct nlattr *to; - to = reserve_sfa_size(sfa, from->nla_len); + to = reserve_sfa_size(sfa, from->nla_len, log); if (IS_ERR(to)) return PTR_ERR(to); @@ -1720,7 +1745,7 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci) + __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *a; bool out_tnl_port = false; @@ -1843,7 +1868,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &out_tnl_port, eth_type); + &out_tnl_port, eth_type, log); if (err) return err; @@ -1852,18 +1877,18 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SAMPLE: err = validate_and_copy_sample(a, key, depth, sfa, - eth_type, vlan_tci); + eth_type, vlan_tci, log); if (err) return err; skip_copy = true; break; default: - OVS_NLERR("Unknown tunnel attribute (%d).\n", type); + OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; } if (!skip_copy) { - err = copy_action(a, sfa); + err = copy_action(a, sfa, log); if (err) return err; } @@ -1877,16 +1902,16 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, bool log) { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr)); + *sfa = nla_alloc_flow_actions(nla_len(attr), log); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, - key->eth.tci); + key->eth.tci, log); if (err) kfree(*sfa); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 90bbe3785504..577f12be3459 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,17 +45,17 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_flow(const struct sw_flow_key *, const struct sw_flow_key *, struct sk_buff *); -int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *); +int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, + bool log); -int ovs_nla_get_match(struct sw_flow_match *match, - const struct nlattr *, - const struct nlattr *); +int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, + const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, - struct sw_flow_actions **sfa); + struct sw_flow_actions **sfa, bool log); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); -- cgit v1.2.3-71-gd317 From f8d7552e945d38bd8d2e9c23aebf98042ce12302 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 7 Nov 2014 14:31:35 +0200 Subject: cfg80211: add channel switch started notification Add a new NL80211_CH_SWITCH_STARTED_NOTIFY message that can be sent to the userspace when a channel switch process has started. This allows userspace to take action, for instance, by requesting other interfaces to switch channel as necessary. This patch introduces a function that allows the drivers to send this notification. It should be used when the driver starts processing a channel switch initiated by a remote device (eg. when a STA receives a CSA from the AP) and when it successfully starts a userspace-triggered channel switch (eg. when hostapd triggers a channel swith in the AP). Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++++++++++ include/uapi/linux/nl80211.h | 11 +++++++++++ net/wireless/nl80211.c | 28 +++++++++++++++++++++++++--- net/wireless/trace.h | 16 ++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5c3acd07acd9..220d5f5f1aca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4719,6 +4719,20 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef); +/* + * cfg80211_ch_switch_started_notify - notify channel switch start + * @dev: the device on which the channel switch started + * @chandef: the future channel definition + * @count: the number of TBTTs until the channel switch happens + * + * Inform the userspace about the channel switch that has just + * started, so that it can take appropriate actions (eg. starting + * channel switch on other vifs), if necessary. + */ +void cfg80211_ch_switch_started_notify(struct net_device *dev, + struct cfg80211_chan_def *chandef, + u8 count); + /** * ieee80211_operating_class_to_band - convert operating class to band * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9b3025e4377a..354163433352 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -645,6 +645,15 @@ * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the * attributes determining channel width. * + * @NL80211_CMD_CH_SWITCH_STARTED_NOTIFY: Notify that a channel switch + * has been started on an interface, regardless of the initiator + * (ie. whether it was requested from a remote device or + * initiated on our own). It indicates that + * %NL80211_ATTR_IFINDEX will be on %NL80211_ATTR_WIPHY_FREQ + * after %NL80211_ATTR_CH_SWITCH_COUNT TBTT's. The userspace may + * decide to react to this indication by requesting other + * interfaces to change channel as well. + * * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by * its %NL80211_ATTR_WDEV identifier. It must have been created with * %NL80211_CMD_NEW_INTERFACE previously. After it has been started, the @@ -930,6 +939,8 @@ enum nl80211_commands { NL80211_CMD_JOIN_OCB, NL80211_CMD_LEAVE_OCB, + NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 24549cbe0b54..24fd2925b281 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11653,7 +11653,9 @@ EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_chan_def *chandef, - gfp_t gfp) + gfp_t gfp, + enum nl80211_commands notif, + u8 count) { struct sk_buff *msg; void *hdr; @@ -11662,7 +11664,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CH_SWITCH_NOTIFY); + hdr = nl80211hdr_put(msg, 0, 0, 0, notif); if (!hdr) { nlmsg_free(msg); return; @@ -11674,6 +11676,10 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; + if ((notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) && + (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count))) + goto nla_put_failure; + genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, @@ -11704,10 +11710,26 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, + NL80211_CMD_CH_SWITCH_NOTIFY, 0); } EXPORT_SYMBOL(cfg80211_ch_switch_notify); +void cfg80211_ch_switch_started_notify(struct net_device *dev, + struct cfg80211_chan_def *chandef, + u8 count) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + trace_cfg80211_ch_switch_started_notify(dev, chandef); + + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, + NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, count); +} +EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); + void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 277a85df910e..6e25370d3ce7 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2355,6 +2355,22 @@ TRACE_EVENT(cfg80211_ch_switch_notify, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_ch_switch_started_notify, + TP_PROTO(struct net_device *netdev, + struct cfg80211_chan_def *chandef), + TP_ARGS(netdev, chandef), + TP_STRUCT__entry( + NETDEV_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + NETDEV_PR_ARG, CHAN_DEF_PR_ARG) +); + TRACE_EVENT(cfg80211_radar_event, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), TP_ARGS(wiphy, chandef), -- cgit v1.2.3-71-gd317 From d04b5ac9e70b2056a8a12f768f4b46773576025e Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 7 Nov 2014 14:31:37 +0200 Subject: cfg80211/mac80211: allow any interface to send channel switch notifications For multi-vif channel switches, we want to send NL80211_CMD_CH_SWITCH_NOTIFY to the userspace to let it decide whether other interfaces need to be moved as well. This is needed when we want a P2P GO interface to follow the channel of a station, for example. Modify the code so that all interfaces can send CSA notifications. Additionally, send notifications for STA CSA as well. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- net/mac80211/mlme.c | 2 ++ net/wireless/nl80211.c | 6 ------ 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 354163433352..a552736c3e59 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -643,7 +643,9 @@ * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels * independently of the userspace SME, send this event indicating * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the - * attributes determining channel width. + * attributes determining channel width. This indication may also be + * sent when a remotely-initiated switch (e.g., when a STA receives a CSA + * from the remote AP) is completed; * * @NL80211_CMD_CH_SWITCH_STARTED_NOTIFY: Notify that a channel switch * has been started on an interface, regardless of the initiator diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 947250077615..243539878991 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1049,6 +1049,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) sdata->csa_block_tx = false; } + cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); + sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 24fd2925b281..d0a8361b3395 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11702,12 +11702,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev, trace_cfg80211_ch_switch_notify(dev, chandef); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO && - wdev->iftype != NL80211_IFTYPE_ADHOC && - wdev->iftype != NL80211_IFTYPE_MESH_POINT)) - return; - wdev->chandef = *chandef; wdev->preset_chandef = *chandef; nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, -- cgit v1.2.3-71-gd317 From a6d4a534e15f0e1b13b518c31219f9fb7166412a Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 23 Oct 2014 09:37:33 +0300 Subject: cfg80211: introduce regulatory flags controlling bw Allow setting bandwidth related regulatory flags. These flags are mapped to the corresponding channel flags in the specified range. Make sure the new flags are consulted when calculating the maximum bandwidth allowed by a regulatory-rule. Also allow propagating the GO_CONCURRENT modifier from a reg-rule to a channel. Signed-off-by: Arik Nemtsov Reviewed-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ net/wireless/reg.c | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a552736c3e59..442369f69b4f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2665,6 +2665,11 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. + * @NL80211_RRF_GO_CONCURRENT: See &NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation + * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation + * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed + * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -2677,11 +2682,18 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_IR = 1<<7, __NL80211_RRF_NO_IBSS = 1<<8, NL80211_RRF_AUTO_BW = 1<<11, + NL80211_RRF_GO_CONCURRENT = 1<<12, + NL80211_RRF_NO_HT40MINUS = 1<<13, + NL80211_RRF_NO_HT40PLUS = 1<<14, + NL80211_RRF_NO_80MHZ = 1<<15, + NL80211_RRF_NO_160MHZ = 1<<16, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR #define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR #define NL80211_RRF_NO_IR NL80211_RRF_NO_IR +#define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ + NL80211_RRF_NO_HT40PLUS) /* For backport compatibility with older userspace */ #define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b725a31a4751..7449a8c0f9fd 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -573,8 +573,9 @@ static const struct ieee80211_regdomain *reg_get_regdomain(struct wiphy *wiphy) return get_cfg80211_regdom(); } -unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, - const struct ieee80211_reg_rule *rule) +static unsigned int +reg_get_max_bandwidth_from_range(const struct ieee80211_regdomain *rd, + const struct ieee80211_reg_rule *rule) { const struct ieee80211_freq_range *freq_range = &rule->freq_range; const struct ieee80211_freq_range *freq_range_tmp; @@ -622,6 +623,27 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, return end_freq - start_freq; } +unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, + const struct ieee80211_reg_rule *rule) +{ + unsigned int bw = reg_get_max_bandwidth_from_range(rd, rule); + + if (rule->flags & NL80211_RRF_NO_160MHZ) + bw = min_t(unsigned int, bw, MHZ_TO_KHZ(80)); + if (rule->flags & NL80211_RRF_NO_80MHZ) + bw = min_t(unsigned int, bw, MHZ_TO_KHZ(40)); + + /* + * HT40+/HT40- limits are handled per-channel. Only limit BW if both + * are not allowed. + */ + if (rule->flags & NL80211_RRF_NO_HT40MINUS && + rule->flags & NL80211_RRF_NO_HT40PLUS) + bw = min_t(unsigned int, bw, MHZ_TO_KHZ(20)); + + return bw; +} + /* Sanity check on a regulatory rule */ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) { @@ -946,6 +968,16 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_OFDM; if (rd_flags & NL80211_RRF_NO_OUTDOOR) channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; + if (rd_flags & NL80211_RRF_GO_CONCURRENT) + channel_flags |= IEEE80211_CHAN_GO_CONCURRENT; + if (rd_flags & NL80211_RRF_NO_HT40MINUS) + channel_flags |= IEEE80211_CHAN_NO_HT40MINUS; + if (rd_flags & NL80211_RRF_NO_HT40PLUS) + channel_flags |= IEEE80211_CHAN_NO_HT40PLUS; + if (rd_flags & NL80211_RRF_NO_80MHZ) + channel_flags |= IEEE80211_CHAN_NO_80MHZ; + if (rd_flags & NL80211_RRF_NO_160MHZ) + channel_flags |= IEEE80211_CHAN_NO_160MHZ; return channel_flags; } -- cgit v1.2.3-71-gd317 From be955b2984822e2a66176bccb3e0dbaf9cd569b6 Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Thu, 6 Nov 2014 08:10:14 -0800 Subject: rtnetlink: add babel protocol recognition Babel uses rt_proto 42. Add to userspace visible header file. Signed-off-by: Dave Taht Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index eb0f1a554d7b..9c9b8b4480cd 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -235,6 +235,7 @@ enum { #define RTPROT_NTK 15 /* Netsukuku */ #define RTPROT_DHCP 16 /* DHCP client */ #define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ /* rtm_scope -- cgit v1.2.3-71-gd317 From 3274f52073d88b62f3c5ace82ae9d48546232e72 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Nov 2014 17:36:44 -0800 Subject: bpf: add 'flags' attribute to BPF_MAP_UPDATE_ELEM command the current meaning of BPF_MAP_UPDATE_ELEM syscall command is: either update existing map element or create a new one. Initially the plan was to add a new command to handle the case of 'create new element if it didn't exist', but 'flags' style looks cleaner and overall diff is much smaller (more code reused), so add 'flags' attribute to BPF_MAP_UPDATE_ELEM command with the following meaning: #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ bpf_update_elem(fd, key, value, BPF_NOEXIST) call can fail with EEXIST if element already exists. bpf_update_elem(fd, key, value, BPF_EXIST) can fail with ENOENT if element doesn't exist. Userspace will call it as: int bpf_update_elem(int fd, void *key, void *value, __u64 flags) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), .flags = flags; }; return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } First two bits of 'flags' are used to encode style of bpf_update_elem() command. Bits 2-63 are reserved for future use. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/uapi/linux/bpf.h | 8 +++++++- kernel/bpf/syscall.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3cf91754a957..51e9242e4803 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,7 +22,7 @@ struct bpf_map_ops { /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); - int (*map_update_elem)(struct bpf_map *map, void *key, void *value); + int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d18316f9e9c4..3e9e1b77f29d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -82,7 +82,7 @@ enum bpf_cmd { /* create or update key/value pair in a given map * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) - * Using attr->map_fd, attr->key, attr->value + * Using attr->map_fd, attr->key, attr->value, attr->flags * returns zero or negative error */ BPF_MAP_UPDATE_ELEM, @@ -117,6 +117,11 @@ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, }; +/* flags for BPF_MAP_UPDATE_ELEM command */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -132,6 +137,7 @@ union bpf_attr { __aligned_u64 value; __aligned_u64 next_key; }; + __u64 flags; }; struct { /* anonymous struct used by BPF_PROG_LOAD command */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ba61c8c16032..c0d03bf317a2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -190,7 +190,7 @@ err_put: return err; } -#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value +#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags static int map_update_elem(union bpf_attr *attr) { @@ -231,7 +231,7 @@ static int map_update_elem(union bpf_attr *attr) * therefore all map accessors rely on this fact, so do the same here */ rcu_read_lock(); - err = map->ops->map_update_elem(map, key, value); + err = map->ops->map_update_elem(map, key, value, attr->flags); rcu_read_unlock(); free_value: -- cgit v1.2.3-71-gd317 From 0f8e4bd8a1fc8c4185f1630061d0a1f2d197a475 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Nov 2014 17:36:45 -0800 Subject: bpf: add hashtable type of eBPF maps add new map type BPF_MAP_TYPE_HASH and its implementation - maps are created/destroyed by userspace. Both userspace and eBPF programs can lookup/update/delete elements from the map - eBPF programs can be called in_irq(), so use spin_lock_irqsave() mechanism for concurrent updates - key/value are opaque range of bytes (aligned to 8 bytes) - user space provides 3 configuration attributes via BPF syscall: key_size, value_size, max_entries - map takes care of allocating/freeing key/value pairs - map_update_elem() must fail to insert new element when max_entries limit is reached to make sure that eBPF programs cannot exhaust memory - map_update_elem() replaces elements in an atomic way - optimized for speed of lookup() which can be called multiple times from eBPF program which itself is triggered by high volume of events . in the future JIT compiler may recognize lookup() call and optimize it further, since key_size is constant for life of eBPF program Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + kernel/bpf/Makefile | 2 +- kernel/bpf/hashtab.c | 362 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/hashtab.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3e9e1b77f29d..03a01fd609aa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -111,6 +111,7 @@ enum bpf_cmd { enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, + BPF_MAP_TYPE_HASH, }; enum bpf_prog_type { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 0daf7f6ae7df..2c0ec7f9da78 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,5 @@ obj-y := core.o -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o ifdef CONFIG_TEST_BPF obj-$(CONFIG_BPF_SYSCALL) += test_stub.o endif diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c new file mode 100644 index 000000000000..d234a012f046 --- /dev/null +++ b/kernel/bpf/hashtab.c @@ -0,0 +1,362 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include + +struct bpf_htab { + struct bpf_map map; + struct hlist_head *buckets; + spinlock_t lock; + u32 count; /* number of elements in this hashtable */ + u32 n_buckets; /* number of hash buckets */ + u32 elem_size; /* size of each element in bytes */ +}; + +/* each htab element is struct htab_elem + key + value */ +struct htab_elem { + struct hlist_node hash_node; + struct rcu_head rcu; + u32 hash; + char key[0] __aligned(8); +}; + +/* Called from syscall */ +static struct bpf_map *htab_map_alloc(union bpf_attr *attr) +{ + struct bpf_htab *htab; + int err, i; + + htab = kzalloc(sizeof(*htab), GFP_USER); + if (!htab) + return ERR_PTR(-ENOMEM); + + /* mandatory map attributes */ + htab->map.key_size = attr->key_size; + htab->map.value_size = attr->value_size; + htab->map.max_entries = attr->max_entries; + + /* check sanity of attributes. + * value_size == 0 may be allowed in the future to use map as a set + */ + err = -EINVAL; + if (htab->map.max_entries == 0 || htab->map.key_size == 0 || + htab->map.value_size == 0) + goto free_htab; + + /* hash table size must be power of 2 */ + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); + + err = -E2BIG; + if (htab->map.key_size > MAX_BPF_STACK) + /* eBPF programs initialize keys on stack, so they cannot be + * larger than max stack size + */ + goto free_htab; + + err = -ENOMEM; + htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), + GFP_USER | __GFP_NOWARN); + + if (!htab->buckets) { + htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head)); + if (!htab->buckets) + goto free_htab; + } + + for (i = 0; i < htab->n_buckets; i++) + INIT_HLIST_HEAD(&htab->buckets[i]); + + spin_lock_init(&htab->lock); + htab->count = 0; + + htab->elem_size = sizeof(struct htab_elem) + + round_up(htab->map.key_size, 8) + + htab->map.value_size; + return &htab->map; + +free_htab: + kfree(htab); + return ERR_PTR(err); +} + +static inline u32 htab_map_hash(const void *key, u32 key_len) +{ + return jhash(key, key_len, 0); +} + +static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +{ + return &htab->buckets[hash & (htab->n_buckets - 1)]; +} + +static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, + void *key, u32 key_size) +{ + struct htab_elem *l; + + hlist_for_each_entry_rcu(l, head, hash_node) + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + + return NULL; +} + +/* Called from syscall or from eBPF program */ +static void *htab_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct hlist_head *head; + struct htab_elem *l; + u32 hash, key_size; + + /* Must be called with rcu_read_lock. */ + WARN_ON_ONCE(!rcu_read_lock_held()); + + key_size = map->key_size; + + hash = htab_map_hash(key, key_size); + + head = select_bucket(htab, hash); + + l = lookup_elem_raw(head, hash, key, key_size); + + if (l) + return l->key + round_up(map->key_size, 8); + + return NULL; +} + +/* Called from syscall */ +static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct hlist_head *head; + struct htab_elem *l, *next_l; + u32 hash, key_size; + int i; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + key_size = map->key_size; + + hash = htab_map_hash(key, key_size); + + head = select_bucket(htab, hash); + + /* lookup the key */ + l = lookup_elem_raw(head, hash, key, key_size); + + if (!l) { + i = 0; + goto find_first_elem; + } + + /* key was found, get next key in the same bucket */ + next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), + struct htab_elem, hash_node); + + if (next_l) { + /* if next elem in this hash list is non-zero, just return it */ + memcpy(next_key, next_l->key, key_size); + return 0; + } + + /* no more elements in this hash list, go to the next bucket */ + i = hash & (htab->n_buckets - 1); + i++; + +find_first_elem: + /* iterate over buckets */ + for (; i < htab->n_buckets; i++) { + head = select_bucket(htab, i); + + /* pick first element in the bucket */ + next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + struct htab_elem, hash_node); + if (next_l) { + /* if it's not empty, just return it */ + memcpy(next_key, next_l->key, key_size); + return 0; + } + } + + /* itereated over all buckets and all elements */ + return -ENOENT; +} + +/* Called from syscall or from eBPF program */ +static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct htab_elem *l_new, *l_old; + struct hlist_head *head; + unsigned long flags; + u32 key_size; + int ret; + + if (map_flags > BPF_EXIST) + /* unknown flags */ + return -EINVAL; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + /* allocate new element outside of lock */ + l_new = kmalloc(htab->elem_size, GFP_ATOMIC); + if (!l_new) + return -ENOMEM; + + key_size = map->key_size; + + memcpy(l_new->key, key, key_size); + memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); + + l_new->hash = htab_map_hash(l_new->key, key_size); + + /* bpf_map_update_elem() can be called in_irq() */ + spin_lock_irqsave(&htab->lock, flags); + + head = select_bucket(htab, l_new->hash); + + l_old = lookup_elem_raw(head, l_new->hash, key, key_size); + + if (!l_old && unlikely(htab->count >= map->max_entries)) { + /* if elem with this 'key' doesn't exist and we've reached + * max_entries limit, fail insertion of new elem + */ + ret = -E2BIG; + goto err; + } + + if (l_old && map_flags == BPF_NOEXIST) { + /* elem already exists */ + ret = -EEXIST; + goto err; + } + + if (!l_old && map_flags == BPF_EXIST) { + /* elem doesn't exist, cannot update it */ + ret = -ENOENT; + goto err; + } + + /* add new element to the head of the list, so that concurrent + * search will find it before old elem + */ + hlist_add_head_rcu(&l_new->hash_node, head); + if (l_old) { + hlist_del_rcu(&l_old->hash_node); + kfree_rcu(l_old, rcu); + } else { + htab->count++; + } + spin_unlock_irqrestore(&htab->lock, flags); + + return 0; +err: + spin_unlock_irqrestore(&htab->lock, flags); + kfree(l_new); + return ret; +} + +/* Called from syscall or from eBPF program */ +static int htab_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct hlist_head *head; + struct htab_elem *l; + unsigned long flags; + u32 hash, key_size; + int ret = -ENOENT; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + key_size = map->key_size; + + hash = htab_map_hash(key, key_size); + + spin_lock_irqsave(&htab->lock, flags); + + head = select_bucket(htab, hash); + + l = lookup_elem_raw(head, hash, key, key_size); + + if (l) { + hlist_del_rcu(&l->hash_node); + htab->count--; + kfree_rcu(l, rcu); + ret = 0; + } + + spin_unlock_irqrestore(&htab->lock, flags); + return ret; +} + +static void delete_all_elements(struct bpf_htab *htab) +{ + int i; + + for (i = 0; i < htab->n_buckets; i++) { + struct hlist_head *head = select_bucket(htab, i); + struct hlist_node *n; + struct htab_elem *l; + + hlist_for_each_entry_safe(l, n, head, hash_node) { + hlist_del_rcu(&l->hash_node); + htab->count--; + kfree(l); + } + } +} + +/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ +static void htab_map_free(struct bpf_map *map) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + + /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete + */ + synchronize_rcu(); + + /* some of kfree_rcu() callbacks for elements of this map may not have + * executed. It's ok. Proceed to free residual elements and map itself + */ + delete_all_elements(htab); + kvfree(htab->buckets); + kfree(htab); +} + +static struct bpf_map_ops htab_ops = { + .map_alloc = htab_map_alloc, + .map_free = htab_map_free, + .map_get_next_key = htab_map_get_next_key, + .map_lookup_elem = htab_map_lookup_elem, + .map_update_elem = htab_map_update_elem, + .map_delete_elem = htab_map_delete_elem, +}; + +static struct bpf_map_type_list tl = { + .ops = &htab_ops, + .type = BPF_MAP_TYPE_HASH, +}; + +static int __init register_htab_map(void) +{ + bpf_register_map_type(&tl); + return 0; +} +late_initcall(register_htab_map); -- cgit v1.2.3-71-gd317 From 28fbcfa08d8ed7c5a50d41a0433aad222835e8e3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Nov 2014 17:36:46 -0800 Subject: bpf: add array type of eBPF maps add new map type BPF_MAP_TYPE_ARRAY and its implementation - optimized for fastest possible lookup() . in the future verifier/JIT may recognize lookup() with constant key and optimize it into constant pointer. Can optimize non-constant key into direct pointer arithmetic as well, since pointers and value_size are constant for the life of the eBPF program. In other words array_map_lookup_elem() may be 'inlined' by verifier/JIT while preserving concurrent access to this map from user space - two main use cases for array type: . 'global' eBPF variables: array of 1 element with key=0 and value is a collection of 'global' variables which programs can use to keep the state between events . aggregation of tracing events into fixed set of buckets - all array elements pre-allocated and zero initialized at init time - key as an index in array and can only be 4 byte - map_delete_elem() returns EINVAL, since elements cannot be deleted - map_update_elem() replaces elements in an non-atomic way (for atomic updates hashtable type should be used instead) Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + kernel/bpf/Makefile | 2 +- kernel/bpf/arraymap.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/arraymap.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 03a01fd609aa..0d662fe75df5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_cmd { enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_ARRAY, }; enum bpf_prog_type { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 2c0ec7f9da78..72ec98ba2d42 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,5 @@ obj-y := core.o -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o ifdef CONFIG_TEST_BPF obj-$(CONFIG_BPF_SYSCALL) += test_stub.o endif diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c new file mode 100644 index 000000000000..58b80c137afd --- /dev/null +++ b/kernel/bpf/arraymap.c @@ -0,0 +1,151 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include + +struct bpf_array { + struct bpf_map map; + u32 elem_size; + char value[0] __aligned(8); +}; + +/* Called from syscall */ +static struct bpf_map *array_map_alloc(union bpf_attr *attr) +{ + struct bpf_array *array; + u32 elem_size; + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size == 0) + return ERR_PTR(-EINVAL); + + elem_size = round_up(attr->value_size, 8); + + /* allocate all map elements and zero-initialize them */ + array = kzalloc(sizeof(*array) + attr->max_entries * elem_size, + GFP_USER | __GFP_NOWARN); + if (!array) { + array = vzalloc(array->map.max_entries * array->elem_size); + if (!array) + return ERR_PTR(-ENOMEM); + } + + /* copy mandatory map attributes */ + array->map.key_size = attr->key_size; + array->map.value_size = attr->value_size; + array->map.max_entries = attr->max_entries; + + array->elem_size = elem_size; + + return &array->map; + +} + +/* Called from syscall or from eBPF program */ +static void *array_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (index >= array->map.max_entries) + return NULL; + + return array->value + array->elem_size * index; +} + +/* Called from syscall */ +static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + u32 *next = (u32 *)next_key; + + if (index >= array->map.max_entries) { + *next = 0; + return 0; + } + + if (index == array->map.max_entries - 1) + return -ENOENT; + + *next = index + 1; + return 0; +} + +/* Called from syscall or from eBPF program */ +static int array_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (map_flags > BPF_EXIST) + /* unknown flags */ + return -EINVAL; + + if (index >= array->map.max_entries) + /* all elements were pre-allocated, cannot insert a new one */ + return -E2BIG; + + if (map_flags == BPF_NOEXIST) + /* all elemenets already exist */ + return -EEXIST; + + memcpy(array->value + array->elem_size * index, value, array->elem_size); + return 0; +} + +/* Called from syscall or from eBPF program */ +static int array_map_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ +static void array_map_free(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + + /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding programs to complete + * and free the array + */ + synchronize_rcu(); + + kvfree(array); +} + +static struct bpf_map_ops array_ops = { + .map_alloc = array_map_alloc, + .map_free = array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = array_map_lookup_elem, + .map_update_elem = array_map_update_elem, + .map_delete_elem = array_map_delete_elem, +}; + +static struct bpf_map_type_list tl = { + .ops = &array_ops, + .type = BPF_MAP_TYPE_ARRAY, +}; + +static int __init register_array_map(void) +{ + bpf_register_map_type(&tl); + return 0; +} +late_initcall(register_array_map); -- cgit v1.2.3-71-gd317 From d0003ec01c667b731c139e23de3306a8b328ccf5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Nov 2014 17:36:49 -0800 Subject: bpf: allow eBPF programs to use maps expose bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() map accessors to eBPF programs Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +++ include/uapi/linux/bpf.h | 3 ++ kernel/bpf/Makefile | 2 +- kernel/bpf/helpers.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/helpers.c (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 51e9242e4803..75e94eaa228b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -133,4 +133,9 @@ struct bpf_prog *bpf_prog_get(u32 ufd); /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +/* verifier prototypes for helper functions called from eBPF programs */ +extern struct bpf_func_proto bpf_map_lookup_elem_proto; +extern struct bpf_func_proto bpf_map_update_elem_proto; +extern struct bpf_func_proto bpf_map_delete_elem_proto; + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0d662fe75df5..4a3d0f84f178 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -158,6 +158,9 @@ union bpf_attr { */ enum bpf_func_id { BPF_FUNC_unspec, + BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ + BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ + BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 72ec98ba2d42..a5ae60f0b0a2 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,5 @@ obj-y := core.o -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o ifdef CONFIG_TEST_BPF obj-$(CONFIG_BPF_SYSCALL) += test_stub.o endif diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c new file mode 100644 index 000000000000..9e3414d85459 --- /dev/null +++ b/kernel/bpf/helpers.c @@ -0,0 +1,89 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include + +/* If kernel subsystem is allowing eBPF programs to call this function, + * inside its own verifier_ops->get_func_proto() callback it should return + * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments + * + * Different map implementations will rely on rcu in map methods + * lookup/update/delete, therefore eBPF programs must run under rcu lock + * if program is allowed to access maps, so check rcu_read_lock_held in + * all three functions. + */ +static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* verifier checked that R1 contains a valid pointer to bpf_map + * and R2 points to a program stack and map->key_size bytes were + * initialized + */ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + void *key = (void *) (unsigned long) r2; + void *value; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + value = map->ops->map_lookup_elem(map, key); + + /* lookup() returns either pointer to element value or NULL + * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type + */ + return (unsigned long) value; +} + +struct bpf_func_proto bpf_map_lookup_elem_proto = { + .func = bpf_map_lookup_elem, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, +}; + +static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + void *key = (void *) (unsigned long) r2; + void *value = (void *) (unsigned long) r3; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + return map->ops->map_update_elem(map, key, value, r4); +} + +struct bpf_func_proto bpf_map_update_elem_proto = { + .func = bpf_map_update_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_PTR_TO_MAP_VALUE, + .arg4_type = ARG_ANYTHING, +}; + +static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + void *key = (void *) (unsigned long) r2; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + return map->ops->map_delete_elem(map, key); +} + +struct bpf_func_proto bpf_map_delete_elem_proto = { + .func = bpf_map_delete_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, +}; -- cgit v1.2.3-71-gd317 From 78632a17eaa7a5abdc22aac8ca5932d6cad59984 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 9 Nov 2014 18:50:14 +0200 Subject: cfg/mac80211: define TDLS channel switch feature bit Define some related TDLS protocol constants and advertise channel switch support in the extended-capabilities IE when the feature bit is defined. Actually supporting TDLS channel-switching also requires support for some new nl80211 commands, to be introduced by future patches. Signed-off-by: Arik Nemtsov Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 3 +++ net/mac80211/tdls.c | 9 ++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index adac1be67387..fbb02d240658 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2019,6 +2019,11 @@ enum ieee80211_tdls_actioncode { */ #define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING BIT(2) +/* TDLS capabilities in the the 4th byte of @WLAN_EID_EXT_CAPABILITY */ +#define WLAN_EXT_CAPA4_TDLS_BUFFER_STA BIT(4) +#define WLAN_EXT_CAPA4_TDLS_PEER_PSM BIT(5) +#define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH BIT(6) + /* Interworking capabilities are set in 7th bit of 4th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ @@ -2030,6 +2035,7 @@ enum ieee80211_tdls_actioncode { */ #define WLAN_EXT_CAPA5_TDLS_ENABLED BIT(5) #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) +#define WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED BIT(7) #define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) #define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(7) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 442369f69b4f..ccdeef28d672 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4095,6 +4095,8 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring * the vif's MAC address upon creation. * See 'macaddr' field in the vif_params (cfg80211.h). + * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when + * operating as a TDLS peer. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -4125,6 +4127,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25, NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26, NL80211_FEATURE_MAC_ON_CREATE = 1 << 27, + NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28, }; /** diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 30a4c1004010..4554bdc72c91 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -35,16 +35,19 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk) mutex_unlock(&local->mtx); } -static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) +static void ieee80211_tdls_add_ext_capab(struct ieee80211_local *local, + struct sk_buff *skb) { u8 *pos = (void *)skb_put(skb, 7); + bool chan_switch = local->hw.wiphy->features & + NL80211_FEATURE_TDLS_CHANNEL_SWITCH; *pos++ = WLAN_EID_EXT_CAPABILITY; *pos++ = 5; /* len */ *pos++ = 0x0; *pos++ = 0x0; *pos++ = 0x0; - *pos++ = 0x0; + *pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0; *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; } @@ -289,7 +292,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } - ieee80211_tdls_add_ext_capab(skb); + ieee80211_tdls_add_ext_capab(local, skb); /* add the QoS element if we support it */ if (local->hw.queues >= IEEE80211_NUM_ACS && -- cgit v1.2.3-71-gd317 From 1057d35ede5dbf7ed7842357564fb42c9b54ba50 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 19 Nov 2014 12:54:26 +0200 Subject: cfg80211: introduce TDLS channel switch commands Introduce commands to initiate and cancel TDLS channel-switching. Once TDLS channel-switching is started, the lower level driver is responsible for continually initiating channel-switch operations and returning to the base (AP) channel to listen for beacons from time to time. Upon cancellation of the channel-switch all communication between the relevant TDLS peers will continue on the base channel. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++ include/uapi/linux/nl80211.h | 19 ++++++++ net/wireless/core.c | 4 ++ net/wireless/nl80211.c | 108 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 24 ++++++++++ net/wireless/trace.h | 42 +++++++++++++++++ 6 files changed, 211 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 220d5f5f1aca..8d04dfef32bf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2367,6 +2367,12 @@ struct cfg80211_qos_map { * (invoked with the wireless_dev mutex held) * @leave_ocb: leave the current OCB network * (invoked with the wireless_dev mutex held) + * + * @tdls_channel_switch: Start channel-switching with a TDLS peer. The driver + * is responsible for continually initiating channel-switching operations + * and returning to the base channel for communication with the AP. + * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both + * peers must be on the base channel when the call completes. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2622,6 +2628,14 @@ struct cfg80211_ops { u16 admitted_time); int (*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev, u8 tsid, const u8 *peer); + + int (*tdls_channel_switch)(struct wiphy *wiphy, + struct net_device *dev, + const u8 *addr, u8 oper_class, + struct cfg80211_chan_def *chandef); + void (*tdls_cancel_channel_switch)(struct wiphy *wiphy, + struct net_device *dev, + const u8 *addr); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ccdeef28d672..365db67ca71d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -762,6 +762,18 @@ * @NL80211_CMD_LEAVE_OCB: Leave the OCB network -- no special arguments, the * network is determined by the network interface. * + * @NL80211_CMD_TDLS_CHANNEL_SWITCH: Start channel-switching with a TDLS peer, + * identified by the %NL80211_ATTR_MAC parameter. A target channel is + * provided via %NL80211_ATTR_WIPHY_FREQ and other attributes determining + * channel width/type. The target operating class is given via + * %NL80211_ATTR_OPER_CLASS. + * The driver is responsible for continually initiating channel-switching + * operations and returning to the base channel for communication with the + * AP. + * @NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH: Stop channel-switching with a TDLS + * peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel + * when this command completes. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -943,6 +955,9 @@ enum nl80211_commands { NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, + NL80211_CMD_TDLS_CHANNEL_SWITCH, + NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1669,6 +1684,8 @@ enum nl80211_commands { * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see * &enum nl80211_smps_mode. * + * @NL80211_ATTR_OPER_CLASS: operating class + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -2021,6 +2038,8 @@ enum nl80211_attrs { NL80211_ATTR_SMPS_MODE, + NL80211_ATTR_OPER_CLASS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.c b/net/wireless/core.c index a4d27927aba2..4c2e501203d1 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -541,6 +541,10 @@ int wiphy_register(struct wiphy *wiphy) !wiphy->wowlan->tcp)) return -EINVAL; #endif + if (WARN_ON((wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH) && + (!rdev->ops->tdls_channel_switch || + !rdev->ops->tdls_cancel_channel_switch))) + return -EINVAL; if (WARN_ON(wiphy->coalesce && (!wiphy->coalesce->n_rules || diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d0a8361b3395..27666f5e5050 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9658,6 +9658,98 @@ static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_tdls_channel_switch(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_chan_def chandef = {}; + const u8 *addr; + u8 oper_class; + int err; + + if (!rdev->ops->tdls_channel_switch || + !(rdev->wiphy.features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) + return -EOPNOTSUPP; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + break; + default: + return -EOPNOTSUPP; + } + + if (!info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_OPER_CLASS]) + return -EINVAL; + + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + + /* + * Don't allow wide channels on the 2.4Ghz band, as per IEEE802.11-2012 + * section 10.22.6.2.1. Disallow 5/10Mhz channels as well for now, the + * specification is not defined for them. + */ + if (chandef.chan->band == IEEE80211_BAND_2GHZ && + chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + chandef.width != NL80211_CHAN_WIDTH_20) + return -EINVAL; + + /* we will be active on the TDLS link */ + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, wdev->iftype)) + return -EINVAL; + + /* don't allow switching to DFS channels */ + if (cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, wdev->iftype)) + return -EINVAL; + + addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + oper_class = nla_get_u8(info->attrs[NL80211_ATTR_OPER_CLASS]); + + wdev_lock(wdev); + err = rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); + wdev_unlock(wdev); + + return err; +} + +static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *addr; + + if (!rdev->ops->tdls_channel_switch || + !rdev->ops->tdls_cancel_channel_switch || + !(rdev->wiphy.features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) + return -EOPNOTSUPP; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + break; + default: + return -EOPNOTSUPP; + } + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + wdev_lock(wdev); + rdev_tdls_cancel_channel_switch(rdev, dev, addr); + wdev_unlock(wdev); + + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -10456,6 +10548,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, + .doit = nl80211_tdls_channel_switch, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, + .doit = nl80211_tdls_cancel_channel_switch, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 1b3864cd50ca..35cfb7134bdb 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -993,4 +993,28 @@ rdev_del_tx_ts(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_tdls_channel_switch(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *addr, + u8 oper_class, struct cfg80211_chan_def *chandef) +{ + int ret; + + trace_rdev_tdls_channel_switch(&rdev->wiphy, dev, addr, oper_class, + chandef); + ret = rdev->ops->tdls_channel_switch(&rdev->wiphy, dev, addr, + oper_class, chandef); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void +rdev_tdls_cancel_channel_switch(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *addr) +{ + trace_rdev_tdls_cancel_channel_switch(&rdev->wiphy, dev, addr); + rdev->ops->tdls_cancel_channel_switch(&rdev->wiphy, dev, addr); + trace_rdev_return_void(&rdev->wiphy); +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6e25370d3ce7..ad38910f7036 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2032,6 +2032,48 @@ TRACE_EVENT(rdev_del_tx_ts, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid) ); +TRACE_EVENT(rdev_tdls_channel_switch, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const u8 *addr, u8 oper_class, + struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, netdev, addr, oper_class, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(addr) + __field(u8, oper_class) + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(addr, addr); + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT + " oper class %d, " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr), + __entry->oper_class, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(rdev_tdls_cancel_channel_switch, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const u8 *addr), + TP_ARGS(wiphy, netdev, addr), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(addr) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(addr, addr); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr)) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3-71-gd317 From 8cd4d4563ef0a518002c4a8f47dd950afe386ea8 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 17 Sep 2014 11:55:28 +0300 Subject: cfg80211: add wowlan net-detect support Add a new WoWLAN API to enable net-detect as a wake up trigger. Net-detect allows the device to scan in the background while the host is asleep to wake up the host system when a matching network is found. Reuse the scheduled scan attributes to specify how the scan is performed while suspended and the matches that will trigger a wake event. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 41 ++++++++++++++++ include/uapi/linux/nl80211.h | 23 +++++++++ net/wireless/core.h | 1 + net/wireless/nl80211.c | 111 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 175 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8d04dfef32bf..05aae22e92a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1940,6 +1940,7 @@ struct cfg80211_wowlan_tcp { * @rfkill_release: wake up when rfkill is released * @tcp: TCP connection establishment/wakeup parameters, see nl80211.h. * NULL if not configured. + * @nd_config: configuration for the scan to be used for net detect wake. */ struct cfg80211_wowlan { bool any, disconnect, magic_pkt, gtk_rekey_failure, @@ -1948,6 +1949,7 @@ struct cfg80211_wowlan { struct cfg80211_pkt_pattern *patterns; struct cfg80211_wowlan_tcp *tcp; int n_patterns; + struct cfg80211_sched_scan_request *nd_config; }; /** @@ -1979,6 +1981,35 @@ struct cfg80211_coalesce { int n_rules; }; +/** + * struct cfg80211_wowlan_nd_match - information about the match + * + * @ssid: SSID of the match that triggered the wake up + * @n_channels: Number of channels where the match occurred. This + * value may be zero if the driver can't report the channels. + * @channels: center frequencies of the channels where a match + * occurred (in MHz) + */ +struct cfg80211_wowlan_nd_match { + struct cfg80211_ssid ssid; + int n_channels; + u32 channels[]; +}; + +/** + * struct cfg80211_wowlan_nd_info - net detect wake up information + * + * @n_matches: Number of match information instances provided in + * @matches. This value may be zero if the driver can't provide + * match information. + * @matches: Array of pointers to matches containing information about + * the matches that triggered the wake up. + */ +struct cfg80211_wowlan_nd_info { + int n_matches; + struct cfg80211_wowlan_nd_match *matches[]; +}; + /** * struct cfg80211_wowlan_wakeup - wakeup report * @disconnect: woke up by getting disconnected @@ -1998,6 +2029,7 @@ struct cfg80211_coalesce { * @tcp_match: TCP wakeup packet received * @tcp_connlost: TCP connection lost or failed to establish * @tcp_nomoretokens: TCP data ran out of tokens + * @net_detect: if not %NULL, woke up because of net detect */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, @@ -2007,6 +2039,7 @@ struct cfg80211_wowlan_wakeup { s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; + struct cfg80211_wowlan_nd_info *net_detect; }; /** @@ -2810,6 +2843,7 @@ struct ieee80211_txrx_stypes { * @WIPHY_WOWLAN_EAP_IDENTITY_REQ: supports wakeup on EAP identity request * @WIPHY_WOWLAN_4WAY_HANDSHAKE: supports wakeup on 4-way handshake failure * @WIPHY_WOWLAN_RFKILL_RELEASE: supports wakeup on RF-kill release + * @WIPHY_WOWLAN_NET_DETECT: supports wakeup on network detection */ enum wiphy_wowlan_support_flags { WIPHY_WOWLAN_ANY = BIT(0), @@ -2820,6 +2854,7 @@ enum wiphy_wowlan_support_flags { WIPHY_WOWLAN_EAP_IDENTITY_REQ = BIT(5), WIPHY_WOWLAN_4WAY_HANDSHAKE = BIT(6), WIPHY_WOWLAN_RFKILL_RELEASE = BIT(7), + WIPHY_WOWLAN_NET_DETECT = BIT(8), }; struct wiphy_wowlan_tcp_support { @@ -2838,6 +2873,11 @@ struct wiphy_wowlan_tcp_support { * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern * @max_pkt_offset: maximum Rx packet offset + * @max_nd_match_sets: maximum number of matchsets for net-detect, + * similar, but not necessarily identical, to max_match_sets for + * scheduled scans. + * See &struct cfg80211_sched_scan_request.@match_sets for more + * details. * @tcp: TCP wakeup support information */ struct wiphy_wowlan_support { @@ -2846,6 +2886,7 @@ struct wiphy_wowlan_support { int pattern_max_len; int pattern_min_len; int max_pkt_offset; + int max_nd_match_sets; const struct wiphy_wowlan_tcp_support *tcp; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 365db67ca71d..d23208194e3c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1686,6 +1686,7 @@ enum nl80211_commands { * * @NL80211_ATTR_OPER_CLASS: operating class * + * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -2043,6 +2044,7 @@ enum nl80211_attrs { /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, + NUM_NL80211_ATTR = __NL80211_ATTR_AFTER_LAST, NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; @@ -3610,6 +3612,25 @@ struct nl80211_pattern_support { * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only, * the TCP connection ran out of tokens to use for data to send to the * service + * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network + * is detected. This is a nested attribute that contains the + * same attributes used with @NL80211_CMD_START_SCHED_SCAN. It + * specifies how the scan is performed (e.g. the interval and the + * channels to scan) as well as the scan results that will + * trigger a wake (i.e. the matchsets). + * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute + * containing an array with information about what triggered the + * wake up. If no elements are present in the array, it means + * that the information is not available. If more than one + * element is present, it means that more than one match + * occurred. + * Each element in the array is a nested attribute that contains + * one optional %NL80211_ATTR_SSID attribute and one optional + * %NL80211_ATTR_SCAN_FREQUENCIES attribute. At least one of + * these attributes must be present. If + * %NL80211_ATTR_SCAN_FREQUENCIES contains more than one + * frequency, it means that the match occurred in more than one + * channel. * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number * @@ -3635,6 +3656,8 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST, NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, + NL80211_WOWLAN_TRIG_NET_DETECT, + NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS, /* keep last */ NUM_NL80211_WOWLAN_TRIG, diff --git a/net/wireless/core.h b/net/wireless/core.h index 61ee664cf2bd..faa5b1609aae 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -111,6 +111,7 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) rdev->wiphy.wowlan_config->tcp->sock) sock_release(rdev->wiphy.wowlan_config->tcp->sock); kfree(rdev->wiphy.wowlan_config->tcp); + kfree(rdev->wiphy.wowlan_config->nd_config); kfree(rdev->wiphy.wowlan_config); #endif } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 03a302b884fd..3ec7dc557960 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -209,7 +209,7 @@ cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) } /* policy for the attributes */ -static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { +static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, @@ -428,6 +428,7 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, + [NL80211_WOWLAN_TRIG_NET_DETECT] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -1088,6 +1089,8 @@ static int nl80211_send_wowlan(struct sk_buff *msg, if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; + /* TODO: send wowlan net detect */ + nla_nest_end(msg, nl_wowlan); return 0; @@ -8695,6 +8698,39 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return 0; } +static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, + const struct wiphy_wowlan_support *wowlan, + struct nlattr *attr, + struct cfg80211_wowlan *trig) +{ + struct nlattr **tb; + int err; + + tb = kzalloc(NUM_NL80211_ATTR * sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + + if (!(wowlan->flags & WIPHY_WOWLAN_NET_DETECT)) { + err = -EOPNOTSUPP; + goto out; + } + + err = nla_parse(tb, NL80211_ATTR_MAX, + nla_data(attr), nla_len(attr), + nl80211_policy); + if (err) + goto out; + + trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, tb); + err = PTR_ERR_OR_ZERO(trig->nd_config); + if (err) + trig->nd_config = NULL; + +out: + kfree(tb); + return err; +} + static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -8840,6 +8876,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } + if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) { + err = nl80211_parse_wowlan_nd( + rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT], + &new_triggers); + if (err) + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -12082,6 +12126,67 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_report_obss_beacon); #ifdef CONFIG_PM +static int cfg80211_net_detect_results(struct sk_buff *msg, + struct cfg80211_wowlan_wakeup *wakeup) +{ + struct cfg80211_wowlan_nd_info *nd = wakeup->net_detect; + struct nlattr *nl_results, *nl_match, *nl_freqs; + int i, j; + + nl_results = nla_nest_start( + msg, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); + if (!nl_results) + return -EMSGSIZE; + + for (i = 0; i < nd->n_matches; i++) { + struct cfg80211_wowlan_nd_match *match = nd->matches[i]; + + nl_match = nla_nest_start(msg, i); + if (!nl_match) + break; + + /* The SSID attribute is optional in nl80211, but for + * simplicity reasons it's always present in the + * cfg80211 structure. If a driver can't pass the + * SSID, that needs to be changed. A zero length SSID + * is still a valid SSID (wildcard), so it cannot be + * used for this purpose. + */ + if (nla_put(msg, NL80211_ATTR_SSID, match->ssid.ssid_len, + match->ssid.ssid)) { + nla_nest_cancel(msg, nl_match); + goto out; + } + + if (match->n_channels) { + nl_freqs = nla_nest_start( + msg, NL80211_ATTR_SCAN_FREQUENCIES); + if (!nl_freqs) { + nla_nest_cancel(msg, nl_match); + goto out; + } + + for (j = 0; j < match->n_channels; j++) { + if (nla_put_u32(msg, + NL80211_ATTR_WIPHY_FREQ, + match->channels[j])) { + nla_nest_cancel(msg, nl_freqs); + nla_nest_cancel(msg, nl_match); + goto out; + } + } + + nla_nest_end(msg, nl_freqs); + } + + nla_nest_end(msg, nl_match); + } + +out: + nla_nest_end(msg, nl_results); + return 0; +} + void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp) @@ -12176,6 +12281,10 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, goto free_msg; } + if (wakeup->net_detect && + cfg80211_net_detect_results(msg, wakeup)) + goto free_msg; + nla_nest_end(msg, reasons); } -- cgit v1.2.3-71-gd317 From ad2b26abc157460ca6fac1a53a2bfeade283adfa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Jun 2014 21:39:05 +0200 Subject: cfg80211: allow drivers to support random MAC addresses for scan Add the necessary feature flags and a scan flag to support using random MAC addresses for scan while unassociated. The configuration for this supports an arbitrary MAC address value and mask, so that any kind of configuration (e.g. fixed OUI or full 46-bit random) can be requested. Full 46-bit random is the default when no other configuration is passed. Also add a small helper function to use the addr/mask correctly. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 25 +++++++++++++ include/uapi/linux/nl80211.h | 29 +++++++++++++++ net/wireless/nl80211.c | 86 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 137 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 05aae22e92a5..bb748c4da5af 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1437,6 +1437,10 @@ struct cfg80211_ssid { * @aborted: (internal) scan request was notified as aborted * @notified: (internal) scan request was notified as done or aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band + * @mac_addr: MAC address used with randomisation + * @mac_addr_mask: MAC address mask used with randomisation, bits that + * are 0 in the mask should be randomised, bits that are 1 should + * be taken from the @mac_addr */ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; @@ -1451,6 +1455,9 @@ struct cfg80211_scan_request { struct wireless_dev *wdev; + u8 mac_addr[ETH_ALEN] __aligned(2); + u8 mac_addr_mask[ETH_ALEN] __aligned(2); + /* internal */ struct wiphy *wiphy; unsigned long scan_start; @@ -1461,6 +1468,17 @@ struct cfg80211_scan_request { struct ieee80211_channel *channels[0]; }; +static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) +{ + int i; + + get_random_bytes(buf, ETH_ALEN); + for (i = 0; i < ETH_ALEN; i++) { + buf[i] &= ~mask[i]; + buf[i] |= addr[i] & mask[i]; + } +} + /** * struct cfg80211_match_set - sets of attributes to match * @@ -1494,6 +1512,10 @@ struct cfg80211_match_set { * @channels: channels to scan * @min_rssi_thold: for drivers only supporting a single threshold, this * contains the minimum over all matchsets + * @mac_addr: MAC address used with randomisation + * @mac_addr_mask: MAC address mask used with randomisation, bits that + * are 0 in the mask should be randomised, bits that are 1 should + * be taken from the @mac_addr */ struct cfg80211_sched_scan_request { struct cfg80211_ssid *ssids; @@ -1508,6 +1530,9 @@ struct cfg80211_sched_scan_request { int n_match_sets; s32 min_rssi_thold; + u8 mac_addr[ETH_ALEN] __aligned(2); + u8 mac_addr_mask[ETH_ALEN] __aligned(2); + /* internal */ struct wiphy *wiphy; struct net_device *dev; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d23208194e3c..a99081efc2d4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1686,6 +1686,8 @@ enum nl80211_commands { * * @NL80211_ATTR_OPER_CLASS: operating class * + * @NL80211_ATTR_MAC_MASK: MAC address mask + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2041,6 +2043,8 @@ enum nl80211_attrs { NL80211_ATTR_OPER_CLASS, + NL80211_ATTR_MAC_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4139,6 +4143,18 @@ enum nl80211_ap_sme_features { * See 'macaddr' field in the vif_params (cfg80211.h). * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when * operating as a TDLS peer. + * @NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR: This device/driver supports using a + * random MAC address during scan (if the device is unassociated); the + * %NL80211_SCAN_FLAG_RANDOM_ADDR flag may be set for scans and the MAC + * address mask/value will be used. + * @NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR: This device/driver supports + * using a random MAC address for every scan iteration during scheduled + * scan (while not associated), the %NL80211_SCAN_FLAG_RANDOM_ADDR may + * be set for scheduled scan and the MAC address mask/value will be used. + * @NL80211_FEATURE_ND_RANDOM_MAC_ADDR: This device/driver supports using a + * random MAC address for every scan iteration during "net detect", i.e. + * scan in unassociated WoWLAN, the %NL80211_SCAN_FLAG_RANDOM_ADDR may + * be set for scheduled scan and the MAC address mask/value will be used. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -4170,6 +4186,9 @@ enum nl80211_feature_flags { NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26, NL80211_FEATURE_MAC_ON_CREATE = 1 << 27, NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28, + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR = 1 << 29, + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR = 1 << 30, + NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1 << 31, }; /** @@ -4218,11 +4237,21 @@ enum nl80211_connect_failed_reason { * dangerous because will destroy stations performance as a lot of frames * will be lost while scanning off-channel, therefore it must be used only * when really needed + * @NL80211_SCAN_FLAG_RANDOM_ADDR: use a random MAC address for this scan (or + * for scheduled scan: a different one for every scan iteration). When the + * flag is set, depending on device capabilities the @NL80211_ATTR_MAC and + * @NL80211_ATTR_MAC_MASK attributes may also be given in which case only + * the masked bits will be preserved from the MAC address and the remainder + * randomised. If the attributes are not given full randomisation (46 bits, + * locally administered 1, multicast 0) is assumed. + * This flag must not be requested when the feature isn't supported, check + * the nl80211 feature flags for the device. */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, NL80211_SCAN_FLAG_FLUSH = 1<<1, NL80211_SCAN_FLAG_AP = 1<<2, + NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3ec7dc557960..dd5a827f9cb0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -395,6 +395,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, }; /* policy for the key attributes */ @@ -5481,6 +5482,43 @@ static int validate_scan_freqs(struct nlattr *freqs) return n_channels; } +static int nl80211_parse_random_mac(struct nlattr **attrs, + u8 *mac_addr, u8 *mac_addr_mask) +{ + int i; + + if (!attrs[NL80211_ATTR_MAC] && !attrs[NL80211_ATTR_MAC_MASK]) { + memset(mac_addr, 0, ETH_ALEN); + memset(mac_addr_mask, 0, ETH_ALEN); + mac_addr[0] = 0x2; + mac_addr_mask[0] = 0x3; + + return 0; + } + + /* need both or none */ + if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_MAC_MASK]) + return -EINVAL; + + memcpy(mac_addr, nla_data(attrs[NL80211_ATTR_MAC]), ETH_ALEN); + memcpy(mac_addr_mask, nla_data(attrs[NL80211_ATTR_MAC_MASK]), ETH_ALEN); + + /* don't allow or configure an mcast address */ + if (!is_multicast_ether_addr(mac_addr_mask) || + is_multicast_ether_addr(mac_addr)) + return -EINVAL; + + /* + * allow users to pass a MAC address that has bits set outside + * of the mask, but don't bother drivers with having to deal + * with such bits + */ + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] &= mac_addr_mask[i]; + + return 0; +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -5658,6 +5696,25 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EOPNOTSUPP; goto out_free; } + + if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + if (!(wiphy->features & + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) { + err = -EOPNOTSUPP; + goto out_free; + } + + if (wdev->current_bss) { + err = -EOPNOTSUPP; + goto out_free; + } + + err = nl80211_parse_random_mac(info->attrs, + request->mac_addr, + request->mac_addr_mask); + if (err) + goto out_free; + } } request->no_cck = @@ -5685,7 +5742,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } static struct cfg80211_sched_scan_request * -nl80211_parse_sched_scan(struct wiphy *wiphy, +nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr **attrs) { struct cfg80211_sched_scan_request *request; @@ -5934,6 +5991,28 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, err = -EOPNOTSUPP; goto out_free; } + + if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR; + + if (!wdev) /* must be net-detect */ + flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR; + + if (!(wiphy->features & flg)) { + err = -EOPNOTSUPP; + goto out_free; + } + + if (wdev && wdev->current_bss) { + err = -EOPNOTSUPP; + goto out_free; + } + + err = nl80211_parse_random_mac(attrs, request->mac_addr, + request->mac_addr_mask); + if (err) + goto out_free; + } } request->interval = interval; @@ -5951,6 +6030,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || @@ -5960,7 +6040,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (rdev->sched_scan_req) return -EINPROGRESS; - rdev->sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, + rdev->sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, info->attrs); err = PTR_ERR_OR_ZERO(rdev->sched_scan_req); if (err) @@ -8721,7 +8801,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, if (err) goto out; - trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, tb); + trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, NULL, tb); err = PTR_ERR_OR_ZERO(trig->nd_config); if (err) trig->nd_config = NULL; -- cgit v1.2.3-71-gd317 From 18e5ca65e55da4cacd9deb4e934eb5429bb4b79d Mon Sep 17 00:00:00 2001 From: Jukka Rissanen Date: Thu, 13 Nov 2014 17:25:14 +0200 Subject: nl80211: Replace interface socket owner attribute with more generic one Replace NL80211_ATTR_IFACE_SOCKET_OWNER attribute with more generic NL80211_ATTR_SOCKET_OWNER that can be used with other commands that interface creation. Signed-off-by: Jukka Rissanen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 ++++--- net/wireless/nl80211.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a99081efc2d4..d77524510435 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1652,9 +1652,9 @@ enum nl80211_commands { * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32. * As specified in the &enum nl80211_tdls_peer_capability. * - * @NL80211_ATTR_IFACE_SOCKET_OWNER: flag attribute, if set during interface + * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface * creation then the new interface will be owned by the netlink socket - * that created it and will be destroyed when the socket is closed + * that created it and will be destroyed when the socket is closed. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. @@ -2024,7 +2024,7 @@ enum nl80211_attrs { NL80211_ATTR_TDLS_PEER_CAPABILITY, - NL80211_ATTR_IFACE_SOCKET_OWNER, + NL80211_ATTR_SOCKET_OWNER, NL80211_ATTR_CSA_C_OFFSETS_TX, NL80211_ATTR_MAX_CSA_COUNTERS, @@ -2055,6 +2055,7 @@ enum nl80211_attrs { /* source-level API compatibility */ #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION #define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG +#define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER /* * Allow user space programs to use #ifdef on new attributes by defining them diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5cfd75dfff67..c81491b1f737 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -388,7 +388,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, - [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, + [NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, [NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG }, [NL80211_ATTR_TSID] = { .type = NLA_U8 }, @@ -2653,7 +2653,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } - if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER]) + if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->owner_nlportid = info->snd_portid; switch (type) { -- cgit v1.2.3-71-gd317 From c7e2b9689ef81362a8091592da6cb6a7723f377a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 19 Nov 2014 14:05:03 +0100 Subject: sched: introduce vlan action This tc action allows to work with vlan tagged skbs. Two supported sub-actions are header pop and header push. Signed-off-by: Jiri Pirko Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 27 +++++ include/uapi/linux/tc_act/tc_vlan.h | 35 ++++++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/act_vlan.c | 207 ++++++++++++++++++++++++++++++++++++ 5 files changed, 281 insertions(+) create mode 100644 include/net/tc_act/tc_vlan.h create mode 100644 include/uapi/linux/tc_act/tc_vlan.h create mode 100644 net/sched/act_vlan.c (limited to 'include/uapi/linux') diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h new file mode 100644 index 000000000000..c809c1d2cea5 --- /dev/null +++ b/include/net/tc_act/tc_vlan.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NET_TC_VLAN_H +#define __NET_TC_VLAN_H + +#include + +#define VLAN_F_POP 0x1 +#define VLAN_F_PUSH 0x2 + +struct tcf_vlan { + struct tcf_common common; + int tcfv_action; + __be16 tcfv_push_vid; + __be16 tcfv_push_proto; +}; +#define to_vlan(a) \ + container_of(a->priv, struct tcf_vlan, common) + +#endif /* __NET_TC_VLAN_H */ diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h new file mode 100644 index 000000000000..f7b8d448b960 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_VLAN_H +#define __LINUX_TC_VLAN_H + +#include + +#define TCA_ACT_VLAN 12 + +#define TCA_VLAN_ACT_POP 1 +#define TCA_VLAN_ACT_PUSH 2 + +struct tc_vlan { + tc_gen; + int v_action; +}; + +enum { + TCA_VLAN_UNSPEC, + TCA_VLAN_TM, + TCA_VLAN_PARMS, + TCA_VLAN_PUSH_VLAN_ID, + TCA_VLAN_PUSH_VLAN_PROTOCOL, + __TCA_VLAN_MAX, +}; +#define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a1a8e29e5fc9..88618f8b794c 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -686,6 +686,17 @@ config NET_ACT_CSUM To compile this code as a module, choose M here: the module will be called act_csum. +config NET_ACT_VLAN + tristate "Vlan manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to push or pop vlan headers. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_vlan. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 0a869a11f3e6..679f24ae7f93 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o +obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c new file mode 100644 index 000000000000..d735ecf0b1a7 --- /dev/null +++ b/net/sched/act_vlan.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2014 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define VLAN_TAB_MASK 15 + +static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_vlan *v = a->priv; + int action; + int err; + + spin_lock(&v->tcf_lock); + v->tcf_tm.lastuse = jiffies; + bstats_update(&v->tcf_bstats, skb); + action = v->tcf_action; + + switch (v->tcfv_action) { + case TCA_VLAN_ACT_POP: + err = skb_vlan_pop(skb); + if (err) + goto drop; + break; + case TCA_VLAN_ACT_PUSH: + err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid); + if (err) + goto drop; + break; + default: + BUG(); + } + + goto unlock; + +drop: + action = TC_ACT_SHOT; + v->tcf_qstats.drops++; +unlock: + spin_unlock(&v->tcf_lock); + return action; +} + +static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { + [TCA_VLAN_PARMS] = { .len = sizeof(struct tc_vlan) }, + [TCA_VLAN_PUSH_VLAN_ID] = { .type = NLA_U16 }, + [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NLA_U16 }, +}; + +static int tcf_vlan_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tc_vlan *parm; + struct tcf_vlan *v; + int action; + __be16 push_vid = 0; + __be16 push_proto = 0; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy); + if (err < 0) + return err; + + if (!tb[TCA_VLAN_PARMS]) + return -EINVAL; + parm = nla_data(tb[TCA_VLAN_PARMS]); + switch (parm->v_action) { + case TCA_VLAN_ACT_POP: + break; + case TCA_VLAN_ACT_PUSH: + if (!tb[TCA_VLAN_PUSH_VLAN_ID]) + return -EINVAL; + push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); + if (push_vid >= VLAN_VID_MASK) + return -ERANGE; + + if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) { + push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]); + switch (push_proto) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } else { + push_proto = htons(ETH_P_8021Q); + } + break; + default: + return -EINVAL; + } + action = parm->v_action; + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + if (bind) + return 0; + tcf_hash_release(a, bind); + if (!ovr) + return -EEXIST; + } + + v = to_vlan(a); + + spin_lock_bh(&v->tcf_lock); + + v->tcfv_action = action; + v->tcfv_push_vid = push_vid; + v->tcfv_push_proto = push_proto; + + v->tcf_action = parm->action; + + spin_unlock_bh(&v->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(a); + return ret; +} + +static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_vlan *v = a->priv; + struct tc_vlan opt = { + .index = v->tcf_index, + .refcnt = v->tcf_refcnt - ref, + .bindcnt = v->tcf_bindcnt - bind, + .action = v->tcf_action, + .v_action = v->tcfv_action, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (v->tcfv_action == TCA_VLAN_ACT_PUSH && + (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || + nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto))) + goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(v->tcf_tm.expires); + if (nla_put(skb, TCA_VLAN_TM, sizeof(t), &t)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_vlan_ops = { + .kind = "vlan", + .type = TCA_ACT_VLAN, + .owner = THIS_MODULE, + .act = tcf_vlan, + .dump = tcf_vlan_dump, + .init = tcf_vlan_init, +}; + +static int __init vlan_init_module(void) +{ + return tcf_register_action(&act_vlan_ops, VLAN_TAB_MASK); +} + +static void __exit vlan_cleanup_module(void) +{ + tcf_unregister_action(&act_vlan_ops); +} + +module_init(vlan_init_module); +module_exit(vlan_cleanup_module); + +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("vlan manipulation actions"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-71-gd317 From 0655f6a8635b1b66f2434d5556b1044c14b1ccaf Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:07 +0100 Subject: tipc: add bearer disable/enable to new netlink api A new netlink API for tipc that can disable or enable a tipc bearer. The new API is separated from the old API because of a bug in the user space client (tipc-config). The problem is that older versions of tipc-config has a very low receive limit and adding commands to the legacy genl_opts struct causes the ctrl_getfamily() response message to grow, subsequently breaking the tool. The new API utilizes netlink policies for input validation. Where the top-level netlink attributes are tipc-logical entities, like bearer. The top level entities then contain nested attributes. In this case a name, nested link properties and a domain. Netlink commands implemented in this patch: TIPC_NL_BEARER_ENABLE TIPC_NL_BEARER_DISABLE Netlink logical layout of bearer enable message: -> bearer -> name [ -> domain ] [ -> properties -> priority ] Netlink logical layout of bearer disable message: -> bearer -> name Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 83 ++++++++++++++++++++++++++++++++ net/tipc/bearer.c | 99 ++++++++++++++++++++++++++++++++++++++- net/tipc/bearer.h | 7 ++- net/tipc/core.h | 1 + net/tipc/link.c | 47 +++++++++++++++++++ net/tipc/link.h | 3 ++ net/tipc/netlink.c | 42 ++++++++++++++++- net/tipc/netlink.h | 41 ++++++++++++++++ 8 files changed, 320 insertions(+), 3 deletions(-) create mode 100644 include/uapi/linux/tipc_netlink.h create mode 100644 net/tipc/netlink.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h new file mode 100644 index 000000000000..b9b710faa229 --- /dev/null +++ b/include/uapi/linux/tipc_netlink.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_TIPC_NETLINK_H_ +#define _LINUX_TIPC_NETLINK_H_ + +#define TIPC_GENL_V2_NAME "TIPCv2" +#define TIPC_GENL_V2_VERSION 0x1 + +/* Netlink commands */ +enum { + TIPC_NL_UNSPEC, + TIPC_NL_LEGACY, + TIPC_NL_BEARER_DISABLE, + TIPC_NL_BEARER_ENABLE, + + __TIPC_NL_CMD_MAX, + TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 +}; + +/* Top level netlink attributes */ +enum { + TIPC_NLA_UNSPEC, + TIPC_NLA_BEARER, /* nest */ + + __TIPC_NLA_MAX, + TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 +}; + +/* Bearer info */ +enum { + TIPC_NLA_BEARER_UNSPEC, + TIPC_NLA_BEARER_NAME, /* string */ + TIPC_NLA_BEARER_PROP, /* nest */ + TIPC_NLA_BEARER_DOMAIN, /* u32 */ + + __TIPC_NLA_BEARER_MAX, + TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1 +}; + +/* Nest, link propreties. Valid for link, media and bearer */ +enum { + TIPC_NLA_PROP_UNSPEC, + + TIPC_NLA_PROP_PRIO, /* u32 */ + TIPC_NLA_PROP_TOL, /* u32 */ + TIPC_NLA_PROP_WIN, /* u32 */ + + __TIPC_NLA_PROP_MAX, + TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 +}; + +#endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 264474394f9f..59815be0ab98 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1,7 +1,7 @@ /* * net/tipc/bearer.c: TIPC bearer code * - * Copyright (c) 1996-2006, 2013, Ericsson AB + * Copyright (c) 1996-2006, 2013-2014, Ericsson AB * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * @@ -37,6 +37,7 @@ #include "core.h" #include "config.h" #include "bearer.h" +#include "link.h" #include "discover.h" #define MAX_ADDR_STR 60 @@ -49,6 +50,17 @@ static struct tipc_media * const media_info_array[] = { NULL }; +static const struct nla_policy +tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { + [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_BEARER_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_BEARER_NAME + }, + [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } +}; + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); @@ -627,3 +639,88 @@ void tipc_bearer_stop(void) } } } + +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + bearer = tipc_bearer_find(name); + if (!bearer) { + rtnl_unlock(); + return -EINVAL; + } + + bearer_disable(bearer, false); + rtnl_unlock(); + + return 0; +} + +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + u32 domain; + u32 prio; + + prio = TIPC_MEDIA_LINK_PRI; + domain = tipc_own_addr & TIPC_CLUSTER_MASK; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + if (attrs[TIPC_NLA_BEARER_DOMAIN]) + domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + } + + rtnl_lock(); + err = tipc_enable_bearer(bearer, domain, prio); + if (err) { + rtnl_unlock(); + return err; + } + rtnl_unlock(); + + return 0; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 78fccc49de23..a87e8c78d862 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -1,7 +1,7 @@ /* * net/tipc/bearer.h: Include file for TIPC bearer code * - * Copyright (c) 1996-2006, 2013, Ericsson AB + * Copyright (c) 1996-2006, 2013-2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -38,6 +38,8 @@ #define _TIPC_BEARER_H #include "bcast.h" +#include "netlink.h" +#include #define MAX_BEARERS 2 #define MAX_MEDIA 2 @@ -176,6 +178,9 @@ extern struct tipc_media eth_media_info; extern struct tipc_media ib_media_info; #endif +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); + int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); diff --git a/net/tipc/core.h b/net/tipc/core.h index f773b148722f..b578b10feefa 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -41,6 +41,7 @@ #include #include +#include #include #include #include diff --git a/net/tipc/link.c b/net/tipc/link.c index 7cf8004577f1..e7f365012bd1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -40,6 +40,7 @@ #include "name_distr.h" #include "discover.h" #include "config.h" +#include "netlink.h" #include @@ -50,6 +51,14 @@ static const char *link_co_err = "Link changeover error, "; static const char *link_rst_msg = "Resetting link "; static const char *link_unk_evt = "Unknown link event "; +/* Properties valid for media, bearar and link */ +static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { + [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } +}; + /* * Out-of-range value for link session numbers */ @@ -2376,3 +2385,41 @@ static void link_print(struct tipc_link *l_ptr, const char *str) else pr_cont("\n"); } + +/* Parse and validate nested (link) properties valid for media, bearer and link + */ +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) +{ + int err; + + err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (prio > TIPC_MAX_LINK_PRI) + return -EINVAL; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) + return -EINVAL; + } + + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN)) + return -EINVAL; + } + + return 0; +} diff --git a/net/tipc/link.h b/net/tipc/link.h index b567a3427fda..4338294f20d4 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -37,6 +37,7 @@ #ifndef _TIPC_LINK_H #define _TIPC_LINK_H +#include #include "msg.h" #include "node.h" @@ -239,6 +240,8 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *start, u32 retransmits); +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); + /* * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) */ diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index ad844d365340..af506ae0a129 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -1,7 +1,7 @@ /* * net/tipc/netlink.c: TIPC configuration handling * - * Copyright (c) 2005-2006, Ericsson AB + * Copyright (c) 2005-2006, 2014, Ericsson AB * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * @@ -36,6 +36,7 @@ #include "core.h" #include "config.h" +#include "bearer.h" #include static int handle_cmd(struct sk_buff *skb, struct genl_info *info) @@ -68,6 +69,12 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) return 0; } +static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { + [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, + [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, +}; + +/* Legacy ASCII API */ static struct genl_family tipc_genl_family = { .id = GENL_ID_GENERATE, .name = TIPC_GENL_NAME, @@ -76,6 +83,7 @@ static struct genl_family tipc_genl_family = { .maxattr = 0, }; +/* Legacy ASCII API */ static struct genl_ops tipc_genl_ops[] = { { .cmd = TIPC_GENL_CMD, @@ -83,11 +91,42 @@ static struct genl_ops tipc_genl_ops[] = { }, }; +/* Users of the legacy API (tipc-config) can't handle that we add operations, + * so we have a separate genl handling for the new API. + */ +struct genl_family tipc_genl_v2_family = { + .id = GENL_ID_GENERATE, + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, +}; + +static const struct genl_ops tipc_genl_v2_ops[] = { + { + .cmd = TIPC_NL_BEARER_DISABLE, + .doit = tipc_nl_bearer_disable, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_ENABLE, + .doit = tipc_nl_bearer_enable, + .policy = tipc_nl_policy, + } +}; + int tipc_netlink_start(void) { int res; res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_ops); + if (res) { + pr_err("Failed to register legacy interface\n"); + return res; + } + + res = genl_register_family_with_ops(&tipc_genl_v2_family, + tipc_genl_v2_ops); if (res) { pr_err("Failed to register netlink interface\n"); return res; @@ -98,4 +137,5 @@ int tipc_netlink_start(void) void tipc_netlink_stop(void) { genl_unregister_family(&tipc_genl_family); + genl_unregister_family(&tipc_genl_v2_family); } diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h new file mode 100644 index 000000000000..e9980c0e2207 --- /dev/null +++ b/net/tipc/netlink.h @@ -0,0 +1,41 @@ +/* + * net/tipc/netlink.h: Include file for TIPC netlink code + * + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NETLINK_H +#define _TIPC_NETLINK_H + +extern struct genl_family tipc_genl_v2_family; + +#endif -- cgit v1.2.3-71-gd317 From 35b9dd7607f049466a66427e58818b29aeae9ea7 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:08 +0100 Subject: tipc: add bearer get/dump to new netlink api Add TIPC_NL_BEARER_GET command to the new tipc netlink API. This command supports dumping all data about all bearers or getting all information about a specific bearer. The information about a bearer includes name, link priorities and domain. Netlink logical layout of bearer get message: -> bearer -> name Netlink logical layout of returned bearer information: -> bearer -> name -> link properties -> priority -> tolerance -> window -> domain Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/bearer.c | 125 ++++++++++++++++++++++++++++++++++++++ net/tipc/bearer.h | 2 + net/tipc/netlink.c | 6 ++ net/tipc/netlink.h | 6 ++ 5 files changed, 140 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index b9b710faa229..249ec7e9952c 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -43,6 +43,7 @@ enum { TIPC_NL_LEGACY, TIPC_NL_BEARER_DISABLE, TIPC_NL_BEARER_ENABLE, + TIPC_NL_BEARER_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 59815be0ab98..d49b8c25dd11 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -640,6 +640,131 @@ void tipc_bearer_stop(void) } } +/* Caller should hold rtnl_lock to protect the bearer */ +int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + NLM_F_MULTI, TIPC_NL_BEARER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + + if (i == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (i = 0; i < MAX_BEARERS; i++) { + bearer = rtnl_dereference(bearer_list[i]); + if (!bearer) + continue; + + err = __tipc_nl_add_bearer(&msg, bearer); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct sk_buff *rep; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + bearer = tipc_bearer_find(name); + if (!bearer) { + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_bearer(&msg, bearer); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index a87e8c78d862..2d07e35f91fc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -180,6 +180,8 @@ extern struct tipc_media ib_media_info; int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index af506ae0a129..0c00422512fa 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -112,6 +112,12 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_BEARER_ENABLE, .doit = tipc_nl_bearer_enable, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_GET, + .doit = tipc_nl_bearer_get, + .dumpit = tipc_nl_bearer_dump, + .policy = tipc_nl_policy, } }; diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index e9980c0e2207..e34a3fca7d3d 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -38,4 +38,10 @@ extern struct genl_family tipc_genl_v2_family; +struct tipc_nl_msg { + struct sk_buff *skb; + u32 portid; + u32 seq; +}; + #endif -- cgit v1.2.3-71-gd317 From 315c00bc9f2bd17f7ad7ed8119ca49b1125af507 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:09 +0100 Subject: tipc: add bearer set to new netlink api Add TIPC_NL_BEARER_SET command to the new tipc netlink API. This command can set one or more link properties for a particular bearer. Netlink logical layout of bearer set message: -> bearer -> name -> link properties [ -> tolerance ] [ -> priority ] [ -> window ] Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/bearer.c | 49 +++++++++++++++++++++++++++++++++++++++ net/tipc/bearer.h | 1 + net/tipc/netlink.c | 5 ++++ 4 files changed, 56 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 249ec7e9952c..f446fba6a0c7 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -44,6 +44,7 @@ enum { TIPC_NL_BEARER_DISABLE, TIPC_NL_BEARER_ENABLE, TIPC_NL_BEARER_GET, + TIPC_NL_BEARER_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d49b8c25dd11..6d547d0b3f31 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -849,3 +849,52 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) return 0; } + +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(name); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) { + rtnl_unlock(); + return err; + } + + if (props[TIPC_NLA_PROP_TOL]) + b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if (props[TIPC_NLA_PROP_PRIO]) + b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + } + rtnl_unlock(); + + return 0; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 2d07e35f91fc..c6cf9df31375 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -182,6 +182,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 0c00422512fa..a5291314a210 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -118,6 +118,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_SET, + .doit = tipc_nl_bearer_set, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 34b78a127c4fd57cf3d5c64031693d10a8e0fae1 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:10 +0100 Subject: tipc: add sock dump to new netlink api Add TIPC_NL_SOCK_GET command to the new tipc netlink API. This command supports dumping of all available sockets with their associated connection or publication(s). It could be extended to reply with a single socket if the NLM_F_DUMP isn't set. The information about a socket includes reference, address, connection information / publication information. Netlink logical layout of response message: -> socket -> reference -> address [ -> connection -> node -> socket [ -> connected flag -> type -> instance ] ] [ -> publication flag ] Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 28 +++++++++++ net/tipc/netlink.c | 7 +++ net/tipc/socket.c | 101 ++++++++++++++++++++++++++++++++++++++ net/tipc/socket.h | 2 + 4 files changed, 138 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index f446fba6a0c7..8c87e2490bc7 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -45,6 +45,7 @@ enum { TIPC_NL_BEARER_ENABLE, TIPC_NL_BEARER_GET, TIPC_NL_BEARER_SET, + TIPC_NL_SOCK_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -54,6 +55,7 @@ enum { enum { TIPC_NLA_UNSPEC, TIPC_NLA_BEARER, /* nest */ + TIPC_NLA_SOCK, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -70,6 +72,32 @@ enum { TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1 }; +/* Socket info */ +enum { + TIPC_NLA_SOCK_UNSPEC, + TIPC_NLA_SOCK_ADDR, /* u32 */ + TIPC_NLA_SOCK_REF, /* u32 */ + TIPC_NLA_SOCK_CON, /* nest */ + TIPC_NLA_SOCK_HAS_PUBL, /* flag */ + + __TIPC_NLA_SOCK_MAX, + TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1 +}; + +/* Nest, connection info */ +enum { + TIPC_NLA_CON_UNSPEC, + + TIPC_NLA_CON_FLAG, /* flag */ + TIPC_NLA_CON_NODE, /* u32 */ + TIPC_NLA_CON_SOCK, /* u32 */ + TIPC_NLA_CON_TYPE, /* u32 */ + TIPC_NLA_CON_INST, /* u32 */ + + __TIPC_NLA_CON_MAX, + TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1 +}; + /* Nest, link propreties. Valid for link, media and bearer */ enum { TIPC_NLA_PROP_UNSPEC, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index a5291314a210..951fabeec8b7 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -36,6 +36,7 @@ #include "core.h" #include "config.h" +#include "socket.h" #include "bearer.h" #include @@ -72,6 +73,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, + [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, }; /* Legacy ASCII API */ @@ -123,6 +125,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_BEARER_SET, .doit = tipc_nl_bearer_set, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_SOCK_GET, + .dumpit = tipc_nl_sk_dump, + .policy = tipc_nl_policy, } }; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 591bbfa082a0..9e95c1ea5564 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2801,3 +2801,104 @@ void tipc_socket_stop(void) sock_unregister(tipc_family_ops.family); proto_unregister(&tipc_proto); } + +/* Caller should hold socket lock for the passed tipc socket. */ +int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) +{ + u32 peer_node; + u32 peer_port; + struct nlattr *nest; + + peer_node = tsk_peer_node(tsk); + peer_port = tsk_peer_port(tsk); + + nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + + if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) + goto msg_full; + + if (tsk->conn_type != 0) { + if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) + goto msg_full; + } + nla_nest_end(skb, nest); + + return 0; + +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + int err; + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + if (!attrs) + goto genlmsg_cancel; + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) + goto attr_msg_cancel; + + if (tsk->connected) { + err = __tipc_nl_add_sk_con(skb, tsk); + if (err) + goto attr_msg_cancel; + } else if (!list_empty(&tsk->publications)) { + if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) + goto attr_msg_cancel; + } + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + struct tipc_sock *tsk; + u32 prev_ref = cb->args[0]; + u32 ref = prev_ref; + + tsk = tipc_sk_get_next(&ref); + for (; tsk; tsk = tipc_sk_get_next(&ref)) { + lock_sock(&tsk->sk); + err = __tipc_nl_add_sk(skb, cb, tsk); + release_sock(&tsk->sk); + tipc_sk_put(tsk); + if (err) + break; + + prev_ref = ref; + } + + cb->args[0] = prev_ref; + + return skb->len; +} diff --git a/net/tipc/socket.h b/net/tipc/socket.h index baa43d03901e..16dfd62983a8 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -36,6 +36,7 @@ #define _TIPC_SOCK_H #include +#include #define TIPC_CONNACK_INTV 256 #define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) @@ -47,5 +48,6 @@ void tipc_sk_mcast_rcv(struct sk_buff *buf); void tipc_sk_reinit(void); int tipc_sk_ref_table_init(u32 requested_size, u32 start); void tipc_sk_ref_table_stop(void); +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); #endif -- cgit v1.2.3-71-gd317 From 1a1a143daf84db95dd7212086042004a3abb7bc2 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:11 +0100 Subject: tipc: add publication dump to new netlink api Add TIPC_NL_PUBL_GET command to the new tipc netlink API. This command supports dumping of all publications for a specific socket. Netlink logical layout of request message: -> socket -> reference Netlink logical layout of response message: -> publication -> type -> lower -> upper Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 18 +++++ net/tipc/netlink.c | 17 +++++ net/tipc/netlink.h | 1 + net/tipc/socket.c | 135 ++++++++++++++++++++++++++++++++++++++ net/tipc/socket.h | 1 + 5 files changed, 172 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 8c87e2490bc7..7e51ff61dccd 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -46,6 +46,7 @@ enum { TIPC_NL_BEARER_GET, TIPC_NL_BEARER_SET, TIPC_NL_SOCK_GET, + TIPC_NL_PUBL_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -56,6 +57,7 @@ enum { TIPC_NLA_UNSPEC, TIPC_NLA_BEARER, /* nest */ TIPC_NLA_SOCK, /* nest */ + TIPC_NLA_PUBL, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -84,6 +86,22 @@ enum { TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1 }; +/* Publication info */ +enum { + TIPC_NLA_PUBL_UNSPEC, + + TIPC_NLA_PUBL_TYPE, /* u32 */ + TIPC_NLA_PUBL_LOWER, /* u32 */ + TIPC_NLA_PUBL_UPPER, /* u32 */ + TIPC_NLA_PUBL_SCOPE, /* u32 */ + TIPC_NLA_PUBL_NODE, /* u32 */ + TIPC_NLA_PUBL_REF, /* u32 */ + TIPC_NLA_PUBL_KEY, /* u32 */ + + __TIPC_NLA_PUBL_MAX, + TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1 +}; + /* Nest, connection info */ enum { TIPC_NLA_CON_UNSPEC, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 951fabeec8b7..9bc64aaff466 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -74,6 +74,7 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, + [TIPC_NLA_PUBL] = { .type = NLA_NESTED, } }; /* Legacy ASCII API */ @@ -130,9 +131,25 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_SOCK_GET, .dumpit = tipc_nl_sk_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_PUBL_GET, + .dumpit = tipc_nl_publ_dump, + .policy = tipc_nl_policy, } }; +int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) +{ + u32 maxattr = tipc_genl_v2_family.maxattr; + + *attr = tipc_genl_v2_family.attrbuf; + if (!*attr) + return -EOPNOTSUPP; + + return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); +} + int tipc_netlink_start(void) { int res; diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index e34a3fca7d3d..1425c6869de0 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -37,6 +37,7 @@ #define _TIPC_NETLINK_H extern struct genl_family tipc_genl_v2_family; +int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); struct tipc_nl_msg { struct sk_buff *skb; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9e95c1ea5564..e91809182c28 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -121,6 +121,14 @@ static const struct proto_ops msg_ops; static struct proto tipc_proto; static struct proto tipc_proto_kern; +static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { + [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED }, + [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } +}; + /* * Revised TIPC socket locking policy: * @@ -2902,3 +2910,130 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } + +/* Caller should hold socket lock for the passed tipc socket. */ +int __tipc_nl_add_sk_publ(struct sk_buff *skb, struct netlink_callback *cb, + struct publication *publ) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start(skb, TIPC_NLA_PUBL); + if (!attrs) + goto genlmsg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + goto attr_msg_cancel; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +int __tipc_nl_list_sk_publ(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk, u32 *last_publ) +{ + int err; + struct publication *p; + + if (*last_publ) { + list_for_each_entry(p, &tsk->publications, pport_list) { + if (p->key == *last_publ) + break; + } + if (p->key != *last_publ) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + *last_publ = 0; + return -EPIPE; + } + } else { + p = list_first_entry(&tsk->publications, struct publication, + pport_list); + } + + list_for_each_entry_from(p, &tsk->publications, pport_list) { + err = __tipc_nl_add_sk_publ(skb, cb, p); + if (err) { + *last_publ = p->key; + return err; + } + } + *last_publ = 0; + + return 0; +} + +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + u32 tsk_ref = cb->args[0]; + u32 last_publ = cb->args[1]; + u32 done = cb->args[2]; + struct tipc_sock *tsk; + + if (!tsk_ref) { + struct nlattr **attrs; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy); + if (err) + return err; + + if (!sock[TIPC_NLA_SOCK_REF]) + return -EINVAL; + + tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + } + + if (done) + return 0; + + tsk = tipc_sk_get(tsk_ref); + if (!tsk) + return -EINVAL; + + lock_sock(&tsk->sk); + err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); + if (!err) + done = 1; + release_sock(&tsk->sk); + tipc_sk_put(tsk); + + cb->args[0] = tsk_ref; + cb->args[1] = last_publ; + cb->args[2] = done; + + return skb->len; +} diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 16dfd62983a8..d34089387006 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -49,5 +49,6 @@ void tipc_sk_reinit(void); int tipc_sk_ref_table_init(u32 requested_size, u32 start); void tipc_sk_ref_table_stop(void); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); #endif -- cgit v1.2.3-71-gd317 From 7be57fc6918470ecacd16b89c0d4f73d8fc265c4 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:12 +0100 Subject: tipc: add link get/dump to new netlink api Add TIPC_NL_LINK_GET command to the new tipc netlink API. This command supports dumping all information about all links (including the broadcast link) or getting all information about a specific link (not the broadcast link). The information about a link includes name, transmission info, properties and link statistics. As the tipc broadcast link is special we unfortunately have to treat it specially. It is a deliberate decision not to abstract the broadcast link on this (API) level. Netlink logical layout of link response message: -> port -> name -> MTU -> RX -> TX -> up flag -> active flag -> properties -> priority -> tolerance -> window -> statistics -> rx_info -> rx_fragments -> rx_fragmented -> rx_bundles -> rx_bundled -> tx_info -> tx_fragments -> tx_fragmented -> tx_bundles -> tx_bundled -> msg_prof_tot -> msg_len_cnt -> msg_len_tot -> msg_len_p0 -> msg_len_p1 -> msg_len_p2 -> msg_len_p3 -> msg_len_p4 -> msg_len_p5 -> msg_len_p6 -> rx_states -> rx_probes -> rx_nacks -> rx_deferred -> tx_states -> tx_probes -> tx_nacks -> tx_acks -> retransmitted -> duplicates -> link_congs -> max_queue -> avg_queue Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 62 ++++++++ net/tipc/bcast.c | 111 +++++++++++++++ net/tipc/bcast.h | 4 + net/tipc/link.c | 287 ++++++++++++++++++++++++++++++++++++++ net/tipc/link.h | 2 + net/tipc/netlink.c | 10 +- 6 files changed, 475 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 7e51ff61dccd..1034fb43d32e 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -47,6 +47,7 @@ enum { TIPC_NL_BEARER_SET, TIPC_NL_SOCK_GET, TIPC_NL_PUBL_GET, + TIPC_NL_LINK_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -58,6 +59,7 @@ enum { TIPC_NLA_BEARER, /* nest */ TIPC_NLA_SOCK, /* nest */ TIPC_NLA_PUBL, /* nest */ + TIPC_NLA_LINK, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -86,6 +88,24 @@ enum { TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1 }; +/* Link info */ +enum { + TIPC_NLA_LINK_UNSPEC, + TIPC_NLA_LINK_NAME, /* string */ + TIPC_NLA_LINK_DEST, /* u32 */ + TIPC_NLA_LINK_MTU, /* u32 */ + TIPC_NLA_LINK_BROADCAST, /* flag */ + TIPC_NLA_LINK_UP, /* flag */ + TIPC_NLA_LINK_ACTIVE, /* flag */ + TIPC_NLA_LINK_PROP, /* nest */ + TIPC_NLA_LINK_STATS, /* nest */ + TIPC_NLA_LINK_RX, /* u32 */ + TIPC_NLA_LINK_TX, /* u32 */ + + __TIPC_NLA_LINK_MAX, + TIPC_NLA_LINK_MAX = __TIPC_NLA_LINK_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, @@ -128,4 +148,46 @@ enum { TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 }; +/* Nest, statistics info */ +enum { + TIPC_NLA_STATS_UNSPEC, + + TIPC_NLA_STATS_RX_INFO, /* u32 */ + TIPC_NLA_STATS_RX_FRAGMENTS, /* u32 */ + TIPC_NLA_STATS_RX_FRAGMENTED, /* u32 */ + TIPC_NLA_STATS_RX_BUNDLES, /* u32 */ + TIPC_NLA_STATS_RX_BUNDLED, /* u32 */ + TIPC_NLA_STATS_TX_INFO, /* u32 */ + TIPC_NLA_STATS_TX_FRAGMENTS, /* u32 */ + TIPC_NLA_STATS_TX_FRAGMENTED, /* u32 */ + TIPC_NLA_STATS_TX_BUNDLES, /* u32 */ + TIPC_NLA_STATS_TX_BUNDLED, /* u32 */ + TIPC_NLA_STATS_MSG_PROF_TOT, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_CNT, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_TOT, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P0, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P1, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P2, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P3, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P4, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P5, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P6, /* u32 */ + TIPC_NLA_STATS_RX_STATES, /* u32 */ + TIPC_NLA_STATS_RX_PROBES, /* u32 */ + TIPC_NLA_STATS_RX_NACKS, /* u32 */ + TIPC_NLA_STATS_RX_DEFERRED, /* u32 */ + TIPC_NLA_STATS_TX_STATES, /* u32 */ + TIPC_NLA_STATS_TX_PROBES, /* u32 */ + TIPC_NLA_STATS_TX_NACKS, /* u32 */ + TIPC_NLA_STATS_TX_ACKS, /* u32 */ + TIPC_NLA_STATS_RETRANSMITTED, /* u32 */ + TIPC_NLA_STATS_DUPLICATES, /* u32 */ + TIPC_NLA_STATS_LINK_CONGS, /* u32 */ + TIPC_NLA_STATS_MAX_QUEUE, /* u32 */ + TIPC_NLA_STATS_AVG_QUEUE, /* u32 */ + + __TIPC_NLA_STATS_MAX, + TIPC_NLA_STATS_MAX = __TIPC_NLA_STATS_MAX - 1 +}; + #endif diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index b8670bf262e2..dcf3589e3cc5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -767,6 +767,117 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) tipc_bclink_unlock(); } +int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, struct tipc_stats *stats) +{ + int i; + struct nlattr *nest; + + struct nla_map { + __u32 key; + __u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; + + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, nest); + + return 0; +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + if (!bcl) + return 0; + + tipc_bclink_lock(); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->next_in_no)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->next_out_no)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->queue_limit[0])) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); + if (err) + goto attr_msg_full; + + tipc_bclink_unlock(); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bclink_unlock(); + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} int tipc_bclink_stats(char *buf, const u32 buf_size) { diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index e7b0f85a82bc..443de084d3e8 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -37,6 +37,8 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H +#include "netlink.h" + #define MAX_NODES 4096 #define WSIZE 32 #define TIPC_BCLINK_RESET 1 @@ -100,4 +102,6 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); uint tipc_bclink_get_mtu(void); int tipc_bclink_xmit(struct sk_buff *buf); void tipc_bclink_wakeup_users(void); +int tipc_nl_add_bc_link(struct tipc_nl_msg *msg); + #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index e7f365012bd1..bdc4b5e5bf56 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -36,6 +36,7 @@ #include "core.h" #include "link.h" +#include "bcast.h" #include "socket.h" #include "name_distr.h" #include "discover.h" @@ -51,6 +52,22 @@ static const char *link_co_err = "Link changeover error, "; static const char *link_rst_msg = "Resetting link "; static const char *link_unk_evt = "Unknown link event "; +static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME + }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + /* Properties valid for media, bearar and link */ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, @@ -2423,3 +2440,273 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) return 0; } + +int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) +{ + int i; + struct nlattr *stats; + + struct nla_map { + u32 key; + u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, s->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, s->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled}, + {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ? + s->msg_length_counts : 1}, + {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts}, + {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total}, + {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]}, + {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]}, + {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]}, + {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]}, + {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]}, + {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]}, + {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]}, + {TIPC_NLA_STATS_RX_STATES, s->recv_states}, + {TIPC_NLA_STATS_RX_PROBES, s->recv_probes}, + {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv}, + {TIPC_NLA_STATS_TX_STATES, s->sent_states}, + {TIPC_NLA_STATS_TX_PROBES, s->sent_probes}, + {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, s->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, s->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, s->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ? + (s->accu_queue_sz / s->queue_sz_counts) : 0} + }; + + stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!stats) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, stats); + + return 0; +msg_full: + nla_nest_cancel(skb, stats); + + return -EMSGSIZE; +} + +/* Caller should hold appropriate locks to protect the link */ +int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, + tipc_cluster_mask(tipc_own_addr))) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->next_out_no)) + goto attr_msg_full; + + if (tipc_link_is_up(link)) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + if (tipc_link_is_active(link)) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, + link->queue_limit[TIPC_LOW_IMPORTANCE])) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_stats(msg->skb, &link->stats); + if (err) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +/* Caller should hold node lock */ +int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, struct tipc_node *node, + u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i]) + continue; + + err = __tipc_nl_add_link(msg, node->links[i]); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + + if (prev_node) { + node = tipc_node_find(prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + + list_for_each_entry_continue_rcu(node, &tipc_node_list, list) { + tipc_node_lock(node); + err = __tipc_nl_add_node_links(&msg, node, &prev_link); + tipc_node_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(&msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tipc_node_list, list) { + tipc_node_lock(node); + err = __tipc_nl_add_node_links(&msg, node, &prev_link); + tipc_node_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} + +int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *ans_skb; + struct tipc_nl_msg msg; + struct tipc_link *link; + struct tipc_node *node; + char *name; + int bearer_id; + int err; + + if (!info->attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + node = tipc_link_find_owner(name, &bearer_id); + if (!node) + return -EINVAL; + + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!ans_skb) + return -ENOMEM; + + msg.skb = ans_skb; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + tipc_node_lock(node); + link = node->links[bearer_id]; + if (!link) { + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_link(&msg, link); + if (err) + goto err_out; + + tipc_node_unlock(node); + + return genlmsg_reply(ans_skb, info); + +err_out: + tipc_node_unlock(node); + nlmsg_free(ans_skb); + + return err; +} diff --git a/net/tipc/link.h b/net/tipc/link.h index 4338294f20d4..8e3542a4a037 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -240,6 +240,8 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *start, u32 retransmits); +int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); /* diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 9bc64aaff466..ea168b889caa 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -38,6 +38,7 @@ #include "config.h" #include "socket.h" #include "bearer.h" +#include "link.h" #include static int handle_cmd(struct sk_buff *skb, struct genl_info *info) @@ -74,7 +75,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, - [TIPC_NLA_PUBL] = { .type = NLA_NESTED, } + [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, + [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, }; /* Legacy ASCII API */ @@ -136,6 +138,12 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_PUBL_GET, .dumpit = tipc_nl_publ_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_GET, + .doit = tipc_nl_link_get, + .dumpit = tipc_nl_link_dump, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From f96ce7a20d6972a834202f3cdd6a53fd0ee26a8e Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:13 +0100 Subject: tipc: add link set to new netlink api Add TIPC_NL_LINK_SET to the new tipc netlink API. This command can set one or more link properties for a particular link. Netlink logical layout of link set message: -> link -> name -> properties [ -> tolerance ] [ -> priority ] [ -> window ] Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/link.c | 73 +++++++++++++++++++++++++++++++++++++++ net/tipc/link.h | 1 + net/tipc/netlink.c | 5 +++ 4 files changed, 80 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 1034fb43d32e..2deb7fd8d85d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -48,6 +48,7 @@ enum { TIPC_NL_SOCK_GET, TIPC_NL_PUBL_GET, TIPC_NL_LINK_GET, + TIPC_NL_LINK_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/link.c b/net/tipc/link.c index bdc4b5e5bf56..6785dcfd2d23 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2441,6 +2441,79 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) return 0; } +int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + node = tipc_link_find_owner(name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_lock(node); + + link = node->links[bearer_id]; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + link_set_supervision_props(link, tol); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0, 0); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + link->priority = prio; + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio, 0); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_unlock(node); + + return res; +} int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; diff --git a/net/tipc/link.h b/net/tipc/link.h index 8e3542a4a037..3738ba11ce00 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -242,6 +242,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); /* diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index ea168b889caa..d4444e59bf98 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -144,6 +144,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .doit = tipc_nl_link_get, .dumpit = tipc_nl_link_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_SET, + .doit = tipc_nl_link_set, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From ae36342b50a91cff188e417201452dc075a8f444 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:14 +0100 Subject: tipc: add link stat reset to new netlink api Add TIPC_NL_LINK_RESET_STATS command to the new netlink API. This command resets the link statistics for a particular link. Netlink logical layout of link reset message: -> link -> name Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/link.c | 49 +++++++++++++++++++++++++++++++++++++++ net/tipc/link.h | 1 + net/tipc/netlink.c | 5 ++++ 4 files changed, 56 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 2deb7fd8d85d..574882a301bf 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -49,6 +49,7 @@ enum { TIPC_NL_PUBL_GET, TIPC_NL_LINK_GET, TIPC_NL_LINK_SET, + TIPC_NL_LINK_RESET_STATS, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/link.c b/net/tipc/link.c index 6785dcfd2d23..629e8cf393bf 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2783,3 +2783,52 @@ err_out: return err; } + +int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(); + if (err) + return err; + return 0; + } + + node = tipc_link_find_owner(link_name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_lock(node); + + link = node->links[bearer_id]; + if (!link) { + tipc_node_unlock(node); + return -EINVAL; + } + + link_reset_statistics(link); + + tipc_node_unlock(node); + + return 0; +} diff --git a/net/tipc/link.h b/net/tipc/link.h index 3738ba11ce00..f463e7be801c 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -243,6 +243,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); /* diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index d4444e59bf98..68bfd11f232d 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -149,6 +149,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_LINK_SET, .doit = tipc_nl_link_set, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_RESET_STATS, + .doit = tipc_nl_link_reset_stats, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 46f15c6794fb744bb7741d26143a85b9012c10d4 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:15 +0100 Subject: tipc: add media get/dump to new netlink api Add TIPC_NL_MEDIA_GET command to the new tipc netlink API. This command supports dumping all information about all defined media as well as getting all information about a specific media. The information about a media includes name and link properties. Netlink logical layout of media get response message: -> media -> name -> link properties -> tolerance -> priority -> window Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 12 ++++ net/tipc/bearer.c | 125 ++++++++++++++++++++++++++++++++++++++ net/tipc/bearer.h | 3 + net/tipc/netlink.c | 7 +++ 4 files changed, 147 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 574882a301bf..4101b70991cc 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -50,6 +50,7 @@ enum { TIPC_NL_LINK_GET, TIPC_NL_LINK_SET, TIPC_NL_LINK_RESET_STATS, + TIPC_NL_MEDIA_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -62,6 +63,7 @@ enum { TIPC_NLA_SOCK, /* nest */ TIPC_NLA_PUBL, /* nest */ TIPC_NLA_LINK, /* nest */ + TIPC_NLA_MEDIA, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -108,6 +110,16 @@ enum { TIPC_NLA_LINK_MAX = __TIPC_NLA_LINK_MAX - 1 }; +/* Media info */ +enum { + TIPC_NLA_MEDIA_UNSPEC, + TIPC_NLA_MEDIA_NAME, /* string */ + TIPC_NLA_MEDIA_PROP, /* nest */ + + __TIPC_NLA_MEDIA_MAX, + TIPC_NLA_MEDIA_MAX = __TIPC_NLA_MEDIA_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d547d0b3f31..26630a51fcae 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -61,6 +61,12 @@ tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } }; +static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { + [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING }, + [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } +}; + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); @@ -898,3 +904,122 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) return 0; } + +int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + NLM_F_MULTI, TIPC_NL_MEDIA_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_nl_msg msg; + + if (i == MAX_MEDIA) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (; media_info_array[i] != NULL; i++) { + err = __tipc_nl_add_media(&msg, media_info_array[i]); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_nl_msg msg; + struct tipc_media *media; + struct sk_buff *rep; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + media = tipc_media_find(name); + if (!media) { + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_media(&msg, media); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index c6cf9df31375..bacd225bccfb 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -184,6 +184,9 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); + int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 68bfd11f232d..742e51a0afbb 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -77,6 +77,7 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, + [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, }; /* Legacy ASCII API */ @@ -154,6 +155,12 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_LINK_RESET_STATS, .doit = tipc_nl_link_reset_stats, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MEDIA_GET, + .doit = tipc_nl_media_get, + .dumpit = tipc_nl_media_dump, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 1e55417d8fc6f6d93b1cc6995b911d48ded2adfb Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:16 +0100 Subject: tipc: add media set to new netlink api Add TIPC_NL_MEDIA_SET command to the new tipc netlink API. This command can set one or more link properties for a particular media. Netlink logical layout of bearer set message: -> media -> name -> link properties [ -> tolerance ] [ -> priority ] [ -> window ] Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/bearer.c | 47 +++++++++++++++++++++++++++++++++++++++ net/tipc/bearer.h | 1 + net/tipc/netlink.c | 5 +++++ 4 files changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 4101b70991cc..43beef28d67f 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -51,6 +51,7 @@ enum { TIPC_NL_LINK_SET, TIPC_NL_LINK_RESET_STATS, TIPC_NL_MEDIA_GET, + TIPC_NL_MEDIA_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 26630a51fcae..5f6f32369c16 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1023,3 +1023,50 @@ err_out: return err; } + +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_media *m; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy); + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + rtnl_lock(); + m = tipc_media_find(name); + if (!m) { + rtnl_unlock(); + return -EINVAL; + } + + if (attrs[TIPC_NLA_MEDIA_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], + props); + if (err) { + rtnl_unlock(); + return err; + } + + if (props[TIPC_NLA_PROP_TOL]) + m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if (props[TIPC_NLA_PROP_PRIO]) + m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + } + rtnl_unlock(); + + return 0; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index bacd225bccfb..b1d905209e83 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -186,6 +186,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 742e51a0afbb..8673e370b7ed 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -161,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MEDIA_SET, + .doit = tipc_nl_media_set, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 3e4b6ab58d614934e7ca99bdf448089695d34ffa Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:17 +0100 Subject: tipc: add node get/dump to new netlink api Add TIPC_NL_NODE_GET to the new tipc netlink API. This command can dump the address and node status of all nodes in the tipc cluster. Netlink logical layout of returned node/address data: -> node -> address -> up flag Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 12 +++++ net/tipc/netlink.c | 7 +++ net/tipc/node.c | 96 +++++++++++++++++++++++++++++++++++++++ net/tipc/node.h | 4 +- 4 files changed, 118 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 43beef28d67f..08a793374e8c 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -52,6 +52,7 @@ enum { TIPC_NL_LINK_RESET_STATS, TIPC_NL_MEDIA_GET, TIPC_NL_MEDIA_SET, + TIPC_NL_NODE_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -65,6 +66,7 @@ enum { TIPC_NLA_PUBL, /* nest */ TIPC_NLA_LINK, /* nest */ TIPC_NLA_MEDIA, /* nest */ + TIPC_NLA_NODE, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -121,6 +123,16 @@ enum { TIPC_NLA_MEDIA_MAX = __TIPC_NLA_MEDIA_MAX - 1 }; +/* Node info */ +enum { + TIPC_NLA_NODE_UNSPEC, + TIPC_NLA_NODE_ADDR, /* u32 */ + TIPC_NLA_NODE_UP, /* flag */ + + __TIPC_NLA_NODE_MAX, + TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 8673e370b7ed..5b0e3c8457d2 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -39,6 +39,7 @@ #include "socket.h" #include "bearer.h" #include "link.h" +#include "node.h" #include static int handle_cmd(struct sk_buff *skb, struct genl_info *info) @@ -78,6 +79,7 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, + [TIPC_NLA_NODE] = { .type = NLA_NESTED, } }; /* Legacy ASCII API */ @@ -166,6 +168,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_MEDIA_SET, .doit = tipc_nl_media_set, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NODE_GET, + .dumpit = tipc_nl_node_dump, + .policy = tipc_nl_policy, } }; diff --git a/net/tipc/node.c b/net/tipc/node.c index 5781634e957d..72a75d4838b2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -58,6 +58,12 @@ struct tipc_sock_conn { struct list_head list; }; +static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { + [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } +}; + /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -601,3 +607,93 @@ void tipc_node_unlock(struct tipc_node *node) tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, link_id, addr); } + +/* Caller should hold node lock for the passed node */ +int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + NLM_F_MULTI, TIPC_NL_NODE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) + goto attr_msg_full; + if (tipc_node_is_up(node)) + if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int done = cb->args[0]; + int last_addr = cb->args[1]; + struct tipc_node *node; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + + if (last_addr && !tipc_node_find(last_addr)) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() this + * means that setting prev_seq here will cause the consistence + * check to fail in the netlink callback handler. Resulting in + * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if + * the node state changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + + list_for_each_entry_rcu(node, &tipc_node_list, list) { + if (last_addr) { + if (node->addr == last_addr) + last_addr = 0; + else + continue; + } + + tipc_node_lock(node); + err = __tipc_nl_add_node(&msg, node); + if (err) { + last_addr = node->addr; + tipc_node_unlock(node); + goto out; + } + + tipc_node_unlock(node); + } + done = 1; +out: + cb->args[0] = done; + cb->args[1] = last_addr; + rcu_read_unlock(); + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 04e91458bb29..005fbcef3212 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,7 +1,7 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * @@ -145,6 +145,8 @@ void tipc_node_unlock(struct tipc_node *node); int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(u32 dnode, u32 port); +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); + static inline void tipc_node_lock(struct tipc_node *node) { spin_lock_bh(&node->lock); -- cgit v1.2.3-71-gd317 From fd3cf2ad519f73c2f7a46460ebedf32ad246520c Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:18 +0100 Subject: tipc: add net dump to new netlink api Add TIPC_NL_NET_GET command to the new tipc netlink API. This command dumps the network id of the node. Netlink logical layout of returned network data: -> net -> id Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 11 ++++++++ net/tipc/net.c | 59 +++++++++++++++++++++++++++++++++++++++ net/tipc/net.h | 7 ++++- net/tipc/netlink.c | 9 +++++- 4 files changed, 84 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 08a793374e8c..dcfd42002da7 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -53,6 +53,7 @@ enum { TIPC_NL_MEDIA_GET, TIPC_NL_MEDIA_SET, TIPC_NL_NODE_GET, + TIPC_NL_NET_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -67,6 +68,7 @@ enum { TIPC_NLA_LINK, /* nest */ TIPC_NLA_MEDIA, /* nest */ TIPC_NLA_NODE, /* nest */ + TIPC_NLA_NET, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -133,6 +135,15 @@ enum { TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 }; +/* Net info */ +enum { + TIPC_NLA_NET_UNSPEC, + TIPC_NLA_NET_ID, /* u32 */ + + __TIPC_NLA_NET_MAX, + TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, diff --git a/net/tipc/net.c b/net/tipc/net.c index 93b9944a6a8b..d9e666a1be9d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -42,6 +42,11 @@ #include "node.h" #include "config.h" +static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { + [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NET_ID] = { .type = NLA_U32 } +}; + /* * The TIPC locking policy is designed to ensure a very fine locking * granularity, permitting complete parallel access to individual @@ -138,3 +143,57 @@ void tipc_net_stop(void) pr_info("Left network mode\n"); } + +static int __tipc_nl_add_net(struct tipc_nl_msg *msg) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + NLM_F_MULTI, TIPC_NL_NET_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tipc_net_id)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int done = cb->args[0]; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + err = __tipc_nl_add_net(&msg); + if (err) + goto out; + + done = 1; +out: + cb->args[0] = done; + + return skb->len; +} diff --git a/net/tipc/net.h b/net/tipc/net.h index 59ef3388be2c..60dc22fe9267 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -1,7 +1,7 @@ /* * net/tipc/net.h: Include file for TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,7 +37,12 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H +#include + int tipc_net_start(u32 addr); + void tipc_net_stop(void); +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); + #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 5b0e3c8457d2..c143f9c20f61 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -40,6 +40,7 @@ #include "bearer.h" #include "link.h" #include "node.h" +#include "net.h" #include static int handle_cmd(struct sk_buff *skb, struct genl_info *info) @@ -79,7 +80,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, - [TIPC_NLA_NODE] = { .type = NLA_NESTED, } + [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, + [TIPC_NLA_NET] = { .type = NLA_NESTED, } }; /* Legacy ASCII API */ @@ -173,6 +175,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_NODE_GET, .dumpit = tipc_nl_node_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NET_GET, + .dumpit = tipc_nl_net_dump, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 27c21416727af73df45051acb05331c0f10e50f6 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:19 +0100 Subject: tipc: add net set to new netlink api Add TIPC_NL_NET_SET command to the new tipc netlink API. This command can set the network id and network (tipc) address. Netlink logical layout of network set message: -> net [ -> id ] [ -> address ] Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 2 ++ net/tipc/net.c | 47 +++++++++++++++++++++++++++++++++++++++ net/tipc/net.h | 1 + net/tipc/netlink.c | 5 +++++ 4 files changed, 55 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index dcfd42002da7..fb980e27f425 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -54,6 +54,7 @@ enum { TIPC_NL_MEDIA_SET, TIPC_NL_NODE_GET, TIPC_NL_NET_GET, + TIPC_NL_NET_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -139,6 +140,7 @@ enum { enum { TIPC_NLA_NET_UNSPEC, TIPC_NLA_NET_ID, /* u32 */ + TIPC_NLA_NET_ADDR, /* u32 */ __TIPC_NLA_NET_MAX, TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 diff --git a/net/tipc/net.c b/net/tipc/net.c index d9e666a1be9d..cf13df3cde8f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -197,3 +197,50 @@ out: return skb->len; } + +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy); + if (err) + return err; + + if (attrs[TIPC_NLA_NET_ID]) { + u32 val; + + /* Can't change net id once TIPC has joined a network */ + if (tipc_own_addr) + return -EPERM; + + val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); + if (val < 1 || val > 9999) + return -EINVAL; + + tipc_net_id = val; + } + + if (attrs[TIPC_NLA_NET_ADDR]) { + u32 addr; + + /* Can't change net addr once TIPC has joined a network */ + if (tipc_own_addr) + return -EPERM; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!tipc_addr_node_valid(addr)) + return -EINVAL; + + rtnl_lock(); + tipc_net_start(addr); + rtnl_unlock(); + } + + return 0; +} diff --git a/net/tipc/net.h b/net/tipc/net.h index 60dc22fe9267..a81c1b9eb150 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -44,5 +44,6 @@ int tipc_net_start(u32 addr); void tipc_net_stop(void); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index c143f9c20f61..cb37d30378a8 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -180,6 +180,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_NET_GET, .dumpit = tipc_nl_net_dump, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NET_SET, + .doit = tipc_nl_net_set, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 1593123a6a4914ccac4699d7f93cdf8057a7d822 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Thu, 20 Nov 2014 10:29:20 +0100 Subject: tipc: add name table dump to new netlink api Add TIPC_NL_NAME_TABLE_GET command to the new tipc netlink API. This command supports dumping the name table of all nodes. Netlink logical layout of name table response message: -> name table -> publication -> type -> lower -> upper -> scope -> node -> ref -> key Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 11 +++ net/tipc/name_table.c | 190 +++++++++++++++++++++++++++++++++++++- net/tipc/name_table.h | 4 +- net/tipc/netlink.c | 9 +- 4 files changed, 211 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index fb980e27f425..8d723824ad69 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -55,6 +55,7 @@ enum { TIPC_NL_NODE_GET, TIPC_NL_NET_GET, TIPC_NL_NET_SET, + TIPC_NL_NAME_TABLE_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -70,6 +71,7 @@ enum { TIPC_NLA_MEDIA, /* nest */ TIPC_NLA_NODE, /* nest */ TIPC_NLA_NET, /* nest */ + TIPC_NLA_NAME_TABLE, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -146,6 +148,15 @@ enum { TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 }; +/* Name table info */ +enum { + TIPC_NLA_NAME_TABLE_UNSPEC, + TIPC_NLA_NAME_TABLE_PUBL, /* nest */ + + __TIPC_NLA_NAME_TABLE_MAX, + TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 3a6a0a7c0759..30ca8e0e0f84 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,7 +1,7 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2004-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -42,6 +42,12 @@ #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ +static const struct nla_policy +tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { + [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } +}; + /** * struct name_info - name sequence publication info * @node_list: circular list of publications made by own node @@ -995,3 +1001,185 @@ void tipc_nametbl_stop(void) table.types = NULL; write_unlock_bh(&tipc_nametbl_lock); } + +int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, struct name_seq *seq, + struct sub_seq *sseq, u32 *last_publ) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *publ; + struct publication *p; + + if (*last_publ) { + list_for_each_entry(p, &sseq->info->zone_list, zone_list) + if (p->key == *last_publ) + break; + if (p->key != *last_publ) + return -EPIPE; + } else { + p = list_first_entry(&sseq->info->zone_list, struct publication, + zone_list); + } + + list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) { + *last_publ = p->key; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, + &tipc_genl_v2_family, NLM_F_MULTI, + TIPC_NL_NAME_TABLE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE); + if (!attrs) + goto msg_full; + + publ = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + if (!publ) + goto attr_msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, seq->type)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sseq->lower)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sseq->upper)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) + goto publ_msg_full; + + nla_nest_end(msg->skb, publ); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + } + *last_publ = 0; + + return 0; + +publ_msg_full: + nla_nest_cancel(msg->skb, publ); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, + u32 *last_lower, u32 *last_publ) +{ + struct sub_seq *sseq; + struct sub_seq *sseq_start; + int err; + + if (*last_lower) { + sseq_start = nameseq_find_subseq(seq, *last_lower); + if (!sseq_start) + return -EPIPE; + } else { + sseq_start = seq->sseqs; + } + + for (sseq = sseq_start; sseq != &seq->sseqs[seq->first_free]; sseq++) { + err = __tipc_nl_add_nametable_publ(msg, seq, sseq, last_publ); + if (err) { + *last_lower = sseq->lower; + return err; + } + } + *last_lower = 0; + + return 0; +} + +int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, u32 *last_lower, + u32 *last_publ) +{ + struct hlist_head *seq_head; + struct name_seq *seq; + int err; + int i; + + if (*last_type) + i = hash(*last_type); + else + i = 0; + + for (; i < TIPC_NAMETBL_SIZE; i++) { + seq_head = &table.types[i]; + + if (*last_type) { + seq = nametbl_find_seq(*last_type); + if (!seq) + return -EPIPE; + } else { + seq = hlist_entry_safe((seq_head)->first, + struct name_seq, ns_list); + if (!seq) + continue; + } + + hlist_for_each_entry_from(seq, ns_list) { + spin_lock_bh(&seq->lock); + + err = __tipc_nl_subseq_list(msg, seq, last_lower, + last_publ); + + if (err) { + *last_type = seq->type; + spin_unlock_bh(&seq->lock); + return err; + } + spin_unlock_bh(&seq->lock); + } + *last_type = 0; + } + return 0; +} + +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int done = cb->args[3]; + u32 last_type = cb->args[0]; + u32 last_lower = cb->args[1]; + u32 last_publ = cb->args[2]; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + read_lock_bh(&tipc_nametbl_lock); + + err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ); + if (!err) { + done = 1; + } else if (err != -EMSGSIZE) { + /* We never set seq or call nl_dump_check_consistent() this + * means that setting prev_seq here will cause the consistence + * check to fail in the netlink callback handler. Resulting in + * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if + * we got an error. + */ + cb->prev_seq = 1; + } + + read_unlock_bh(&tipc_nametbl_lock); + + cb->args[0] = last_type; + cb->args[1] = last_lower; + cb->args[2] = last_publ; + cb->args[3] = done; + + return skb->len; +} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index f02f48b9a216..b38ebecac766 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,7 +1,7 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -84,6 +84,8 @@ struct publication { extern rwlock_t tipc_nametbl_lock; +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); + struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index cb37d30378a8..b891e3905bc4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -37,6 +37,7 @@ #include "core.h" #include "config.h" #include "socket.h" +#include "name_table.h" #include "bearer.h" #include "link.h" #include "node.h" @@ -81,7 +82,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, - [TIPC_NLA_NET] = { .type = NLA_NESTED, } + [TIPC_NLA_NET] = { .type = NLA_NESTED, }, + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } }; /* Legacy ASCII API */ @@ -185,6 +187,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_NET_SET, .doit = tipc_nl_net_set, .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NAME_TABLE_GET, + .dumpit = tipc_nl_name_table_dump, + .policy = tipc_nl_policy, } }; -- cgit v1.2.3-71-gd317 From 2ad7bf3638411cb547f2823df08166c13ab04269 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sun, 23 Nov 2014 23:07:46 -0800 Subject: ipvlan: Initial check-in of the IPVLAN driver. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver is very similar to the macvlan driver except that it uses L3 on the frame to determine the logical interface while functioning as packet dispatcher. It inherits L2 of the master device hence the packets on wire will have the same L2 for all the packets originating from all virtual devices off of the same master device. This driver was developed keeping the namespace use-case in mind. Hence most of the examples given here take that as the base setup where main-device belongs to the default-ns and virtual devices are assigned to the additional namespaces. The device operates in two different modes and the difference in these two modes in primarily in the TX side. (a) L2 mode : In this mode, the device behaves as a L2 device. TX processing upto L2 happens on the stack of the virtual device associated with (namespace). Packets are switched after that into the main device (default-ns) and queued for xmit. RX processing is simple and all multicast, broadcast (if applicable), and unicast belonging to the address(es) are delivered to the virtual devices. (b) L3 mode : In this mode, the device behaves like a L3 device. TX processing upto L3 happens on the stack of the virtual device associated with (namespace). Packets are switched to the main-device (default-ns) for the L2 processing. Hence the routing table of the default-ns will be used in this mode. RX processins is somewhat similar to the L2 mode except that in this mode only Unicast packets are delivered to the virtual device while main-dev will handle all other packets. The devices can be added using the "ip" command from the iproute2 package - ip link add link type ipvlan mode [ l2 | l3 ] Signed-off-by: Mahesh Bandewar Cc: Eric Dumazet Cc: Maciej Å»enczykowski Cc: Laurent Chavey Cc: Tim Hockin Cc: Brandon Philips Cc: Pavel Emelianov Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 107 +++++ drivers/net/Kconfig | 18 + drivers/net/Makefile | 1 + drivers/net/ipvlan/Makefile | 7 + drivers/net/ipvlan/ipvlan.h | 130 ++++++ drivers/net/ipvlan/ipvlan_core.c | 607 +++++++++++++++++++++++++++ drivers/net/ipvlan/ipvlan_main.c | 789 ++++++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 4 + include/uapi/linux/if_link.h | 15 + 9 files changed, 1678 insertions(+) create mode 100644 Documentation/networking/ipvlan.txt create mode 100644 drivers/net/ipvlan/Makefile create mode 100644 drivers/net/ipvlan/ipvlan.h create mode 100644 drivers/net/ipvlan/ipvlan_core.c create mode 100644 drivers/net/ipvlan/ipvlan_main.c (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt new file mode 100644 index 000000000000..cf996394e466 --- /dev/null +++ b/Documentation/networking/ipvlan.txt @@ -0,0 +1,107 @@ + + IPVLAN Driver HOWTO + +Initial Release: + Mahesh Bandewar + +1. Introduction: + This is conceptually very similar to the macvlan driver with one major +exception of using L3 for mux-ing /demux-ing among slaves. This property makes +the master device share the L2 with it's slave devices. I have developed this +driver in conjuntion with network namespaces and not sure if there is use case +outside of it. + + +2. Building and Installation: + In order to build the driver, please select the config item CONFIG_IPVLAN. +The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module +(CONFIG_IPVLAN=m). + + +3. Configuration: + There are no module parameters for this driver and it can be configured +using IProute2/ip utility. + + ip link add link type ipvlan mode { l2 | L3 } + + e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 + + +4. Operating modes: + IPvlan has two modes of operation - L2 and L3. For a given master device, +you can select one of these two modes and all slaves on that master will +operate in the same (selected) mode. The RX mode is almost identical except +that in L3 mode the slaves wont receive any multicast / broadcast traffic. +L3 mode is more restrictive since routing is controlled from the other (mostly) +default namespace. + +4.1 L2 mode: + In this mode TX processing happens on the stack instance attached to the +slave device and packets are switched and queued to the master device to send +out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) +as well. + +4.2 L3 mode: + In this mode TX processing upto L3 happens on the stack instance attached +to the slave device and packets are switched to the stack instance of the +master device for the L2 processing and routing from that instance will be +used before packets are queued on the outbound device. In this mode the slaves +will not receive nor can send multicast / broadcast traffic. + + +5. What to choose (macvlan vs. ipvlan)? + These two devices are very similar in many regards and the specific use +case could very well define which device to choose. if one of the following +situations defines your use case then you can choose to use ipvlan - + (a) The Linux host that is connected to the external switch / router has +policy configured that allows only one mac per port. + (b) No of virtual devices created on a master exceed the mac capacity and +puts the NIC in promiscous mode and degraded performance is a concern. + (c) If the slave device is to be put into the hostile / untrusted network +namespace where L2 on the slave could be changed / misused. + + +6. Example configuration: + + +=============================================================+ + | Host: host1 | + | | + | +----------------------+ +----------------------+ | + | | NS:ns0 | | NS:ns1 | | + | | | | | | + | | | | | | + | | ipvl0 | | ipvl1 | | + | +----------#-----------+ +-----------#----------+ | + | # # | + | ################################ | + | # eth0 | + +==============================#==============================+ + + + (a) Create two network namespaces - ns0, ns1 + ip netns add ns0 + ip netns add ns1 + + (b) Create two ipvlan slaves on eth0 (master device) + ip link add link eth0 ipvl0 type ipvlan mode l2 + ip link add link eth0 ipvl1 type ipvlan mode l2 + + (c) Assign slaves to the respective network namespaces + ip link set dev ipvl0 netns ns0 + ip link set dev ipvl1 netns ns1 + + (d) Now switch to the namespace (ns0 or ns1) to configure the slave devices + - For ns0 + (1) ip netns exec ns0 bash + (2) ip link set dev ipvl0 up + (3) ip link set dev lo up + (4) ip -4 addr add 127.0.0.1 dev lo + (5) ip -4 addr add $IPADDR dev ipvl0 + (6) ip -4 route add default via $ROUTER dev ipvl0 + - For ns1 + (1) ip netns exec ns1 bash + (2) ip link set dev ipvl1 up + (3) ip link set dev lo up + (4) ip -4 addr add 127.0.0.1 dev lo + (5) ip -4 addr add $IPADDR dev ipvl1 + (6) ip -4 route add default via $ROUTER dev ipvl1 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f9009be3f307..b6d64f546574 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -145,6 +145,24 @@ config MACVTAP To compile this driver as a module, choose M here: the module will be called macvtap. + +config IPVLAN + tristate "IP-VLAN support" + ---help--- + This allows one to create virtual devices off of a main interface + and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) + on packets. All interfaces (including the main interface) share L2 + making it transparent to the connected L2 switch. + + Ipvlan devices can be added using the "ip" command from the + iproute2 package starting with the iproute2-X.Y.ZZ release: + + "ip link add link [ NAME ] type ipvlan" + + To compile this driver as a module, choose M here: the module + will be called ipvlan. + + config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" depends on INET diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 61aefdd1e173..e25fdd7d905e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -6,6 +6,7 @@ # Networking Core Drivers # obj-$(CONFIG_BONDING) += bonding/ +obj-$(CONFIG_IPVLAN) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile new file mode 100644 index 000000000000..df79910192d6 --- /dev/null +++ b/drivers/net/ipvlan/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Ethernet Ipvlan driver +# + +obj-$(CONFIG_IPVLAN) += ipvlan.o + +ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h new file mode 100644 index 000000000000..ab3e7614ed71 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2014 Mahesh Bandewar + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ +#ifndef __IPVLAN_H +#define __IPVLAN_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPVLAN_DRV "ipvlan" +#define IPV_DRV_VER "0.1" + +#define IPVLAN_HASH_SIZE (1 << BITS_PER_BYTE) +#define IPVLAN_HASH_MASK (IPVLAN_HASH_SIZE - 1) + +#define IPVLAN_MAC_FILTER_BITS 8 +#define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) +#define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) + +typedef enum { + IPVL_IPV6 = 0, + IPVL_ICMPV6, + IPVL_IPV4, + IPVL_ARP, +} ipvl_hdr_type; + +struct ipvl_pcpu_stats { + u64 rx_pkts; + u64 rx_bytes; + u64 rx_mcast; + u64 tx_pkts; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 rx_errs; + u32 tx_drps; +}; + +struct ipvl_port; + +struct ipvl_dev { + struct net_device *dev; + struct list_head pnode; + struct ipvl_port *port; + struct net_device *phy_dev; + struct list_head addrs; + int ipv4cnt; + int ipv6cnt; + struct ipvl_pcpu_stats *pcpu_stats; + DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); + netdev_features_t sfeatures; + u32 msg_enable; + u16 mtu_adj; +}; + +struct ipvl_addr { + struct ipvl_dev *master; /* Back pointer to master */ + union { + struct in6_addr ip6; /* IPv6 address on logical interface */ + struct in_addr ip4; /* IPv4 address on logical interface */ + } ipu; +#define ip6addr ipu.ip6 +#define ip4addr ipu.ip4 + struct hlist_node hlnode; /* Hash-table linkage */ + struct list_head anode; /* logical-interface linkage */ + struct rcu_head rcu; + ipvl_hdr_type atype; +}; + +struct ipvl_port { + struct net_device *dev; + struct hlist_head hlhead[IPVLAN_HASH_SIZE]; + struct list_head ipvlans; + struct rcu_head rcu; + int count; + u16 mode; +}; + +static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) +{ + return rcu_dereference(d->rx_handler_data); +} + +static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) +{ + return rtnl_dereference(d->rx_handler_data); +} + +static inline bool ipvlan_dev_master(struct net_device *d) +{ + return d->priv_flags & IFF_IPVLAN_MASTER; +} + +static inline bool ipvlan_dev_slave(struct net_device *d) +{ + return d->priv_flags & IFF_IPVLAN_SLAVE; +} + +void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev); +void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); +void ipvlan_init_secret(void); +unsigned int ipvlan_mac_hash(const unsigned char *addr); +rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); +int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); +void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); +bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6); +struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, + const void *iaddr, bool is_v6); +void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); +#endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c new file mode 100644 index 000000000000..a14d87783245 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -0,0 +1,607 @@ +/* Copyright (c) 2014 Mahesh Bandewar + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#include "ipvlan.h" + +static u32 ipvlan_jhash_secret; + +void ipvlan_init_secret(void) +{ + net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); +} + +static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, + unsigned int len, bool success, bool mcast) +{ + if (!ipvlan) + return; + + if (likely(success)) { + struct ipvl_pcpu_stats *pcptr; + + pcptr = this_cpu_ptr(ipvlan->pcpu_stats); + u64_stats_update_begin(&pcptr->syncp); + pcptr->rx_pkts++; + pcptr->rx_bytes += len; + if (mcast) + pcptr->rx_mcast++; + u64_stats_update_end(&pcptr->syncp); + } else { + this_cpu_inc(ipvlan->pcpu_stats->rx_errs); + } +} + +static u8 ipvlan_get_v6_hash(const void *iaddr) +{ + const struct in6_addr *ip6_addr = iaddr; + + return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & + IPVLAN_HASH_MASK; +} + +static u8 ipvlan_get_v4_hash(const void *iaddr) +{ + const struct in_addr *ip4_addr = iaddr; + + return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & + IPVLAN_HASH_MASK; +} + +struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, + const void *iaddr, bool is_v6) +{ + struct ipvl_addr *addr; + u8 hash; + + hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : + ipvlan_get_v4_hash(iaddr); + hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { + if (is_v6 && addr->atype == IPVL_IPV6 && + ipv6_addr_equal(&addr->ip6addr, iaddr)) + return addr; + else if (!is_v6 && addr->atype == IPVL_IPV4 && + addr->ip4addr.s_addr == + ((struct in_addr *)iaddr)->s_addr) + return addr; + } + return NULL; +} + +void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) +{ + struct ipvl_port *port = ipvlan->port; + u8 hash; + + hash = (addr->atype == IPVL_IPV6) ? + ipvlan_get_v6_hash(&addr->ip6addr) : + ipvlan_get_v4_hash(&addr->ip4addr); + hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); +} + +void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) +{ + hlist_del_rcu(&addr->hlnode); + if (sync) + synchronize_rcu(); +} + +bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) +{ + struct ipvl_port *port = ipvlan->port; + struct ipvl_addr *addr; + + list_for_each_entry(addr, &ipvlan->addrs, anode) { + if ((is_v6 && addr->atype == IPVL_IPV6 && + ipv6_addr_equal(&addr->ip6addr, iaddr)) || + (!is_v6 && addr->atype == IPVL_IPV4 && + addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) + return true; + } + + if (ipvlan_ht_addr_lookup(port, iaddr, is_v6)) + return true; + + return false; +} + +static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type) +{ + void *lyr3h = NULL; + + switch (skb->protocol) { + case htons(ETH_P_ARP): { + struct arphdr *arph; + + if (unlikely(!pskb_may_pull(skb, sizeof(*arph)))) + return NULL; + + arph = arp_hdr(skb); + *type = IPVL_ARP; + lyr3h = arph; + break; + } + case htons(ETH_P_IP): { + u32 pktlen; + struct iphdr *ip4h; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) + return NULL; + + ip4h = ip_hdr(skb); + pktlen = ntohs(ip4h->tot_len); + if (ip4h->ihl < 5 || ip4h->version != 4) + return NULL; + if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) + return NULL; + + *type = IPVL_IPV4; + lyr3h = ip4h; + break; + } + case htons(ETH_P_IPV6): { + struct ipv6hdr *ip6h; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) + return NULL; + + ip6h = ipv6_hdr(skb); + if (ip6h->version != 6) + return NULL; + + *type = IPVL_IPV6; + lyr3h = ip6h; + /* Only Neighbour Solicitation pkts need different treatment */ + if (ipv6_addr_any(&ip6h->saddr) && + ip6h->nexthdr == NEXTHDR_ICMP) { + *type = IPVL_ICMPV6; + lyr3h = ip6h + 1; + } + break; + } + default: + return NULL; + } + + return lyr3h; +} + +unsigned int ipvlan_mac_hash(const unsigned char *addr) +{ + u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), + ipvlan_jhash_secret); + + return hash & IPVLAN_MAC_FILTER_MASK; +} + +static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, + const struct ipvl_dev *in_dev, bool local) +{ + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_dev *ipvlan; + struct sk_buff *nskb; + unsigned int len; + unsigned int mac_hash; + int ret; + + if (skb->protocol == htons(ETH_P_PAUSE)) + return; + + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + if (local && (ipvlan == in_dev)) + continue; + + mac_hash = ipvlan_mac_hash(eth->h_dest); + if (!test_bit(mac_hash, ipvlan->mac_filters)) + continue; + + ret = NET_RX_DROP; + len = skb->len + ETH_HLEN; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + goto mcast_acct; + + if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; + + nskb->dev = ipvlan->dev; + if (local) + ret = dev_forward_skb(ipvlan->dev, nskb); + else + ret = netif_rx(nskb); +mcast_acct: + ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + } + + /* Locally generated? ...Forward a copy to the main-device as + * well. On the RX side we'll ignore it (wont give it to any + * of the virtual devices. + */ + if (local) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + if (ether_addr_equal(eth->h_dest, port->dev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; + + dev_forward_skb(port->dev, nskb); + } + } +} + +static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb, + bool local) +{ + struct ipvl_dev *ipvlan = addr->master; + struct net_device *dev = ipvlan->dev; + unsigned int len; + rx_handler_result_t ret = RX_HANDLER_CONSUMED; + bool success = false; + + len = skb->len + ETH_HLEN; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto out; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + + if (local) { + if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) + success = true; + } else { + ret = RX_HANDLER_ANOTHER; + success = true; + } + +out: + ipvlan_count_rx(ipvlan, len, success, false); + return ret; +} + +static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, + void *lyr3h, int addr_type, + bool use_dest) +{ + struct ipvl_addr *addr = NULL; + + if (addr_type == IPVL_IPV6) { + struct ipv6hdr *ip6h; + struct in6_addr *i6addr; + + ip6h = (struct ipv6hdr *)lyr3h; + i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; + addr = ipvlan_ht_addr_lookup(port, i6addr, true); + } else if (addr_type == IPVL_ICMPV6) { + struct nd_msg *ndmh; + struct in6_addr *i6addr; + + /* Make sure that the NeighborSolicitation ICMPv6 packets + * are handled to avoid DAD issue. + */ + ndmh = (struct nd_msg *)lyr3h; + if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { + i6addr = &ndmh->target; + addr = ipvlan_ht_addr_lookup(port, i6addr, true); + } + } else if (addr_type == IPVL_IPV4) { + struct iphdr *ip4h; + __be32 *i4addr; + + ip4h = (struct iphdr *)lyr3h; + i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; + addr = ipvlan_ht_addr_lookup(port, i4addr, false); + } else if (addr_type == IPVL_ARP) { + struct arphdr *arph; + unsigned char *arp_ptr; + __be32 dip; + + arph = (struct arphdr *)lyr3h; + arp_ptr = (unsigned char *)(arph + 1); + if (use_dest) + arp_ptr += (2 * port->dev->addr_len) + 4; + else + arp_ptr += port->dev->addr_len; + + memcpy(&dip, arp_ptr, 4); + addr = ipvlan_ht_addr_lookup(port, &dip, false); + } + + return addr; +} + +static int ipvlan_process_v4_outbound(struct sk_buff *skb) +{ + const struct iphdr *ip4h = ip_hdr(skb); + struct net_device *dev = skb->dev; + struct rtable *rt; + int err, ret = NET_XMIT_DROP; + struct flowi4 fl4 = { + .flowi4_oif = dev->iflink, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + + rt = ip_route_output_flow(dev_net(dev), &fl4, NULL); + if (IS_ERR(rt)) + goto err; + + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto err; + } + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out; +err: + dev->stats.tx_errors++; + kfree_skb(skb); +out: + return ret; +} + +static int ipvlan_process_v6_outbound(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net_device *dev = skb->dev; + struct dst_entry *dst; + int err, ret = NET_XMIT_DROP; + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_output(dev_net(dev), NULL, &fl6); + if (IS_ERR(dst)) + goto err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + err = ip6_local_out(skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out; +err: + dev->stats.tx_errors++; + kfree_skb(skb); +out: + return ret; +} + +static int ipvlan_process_outbound(struct sk_buff *skb, + const struct ipvl_dev *ipvlan) +{ + struct ethhdr *ethh = eth_hdr(skb); + int ret = NET_XMIT_DROP; + + /* In this mode we dont care about multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + + /* The ipvlan is a pseudo-L2 device, so the packets that we receive + * will have L2; which need to discarded and processed further + * in the net-ns of the main-device. + */ + if (skb_mac_header_was_set(skb)) { + skb_pull(skb, sizeof(*ethh)); + skb->mac_header = (typeof(skb->mac_header))~0U; + skb_reset_network_header(skb); + } + + if (skb->protocol == htons(ETH_P_IPV6)) + ret = ipvlan_process_v6_outbound(skb); + else if (skb->protocol == htons(ETH_P_IP)) + ret = ipvlan_process_v4_outbound(skb); + else { + pr_warn_ratelimited("Dropped outbound packet type=%x\n", + ntohs(skb->protocol)); + kfree_skb(skb); + } +out: + return ret; +} + +static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + void *lyr3h; + struct ipvl_addr *addr; + int addr_type; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); + if (addr) + return ipvlan_rcv_frame(addr, skb, true); + +out: + skb->dev = ipvlan->phy_dev; + return ipvlan_process_outbound(skb, ipvlan); +} + +static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_addr *addr; + void *lyr3h; + int addr_type; + + if (ether_addr_equal(eth->h_dest, eth->h_source)) { + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (lyr3h) { + addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); + if (addr) + return ipvlan_rcv_frame(addr, skb, true); + } + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + + /* Packet definitely does not belong to any of the + * virtual devices, but the dest is local. So forward + * the skb for the main-dev. At the RX side we just return + * RX_PASS for it to be processed further on the stack. + */ + return dev_forward_skb(ipvlan->phy_dev, skb); + + } else if (is_multicast_ether_addr(eth->h_dest)) { + u8 ip_summed = skb->ip_summed; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true); + skb->ip_summed = ip_summed; + } + + skb->dev = ipvlan->phy_dev; + return dev_queue_xmit(skb); +} + +int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev); + + if (!port) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) + goto out; + + switch(port->mode) { + case IPVLAN_MODE_L2: + return ipvlan_xmit_mode_l2(skb, dev); + case IPVLAN_MODE_L3: + return ipvlan_xmit_mode_l3(skb, dev); + } + + /* Should not reach here */ + WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", + port->mode); +out: + kfree_skb(skb); + return NET_XMIT_DROP; +} + +static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) +{ + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_addr *addr; + void *lyr3h; + int addr_type; + + if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + return true; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); + if (addr) + return false; + } + + return true; +} + +static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, + struct ipvl_port *port) +{ + void *lyr3h; + int addr_type; + struct ipvl_addr *addr; + struct sk_buff *skb = *pskb; + rx_handler_result_t ret = RX_HANDLER_PASS; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); + if (addr) + ret = ipvlan_rcv_frame(addr, skb, false); + +out: + return ret; +} + +static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, + struct ipvl_port *port) +{ + struct sk_buff *skb = *pskb; + struct ethhdr *eth = eth_hdr(skb); + rx_handler_result_t ret = RX_HANDLER_PASS; + void *lyr3h; + int addr_type; + + if (is_multicast_ether_addr(eth->h_dest)) { + if (ipvlan_external_frame(skb, port)) + ipvlan_multicast_frame(port, skb, NULL, false); + } else { + struct ipvl_addr *addr; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + return ret; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); + if (addr) + ret = ipvlan_rcv_frame(addr, skb, false); + } + + return ret; +} + +rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); + + if (!port) + return RX_HANDLER_PASS; + + switch (port->mode) { + case IPVLAN_MODE_L2: + return ipvlan_handle_mode_l2(pskb, port); + case IPVLAN_MODE_L3: + return ipvlan_handle_mode_l3(pskb, port); + } + + /* Should not reach here */ + WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", + port->mode); + kfree_skb(skb); + return NET_RX_DROP; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c new file mode 100644 index 000000000000..c3df84bd2857 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -0,0 +1,789 @@ +/* Copyright (c) 2014 Mahesh Bandewar + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#include "ipvlan.h" + +void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) +{ + ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; +} + +void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval) +{ + struct ipvl_dev *ipvlan; + + if (port->mode != nval) { + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + if (nval == IPVLAN_MODE_L3) + ipvlan->dev->flags |= IFF_NOARP; + else + ipvlan->dev->flags &= ~IFF_NOARP; + } + port->mode = nval; + } +} + +static int ipvlan_port_create(struct net_device *dev) +{ + struct ipvl_port *port; + int err, idx; + + if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) { + netdev_err(dev, "Master is either lo or non-ether device\n"); + return -EINVAL; + } + port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + port->dev = dev; + port->mode = IPVLAN_MODE_L3; + INIT_LIST_HEAD(&port->ipvlans); + for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) + INIT_HLIST_HEAD(&port->hlhead[idx]); + + err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); + if (err) + goto err; + + dev->priv_flags |= IFF_IPVLAN_MASTER; + return 0; + +err: + kfree_rcu(port, rcu); + return err; +} + +static void ipvlan_port_destroy(struct net_device *dev) +{ + struct ipvl_port *port = ipvlan_port_get_rtnl(dev); + + dev->priv_flags &= ~IFF_IPVLAN_MASTER; + netdev_rx_handler_unregister(dev); + kfree_rcu(port, rcu); +} + +/* ipvlan network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key ipvlan_netdev_xmit_lock_key; +static struct lock_class_key ipvlan_netdev_addr_lock_key; + +#define IPVLAN_FEATURES \ + (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ + NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) + +#define IPVLAN_STATE_MASK \ + ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) + +static void ipvlan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &ipvlan_netdev_xmit_lock_key); +} + +static void ipvlan_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &ipvlan_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, ipvlan_set_lockdep_class_one, NULL); +} + +static int ipvlan_init(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + const struct net_device *phy_dev = ipvlan->phy_dev; + + dev->state = (dev->state & ~IPVLAN_STATE_MASK) | + (phy_dev->state & IPVLAN_STATE_MASK); + dev->features = phy_dev->features & IPVLAN_FEATURES; + dev->features |= NETIF_F_LLTX; + dev->gso_max_size = phy_dev->gso_max_size; + dev->iflink = phy_dev->ifindex; + dev->hard_header_len = phy_dev->hard_header_len; + + ipvlan_set_lockdep_class(dev); + + ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats); + if (!ipvlan->pcpu_stats) + return -ENOMEM; + + return 0; +} + +static void ipvlan_uninit(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan->port; + + if (ipvlan->pcpu_stats) + free_percpu(ipvlan->pcpu_stats); + + port->count -= 1; + if (!port->count) + ipvlan_port_destroy(port->dev); +} + +static int ipvlan_open(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + struct ipvl_addr *addr; + + if (ipvlan->port->mode == IPVLAN_MODE_L3) + dev->flags |= IFF_NOARP; + else + dev->flags &= ~IFF_NOARP; + + if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_add(ipvlan, addr); + } + return dev_uc_add(phy_dev, phy_dev->dev_addr); +} + +static int ipvlan_stop(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + struct ipvl_addr *addr; + + dev_uc_unsync(phy_dev, dev); + dev_mc_unsync(phy_dev, dev); + + dev_uc_del(phy_dev, phy_dev->dev_addr); + + if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_del(addr, !dev->dismantle); + } + return 0; +} + +netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + int skblen = skb->len; + int ret; + + ret = ipvlan_queue_xmit(skb, dev); + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { + struct ipvl_pcpu_stats *pcptr; + + pcptr = this_cpu_ptr(ipvlan->pcpu_stats); + + u64_stats_update_begin(&pcptr->syncp); + pcptr->tx_pkts++; + pcptr->tx_bytes += skblen; + u64_stats_update_end(&pcptr->syncp); + } else { + this_cpu_inc(ipvlan->pcpu_stats->tx_drps); + } + return ret; +} + +static netdev_features_t ipvlan_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES); +} + +static void ipvlan_change_rx_flags(struct net_device *dev, int change) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); +} + +static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set) +{ + struct net_device *dev = ipvlan->dev; + unsigned int hashbit = ipvlan_mac_hash(dev->broadcast); + + if (set && !test_bit(hashbit, ipvlan->mac_filters)) + __set_bit(hashbit, ipvlan->mac_filters); + else if (!set && test_bit(hashbit, ipvlan->mac_filters)) + __clear_bit(hashbit, ipvlan->mac_filters); +} + +static void ipvlan_set_multicast_mac_filter(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { + bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); + } else { + struct netdev_hw_addr *ha; + DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); + + bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); + netdev_for_each_mc_addr(ha, dev) + __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); + + bitmap_copy(ipvlan->mac_filters, mc_filters, + IPVLAN_MAC_FILTER_SIZE); + } + dev_uc_sync(ipvlan->phy_dev, dev); + dev_mc_sync(ipvlan->phy_dev, dev); +} + +static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (ipvlan->pcpu_stats) { + struct ipvl_pcpu_stats *pcptr; + u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; + u32 rx_errs = 0, tx_drps = 0; + u32 strt; + int idx; + + for_each_possible_cpu(idx) { + pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); + do { + strt= u64_stats_fetch_begin_irq(&pcptr->syncp); + rx_pkts = pcptr->rx_pkts; + rx_bytes = pcptr->rx_bytes; + rx_mcast = pcptr->rx_mcast; + tx_pkts = pcptr->tx_pkts; + tx_bytes = pcptr->tx_bytes; + } while (u64_stats_fetch_retry_irq(&pcptr->syncp, + strt)); + + s->rx_packets += rx_pkts; + s->rx_bytes += rx_bytes; + s->multicast += rx_mcast; + s->tx_packets += tx_pkts; + s->tx_bytes += tx_bytes; + + /* u32 values are updated without syncp protection. */ + rx_errs += pcptr->rx_errs; + tx_drps += pcptr->tx_drps; + } + s->rx_errors = rx_errs; + s->rx_dropped = rx_errs; + s->tx_dropped = tx_drps; + } + return s; +} + +static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + return vlan_vid_add(phy_dev, proto, vid); +} + +static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, + u16 vid) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + vlan_vid_del(phy_dev, proto, vid); + return 0; +} + +static const struct net_device_ops ipvlan_netdev_ops = { + .ndo_init = ipvlan_init, + .ndo_uninit = ipvlan_uninit, + .ndo_open = ipvlan_open, + .ndo_stop = ipvlan_stop, + .ndo_start_xmit = ipvlan_start_xmit, + .ndo_fix_features = ipvlan_fix_features, + .ndo_change_rx_flags = ipvlan_change_rx_flags, + .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, + .ndo_get_stats64 = ipvlan_get_stats64, + .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, +}; + +static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + /* TODO Probably use a different field than dev_addr so that the + * mac-address on the virtual device is portable and can be carried + * while the packets use the mac-addr on the physical device. + */ + return dev_hard_header(skb, phy_dev, type, daddr, + saddr ? : dev->dev_addr, len); +} + +static const struct header_ops ipvlan_header_ops = { + .create = ipvlan_hard_header, + .rebuild = eth_rebuild_header, + .parse = eth_header_parse, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; + +static int ipvlan_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + + return __ethtool_get_settings(ipvlan->phy_dev, cmd); +} + +static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); +} + +static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + + return ipvlan->msg_enable; +} + +static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + ipvlan->msg_enable = value; +} + +static const struct ethtool_ops ipvlan_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_settings = ipvlan_ethtool_get_settings, + .get_drvinfo = ipvlan_ethtool_get_drvinfo, + .get_msglevel = ipvlan_ethtool_get_msglevel, + .set_msglevel = ipvlan_ethtool_set_msglevel, +}; + +static int ipvlan_nl_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + + if (data && data[IFLA_IPVLAN_MODE]) { + u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + + ipvlan_set_port_mode(port, nmode); + } + return 0; +} + +static size_t ipvlan_nl_getsize(const struct net_device *dev) +{ + return (0 + + nla_total_size(2) /* IFLA_IPVLAN_MODE */ + ); +} + +static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (data && data[IFLA_IPVLAN_MODE]) { + u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + + if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX) + return -EINVAL; + } + return 0; +} + +static int ipvlan_nl_fillinfo(struct sk_buff *skb, + const struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int ret = -EINVAL; + + if (!port) + goto err; + + ret = -EMSGSIZE; + if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) + goto err; + + return 0; + +err: + return ret; +} + +static int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port; + struct net_device *phy_dev; + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!phy_dev) + return -ENODEV; + + if (ipvlan_dev_slave(phy_dev)) { + struct ipvl_dev *tmp = netdev_priv(phy_dev); + + phy_dev = tmp->phy_dev; + } else if (!ipvlan_dev_master(phy_dev)) { + err = ipvlan_port_create(phy_dev); + if (err < 0) + return err; + } + + port = ipvlan_port_get_rtnl(phy_dev); + if (data && data[IFLA_IPVLAN_MODE]) + port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + + ipvlan->phy_dev = phy_dev; + ipvlan->dev = dev; + ipvlan->port = port; + ipvlan->sfeatures = IPVLAN_FEATURES; + INIT_LIST_HEAD(&ipvlan->addrs); + ipvlan->ipv4cnt = 0; + ipvlan->ipv6cnt = 0; + + /* TODO Probably put random address here to be presented to the + * world but keep using the physical-dev address for the outgoing + * packets. + */ + memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN); + + dev->priv_flags |= IFF_IPVLAN_SLAVE; + + port->count += 1; + err = register_netdevice(dev); + if (err < 0) + goto ipvlan_destroy_port; + + err = netdev_upper_dev_link(phy_dev, dev); + if (err) + goto ipvlan_destroy_port; + + list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); + netif_stacked_transfer_operstate(phy_dev, dev); + return 0; + +ipvlan_destroy_port: + port->count -= 1; + if (!port->count) + ipvlan_port_destroy(phy_dev); + + return err; +} + +static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_addr *addr, *next; + + if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { + list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { + ipvlan_ht_addr_del(addr, !dev->dismantle); + list_del_rcu(&addr->anode); + } + } + list_del_rcu(&ipvlan->pnode); + unregister_netdevice_queue(dev, head); + netdev_upper_dev_unlink(ipvlan->phy_dev, dev); +} + +static void ipvlan_link_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); + dev->priv_flags |= IFF_UNICAST_FLT; + dev->netdev_ops = &ipvlan_netdev_ops; + dev->destructor = free_netdev; + dev->header_ops = &ipvlan_header_ops; + dev->ethtool_ops = &ipvlan_ethtool_ops; + dev->tx_queue_len = 0; +} + +static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = +{ + [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, +}; + +static struct rtnl_link_ops ipvlan_link_ops = { + .kind = "ipvlan", + .priv_size = sizeof(struct ipvl_dev), + + .get_size = ipvlan_nl_getsize, + .policy = ipvlan_nl_policy, + .validate = ipvlan_nl_validate, + .fill_info = ipvlan_nl_fillinfo, + .changelink = ipvlan_nl_changelink, + .maxtype = IFLA_IPVLAN_MAX, + + .setup = ipvlan_link_setup, + .newlink = ipvlan_link_new, + .dellink = ipvlan_link_delete, +}; + +int ipvlan_link_register(struct rtnl_link_ops *ops) +{ + return rtnl_link_register(ops); +} + +static int ipvlan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ipvl_dev *ipvlan, *next; + struct ipvl_port *port; + LIST_HEAD(lst_kill); + + if (!ipvlan_dev_master(dev)) + return NOTIFY_DONE; + + port = ipvlan_port_get_rtnl(dev); + + switch (event) { + case NETDEV_CHANGE: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) + netif_stacked_transfer_operstate(ipvlan->phy_dev, + ipvlan->dev); + break; + + case NETDEV_UNREGISTER: + if (dev->reg_state != NETREG_UNREGISTERING) + break; + + list_for_each_entry_safe(ipvlan, next, &port->ipvlans, + pnode) + ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, + &lst_kill); + unregister_netdevice_many(&lst_kill); + break; + + case NETDEV_FEAT_CHANGE: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + ipvlan->dev->features = dev->features & IPVLAN_FEATURES; + ipvlan->dev->gso_max_size = dev->gso_max_size; + netdev_features_change(ipvlan->dev); + } + break; + + case NETDEV_CHANGEMTU: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) + ipvlan_adjust_mtu(ipvlan, dev); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlying device to change its type. */ + return NOTIFY_BAD; + } + return NOTIFY_DONE; +} + +static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) +{ + struct ipvl_addr *addr; + + if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) { + netif_err(ipvlan, ifup, ipvlan->dev, + "Failed to add IPv6=%pI6c addr for %s intf\n", + ip6_addr, ipvlan->dev->name); + return -EINVAL; + } + addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); + if (!addr) + return -ENOMEM; + + addr->master = ipvlan; + memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); + addr->atype = IPVL_IPV6; + list_add_tail_rcu(&addr->anode, &ipvlan->addrs); + ipvlan->ipv6cnt++; + ipvlan_ht_addr_add(ipvlan, addr); + + return 0; +} + +static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) +{ + struct ipvl_addr *addr; + + addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true); + if (!addr) + return; + + ipvlan_ht_addr_del(addr, true); + list_del_rcu(&addr->anode); + ipvlan->ipv6cnt--; + WARN_ON(ipvlan->ipv6cnt < 0); + kfree_rcu(addr, rcu); + + return; +} + +static int ipvlan_addr6_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; + struct net_device *dev = (struct net_device *)if6->idev->dev; + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (!ipvlan_dev_slave(dev)) + return NOTIFY_DONE; + + if (!ipvlan || !ipvlan->port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + if (ipvlan_add_addr6(ipvlan, &if6->addr)) + return NOTIFY_BAD; + break; + + case NETDEV_DOWN: + ipvlan_del_addr6(ipvlan, &if6->addr); + break; + } + + return NOTIFY_OK; +} + +static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) +{ + struct ipvl_addr *addr; + + if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) { + netif_err(ipvlan, ifup, ipvlan->dev, + "Failed to add IPv4=%pI4 on %s intf.\n", + ip4_addr, ipvlan->dev->name); + return -EINVAL; + } + addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL); + if (!addr) + return -ENOMEM; + + addr->master = ipvlan; + memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); + addr->atype = IPVL_IPV4; + list_add_tail_rcu(&addr->anode, &ipvlan->addrs); + ipvlan->ipv4cnt++; + ipvlan_ht_addr_add(ipvlan, addr); + ipvlan_set_broadcast_mac_filter(ipvlan, true); + + return 0; +} + +static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) +{ + struct ipvl_addr *addr; + + addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false); + if (!addr) + return; + + ipvlan_ht_addr_del(addr, true); + list_del_rcu(&addr->anode); + ipvlan->ipv4cnt--; + WARN_ON(ipvlan->ipv4cnt < 0); + if (!ipvlan->ipv4cnt) + ipvlan_set_broadcast_mac_filter(ipvlan, false); + kfree_rcu(addr, rcu); + + return; +} + +static int ipvlan_addr4_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; + struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct in_addr ip4_addr; + + if (!ipvlan_dev_slave(dev)) + return NOTIFY_DONE; + + if (!ipvlan || !ipvlan->port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + ip4_addr.s_addr = if4->ifa_address; + if (ipvlan_add_addr4(ipvlan, &ip4_addr)) + return NOTIFY_BAD; + break; + + case NETDEV_DOWN: + ip4_addr.s_addr = if4->ifa_address; + ipvlan_del_addr4(ipvlan, &ip4_addr); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { + .notifier_call = ipvlan_addr4_event, +}; + +static struct notifier_block ipvlan_notifier_block __read_mostly = { + .notifier_call = ipvlan_device_event, +}; + +static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { + .notifier_call = ipvlan_addr6_event, +}; + +static int __init ipvlan_init_module(void) +{ + int err; + + ipvlan_init_secret(); + register_netdevice_notifier(&ipvlan_notifier_block); + register_inet6addr_notifier(&ipvlan_addr6_notifier_block); + register_inetaddr_notifier(&ipvlan_addr4_notifier_block); + + err = ipvlan_link_register(&ipvlan_link_ops); + if (err < 0) + goto error; + + return 0; +error: + unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); + unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); + unregister_netdevice_notifier(&ipvlan_notifier_block); + return err; +} + +static void __exit ipvlan_cleanup_module(void) +{ + rtnl_link_unregister(&ipvlan_link_ops); + unregister_netdevice_notifier(&ipvlan_notifier_block); + unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); + unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); +} + +module_init(ipvlan_init_module); +module_exit(ipvlan_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mahesh Bandewar "); +MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); +MODULE_ALIAS_RTNL_LINK("ipvlan"); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5cd508787572..2cb772495f7a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1230,6 +1230,8 @@ enum netdev_priv_flags { IFF_LIVE_ADDR_CHANGE = 1<<20, IFF_MACVLAN = 1<<21, IFF_XMIT_DST_RELEASE_PERM = 1<<22, + IFF_IPVLAN_MASTER = 1<<23, + IFF_IPVLAN_SLAVE = 1<<24, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1255,6 +1257,8 @@ enum netdev_priv_flags { #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE #define IFF_MACVLAN IFF_MACVLAN #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM +#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER +#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE /** * struct net_device - The DEVICE structure. diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7072d8325016..36bddc233633 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -330,6 +330,21 @@ enum macvlan_macaddr_mode { #define MACVLAN_FLAG_NOPROMISC 1 +/* IPVLAN section */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + __IFLA_IPVLAN_MAX +}; + +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) + +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_MAX +}; + /* VXLAN section */ enum { IFLA_VXLAN_UNSPEC, -- cgit v1.2.3-71-gd317 From 4f0372150b1fbf2167cfe21d2e6eac1933fb36ac Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 24 Nov 2014 14:24:54 +0100 Subject: tipc: add tipc_netlink.h to uapi Kbuild tipc_netlink.h is the user-space header for the new netlink api. It was accidentally left out of the uapi Kbuild list when the api was added. Signed-off-by: Richard Alpe Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 72298b6887ac..a1e8175cc488 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -389,6 +389,7 @@ header-y += times.h header-y += timex.h header-y += tiocl.h header-y += tipc_config.h +header-y += tipc_netlink.h header-y += tipc.h header-y += toshiba.h header-y += tty_flags.h -- cgit v1.2.3-71-gd317 From 98f0334263f177dd22ca7c685cde04b47cc57b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Nov 2014 12:42:02 +0100 Subject: cfg80211: clean up beacon loss CQM event Having it as a sub-event for RSSI thresholds is very ugly, but luckily no userspace actually uses the events yet. Move the event to its own function call internally and to its own event attribute in nl80211. Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wlcore/event.c | 5 +---- include/net/cfg80211.h | 9 +++++++++ include/net/mac80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 7 ++++--- net/mac80211/mlme.c | 14 +++++++++++--- net/mac80211/trace.h | 6 ++++++ net/wireless/nl80211.c | 23 +++++++++++++++++++++++ 7 files changed, 62 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c index 16d10281798d..5153640f4532 100644 --- a/drivers/net/wireless/ti/wlcore/event.c +++ b/drivers/net/wireless/ti/wlcore/event.c @@ -259,10 +259,7 @@ void wlcore_event_beacon_loss(struct wl1271 *wl, unsigned long roles_bitmap) &wlvif->connection_loss_work, msecs_to_jiffies(delay)); - ieee80211_cqm_rssi_notify( - vif, - NL80211_CQM_RSSI_BEACON_LOSS_EVENT, - GFP_KERNEL); + ieee80211_cqm_beacon_loss_notify(vif, GFP_KERNEL); } } EXPORT_SYMBOL_GPL(wlcore_event_beacon_loss); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1d15f1dfdaa7..4ebb816241fa 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4669,6 +4669,15 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); +/** + * cfg80211_cqm_beacon_loss_notify - beacon loss event + * @dev: network device + * @gfp: context flags + * + * Notify userspace about beacon loss from the connected AP. + */ +void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); + /** * cfg80211_radar_event - radar detection event * @wiphy: the wiphy diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cff3a26a9dae..66cbfe46428d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4671,6 +4671,14 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * ieee80211_cqm_beacon_loss_notify - inform CQM of beacon loss + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @gfp: context flags + */ +void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp); + /** * ieee80211_radar_detected - inform that a radar was detected * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d77524510435..b37bd5a1cb82 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3451,6 +3451,8 @@ enum nl80211_ps_state { * interval in which %NL80211_ATTR_CQM_TXE_PKTS and * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting. + * @NL80211_ATTR_CQM_BEACON_LOSS_EVENT: flag attribute that's set in a beacon + * loss event * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -3463,6 +3465,7 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_TXE_RATE, NL80211_ATTR_CQM_TXE_PKTS, NL80211_ATTR_CQM_TXE_INTVL, + NL80211_ATTR_CQM_BEACON_LOSS_EVENT, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, @@ -3475,9 +3478,7 @@ enum nl80211_attr_cqm { * configured threshold * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the * configured threshold - * @NL80211_CQM_RSSI_BEACON_LOSS_EVENT: The device experienced beacon loss. - * (Note that deauth/disassoc will still follow if the AP is not - * available. This event might get used as roaming event, etc.) + * @NL80211_CQM_RSSI_BEACON_LOSS_EVENT: (reserved, never sent) */ enum nl80211_cqm_rssi_threshold_event { NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 36e39ed4dfd9..8a23a64b9a61 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2266,9 +2266,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, "detected beacon loss from AP (missed %d beacons) - probing\n", beacon_loss_count); - ieee80211_cqm_rssi_notify(&sdata->vif, - NL80211_CQM_RSSI_BEACON_LOSS_EVENT, - GFP_KERNEL); + ieee80211_cqm_beacon_loss_notify(&sdata->vif, GFP_KERNEL); } /* @@ -4901,3 +4899,13 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); } EXPORT_SYMBOL(ieee80211_cqm_rssi_notify); + +void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + trace_api_cqm_beacon_loss_notify(sdata->local, sdata); + + cfg80211_cqm_beacon_loss_notify(sdata->dev, gfp); +} +EXPORT_SYMBOL(ieee80211_cqm_beacon_loss_notify); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 85ccfbe863db..8e461a02c6a8 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1829,6 +1829,12 @@ TRACE_EVENT(api_cqm_rssi_notify, ) ); +DEFINE_EVENT(local_sdata_evt, api_cqm_beacon_loss_notify, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) +); + TRACE_EVENT(api_scan_completed, TP_PROTO(struct ieee80211_local *local, bool aborted), diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9c4d0102d34d..e11980e74a04 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11826,6 +11826,10 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, trace_cfg80211_cqm_rssi_notify(dev, rssi_event); + if (WARN_ON(rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW && + rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) + return; + msg = cfg80211_prepare_cqm(dev, NULL, gfp); if (!msg) return; @@ -11892,6 +11896,25 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); +void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp) +{ + struct sk_buff *msg; + + msg = cfg80211_prepare_cqm(dev, NULL, gfp); + if (!msg) + return; + + if (nla_put_flag(msg, NL80211_ATTR_CQM_BEACON_LOSS_EVENT)) + goto nla_put_failure; + + cfg80211_send_cqm(msg, gfp); + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_cqm_beacon_loss_notify); + static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) -- cgit v1.2.3-71-gd317 From a99903ec4566eeeaaaf611499cae00abbe844938 Mon Sep 17 00:00:00 2001 From: Julien Lefrique Date: Tue, 21 Oct 2014 16:52:46 +0200 Subject: NFC: NCI: Handle Target mode activation Changes: * Extract the Listen mode activation parameters from RF_INTF_ACTIVATED_NTF. * Store the General Bytes of ATR_REQ. * Signal that Target mode is activated in case of an activation in NFC-DEP. * Update the NCI state accordingly. * Use the various constants defined in nfc.h. * Fix the ATR_REQ and ATR_RES maximum size. As per NCI 1.0 and NCI 1.1, the Activation Parameters for both Poll and Listen mode contain all the bytes of ATR_REQ/ATR_RES starting and including Byte 3 as defined in [DIGITAL]. In [DIGITAL], the maximum size of ATR_REQ/ATR_RES is 64 bytes and they are numbered starting from Byte 1. Signed-off-by: Julien Lefrique Signed-off-by: Samuel Ortiz --- include/net/nfc/nci.h | 26 +++++++-- include/net/nfc/nci_core.h | 3 + include/net/nfc/nfc.h | 2 + include/uapi/linux/nfc.h | 20 ++++--- net/nfc/nci/ntf.c | 133 +++++++++++++++++++++++++++++++++++++-------- 5 files changed, 146 insertions(+), 38 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index 36cf65386b86..fffadc706e06 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -28,6 +28,8 @@ #ifndef __NCI_H #define __NCI_H +#include + /* NCI constants */ #define NCI_MAX_NUM_MAPPING_CONFIGS 10 #define NCI_MAX_NUM_RF_CONFIGS 10 @@ -73,6 +75,8 @@ #define NCI_NFC_A_ACTIVE_LISTEN_MODE 0x83 #define NCI_NFC_F_ACTIVE_LISTEN_MODE 0x85 +#define NCI_RF_TECH_MODE_LISTEN_MASK 0x80 + /* NCI RF Technologies */ #define NCI_NFC_RF_TECHNOLOGY_A 0x00 #define NCI_NFC_RF_TECHNOLOGY_B 0x01 @@ -324,26 +328,31 @@ struct nci_core_intf_error_ntf { struct rf_tech_specific_params_nfca_poll { __u16 sens_res; __u8 nfcid1_len; /* 0, 4, 7, or 10 Bytes */ - __u8 nfcid1[10]; + __u8 nfcid1[NFC_NFCID1_MAXSIZE]; __u8 sel_res_len; /* 0 or 1 Bytes */ __u8 sel_res; } __packed; struct rf_tech_specific_params_nfcb_poll { __u8 sensb_res_len; - __u8 sensb_res[12]; /* 11 or 12 Bytes */ + __u8 sensb_res[NFC_SENSB_RES_MAXSIZE]; /* 11 or 12 Bytes */ } __packed; struct rf_tech_specific_params_nfcf_poll { __u8 bit_rate; __u8 sensf_res_len; - __u8 sensf_res[18]; /* 16 or 18 Bytes */ + __u8 sensf_res[NFC_SENSF_RES_MAXSIZE]; /* 16 or 18 Bytes */ } __packed; struct rf_tech_specific_params_nfcv_poll { __u8 res_flags; __u8 dsfid; - __u8 uid[8]; /* 8 Bytes */ + __u8 uid[NFC_ISO15693_UID_MAXSIZE]; /* 8 Bytes */ +} __packed; + +struct rf_tech_specific_params_nfcf_listen { + __u8 local_nfcid2_len; + __u8 local_nfcid2[NFC_NFCID2_MAXSIZE]; /* 0 or 8 Bytes */ } __packed; struct nci_rf_discover_ntf { @@ -375,7 +384,12 @@ struct activation_params_nfcb_poll_iso_dep { struct activation_params_poll_nfc_dep { __u8 atr_res_len; - __u8 atr_res[63]; + __u8 atr_res[NFC_ATR_RES_MAXSIZE - 2]; /* ATR_RES from byte 3 */ +}; + +struct activation_params_listen_nfc_dep { + __u8 atr_req_len; + __u8 atr_req[NFC_ATR_REQ_MAXSIZE - 2]; /* ATR_REQ from byte 3 */ }; struct nci_rf_intf_activated_ntf { @@ -392,6 +406,7 @@ struct nci_rf_intf_activated_ntf { struct rf_tech_specific_params_nfcb_poll nfcb_poll; struct rf_tech_specific_params_nfcf_poll nfcf_poll; struct rf_tech_specific_params_nfcv_poll nfcv_poll; + struct rf_tech_specific_params_nfcf_listen nfcf_listen; } rf_tech_specific_params; __u8 data_exch_rf_tech_and_mode; @@ -403,6 +418,7 @@ struct nci_rf_intf_activated_ntf { struct activation_params_nfca_poll_iso_dep nfca_poll_iso_dep; struct activation_params_nfcb_poll_iso_dep nfcb_poll_iso_dep; struct activation_params_poll_nfc_dep poll_nfc_dep; + struct activation_params_listen_nfc_dep listen_nfc_dep; } activation_params; } __packed; diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 75d10e625c49..cfea60748a39 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -4,6 +4,7 @@ * * Copyright (C) 2011 Texas Instruments, Inc. * Copyright (C) 2013 Intel Corporation. All rights reserved. + * Copyright (C) 2014 Marvell International Ltd. * * Written by Ilan Elias * @@ -49,6 +50,8 @@ enum nci_state { NCI_W4_ALL_DISCOVERIES, NCI_W4_HOST_SELECT, NCI_POLL_ACTIVE, + NCI_LISTEN_ACTIVE, + NCI_LISTEN_SLEEP, }; /* NCI timeouts */ diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 6c583e244de2..12adb817c27a 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2011 Instituto Nokia de Tecnologia + * Copyright (C) 2014 Marvell International Ltd. * * Authors: * Lauro Ramos Venancio @@ -87,6 +88,7 @@ struct nfc_ops { #define NFC_TARGET_IDX_ANY -1 #define NFC_MAX_GT_LEN 48 #define NFC_ATR_RES_GT_OFFSET 15 +#define NFC_ATR_REQ_GT_OFFSET 14 /** * struct nfc_target - NFC target descriptiom diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 9b19b4461928..19a75daac14c 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -196,15 +196,17 @@ enum nfc_sdp_attr { }; #define NFC_SDP_ATTR_MAX (__NFC_SDP_ATTR_AFTER_LAST - 1) -#define NFC_DEVICE_NAME_MAXSIZE 8 -#define NFC_NFCID1_MAXSIZE 10 -#define NFC_NFCID2_MAXSIZE 8 -#define NFC_NFCID3_MAXSIZE 10 -#define NFC_SENSB_RES_MAXSIZE 12 -#define NFC_SENSF_RES_MAXSIZE 18 -#define NFC_GB_MAXSIZE 48 -#define NFC_FIRMWARE_NAME_MAXSIZE 32 -#define NFC_ISO15693_UID_MAXSIZE 8 +#define NFC_DEVICE_NAME_MAXSIZE 8 +#define NFC_NFCID1_MAXSIZE 10 +#define NFC_NFCID2_MAXSIZE 8 +#define NFC_NFCID3_MAXSIZE 10 +#define NFC_SENSB_RES_MAXSIZE 12 +#define NFC_SENSF_RES_MAXSIZE 18 +#define NFC_ATR_REQ_MAXSIZE 64 +#define NFC_ATR_RES_MAXSIZE 64 +#define NFC_GB_MAXSIZE 48 +#define NFC_FIRMWARE_NAME_MAXSIZE 32 +#define NFC_ISO15693_UID_MAXSIZE 8 /* NFC protocols */ #define NFC_PROTO_JEWEL 1 diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 205b35f666db..46b2a90ac55a 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -167,6 +167,18 @@ static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev, return data; } +static __u8 *nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev, + struct rf_tech_specific_params_nfcf_listen *nfcf_listen, + __u8 *data) +{ + nfcf_listen->local_nfcid2_len = min_t(__u8, *data++, + NFC_NFCID2_MAXSIZE); + memcpy(nfcf_listen->local_nfcid2, data, nfcf_listen->local_nfcid2_len); + data += nfcf_listen->local_nfcid2_len; + + return data; +} + __u32 nci_get_prop_rf_protocol(struct nci_dev *ndev, __u8 rf_protocol) { if (ndev->ops->get_rfprotocol) @@ -401,17 +413,29 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, struct nci_rf_intf_activated_ntf *ntf, __u8 *data) { struct activation_params_poll_nfc_dep *poll; + struct activation_params_listen_nfc_dep *listen; switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: case NCI_NFC_F_PASSIVE_POLL_MODE: poll = &ntf->activation_params.poll_nfc_dep; - poll->atr_res_len = min_t(__u8, *data++, 63); + poll->atr_res_len = min_t(__u8, *data++, + NFC_ATR_RES_MAXSIZE - 2); pr_debug("atr_res_len %d\n", poll->atr_res_len); if (poll->atr_res_len > 0) memcpy(poll->atr_res, data, poll->atr_res_len); break; + case NCI_NFC_A_PASSIVE_LISTEN_MODE: + case NCI_NFC_F_PASSIVE_LISTEN_MODE: + listen = &ntf->activation_params.listen_nfc_dep; + listen->atr_req_len = min_t(__u8, *data++, + NFC_ATR_REQ_MAXSIZE - 2); + pr_debug("atr_req_len %d\n", listen->atr_req_len); + if (listen->atr_req_len > 0) + memcpy(listen->atr_req, data, listen->atr_req_len); + break; + default: pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", ntf->activation_rf_tech_and_mode); @@ -444,6 +468,50 @@ static void nci_target_auto_activated(struct nci_dev *ndev, nfc_targets_found(ndev->nfc_dev, ndev->targets, ndev->n_targets); } +static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, + struct nci_rf_intf_activated_ntf *ntf) +{ + ndev->remote_gb_len = 0; + + if (ntf->activation_params_len <= 0) + return NCI_STATUS_OK; + + switch (ntf->activation_rf_tech_and_mode) { + case NCI_NFC_A_PASSIVE_POLL_MODE: + case NCI_NFC_F_PASSIVE_POLL_MODE: + /* ATR_RES general bytes at offset 15 */ + ndev->remote_gb_len = min_t(__u8, + (ntf->activation_params.poll_nfc_dep.atr_res_len + - NFC_ATR_RES_GT_OFFSET), + NFC_MAX_GT_LEN); + memcpy(ndev->remote_gb, + (ntf->activation_params.poll_nfc_dep .atr_res + + NFC_ATR_RES_GT_OFFSET), + ndev->remote_gb_len); + break; + + case NCI_NFC_A_PASSIVE_LISTEN_MODE: + case NCI_NFC_F_PASSIVE_LISTEN_MODE: + /* ATR_REQ general bytes at offset 14 */ + ndev->remote_gb_len = min_t(__u8, + (ntf->activation_params.listen_nfc_dep.atr_req_len + - NFC_ATR_REQ_GT_OFFSET), + NFC_MAX_GT_LEN); + memcpy(ndev->remote_gb, + (ntf->activation_params.listen_nfc_dep.atr_req + + NFC_ATR_REQ_GT_OFFSET), + ndev->remote_gb_len); + break; + + default: + pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", + ntf->activation_rf_tech_and_mode); + return NCI_STATUS_RF_PROTOCOL_ERROR; + } + + return NCI_STATUS_OK; +} + static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { @@ -493,6 +561,16 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, &(ntf.rf_tech_specific_params.nfcv_poll), data); break; + case NCI_NFC_A_PASSIVE_LISTEN_MODE: + /* no RF technology specific parameters */ + break; + + case NCI_NFC_F_PASSIVE_LISTEN_MODE: + data = nci_extract_rf_params_nfcf_passive_listen(ndev, + &(ntf.rf_tech_specific_params.nfcf_listen), + data); + break; + default: pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", ntf.activation_rf_tech_and_mode); @@ -546,32 +624,39 @@ exit: /* store general bytes to be reported later in dep_link_up */ if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) { - ndev->remote_gb_len = 0; - - if (ntf.activation_params_len > 0) { - /* ATR_RES general bytes at offset 15 */ - ndev->remote_gb_len = min_t(__u8, - (ntf.activation_params - .poll_nfc_dep.atr_res_len - - NFC_ATR_RES_GT_OFFSET), - NFC_MAX_GT_LEN); - memcpy(ndev->remote_gb, - (ntf.activation_params.poll_nfc_dep - .atr_res + NFC_ATR_RES_GT_OFFSET), - ndev->remote_gb_len); - } + err = nci_store_general_bytes_nfc_dep(ndev, &ntf); + if (err != NCI_STATUS_OK) + pr_err("unable to store general bytes\n"); } } - if (atomic_read(&ndev->state) == NCI_DISCOVERY) { - /* A single target was found and activated automatically */ - atomic_set(&ndev->state, NCI_POLL_ACTIVE); - if (err == NCI_STATUS_OK) - nci_target_auto_activated(ndev, &ntf); - } else { /* ndev->state == NCI_W4_HOST_SELECT */ - /* A selected target was activated, so complete the request */ - atomic_set(&ndev->state, NCI_POLL_ACTIVE); - nci_req_complete(ndev, err); + if (!(ntf.activation_rf_tech_and_mode & NCI_RF_TECH_MODE_LISTEN_MASK)) { + /* Poll mode */ + if (atomic_read(&ndev->state) == NCI_DISCOVERY) { + /* A single target was found and activated + * automatically */ + atomic_set(&ndev->state, NCI_POLL_ACTIVE); + if (err == NCI_STATUS_OK) + nci_target_auto_activated(ndev, &ntf); + } else { /* ndev->state == NCI_W4_HOST_SELECT */ + /* A selected target was activated, so complete the + * request */ + atomic_set(&ndev->state, NCI_POLL_ACTIVE); + nci_req_complete(ndev, err); + } + } else { + /* Listen mode */ + atomic_set(&ndev->state, NCI_LISTEN_ACTIVE); + if (err == NCI_STATUS_OK && + ntf.rf_protocol == NCI_RF_PROTOCOL_NFC_DEP) { + err = nfc_tm_activated(ndev->nfc_dev, + NFC_PROTO_NFC_DEP_MASK, + NFC_COMM_PASSIVE, + ndev->remote_gb, + ndev->remote_gb_len); + if (err != NCI_STATUS_OK) + pr_err("error when signaling tm activation\n"); + } } } -- cgit v1.2.3-71-gd317 From 3682f49f32051765ed6eb77fc882f0458f7d44c3 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 2 Dec 2014 21:27:50 +0100 Subject: NFC: netlink: Add new netlink command NFC_CMD_ACTIVATE_TARGET Some tag might get deactivated after some read or write tentative. This may happen for example with Mifare Ultralight C tag when trying to read the last 4 blocks (starting block 0x2c) configured as write only. NFC_CMD_ACTIVATE_TARGET will try to reselect the tag in order to detect if it got remove from the field or if it is still present. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 1 + net/nfc/netlink.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 19a75daac14c..3c5efb1bc393 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -116,6 +116,7 @@ enum nfc_commands { NFC_EVENT_SE_TRANSACTION, NFC_CMD_GET_SE, NFC_CMD_SE_IO, + NFC_CMD_ACTIVATE_TARGET, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 43cb1c17e267..95818314aea6 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -810,6 +810,31 @@ out: return rc; } +static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + u32 device_idx, target_idx, protocol; + int rc; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(device_idx); + if (!dev) + return -ENODEV; + + target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); + protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); + + nfc_deactivate_target(dev, target_idx); + rc = nfc_activate_target(dev, target_idx, protocol); + + nfc_put_device(dev); + return 0; +} + static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; @@ -1455,6 +1480,11 @@ static const struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_se_io, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_ACTIVATE_TARGET, + .doit = nfc_genl_activate_target, + .policy = nfc_genl_policy, + }, }; -- cgit v1.2.3-71-gd317 From e479ce479743984a5d4581749f9aaa9c3bfd65e4 Mon Sep 17 00:00:00 2001 From: Julien Lefrique Date: Tue, 2 Dec 2014 16:25:01 +0100 Subject: NFC: NCI: Fix max length of General Bytes in ATR_RES The maximum size of ATR_REQ and ATR_RES is 64 bytes. The maximum number of General Bytes is calculated by the maximum number of data bytes in the ATR_REQ/ATR_RES, substracted by the number of mandatory data bytes. ATR_REQ: 16 mandatory data bytes, giving a maximum of 48 General Bytes. ATR_RES: 17 mandatory data bytes, giving a maximum of 47 General Bytes. Regression introduced in commit a99903ec. Signed-off-by: Julien Lefrique Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 2 ++ net/nfc/nci/ntf.c | 8 +++----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 3c5efb1bc393..8119255feae4 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -205,6 +205,8 @@ enum nfc_sdp_attr { #define NFC_SENSF_RES_MAXSIZE 18 #define NFC_ATR_REQ_MAXSIZE 64 #define NFC_ATR_RES_MAXSIZE 64 +#define NFC_ATR_REQ_GB_MAXSIZE 48 +#define NFC_ATR_RES_GB_MAXSIZE 47 #define NFC_GB_MAXSIZE 48 #define NFC_FIRMWARE_NAME_MAXSIZE 32 #define NFC_ISO15693_UID_MAXSIZE 8 diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 8dee73d0c4e1..22e453cb787d 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -479,24 +479,22 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: case NCI_NFC_F_PASSIVE_POLL_MODE: - /* ATR_RES general bytes at offset 15 */ ndev->remote_gb_len = min_t(__u8, (ntf->activation_params.poll_nfc_dep.atr_res_len - NFC_ATR_RES_GT_OFFSET), - NFC_MAX_GT_LEN); + NFC_ATR_RES_GB_MAXSIZE); memcpy(ndev->remote_gb, - (ntf->activation_params.poll_nfc_dep .atr_res + (ntf->activation_params.poll_nfc_dep.atr_res + NFC_ATR_RES_GT_OFFSET), ndev->remote_gb_len); break; case NCI_NFC_A_PASSIVE_LISTEN_MODE: case NCI_NFC_F_PASSIVE_LISTEN_MODE: - /* ATR_REQ general bytes at offset 14 */ ndev->remote_gb_len = min_t(__u8, (ntf->activation_params.listen_nfc_dep.atr_req_len - NFC_ATR_REQ_GT_OFFSET), - NFC_MAX_GT_LEN); + NFC_ATR_REQ_GB_MAXSIZE); memcpy(ndev->remote_gb, (ntf->activation_params.listen_nfc_dep.atr_req + NFC_ATR_REQ_GT_OFFSET), -- cgit v1.2.3-71-gd317 From b7485f6b035a87685ce35e0e52deee6467811eb0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 28 Nov 2014 14:34:13 +0100 Subject: neigh: sort Neighbor Cache Entry Flags Suggested-by: Florian Fainelli Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 4a1d7e96dfe3..2f043af7fcd6 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -35,11 +35,10 @@ enum { */ #define NTF_USE 0x01 -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - #define NTF_SELF 0x02 #define NTF_MASTER 0x04 +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 /* * Neighbor Cache Entry States. -- cgit v1.2.3-71-gd317 From 82f2841291cfaf4d225aa1766424280254d3e3b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 28 Nov 2014 14:34:18 +0100 Subject: rtnl: expose physical switch id for particular device The netdevice represents a port in a switch, it will expose IFLA_PHYS_SWITCH_ID value via rtnl. Two netdevices with the same value belong to one physical switch. Signed-off-by: Jiri Pirko Reviewed-by: Thomas Graf Acked-by: John Fastabend Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/core/rtnetlink.c | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 36bddc233633..623f1a7c7627 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -145,6 +145,7 @@ enum { IFLA_CARRIER, IFLA_PHYS_PORT_ID, IFLA_CARRIER_CHANGES, + IFLA_PHYS_SWITCH_ID, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0640f1fbe9dd..aa5cd2c53a56 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -869,7 +870,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ - + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */ + + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -968,6 +970,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) +{ + int err; + struct netdev_phys_item_id psid; + + err = netdev_switch_parent_id_get(dev, &psid); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask) @@ -1040,6 +1060,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) + goto nla_put_failure; + attr = nla_reserve(skb, IFLA_STATS, sizeof(struct rtnl_link_stats)); if (attr == NULL) @@ -1199,6 +1222,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ + [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { -- cgit v1.2.3-71-gd317 From cf6b8e1eedffd9ef9a22c0c9453d752b07daf89a Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:21 +0100 Subject: bridge: add API to notify bridge driver of learned FBD on offloaded device When the swdev device learns a new mac/vlan on a port, it sends some async notification to the driver and the driver installs an FDB in the device. To give a holistic system view, the learned mac/vlan should be reflected in the bridge's FBD table, so the user, using normal iproute2 cmds, can view what is currently learned by the device. This API on the bridge driver gives a way for the swdev driver to install an FBD entry in the bridge FBD table. (And remove one). This is equivalent to the device running these cmds: bridge fdb [add|del] dev vid master This patch needs some extra eyeballs for review, in paricular around the locking and contexts. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 18 +++++++++ include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 91 +++++++++++++++++++++++++++++++++++++++++- net/bridge/br_private.h | 3 +- 4 files changed, 111 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 808dcb8cc04f..fa2eca625129 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -37,6 +37,24 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use typedef int br_should_route_hook_t(struct sk_buff *skb); extern br_should_route_hook_t __rcu *br_should_route_hook; +#if IS_ENABLED(CONFIG_BRIDGE) +int br_fdb_external_learn_add(struct net_device *dev, + const unsigned char *addr, u16 vid); +int br_fdb_external_learn_del(struct net_device *dev, + const unsigned char *addr, u16 vid); +#else +static inline int br_fdb_external_learn_add(struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return 0; +} +static inline int br_fdb_external_learn_del(struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return 0; +} +#endif + #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 2f043af7fcd6..f3d77f9f1e0b 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -38,6 +38,7 @@ enum { #define NTF_SELF 0x02 #define NTF_MASTER 0x04 #define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_EXT_LEARNED 0x10 #define NTF_ROUTER 0x80 /* diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b1be971eb06c..cc36e59db7d7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -481,6 +481,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb->is_local = 0; fdb->is_static = 0; fdb->added_by_user = 0; + fdb->added_by_external_learn = 0; fdb->updated = fdb->used = jiffies; hlist_add_head_rcu(&fdb->hlist, head); } @@ -613,7 +614,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = 0; + ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0; ndm->ndm_type = 0; ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; ndm->ndm_state = fdb_to_nud(fdb); @@ -983,3 +984,91 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } } } + +int br_fdb_external_learn_add(struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_port *p; + struct net_bridge *br; + struct hlist_head *head; + struct net_bridge_fdb_entry *fdb; + int err = 0; + + rtnl_lock(); + + p = br_port_get_rtnl(dev); + if (!p) { + pr_info("bridge: %s not a bridge port\n", dev->name); + err = -EINVAL; + goto err_rtnl_unlock; + } + + br = p->br; + + spin_lock_bh(&br->hash_lock); + + head = &br->hash[br_mac_hash(addr, vid)]; + fdb = fdb_find(head, addr, vid); + if (!fdb) { + fdb = fdb_create(head, p, addr, vid); + if (!fdb) { + err = -ENOMEM; + goto err_unlock; + } + fdb->added_by_external_learn = 1; + fdb_notify(br, fdb, RTM_NEWNEIGH); + } else if (fdb->added_by_external_learn) { + /* Refresh entry */ + fdb->updated = fdb->used = jiffies; + } else if (!fdb->added_by_user) { + /* Take over SW learned entry */ + fdb->added_by_external_learn = 1; + fdb->updated = jiffies; + fdb_notify(br, fdb, RTM_NEWNEIGH); + } + +err_unlock: + spin_unlock_bh(&br->hash_lock); +err_rtnl_unlock: + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL(br_fdb_external_learn_add); + +int br_fdb_external_learn_del(struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct net_bridge_port *p; + struct net_bridge *br; + struct hlist_head *head; + struct net_bridge_fdb_entry *fdb; + int err = 0; + + rtnl_lock(); + + p = br_port_get_rtnl(dev); + if (!p) { + pr_info("bridge: %s not a bridge port\n", dev->name); + err = -EINVAL; + goto err_rtnl_unlock; + } + + br = p->br; + + spin_lock_bh(&br->hash_lock); + + head = &br->hash[br_mac_hash(addr, vid)]; + fdb = fdb_find(head, addr, vid); + if (fdb && fdb->added_by_external_learn) + fdb_delete(br, fdb); + else + err = -ENOENT; + + spin_unlock_bh(&br->hash_lock); +err_rtnl_unlock: + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL(br_fdb_external_learn_del); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b529da8234d..cc36fb3efbdd 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -100,7 +100,8 @@ struct net_bridge_fdb_entry mac_addr addr; unsigned char is_local:1, is_static:1, - added_by_user:1; + added_by_user:1, + added_by_external_learn:1; __u16 vlan_id; }; -- cgit v1.2.3-71-gd317 From efacacdaf7cb5a0592ed772e3731636b2742e34a Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:23 +0100 Subject: bridge: add new brport flag LEARNING_SYNC This policy flag controls syncing of learned FDB entries to bridge's FDB. If on, FDB entries learned on bridge port device will be synced. If off, device may still learn new FDB entries but they will not be synced with bridge's FDB. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Acked-by: Roopa Prabhu Acked-by: Jamal Hadi Salim Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 2c81a8efd24d..0a8ce762a47f 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -43,6 +43,7 @@ struct br_ip_list { #define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) #define BR_PROMISC BIT(7) #define BR_PROXYARP BIT(8) +#define BR_LEARNING_SYNC BIT(9) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 623f1a7c7627..f7d0d2d7173a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -245,6 +245,7 @@ enum { IFLA_BRPORT_LEARNING, /* mac learning */ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ IFLA_BRPORT_PROXYARP, /* proxy ARP */ + IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3-71-gd317 From 345cd494a324c149ef7293f4bedf2d7131a6de7c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 Nov 2014 14:34:24 +0100 Subject: bridge: add new hwmode swdev Current hwmode settings are "vepa" or "veb". These are for NIC interfaces with basic bridging function offloaded to HW. Add new "swdev" for full switch device offloads. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index da17e456908d..296a556454e3 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -105,6 +105,7 @@ struct __fdb_entry { #define BRIDGE_MODE_VEB 0 /* Default loopback mode */ #define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ +#define BRIDGE_MODE_SWDEV 2 /* Full switch device offload */ /* Bridge management nested attributes * [IFLA_AF_SPEC] = { -- cgit v1.2.3-71-gd317 From a51b9199b1e092da5ee4a89852e84b4c52ae6044 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:53 +0100 Subject: netfilter: ipset: Alignment problem between 64bit kernel 32bit userspace Sven-Haegar Koch reported the issue: sims:~# iptables -A OUTPUT -m set --match-set testset src -j ACCEPT iptables: Invalid argument. Run `dmesg' for more information. In syslog: x_tables: ip_tables: set.3 match: invalid size 48 (kernel) != (user) 32 which was introduced by the counter extension in ipset. The patch fixes the alignment issue with introducing a new set match revision with the fixed underlying 'struct ip_set_counter_match' structure. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/ipset/ip_set.h | 8 +++- include/uapi/linux/netfilter/xt_set.h | 13 ++++- net/netfilter/xt_set.c | 73 +++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index ca03119111a2..5ab4e60894cf 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -256,11 +256,17 @@ enum { IPSET_COUNTER_GT, }; -struct ip_set_counter_match { +/* Backward compatibility for set match v3 */ +struct ip_set_counter_match0 { __u8 op; __u64 value; }; +struct ip_set_counter_match { + __aligned_u64 value; + __u8 op; +}; + /* Interface to iptables/ip6tables */ #define SO_IP_SET 83 diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h index d6a1df1f2947..d4e02348384c 100644 --- a/include/uapi/linux/netfilter/xt_set.h +++ b/include/uapi/linux/netfilter/xt_set.h @@ -66,8 +66,8 @@ struct xt_set_info_target_v2 { struct xt_set_info_match_v3 { struct xt_set_info match_set; - struct ip_set_counter_match packets; - struct ip_set_counter_match bytes; + struct ip_set_counter_match0 packets; + struct ip_set_counter_match0 bytes; __u32 flags; }; @@ -81,4 +81,13 @@ struct xt_set_info_target_v3 { __u32 timeout; }; +/* Revision 4 match */ + +struct xt_set_info_match_v4 { + struct xt_set_info match_set; + struct ip_set_counter_match packets; + struct ip_set_counter_match bytes; + __u32 flags; +}; + #endif /*_XT_SET_H*/ diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 5732cd64acc0..0d47afea9682 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -157,7 +157,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par) /* Revision 3 match */ static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) +match_counter0(u64 counter, const struct ip_set_counter_match0 *info) { switch (info->op) { case IPSET_COUNTER_NONE: @@ -192,14 +192,60 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) return ret; - if (!match_counter(opt.ext.packets, &info->packets)) + if (!match_counter0(opt.ext.packets, &info->packets)) return 0; - return match_counter(opt.ext.bytes, &info->bytes); + return match_counter0(opt.ext.bytes, &info->bytes); } #define set_match_v3_checkentry set_match_v1_checkentry #define set_match_v3_destroy set_match_v1_destroy +/* Revision 4 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v4 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v4_checkentry set_match_v1_checkentry +#define set_match_v4_destroy set_match_v1_destroy + /* Revision 0 interface: backward compatible with netfilter/iptables */ static unsigned int @@ -573,6 +619,27 @@ static struct xt_match set_matches[] __read_mostly = { .destroy = set_match_v3_destroy, .me = THIS_MODULE }, + /* new revision for counters support: update, match */ + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 4, + .match = set_match_v4, + .matchsize = sizeof(struct xt_set_info_match_v4), + .checkentry = set_match_v4_checkentry, + .destroy = set_match_v4_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 4, + .match = set_match_v4, + .matchsize = sizeof(struct xt_set_info_match_v4), + .checkentry = set_match_v4_checkentry, + .destroy = set_match_v4_destroy, + .me = THIS_MODULE + }, }; static struct xt_target set_targets[] __read_mostly = { -- cgit v1.2.3-71-gd317 From 4fc11e61f46dd66730a18e8136607386007ee8c9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 1 Dec 2014 10:16:17 +0900 Subject: uapi: fix to export linux/vm_sockets.h A typo "header=y" was introduced by commit 7071cf7fc435 (uapi: add missing network related headers to kbuild). Signed-off-by: Masahiro Yamada Cc: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 4c94f31a8c99..8523f9bb72f2 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -427,7 +427,7 @@ header-y += virtio_net.h header-y += virtio_pci.h header-y += virtio_ring.h header-y += virtio_rng.h -header=y += vm_sockets.h +header-y += vm_sockets.h header-y += vt.h header-y += wait.h header-y += wanrouter.h -- cgit v1.2.3-71-gd317 From ddd872bc3098f9d9abe1680a6b2013e59e3337f7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Dec 2014 15:06:34 -0800 Subject: bpf: verifier: add checks for BPF_ABS | BPF_IND instructions introduce program type BPF_PROG_TYPE_SOCKET_FILTER that is used for attaching programs to sockets where ctx == skb. add verifier checks for ABS/IND instructions which can only be seen in socket filters, therefore the check: if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a3d0f84f178..45da7ec7d274 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -117,6 +117,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, }; /* flags for BPF_MAP_UPDATE_ELEM command */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b6a1f7c14a67..a28e09c7825d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1172,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) return 0; } +/* verify safety of LD_ABS|LD_IND instructions: + * - they can only appear in the programs where ctx == skb + * - since they are wrappers of function calls, they scratch R1-R5 registers, + * preserve R6-R9, and store return value into R0 + * + * Implicit input: + * ctx == skb == R6 == CTX + * + * Explicit input: + * SRC == any register + * IMM == 32-bit immediate + * + * Output: + * R0 - 8/16/32-bit skb data converted to cpu endianness + */ +static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +{ + struct reg_state *regs = env->cur_state.regs; + u8 mode = BPF_MODE(insn->code); + struct reg_state *reg; + int i, err; + + if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { + verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); + return -EINVAL; + } + + if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || + (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { + verbose("BPF_LD_ABS uses reserved fields\n"); + return -EINVAL; + } + + /* check whether implicit source operand (register R6) is readable */ + err = check_reg_arg(regs, BPF_REG_6, SRC_OP); + if (err) + return err; + + if (regs[BPF_REG_6].type != PTR_TO_CTX) { + verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); + return -EINVAL; + } + + if (mode == BPF_IND) { + /* check explicit source operand */ + err = check_reg_arg(regs, insn->src_reg, SRC_OP); + if (err) + return err; + } + + /* reset caller saved regs to unreadable */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { + reg = regs + caller_saved[i]; + reg->type = NOT_INIT; + reg->imm = 0; + } + + /* mark destination R0 register as readable, since it contains + * the value fetched from the packet + */ + regs[BPF_REG_0].type = UNKNOWN_VALUE; + return 0; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -1677,8 +1741,10 @@ process_bpf_exit: u8 mode = BPF_MODE(insn->code); if (mode == BPF_ABS || mode == BPF_IND) { - verbose("LD_ABS is not supported yet\n"); - return -EINVAL; + err = check_ld_abs(env, insn); + if (err) + return err; + } else if (mode == BPF_IMM) { err = check_ld_imm(env, insn); if (err) -- cgit v1.2.3-71-gd317 From bac78aabcfece0c493b2ad824c68fbdc20448cbc Mon Sep 17 00:00:00 2001 From: Andri Yngvason Date: Wed, 3 Dec 2014 17:54:13 +0000 Subject: can: dev: Consolidate and unify state change handling The handling of can error states is different between platforms. This is an attempt to correct that problem. I've moved this handling into a generic function for changing the error state. This ensures that error state changes are handled the same way everywhere (where this function is used). This new mechanism also adds reverse state transitioning in error frames, i.e. the user will be notified through the socket interface when the state goes down. Signed-off-by: Andri Yngvason Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 78 ++++++++++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 3 ++ include/uapi/linux/can/error.h | 1 + 3 files changed, 82 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 2cfe5012e4e5..3ec8f6f25e5f 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -273,6 +273,84 @@ static int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, return err; } +static void can_update_state_error_stats(struct net_device *dev, + enum can_state new_state) +{ + struct can_priv *priv = netdev_priv(dev); + + if (new_state <= priv->state) + return; + + switch (new_state) { + case CAN_STATE_ERROR_WARNING: + priv->can_stats.error_warning++; + break; + case CAN_STATE_ERROR_PASSIVE: + priv->can_stats.error_passive++; + break; + case CAN_STATE_BUS_OFF: + default: + break; + }; +} + +static int can_tx_state_to_frame(struct net_device *dev, enum can_state state) +{ + switch (state) { + case CAN_STATE_ERROR_ACTIVE: + return CAN_ERR_CRTL_ACTIVE; + case CAN_STATE_ERROR_WARNING: + return CAN_ERR_CRTL_TX_WARNING; + case CAN_STATE_ERROR_PASSIVE: + return CAN_ERR_CRTL_TX_PASSIVE; + default: + return 0; + } +} + +static int can_rx_state_to_frame(struct net_device *dev, enum can_state state) +{ + switch (state) { + case CAN_STATE_ERROR_ACTIVE: + return CAN_ERR_CRTL_ACTIVE; + case CAN_STATE_ERROR_WARNING: + return CAN_ERR_CRTL_RX_WARNING; + case CAN_STATE_ERROR_PASSIVE: + return CAN_ERR_CRTL_RX_PASSIVE; + default: + return 0; + } +} + +void can_change_state(struct net_device *dev, struct can_frame *cf, + enum can_state tx_state, enum can_state rx_state) +{ + struct can_priv *priv = netdev_priv(dev); + enum can_state new_state = max(tx_state, rx_state); + + if (unlikely(new_state == priv->state)) { + netdev_warn(dev, "%s: oops, state did not change", __func__); + return; + } + + netdev_dbg(dev, "New error state: %d\n", new_state); + + can_update_state_error_stats(dev, new_state); + priv->state = new_state; + + if (unlikely(new_state == CAN_STATE_BUS_OFF)) { + cf->can_id |= CAN_ERR_BUSOFF; + return; + } + + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] |= tx_state >= rx_state ? + can_tx_state_to_frame(dev, tx_state) : 0; + cf->data[1] |= tx_state <= rx_state ? + can_rx_state_to_frame(dev, rx_state) : 0; +} +EXPORT_SYMBOL_GPL(can_change_state); + /* * Local echo of CAN messages * diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index b37ea95bc348..c05ff0f9f9a5 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -127,6 +127,9 @@ void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); +void can_change_state(struct net_device *dev, struct can_frame *cf, + enum can_state tx_state, enum can_state rx_state); + void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index c247446ab25a..1c508be9687f 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -71,6 +71,7 @@ #define CAN_ERR_CRTL_TX_PASSIVE 0x20 /* reached error passive status TX */ /* (at least one error counter exceeds */ /* the protocol-defined level of 127) */ +#define CAN_ERR_CRTL_ACTIVE 0x40 /* recovered to error active state */ /* error in CAN protocol (type) / data[2] */ #define CAN_ERR_PROT_UNSPEC 0x00 /* unspecified */ -- cgit v1.2.3-71-gd317 From 829ae9d611651467fe6cd7be834bd33ca6b28dfe Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 30 Nov 2014 22:22:34 -0500 Subject: net-timestamp: allow reading recv cmsg on errqueue with origin tstamp Allow reading of timestamps and cmsg at the same time on all relevant socket families. One use is to correlate timestamps with egress device, by asking for cmsg IP_PKTINFO. on AF_INET sockets, call the relevant function (ip_cmsg_recv). To avoid changing legacy expectations, only do so if the caller sets a new timestamping flag SOF_TIMESTAMPING_OPT_CMSG. on AF_INET6 sockets, IPV6_PKTINFO and all other recv cmsg are already returned for all origins. only change is to set ifindex, which is not initialized for all error origins. In both cases, only generate the pktinfo message if an ifindex is known. This is not the case for ACK timestamps. The difference between the protocol families is probably a historical accident as a result of the different conditions for generating cmsg in the relevant ip(v6)_recv_error function: ipv4: if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { ipv6: if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { At one time, this was the same test bar for the ICMP/ICMP6 distinction. This is no longer true. Signed-off-by: Willem de Bruijn ---- Changes v1 -> v2 large rewrite - integrate with existing pktinfo cmsg generation code - on ipv4: only send with new flag, to maintain legacy behavior - on ipv6: send at most a single pktinfo cmsg - on ipv6: initialize fields if not yet initialized The recv cmsg interfaces are also relevant to the discussion of whether looping packet headers is problematic. For v6, cmsgs that identify many headers are already returned. This patch expands that to v4. If it sounds reasonable, I will follow with patches 1. request timestamps without payload with SOF_TIMESTAMPING_OPT_TSONLY (http://patchwork.ozlabs.org/patch/366967/) 2. sysctl to conditionally drop all timestamps that have payload or cmsg from users without CAP_NET_RAW. Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 12 +++++++++++- include/uapi/linux/net_tstamp.h | 3 ++- net/ipv4/ip_sockglue.c | 22 ++++++++++++++++++++-- net/ipv6/datagram.c | 21 +++++++++++++++++++-- 4 files changed, 52 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 1d6d02d6ba52..b08e27261ff9 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE: 1.3.3 Timestamp Options -The interface supports one option +The interface supports the options SOF_TIMESTAMPING_OPT_ID: @@ -145,6 +145,16 @@ SOF_TIMESTAMPING_OPT_ID: stream sockets, it increments with every byte. +SOF_TIMESTAMPING_OPT_CMSG: + + Support recv() cmsg for all timestamped packets. Control messages + are already supported unconditionally on all packets with receive + timestamps and on IPv6 packets with transmit timestamp. This option + extends them to IPv4 packets with transmit timestamp. One use case + is to correlate packets with their egress device, by enabling socket + option IP_PKTINFO simultaneously. + + 1.4 Bytestream Timestamps The SO_TIMESTAMPING interface supports timestamping of bytes in a diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index ff354021bb69..edbc888ceb51 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -23,8 +23,9 @@ enum { SOF_TIMESTAMPING_OPT_ID = (1<<7), SOF_TIMESTAMPING_TX_SCHED = (1<<8), SOF_TIMESTAMPING_TX_ACK = (1<<9), + SOF_TIMESTAMPING_OPT_CMSG = (1<<10), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 59eba6c7a512..640f26c6a9fe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, + const struct sk_buff *skb, + int ee_origin) +{ + struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + + if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || + (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + (!skb->dev)) + return false; + + info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; + info->ipi_ifindex = skb->dev->ifindex; + return true; +} + /* * Handle MSG_ERRQUEUE */ @@ -446,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; sin->sin_family = AF_UNSPEC; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { + + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; @@ -1051,7 +1069,7 @@ e_inval: } /** - * ipv4_pktinfo_prepare - transfert some info from rtable to skb + * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer * diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc1139687fd7..2464a00e36ab 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } -- cgit v1.2.3-71-gd317 From 892311f66f2411b813ca631009356891a0c2b0a1 Mon Sep 17 00:00:00 2001 From: Eyal Perry Date: Tue, 2 Dec 2014 18:12:10 +0200 Subject: ethtool: Support for configurable RSS hash function This patch extends the set/get_rxfh ethtool-options for getting or setting the RSS hash function. It modifies drivers implementation of set/get_rxfh accordingly. This change also delegates the responsibility of checking whether a modification to a certain RX flow hash parameter is supported to the driver implementation of set_rxfh. User-kernel API is done through the new hfunc bitmask field in the ethtool_rxfh struct. A bit set in the hfunc field is corresponding to an index in the new string-set ETH_SS_RSS_HASH_FUNCS. Got approval from most of the relevant driver maintainers that their driver is using Toeplitz, and for the few that didn't answered, also assumed it is Toeplitz. Cc: Tom Lendacky Cc: Ariel Elior Cc: Prashant Sreedharan Cc: Michael Chan Cc: Hariprasad S Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Cc: Jeff Kirsher Cc: Jesse Brandeburg Cc: Bruce Allan Cc: Carolyn Wyborny Cc: Don Skidmore Cc: Greg Rose Cc: Matthew Vick Cc: John Ronciak Cc: Mitch Williams Cc: Amir Vadai Cc: Solarflare linux maintainers Cc: Shradha Shah Cc: Shreyas Bhatewara Cc: "VMware, Inc." Cc: Ben Hutchings Signed-off-by: Eyal Perry Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 11 +++- .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 20 ++++++- drivers/net/ethernet/broadcom/tg3.c | 20 ++++++- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 18 +++++- drivers/net/ethernet/emulex/benet/be_ethtool.c | 12 +++- drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c | 12 +++- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 17 +++++- drivers/net/ethernet/intel/igb/igb_ethtool.c | 16 ++++- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 13 +++- drivers/net/ethernet/sfc/ethtool.c | 18 ++++-- drivers/net/vmxnet3/vmxnet3_ethtool.c | 15 ++++- include/linux/ethtool.h | 42 +++++++++---- include/uapi/linux/ethtool.h | 10 +++- net/core/ethtool.c | 69 ++++++++++++---------- 14 files changed, 223 insertions(+), 70 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 95d44538357f..ebf489351555 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -511,7 +511,8 @@ static u32 xgbe_get_rxfh_indir_size(struct net_device *netdev) return ARRAY_SIZE(pdata->rss_table); } -static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) +static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct xgbe_prv_data *pdata = netdev_priv(netdev); unsigned int i; @@ -525,16 +526,22 @@ static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) if (key) memcpy(key, pdata->rss_key, sizeof(pdata->rss_key)); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + return 0; } static int xgbe_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; unsigned int ret; + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + if (indir) { ret = hw_if->set_rss_lookup_table(pdata, indir); if (ret) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 1edc931b1458..ffe4e003e636 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3358,12 +3358,18 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev) return T_ETH_INDIRECTION_TABLE_SIZE; } -static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) +static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, + u8 *hfunc) { struct bnx2x *bp = netdev_priv(dev); u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0}; size_t i; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + /* Get the current configuration of the RSS indirection table */ bnx2x_get_rss_ind_table(&bp->rss_conf_obj, ind_table); @@ -3383,11 +3389,21 @@ static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) } static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct bnx2x *bp = netdev_priv(dev); size_t i; + /* We require at least one supported parameter to be changed and no + * change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!indir) + return 0; + for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { /* * The same as in bnx2x_get_rxfh: we can't use a memcpy() diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 43fd1b72c1ea..bb48a610b72a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12561,22 +12561,38 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev) return size; } -static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) +static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct tg3 *tp = netdev_priv(dev); int i; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) indir[i] = tp->rss_ind_tbl[i]; return 0; } -static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key) +static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, + const u8 hfunc) { struct tg3 *tp = netdev_priv(dev); size_t i; + /* We require at least one supported parameter to be changed and no + * change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!indir) + return 0; + for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) tp->rss_ind_tbl[i] = indir[i]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3aea82bb9039..e7342bc85026 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2923,21 +2923,35 @@ static u32 get_rss_table_size(struct net_device *dev) return pi->rss_size; } -static int get_rss_table(struct net_device *dev, u32 *p, u8 *key) +static int get_rss_table(struct net_device *dev, u32 *p, u8 *key, u8 *hfunc) { const struct port_info *pi = netdev_priv(dev); unsigned int n = pi->rss_size; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!p) + return 0; while (n--) p[n] = pi->rss[n]; return 0; } -static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key) +static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key, + const u8 hfunc) { unsigned int i; struct port_info *pi = netdev_priv(dev); + /* We require at least one supported parameter to be changed and no + * change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!p) + return 0; + for (i = 0; i < pi->rss_size; i++) pi->rss[i] = p[i]; if (pi->adapter->flags & FULL_INIT_DONE) diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index e42a791c1835..73a500ccbf69 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1171,7 +1171,8 @@ static u32 be_get_rxfh_key_size(struct net_device *netdev) return RSS_HASH_KEY_LEN; } -static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) +static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey, + u8 *hfunc) { struct be_adapter *adapter = netdev_priv(netdev); int i; @@ -1185,16 +1186,23 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) if (hkey) memcpy(hkey, rss->rss_hkey, RSS_HASH_KEY_LEN); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + return 0; } static int be_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *hkey) + const u8 *hkey, const u8 hfunc) { int rc = 0, i, j; struct be_adapter *adapter = netdev_priv(netdev); u8 rsstable[RSS_INDIR_TABLE_LEN]; + /* We do not allow change in unsupported parameters */ + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + if (indir) { struct be_rx_obj *rxo; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 2d04464e6aa3..651f53bc7376 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -916,11 +916,15 @@ static u32 fm10k_get_rssrk_size(struct net_device *netdev) return FM10K_RSSRK_SIZE * FM10K_RSSRK_ENTRIES_PER_REG; } -static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key) +static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct fm10k_intfc *interface = netdev_priv(netdev); int i, err; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + err = fm10k_get_reta(netdev, indir); if (err || !key) return err; @@ -932,12 +936,16 @@ static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key) } static int fm10k_set_rssh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_hw *hw = &interface->hw; int i, err; + /* We do not allow change in unsupported parameters */ + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + err = fm10k_set_reta(netdev, indir); if (err || !key) return err; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 69a269b23be6..69b97bac182c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -627,13 +627,19 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev) * * Reads the indirection table directly from the hardware. Always returns 0. **/ -static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) +static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; u32 hlut_val; int i, j; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { hlut_val = rd32(hw, I40E_VFQF_HLUT(i)); indir[j++] = hlut_val & 0xff; @@ -654,13 +660,20 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) * returns 0 after programming the table. **/ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; u32 hlut_val; int i, j; + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { hlut_val = indir[j++]; hlut_val |= indir[j++] << 8; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 02cfd3b14762..d5673eb90c54 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2842,11 +2842,16 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev) return IGB_RETA_SIZE; } -static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) +static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct igb_adapter *adapter = netdev_priv(netdev); int i; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; for (i = 0; i < IGB_RETA_SIZE; i++) indir[i] = adapter->rss_indir_tbl[i]; @@ -2889,13 +2894,20 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter) } static int igb_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key) + const u8 *key, const u8 hfunc) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int i; u32 num_queues; + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + num_queues = adapter->rss_queues; switch (hw->mac.type) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c45e06abc073..28c3fc5a0791 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -978,7 +978,8 @@ static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) return MLX4_EN_RSS_KEY_SIZE; } -static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) +static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, + u8 *hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rss_map *rss_map = &priv->rss_map; @@ -990,16 +991,20 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) rss_rings = 1 << ilog2(rss_rings); while (n--) { + if (!ring_index) + break; ring_index[n] = rss_map->qps[n % rss_rings].qpn - rss_map->base_qpn; } if (key) memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; return err; } static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, - const u8 *key) + const u8 *key, const u8 hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -1008,6 +1013,10 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, int i; int rss_rings = 0; + /* We do not allow change in unsupported parameters */ + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + /* Calculate RSS table size and make sure flows are spread evenly * between rings */ diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index cad258a78708..4835bc0d0de8 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1086,19 +1086,29 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev) 0 : ARRAY_SIZE(efx->rx_indir_table)); } -static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key) +static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key, + u8 *hfunc) { struct efx_nic *efx = netdev_priv(net_dev); - memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table)); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (indir) + memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table)); return 0; } -static int efx_ethtool_set_rxfh(struct net_device *net_dev, - const u32 *indir, const u8 *key) +static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir, + const u8 *key, const u8 hfunc) { struct efx_nic *efx = netdev_priv(net_dev); + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table)); efx->type->rx_push_rss_config(efx); return 0; diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index b725fd9e7803..b7b53329d575 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -583,12 +583,16 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev) } static int -vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key) +vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key, u8 *hfunc) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); struct UPT1_RSSConf *rssConf = adapter->rss_conf; unsigned int n = rssConf->indTableSize; + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!p) + return 0; while (n--) p[n] = rssConf->indTable[n]; return 0; @@ -596,13 +600,20 @@ vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key) } static int -vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key) +vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key, + const u8 hfunc) { unsigned int i; unsigned long flags; struct vmxnet3_adapter *adapter = netdev_priv(netdev); struct UPT1_RSSConf *rssConf = adapter->rss_conf; + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!p) + return 0; for (i = 0; i < rssConf->indTableSize; i++) rssConf->indTable[i] = p[i]; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c1a2d60dfb82..653dc9c4ebac 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -59,6 +59,26 @@ enum ethtool_phys_id_state { ETHTOOL_ID_OFF }; +enum { + ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */ + ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */ + + /* + * Add your fresh new hash function bits above and remember to update + * rss_hash_func_strings[] in ethtool.c + */ + ETH_RSS_HASH_FUNCS_COUNT +}; + +#define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) +#define __ETH_RSS_HASH(name) __ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT) + +#define ETH_RSS_HASH_TOP __ETH_RSS_HASH(TOP) +#define ETH_RSS_HASH_XOR __ETH_RSS_HASH(XOR) + +#define ETH_RSS_HASH_UNKNOWN 0 +#define ETH_RSS_HASH_NO_CHANGE 0 + struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ @@ -158,17 +178,14 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. - * @get_rxfh: Get the contents of the RX flow hash indirection table and hash - * key. - * Will only be called if one or both of @get_rxfh_indir_size and - * @get_rxfh_key_size are implemented and return non-zero. - * Returns a negative error code or zero. - * @set_rxfh: Set the contents of the RX flow hash indirection table and/or - * hash key. In case only the indirection table or hash key is to be - * changed, the other argument will be %NULL. - * Will only be called if one or both of @get_rxfh_indir_size and - * @get_rxfh_key_size are implemented and return non-zero. + * @get_rxfh: Get the contents of the RX flow hash indirection table, hash key + * and/or hash function. * Returns a negative error code or zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table, hash + * key, and/or hash function. Arguments which are set to %NULL or zero + * will remain unchanged. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -241,9 +258,10 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key, + u8 *hfunc); int (*set_rxfh)(struct net_device *, const u32 *indir, - const u8 *key); + const u8 *key, const u8 hfunc); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index eb2095b42fbb..5f66d9c2889d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -534,6 +534,7 @@ struct ethtool_pauseparam { * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE; * now deprecated * @ETH_SS_FEATURES: Device feature names + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -541,6 +542,7 @@ enum ethtool_stringset { ETH_SS_PRIV_FLAGS, ETH_SS_NTUPLE_FILTERS, ETH_SS_FEATURES, + ETH_SS_RSS_HASH_FUNCS, }; /** @@ -884,6 +886,8 @@ struct ethtool_rxfh_indir { * @key_size: On entry, the array size of the user buffer for the hash key, * which may be zero. On return from %ETHTOOL_GRSSH, the size of the * hardware hash key. + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. * @rsvd: Reserved for future extensions. * @rss_config: RX ring/queue index for each hash value i.e., indirection table * of @indir_size __u32 elements, followed by hash key of @key_size @@ -893,14 +897,16 @@ struct ethtool_rxfh_indir { * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested * and a @indir_size of zero means the indir table should be reset to default - * values. + * values. An hfunc of zero means that hash function setting is not requested. */ struct ethtool_rxfh { __u32 cmd; __u32 rss_context; __u32 indir_size; __u32 key_size; - __u32 rsvd[2]; + __u8 hfunc; + __u8 rsvd8[3]; + __u32 rsvd32; __u32 rss_config[0]; }; #define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 715f51f321e9..550892cd6b3f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -100,6 +100,12 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_BUSY_POLL_BIT] = "busy-poll", }; +static const char +rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { + [ETH_RSS_HASH_TOP_BIT] = "toeplitz", + [ETH_RSS_HASH_XOR_BIT] = "xor", +}; + static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -185,6 +191,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_FEATURES) return ARRAY_SIZE(netdev_features_strings); + if (sset == ETH_SS_RSS_HASH_FUNCS) + return ARRAY_SIZE(rss_hash_func_strings); + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -199,6 +208,9 @@ static void __ethtool_get_strings(struct net_device *dev, if (stringset == ETH_SS_FEATURES) memcpy(data, netdev_features_strings, sizeof(netdev_features_strings)); + else if (stringset == ETH_SS_RSS_HASH_FUNCS) + memcpy(data, rss_hash_func_strings, + sizeof(rss_hash_func_strings)); else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); @@ -618,7 +630,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL); + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); if (ret) goto out; @@ -679,7 +691,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, goto out; } - ret = ops->set_rxfh(dev, indir, NULL); + ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); out: kfree(indir); @@ -697,12 +709,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, u32 total_size; u32 indir_bytes; u32 *indir = NULL; + u8 dev_hfunc = 0; u8 *hkey = NULL; u8 *rss_config; - if (!(dev->ethtool_ops->get_rxfh_indir_size || - dev->ethtool_ops->get_rxfh_key_size) || - !dev->ethtool_ops->get_rxfh) + if (!ops->get_rxfh) return -EOPNOTSUPP; if (ops->get_rxfh_indir_size) @@ -710,16 +721,14 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (ops->get_rxfh_key_size) dev_key_size = ops->get_rxfh_key_size(dev); - if ((dev_key_size + dev_indir_size) == 0) - return -EOPNOTSUPP; - if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; user_indir_size = rxfh.indir_size; user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || + rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; rxfh.indir_size = dev_indir_size; @@ -727,13 +736,6 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) return -EFAULT; - /* If the user buffer size is 0, this is just a query for the - * device table size and key size. Otherwise, if the User size is - * not equal to device table size or key size it's an error. - */ - if (!user_indir_size && !user_key_size) - return 0; - if ((user_indir_size && (user_indir_size != dev_indir_size)) || (user_key_size && (user_key_size != dev_key_size))) return -EINVAL; @@ -750,14 +752,19 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) hkey = rss_config + indir_bytes; - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); - if (!ret) { - if (copy_to_user(useraddr + - offsetof(struct ethtool_rxfh, rss_config[0]), - rss_config, total_size)) - ret = -EFAULT; - } + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); + if (ret) + goto out; + if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), + &dev_hfunc, sizeof(rxfh.hfunc))) { + ret = -EFAULT; + } else if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) { + ret = -EFAULT; + } +out: kfree(rss_config); return ret; @@ -776,33 +783,31 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u8 *rss_config; u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); - if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || - !ops->get_rxnfc || !ops->set_rxfh) + if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; if (ops->get_rxfh_indir_size) dev_indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); - if ((dev_key_size + dev_indir_size) == 0) - return -EOPNOTSUPP; if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || + rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; - /* If either indir or hash key is valid, proceed further. - * It is not valid to request that both be unchanged. + /* If either indir, hash key or function is valid, proceed further. + * Must request at least one change: indir size, hash key or function. */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || (rxfh.key_size && (rxfh.key_size != dev_key_size)) || (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && - rxfh.key_size == 0)) + rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE)) return -EINVAL; if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) @@ -845,7 +850,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } - ret = ops->set_rxfh(dev, indir, hkey); + ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); out: kfree(rss_config); -- cgit v1.2.3-71-gd317 From 5f4d8d97f5d979c6a62dcc45619b06d34311937c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 3 Dec 2014 09:33:17 -0800 Subject: tc_act: export uapi header file This file is used by iproute2 and should be exported. Signed-off-by: Stephen Hemminger Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 56f121605c99..b057da2b87a4 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -7,3 +7,4 @@ header-y += tc_mirred.h header-y += tc_nat.h header-y += tc_pedit.h header-y += tc_skbedit.h +header-y += tc_vlan.h -- cgit v1.2.3-71-gd317 From 6e3a8a937c2f86ee0b2d354808fc026a143b4518 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Dec 2014 16:13:23 -0800 Subject: tcp_cubic: add SNMP counters to track how effective is Hystart When deploying FQ pacing, one thing we noticed is that CUBIC Hystart triggers too soon. Having SNMP counters to have an idea of how often the various Hystart methods trigger is useful prior to any modifications. This patch adds SNMP counters tracking, how many time "ack train" or "Delay" based Hystart triggers, and cumulative sum of cwnd at the time Hystart decided to end SS (Slow Start) myhost:~# nstat -a | grep Hystart TcpExtTCPHystartTrainDetect 9 0.0 TcpExtTCPHystartTrainCwnd 20650 0.0 TcpExtTCPHystartDelayDetect 10 0.0 TcpExtTCPHystartDelayCwnd 360 0.0 -> Train detection was triggered 9 times, and average cwnd was 20650/9=2294, Delay detection was triggered 10 times and average cwnd was 36 Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 4 ++++ net/ipv4/proc.c | 4 ++++ net/ipv4/tcp_cubic.c | 31 ++++++++++++++++++++++--------- 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 30f541b32895..b22224100011 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -266,6 +266,10 @@ enum LINUX_MIB_TCPWANTZEROWINDOWADV, /* TCPWantZeroWindowAdv */ LINUX_MIB_TCPSYNRETRANS, /* TCPSynRetrans */ LINUX_MIB_TCPORIGDATASENT, /* TCPOrigDataSent */ + LINUX_MIB_TCPHYSTARTTRAINDETECT, /* TCPHystartTrainDetect */ + LINUX_MIB_TCPHYSTARTTRAINCWND, /* TCPHystartTrainCwnd */ + LINUX_MIB_TCPHYSTARTDELAYDETECT, /* TCPHystartDelayDetect */ + LINUX_MIB_TCPHYSTARTDELAYCWND, /* TCPHystartDelayCwnd */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6513ade8d6dc..8f9cd200ce20 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -288,6 +288,10 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV), SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS), SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT), + SNMP_MIB_ITEM("TCPHystartTrainDetect", LINUX_MIB_TCPHYSTARTTRAINDETECT), + SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND), + SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT), + SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 20de0118c98e..c1d07c7ed03d 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -363,16 +363,28 @@ static void hystart_update(struct sock *sk, u32 delay) struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!(ca->found & hystart_detect)) { + if (ca->found & hystart_detect) + return; + + if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock(); /* first detection parameter - ack-train detection */ if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { ca->last_ack = now; - if ((s32)(now - ca->round_start) > ca->delay_min >> 4) + if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { ca->found |= HYSTART_ACK_TRAIN; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINDETECT); + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTTRAINCWND, + tp->snd_cwnd); + tp->snd_ssthresh = tp->snd_cwnd; + } } + } + if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { if (ca->curr_rtt == 0 || ca->curr_rtt > delay) @@ -381,15 +393,16 @@ static void hystart_update(struct sock *sk, u32 delay) ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + - HYSTART_DELAY_THRESH(ca->delay_min>>4)) + HYSTART_DELAY_THRESH(ca->delay_min>>4)) { ca->found |= HYSTART_DELAY; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYDETECT); + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_TCPHYSTARTDELAYCWND, + tp->snd_cwnd); + tp->snd_ssthresh = tp->snd_cwnd; + } } - /* - * Either one of two conditions are met, - * we exit from slow start immediately. - */ - if (ca->found & hystart_detect) - tp->snd_ssthresh = tp->snd_cwnd; } } -- cgit v1.2.3-71-gd317 From fc0bdbbc67c9f7307d41f415abb1165778fe803f Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 8 Dec 2014 14:04:19 -0800 Subject: bridge: new mode flag to indicate mode 'undefined' This patch adds mode BRIDGE_MODE_UNDEF for cases where mode is not needed. Signed-off-by: Roopa Prabhu Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 296a556454e3..05f08695c6c9 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -106,6 +106,7 @@ struct __fdb_entry { #define BRIDGE_MODE_VEB 0 /* Default loopback mode */ #define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ #define BRIDGE_MODE_SWDEV 2 /* Full switch device offload */ +#define BRIDGE_MODE_UNDEF 0xFFFF /* mode undefined */ /* Bridge management nested attributes * [IFLA_AF_SPEC] = { -- cgit v1.2.3-71-gd317 From 4a5fdfe8b3669070112105573f2d7c487ec7ad0d Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 8 Dec 2014 14:04:21 -0800 Subject: bridge: remove mode BRIDGE_MODE_SWDEV This patch removes bridge mode swdev. Users can use BRIDGE_FLAGS_SELF to indicate swdev offload if needed. Signed-off-by: Roopa Prabhu Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 05f08695c6c9..b03ee8f62d3c 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -105,7 +105,6 @@ struct __fdb_entry { #define BRIDGE_MODE_VEB 0 /* Default loopback mode */ #define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ -#define BRIDGE_MODE_SWDEV 2 /* Full switch device offload */ #define BRIDGE_MODE_UNDEF 0xFFFF /* mode undefined */ /* Bridge management nested attributes -- cgit v1.2.3-71-gd317