From 9a57247f31e361f80508c40363366222dbbb6aa5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Dec 2012 23:49:39 +0000 Subject: rtnl: expose carrier value with possibility to set it Signed-off-by: Jiri Pirko Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 60f3b6b90602..c4edfe11f1f7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -142,6 +142,7 @@ enum { #define IFLA_PROMISCUITY IFLA_PROMISCUITY IFLA_NUM_TX_QUEUES, IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, __IFLA_MAX }; -- cgit v1.2.3-71-gd317 From d582cffbcd04eae0bd8a83b05648bfd54bfd21c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Oct 2012 17:53:44 +0200 Subject: nl80211/mac80211: support full station state in AP mode Today, stations are added already associated. That is inefficient if, for example, the driver has no room for stations any more because then the station will go through the entire auth/assoc handshake, only to be kicked out afterwards. To address this a bit better, at least with drivers using the new station state callback, allow hostapd to add stations in unauthenticated mode, just after receiving the AUTH frame, before even replying. Thus if there's no more space at that point, it can send a negative auth frame back. It still needs to handle later state transition errors though, of course. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++ net/mac80211/cfg.c | 115 ++++++++++++++++++++++++++----------------- net/mac80211/main.c | 3 +- net/wireless/nl80211.c | 24 +++++++++ 4 files changed, 113 insertions(+), 45 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e3e19f8b16f2..547017100a30 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1697,6 +1697,9 @@ enum nl80211_iftype { * flag can't be changed, it is only valid while adding a station, and * attempts to change it will silently be ignored (rather than rejected * as errors.) + * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers + * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a + * previously added station into associated state * @NL80211_STA_FLAG_MAX: highest station flag number currently defined * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ @@ -1708,6 +1711,7 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_MFP, NL80211_STA_FLAG_AUTHENTICATED, NL80211_STA_FLAG_TDLS_PEER, + NL80211_STA_FLAG_ASSOCIATED, /* keep last */ __NL80211_STA_FLAG_AFTER_LAST, @@ -3140,6 +3144,17 @@ enum nl80211_ap_sme_features { * setting * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic * powersave + * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state + * transitions for AP clients. Without this flag (and if the driver + * doesn't have the AP SME in the device) the driver supports adding + * stations only when they're associated and adds them in associated + * state (to later be transitioned into authorized), with this flag + * they should be added before even sending the authentication reply + * and then transitioned into authenticated, associated and authorized + * states using station flags. + * Note that even for drivers that support this, the default is to add + * stations in authenticated/associated state, so to add unauthenticated + * stations the authenticated/associated bits have to be set in the mask. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3155,6 +3170,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3e7d557fd481..f4d12c71928d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -510,6 +510,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); @@ -521,6 +522,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -1077,6 +1080,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } +static int sta_apply_auth_flags(struct ieee80211_local *local, + struct sta_info *sta, + u32 mask, u32 set) +{ + int ret; + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + set & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + else + ret = 0; + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && + test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_NONE); + if (ret) + return ret; + } + + return 0; +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1094,52 +1149,20 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask = params->sta_flags_mask; set = params->sta_flags_set; - /* - * In mesh mode, we can clear AUTHENTICATED flag but must - * also make ASSOCIATED follow appropriately for the driver - * API. See also below, after AUTHORIZED changes. - */ - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && - !test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && - test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_NONE); - if (ret) - return ret; - } + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* + * In mesh mode, ASSOCIATED isn't part of the nl80211 + * API but must follow AUTHENTICATED for driver state. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1273,6 +1296,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; + /* + * defaults -- if userspace wants something else we'll + * change it accordingly in sta_apply_parameters() + */ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e6514f240fce..39cfe8f10ad2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -541,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER; + NL80211_FEATURE_VIF_TXPOWER | + NL80211_FEATURE_FULL_AP_CLIENT_STATE; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b3cf7cc0d4a1..087f68ba6d7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3231,11 +3231,21 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* accept only the listed bits */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; + /* but authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) @@ -3393,17 +3403,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + /* allow authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; break; case NL80211_IFTYPE_STATION: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; -- cgit v1.2.3-71-gd317 From bb65a9cb953fdfe9c507e8dbb6c4ec2540484bd3 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 28 Dec 2012 16:06:28 +0800 Subject: xfrm: removes a superfluous check and add a statistic Remove the check if x->km.state equal to XFRM_STATE_VALID in xfrm_state_check_expire(), which will be done before call xfrm_state_check_expire(). add a LINUX_MIB_XFRMOUTSTATEINVALID statistic to record the outbound error due to invalid xfrm state. Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert --- include/uapi/linux/snmp.h | 1 + net/xfrm/xfrm_output.c | 6 ++++++ net/xfrm/xfrm_proc.c | 1 + net/xfrm/xfrm_state.c | 3 --- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index fdfba235f9f1..b49eab89c9fd 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -278,6 +278,7 @@ enum LINUX_MIB_XFRMOUTPOLDEAD, /* XfrmOutPolDead */ LINUX_MIB_XFRMOUTPOLERROR, /* XfrmOutPolError */ LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/ + LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */ __LINUX_MIB_XFRMMAX }; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 95a338c89f99..3670526e70b9 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + goto error_nolock; + } + err = xfrm_state_check_expire(x); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index d0a1af8ed584..603903853e89 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3459692092ec..05db2362a231 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1370,9 +1370,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (!x->curlft.use_time) x->curlft.use_time = get_seconds(); - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; -- cgit v1.2.3-71-gd317 From 390a1bd8538132186ddb679cafe9e75b7ef7e2d2 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 19 Dec 2012 19:11:32 +0100 Subject: NFC: Initial Secure Element API Each NFC adapter can have several links to different secure elements and that property needs to be exported by the drivers. A secure element link can be enabled and disabled, and card emulation will be handled by the currently active one. Otherwise card emulation will be host implemented. Signed-off-by: Samuel Ortiz --- drivers/nfc/nfcwilink.c | 1 + drivers/nfc/pn533.c | 1 + drivers/nfc/pn544/pn544.c | 6 ++++-- include/net/nfc/hci.h | 3 +++ include/net/nfc/nci_core.h | 1 + include/net/nfc/nfc.h | 6 ++++++ include/uapi/linux/nfc.h | 14 ++++++++++++++ net/nfc/core.c | 3 +++ net/nfc/hci/core.c | 3 ++- net/nfc/nci/core.c | 2 ++ net/nfc/netlink.c | 1 + 11 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/nfc/nfcwilink.c b/drivers/nfc/nfcwilink.c index c7c182d2b7df..3b731acbc408 100644 --- a/drivers/nfc/nfcwilink.c +++ b/drivers/nfc/nfcwilink.c @@ -542,6 +542,7 @@ static int nfcwilink_probe(struct platform_device *pdev) drv->ndev = nci_allocate_device(&nfcwilink_ops, protocols, + NFC_SE_NONE, NFCWILINK_HDR_LEN, 0); if (!drv->ndev) { diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index e8c083203b33..31a5b3b53b2a 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -2525,6 +2525,7 @@ static int pn533_probe(struct usb_interface *interface, dev->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols, + NFC_SE_NONE, dev->ops->tx_header_len + PN533_CMD_DATAEXCH_HEAD_LEN, dev->ops->tx_tail_len); diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index d108c794008d..9c5f16e7baef 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -801,7 +801,7 @@ int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, struct nfc_hci_dev **hdev) { struct pn544_hci_info *info; - u32 protocols; + u32 protocols, se; struct nfc_hci_init_data init_data; int r; @@ -834,8 +834,10 @@ int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_NFC_DEP_MASK; + se = NFC_SE_UICC | NFC_SE_EMBEDDED; + info->hdev = nfc_hci_allocate_device(&pn544_hci_ops, &init_data, 0, - protocols, llc_name, + protocols, se, llc_name, phy_headroom + PN544_CMDS_HEADROOM, phy_tailroom, phy_payload); if (!info->hdev) { diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 2ff71750c428..b87a1692b086 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -59,6 +59,8 @@ struct nfc_hci_ops { struct nfc_target *target); int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, struct sk_buff *skb); + int (*enable_se)(struct nfc_dev *dev, u32 secure_element); + int (*disable_se)(struct nfc_dev *dev, u32 secure_element); }; /* Pipes */ @@ -150,6 +152,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index d705d8674949..5bc0c460edc0 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -147,6 +147,7 @@ struct nci_dev { /* ----- NCI Devices ----- */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom); void nci_free_device(struct nci_dev *ndev); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 1665674e86b2..87a6417fc934 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -68,6 +68,8 @@ struct nfc_ops { void *cb_context); int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); + int (*enable_se)(struct nfc_dev *dev, u32 secure_element); + int (*disable_se)(struct nfc_dev *dev, u32 secure_element); }; #define NFC_TARGET_IDX_ANY -1 @@ -109,6 +111,9 @@ struct nfc_dev { struct nfc_genl_data genl_data; u32 supported_protocols; + u32 supported_se; + u32 active_se; + int tx_headroom; int tx_tailroom; @@ -125,6 +130,7 @@ extern struct class nfc_class; struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom); diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 0e63cee8d810..80e4ecd8c04c 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -67,6 +67,11 @@ * subsequent CONNECT and CC messages. * If one of the passed parameters is wrong none is set and -EINVAL is * returned. + * @NFC_CMD_ENABLE_SE: Enable the physical link to a specific secure element. + * Once enabled a secure element will handle card emulation mode, i.e. + * starting a poll from a device which has a secure element enabled means + * we want to do SE based card emulation. + * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -86,6 +91,8 @@ enum nfc_commands { NFC_EVENT_TM_DEACTIVATED, NFC_CMD_LLC_GET_PARAMS, NFC_CMD_LLC_SET_PARAMS, + NFC_CMD_ENABLE_SE, + NFC_CMD_DISABLE_SE, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -114,6 +121,7 @@ enum nfc_commands { * @NFC_ATTR_LLC_PARAM_LTO: Link TimeOut parameter * @NFC_ATTR_LLC_PARAM_RW: Receive Window size parameter * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter + * @NFC_ATTR_SE: Available Secure Elements */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -134,6 +142,7 @@ enum nfc_attrs { NFC_ATTR_LLC_PARAM_LTO, NFC_ATTR_LLC_PARAM_RW, NFC_ATTR_LLC_PARAM_MIUX, + NFC_ATTR_SE, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -172,6 +181,11 @@ enum nfc_attrs { #define NFC_PROTO_NFC_DEP_MASK (1 << NFC_PROTO_NFC_DEP) #define NFC_PROTO_ISO14443_B_MASK (1 << NFC_PROTO_ISO14443_B) +/* NFC Secure Elements */ +#define NFC_SE_NONE 0x0 +#define NFC_SE_UICC 0x1 +#define NFC_SE_EMBEDDED 0x2 + struct sockaddr_nfc { sa_family_t sa_family; __u32 dev_idx; diff --git a/net/nfc/core.c b/net/nfc/core.c index 7d7b4ee34015..25522e56d350 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -757,6 +757,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -774,6 +775,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->supported_se = supported_se; + dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 755a6b9774ab..91020b210d87 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -797,6 +797,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -822,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 5f98dc1bf039..48ada0ec749e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, + supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3568ae16786d..504b883439f1 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -366,6 +366,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; -- cgit v1.2.3-71-gd317 From dd3332bfcb2223458f553f341d3388cb84040e6a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 05:02:45 +0000 Subject: ipv6: Store Router Alert option in IP6CB directly. Router Alert option is very small and we can store the value itself in the skb. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- include/uapi/linux/ipv6.h | 2 ++ net/ipv6/exthdrs.c | 3 ++- net/ipv6/ip6_input.c | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 304a9f46b578..e971e3742172 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -84,7 +84,7 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) struct inet6_skb_parm { int iif; - __u16 ra; + __be16 ra; __u16 hop; __u16 dst0; __u16 srcrt; @@ -100,6 +100,7 @@ struct inet6_skb_parm { #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 +#define IP6SKB_ROUTERALERT 8 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5a2991cf0251..4bda4cf5b0f5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -63,6 +63,8 @@ struct ipv6_opt_hdr { #define ipv6_destopt_hdr ipv6_opt_hdr #define ipv6_hopopt_hdr ipv6_opt_hdr +/* Router Alert option values (RFC2711) */ +#define IPV6_OPT_ROUTERALERT_MLD 0x0000 /* MLD(RFC2710) */ /* * routing header type 0 (used in cmsghdr struct) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628f9f20..07a7d65a7cb6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2ccd35ec3628..4ac5bf30e16a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -280,9 +280,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -290,7 +289,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { -- cgit v1.2.3-71-gd317 From 25d46f43a911b08c5aa8c8fd4fe7fa9b36445068 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 16:02:06 +0000 Subject: ipv6: Move comment to right place. IN6ADDR_* and in6addr_* are not exported to userspace, and are defined in include/linux/in6.h. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 4 ++++ include/uapi/linux/in6.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 9e2ae26fb598..a16e19349ec0 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -22,6 +22,10 @@ #include +/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 + * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined + * in network byte order, not in host byte order as are the IPv4 equivalents + */ extern const struct in6_addr in6addr_any; #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index f79c3721da6e..5673b97dcf54 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -38,11 +38,6 @@ struct in6_addr { #define s6_addr32 in6_u.u6_addr32 }; -/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 - * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined - * in network byte order, not in host byte order as are the IPv4 equivalents - */ - struct sockaddr_in6 { unsigned short int sin6_family; /* AF_INET6 */ __be16 sin6_port; /* Transport layer port # */ -- cgit v1.2.3-71-gd317 From d3710e74cf329839dea8d13b1ad56e572b06b173 Mon Sep 17 00:00:00 2001 From: Lauro Ramos Venancio Date: Wed, 5 Dec 2012 21:12:25 -0300 Subject: NFC: Change nfc.h license nfc.h being GPL makes it quite controversial for non GPL applications to include it. Moreover, nfc.h only includes structures and API definitions that are hardly copyrightable. Signed-off-by: Lauro Ramos Venancio Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 80e4ecd8c04c..7969f46f1bb3 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -5,20 +5,17 @@ * Lauro Ramos Venancio * Aloisio Almeida Jr * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef __LINUX_NFC_H -- cgit v1.2.3-71-gd317 From 3b1c5a5307fb5277f395efdcf330c064d79df07d Mon Sep 17 00:00:00 2001 From: Marco Porsch Date: Mon, 7 Jan 2013 16:04:52 +0100 Subject: {cfg,nl}80211: mesh power mode primitives and userspace access Add the nl80211_mesh_power_mode enumeration which holds possible values for the mesh power mode. These modes are unknown, active, light sleep and deep sleep. Add power_mode entry to the mesh config structure to hold the user-configured default mesh power mode. This value will be used for new peer links. Add the dot11MeshAwakeWindowDuration value to the mesh config. The awake window is a duration in TU describing how long the STA will stay awake after transmitting its beacon in PS mode. Add access routines to: - get/set local link-specific power mode (STA) - get remote STA's link-specific power mode (STA) - get remote STA's non-peer power mode (STA) - get/set default mesh power mode (mesh config) - get/set mesh awake window duration (mesh config) All config changes may be done at mesh runtime and take effect immediately. Signed-off-by: Marco Porsch Signed-off-by: Ivan Bezyazychnyy Signed-off-by: Mike Krinkin [fix commit message line length, error handling in set station] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 ++++++++++++++++++++ include/uapi/linux/nl80211.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/mesh.c | 3 +++ net/wireless/nl80211.c | 43 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 113 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 516aded3697f..d9f08f65f7a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -610,6 +610,8 @@ enum station_parameters_apply_mask { * @sta_modify_mask: bitmap indicating which parameters changed * (for those that don't have a natural "no change" value), * see &enum station_parameters_apply_mask + * @local_pm: local link-specific mesh power save mode (no change when set + * to unknown) */ struct station_parameters { u8 *supported_rates; @@ -625,6 +627,7 @@ struct station_parameters { struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; + enum nl80211_mesh_power_mode local_pm; }; /** @@ -655,6 +658,9 @@ struct station_parameters { * @STATION_INFO_STA_FLAGS: @sta_flags filled * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled * @STATION_INFO_T_OFFSET: @t_offset filled + * @STATION_INFO_LOCAL_PM: @local_pm filled + * @STATION_INFO_PEER_PM: @peer_pm filled + * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -678,6 +684,9 @@ enum station_info_flags { STATION_INFO_STA_FLAGS = 1<<18, STATION_INFO_BEACON_LOSS_COUNT = 1<<19, STATION_INFO_T_OFFSET = 1<<20, + STATION_INFO_LOCAL_PM = 1<<21, + STATION_INFO_PEER_PM = 1<<22, + STATION_INFO_NONPEER_PM = 1<<23, }; /** @@ -791,6 +800,9 @@ struct sta_bss_parameters { * @sta_flags: station flags mask & values * @beacon_loss_count: Number of times beacon loss event has triggered. * @t_offset: Time offset of the station relative to this host. + * @local_pm: local mesh STA power save mode + * @peer_pm: peer mesh STA power save mode + * @nonpeer_pm: non-peer mesh STA power save mode */ struct station_info { u32 filled; @@ -820,6 +832,9 @@ struct station_info { u32 beacon_loss_count; s64 t_offset; + enum nl80211_mesh_power_mode local_pm; + enum nl80211_mesh_power_mode peer_pm; + enum nl80211_mesh_power_mode nonpeer_pm; /* * Note: Add a new enum station_info_flags value for each new field and @@ -995,6 +1010,10 @@ struct bss_parameters { * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs) * during which a mesh STA can send only one Action frame containing * a PREQ element for root path confirmation. + * @power_mode: The default mesh power save mode which will be the initial + * setting for new peer links. + * @dot11MeshAwakeWindowDuration: The duration in TUs the STA will remain awake + * after transmitting its beacon. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1022,6 +1041,8 @@ struct mesh_config { u32 dot11MeshHWMPactivePathToRootTimeout; u16 dot11MeshHWMProotInterval; u16 dot11MeshHWMPconfirmationInterval; + enum nl80211_mesh_power_mode power_mode; + u16 dot11MeshAwakeWindowDuration; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 547017100a30..6c4f703ae890 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1310,6 +1310,9 @@ enum nl80211_commands { * if not given in START_AP 0 is assumed, if not given in SET_BSS * no change is made. * + * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode + * defined in &enum nl80211_mesh_power_mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1580,6 +1583,8 @@ enum nl80211_attrs { NL80211_ATTR_P2P_CTWINDOW, NL80211_ATTR_P2P_OPPPS, + NL80211_ATTR_LOCAL_MESH_POWER_MODE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1838,6 +1843,10 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update. * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32) * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64) + * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode + * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode + * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards + * non-peer STA * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -1862,6 +1871,9 @@ enum nl80211_sta_info { NL80211_STA_INFO_STA_FLAGS, NL80211_STA_INFO_BEACON_LOSS, NL80211_STA_INFO_T_OFFSET, + NL80211_STA_INFO_LOCAL_PM, + NL80211_STA_INFO_PEER_PM, + NL80211_STA_INFO_NONPEER_PM, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, @@ -2252,6 +2264,34 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_mesh_power_mode - mesh power save modes + * + * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is + * not known or has not been set yet. + * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is + * in Awake state all the time. + * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will + * alternate between Active and Doze states, but will wake up for + * neighbor's beacons. + * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will + * alternate between Active and Doze states, but may not wake up + * for neighbor's beacons. + * + * @__NL80211_MESH_POWER_AFTER_LAST - internal use + * @NL80211_MESH_POWER_MAX - highest possible power save level + */ + +enum nl80211_mesh_power_mode { + NL80211_MESH_POWER_UNKNOWN, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_LIGHT_SLEEP, + NL80211_MESH_POWER_DEEP_SLEEP, + + __NL80211_MESH_POWER_AFTER_LAST, + NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1 +}; + /** * enum nl80211_meshconf_params - mesh configuration parameters * @@ -2346,6 +2386,11 @@ enum nl80211_mntr_flags { * (in TUs) during which a mesh STA can send only one Action frame * containing a PREQ element for root path confirmation. * + * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links. + * type &enum nl80211_mesh_power_mode (u32) + * + * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs) + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2375,6 +2420,8 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + NL80211_MESHCONF_POWER_MODE, + NL80211_MESHCONF_AWAKE_WINDOW, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0fe8ceb5444e..55957a284f6c 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -46,6 +46,7 @@ #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units (=TUs) */ #define MESH_DEFAULT_DTIM_PERIOD 2 +#define MESH_DEFAULT_AWAKE_WINDOW 10 /* in 1024 us units (=TUs) */ const struct mesh_config default_mesh_config = { .dot11MeshRetryTimeout = MESH_RET_T, @@ -72,6 +73,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, + .power_mode = NL80211_MESH_POWER_ACTIVE, + .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d5842eb35aec..1a7a710fe9bf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3001,6 +3001,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_LOCAL_PM) && + nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, + sinfo->local_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_PEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, + sinfo->peer_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_NONPEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, + sinfo->nonpeer_pm)) + goto nla_put_failure; if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -3206,6 +3218,17 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { + enum nl80211_mesh_power_mode pm = nla_get_u32( + info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); + + if (pm <= NL80211_MESH_POWER_UNKNOWN || + pm > NL80211_MESH_POWER_MAX) + return -EINVAL; + + params.local_pm = pm; + } + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3213,6 +3236,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow mesh-specific things */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3265,6 +3290,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things sta doesn't support */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) return -EINVAL; @@ -3922,7 +3949,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, - cur_params.dot11MeshHWMPconfirmationInterval)) + cur_params.dot11MeshHWMPconfirmationInterval) || + nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, + cur_params.power_mode) || + nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, + cur_params.dot11MeshAwakeWindowDuration)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3961,6 +3992,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, + [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -4088,6 +4121,14 @@ do { \ 1, 65535, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_MAX, + mask, NL80211_MESHCONF_POWER_MODE, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, + 0, 65535, mask, + NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3-71-gd317 From cee00a959c0a86571e6f99cf42f0261d7e54d2ae Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 15 Jan 2013 17:15:57 +0200 Subject: cfg80211: Allow use_mfp to be specified with the connect command The NL80211_ATTR_USE_MFP attribute was originally added for NL80211_CMD_ASSOCIATE, but it is actually as useful (if not even more useful) with NL80211_CMD_CONNECT, so process that attribute with the connect command, too. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 6 +++--- net/wireless/nl80211.c | 9 +++++++++ net/wireless/sme.c | 3 ++- 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d81e730962cc..f1686d460e6b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1465,6 +1465,7 @@ struct cfg80211_ibss_params { * @ie: IEs for association request * @ie_len: Length of assoc_ie in octets * @privacy: indicates whether privacy-enabled APs should be used + * @mfp: indicate whether management frame protection is used * @crypto: crypto settings * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication @@ -1485,6 +1486,7 @@ struct cfg80211_connect_params { u8 *ie; size_t ie_len; bool privacy; + enum nl80211_mfp mfp; struct cfg80211_crypto_settings crypto; const u8 *key; u8 key_len, key_idx; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6c4f703ae890..d01c16220dc5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -374,8 +374,8 @@ * requests to connect to a specified network but without separating * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association - * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP, + * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * Background scan period can optionally be @@ -958,7 +958,7 @@ enum nl80211_commands { * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is * used for the association (&enum nl80211_mfp, represented as a u32); * this attribute can be used - * with %NL80211_CMD_ASSOCIATE request + * with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests * * @NL80211_ATTR_STA_FLAGS2: Attribute containing a * &struct nl80211_sta_flag_update. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d543cf152100..df82a5c9faee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5932,6 +5932,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp != NL80211_MFP_REQUIRED && + connect.mfp != NL80211_MFP_NO) + return -EINVAL; + } else { + connect.mfp = NL80211_MFP_NO; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = ieee80211_get_channel(wiphy, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d2d26518cdd7..a825dfe12cf7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -192,7 +192,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto, + params->mfp != NL80211_MFP_NO, + ¶ms->crypto, params->flags, ¶ms->ht_capa, ¶ms->ht_capa_mask); if (err) -- cgit v1.2.3-71-gd317 From 11c4a075db2f8774d37544342c8cb9752b4db9e1 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 8 Jan 2013 14:04:07 +0100 Subject: cfg80211: check radar interface combinations To ease further DFS development regarding interface combinations, use the interface combinations structure to test for radar capabilities. Drivers can specify which channel widths they support, and in which modes. Right now only a single AP interface is allowed, but as the DFS code evolves other combinations can be enabled. Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/core.c | 12 ++++++++++-- net/wireless/core.h | 7 ++++--- net/wireless/nl80211.c | 3 +++ net/wireless/util.c | 41 ++++++++++++++++++++++++++++++++++++++--- 6 files changed, 60 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f1686d460e6b..970da4420676 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2125,6 +2125,7 @@ struct ieee80211_iface_limit { * @beacon_int_infra_match: In this combination, the beacon intervals * between infrastructure and AP types must match. This is required * only in special cases. + * @radar_detect_widths: bitmap of channel widths supported for radar detection * * These examples can be expressed as follows: * @@ -2177,6 +2178,7 @@ struct ieee80211_iface_combination { u16 max_interfaces; u8 n_limits; bool beacon_int_infra_match; + u8 radar_detect_widths; }; struct mac_address { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d01c16220dc5..e6eeb4ba5dc5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2984,6 +2984,8 @@ enum nl80211_iface_limit_attrs { * the infrastructure network's beacon interval. * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many * different channels may be used within this group. + * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap + * of supported channel widths for radar detection. * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -3016,6 +3018,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_MAXNUM, NL80211_IFACE_COMB_STA_AP_BI_MATCH, NL80211_IFACE_COMB_NUM_CHANNELS, + NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, /* keep last */ NUM_NL80211_IFACE_COMB, diff --git a/net/wireless/core.c b/net/wireless/core.c index 747dd9365a44..0e702cdc6043 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -382,8 +382,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c = &wiphy->iface_combinations[i]; - /* Combinations with just one interface aren't real */ - if (WARN_ON(c->max_interfaces < 2)) + /* + * Combinations with just one interface aren't real, + * however we make an exception for DFS. + */ + if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths)) return -EINVAL; /* Need at least one channel */ @@ -398,6 +401,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) return -EINVAL; + /* DFS only works on one channel. */ + if (WARN_ON(c->radar_detect_widths && + (c->num_different_channels > 1))) + return -EINVAL; + if (WARN_ON(!c->n_limits)) return -EINVAL; diff --git a/net/wireless/core.h b/net/wireless/core.h index f342267e3620..8396f7671c8d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -425,7 +425,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode); + enum cfg80211_chan_mode chanmode, + u8 radar_detect); static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, @@ -433,7 +434,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED); + CHAN_MODE_UNDEFINED, 0); } static inline int @@ -450,7 +451,7 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode) { return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode); + chan, chanmode, 0); } void diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df82a5c9faee..33de80364c5c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -856,6 +856,9 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; + if (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } diff --git a/net/wireless/util.c b/net/wireless/util.c index 16d76a807c2f..1c2795d52db0 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1184,7 +1184,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) + enum cfg80211_chan_mode chanmode, + u8 radar_detect) { struct wireless_dev *wdev_iter; u32 used_iftypes = BIT(iftype); @@ -1195,14 +1196,45 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; + bool radar_required; int i, j; ASSERT_RTNL(); lockdep_assert_held(&rdev->devlist_mtx); + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: + radar_required = !!(chan->flags & IEEE80211_CHAN_RADAR); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + radar_required = false; + break; + case NL80211_IFTYPE_P2P_DEVICE: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_UNSPECIFIED: + default: + return -EINVAL; + } + + if (radar_required && !radar_detect) + return -EINVAL; + /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) + if (rdev->wiphy.software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; return 0; + } memset(num, 0, sizeof(num)); memset(used_channels, 0, sizeof(used_channels)); @@ -1275,7 +1307,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, used_iftypes |= BIT(wdev_iter->iftype); } - if (total == 1) + if (total == 1 && !radar_detect) return 0; for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { @@ -1308,6 +1340,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, } } + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + /* * Finally check that all iftypes that we're currently * using are actually part of this combination. If they -- cgit v1.2.3-71-gd317 From c539f01717c239cfa0921dd43927afc976f1eedc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:44 +0000 Subject: netfilter: add connlabel conntrack extension similar to connmarks, except labels are bit-based; i.e. all labels may be attached to a flow at the same time. Up to 128 labels are supported. Supporting more labels is possible, but requires increasing the ct offset delta from u8 to u16 type due to increased extension sizes. Mapping of bit-identifier to label name is done in userspace. The extension is enabled at run-time once "-m connlabel" netfilter rules are added. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 4 ++ include/net/netfilter/nf_conntrack_labels.h | 55 ++++++++++++++++ include/net/netns/conntrack.h | 4 ++ include/uapi/linux/netfilter/xt_connlabel.h | 12 ++++ net/netfilter/Kconfig | 18 ++++++ net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_core.c | 12 ++++ net/netfilter/nf_conntrack_labels.c | 72 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 3 + net/netfilter/xt_connlabel.c | 99 +++++++++++++++++++++++++++++ 10 files changed, 281 insertions(+) create mode 100644 include/net/netfilter/nf_conntrack_labels.h create mode 100644 include/uapi/linux/netfilter/xt_connlabel.h create mode 100644 net/netfilter/nf_conntrack_labels.c create mode 100644 net/netfilter/xt_connlabel.c (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 8b4d1fc29096..977bc8a46444 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -22,6 +22,9 @@ enum nf_ct_ext_id { #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT NF_CT_EXT_TIMEOUT, +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + NF_CT_EXT_LABELS, #endif NF_CT_EXT_NUM, }; @@ -33,6 +36,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout +#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h new file mode 100644 index 000000000000..b94fe31c7b39 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include + +#include + +struct nf_conn_labels { + u8 words; + unsigned long bits[]; +}; + +static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); +#else + return NULL; +#endif +} + +static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + struct nf_conn_labels *cl_ext; + struct net *net = nf_ct_net(ct); + u8 words; + + words = ACCESS_ONCE(net->ct.label_words); + if (words == 0 || WARN_ON_ONCE(words > 8)) + return NULL; + + cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + words * sizeof(long), GFP_ATOMIC); + if (cl_ext != NULL) + cl_ext->words = words; + + return cl_ext; +#else + return NULL; +#endif +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); +int nf_connlabel_set(struct nf_conn *ct, u16 bit); + +#ifdef CONFIG_NF_CONNTRACK_LABELS +int nf_conntrack_labels_init(struct net *net); +void nf_conntrack_labels_fini(struct net *net); +#else +static inline int nf_conntrack_labels_init(struct net *n) { return 0; } +static inline void nf_conntrack_labels_fini(struct net *net) {} +#endif diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 923cb20051ed..c9c0c538b68b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -84,6 +84,10 @@ struct netns_ct { int sysctl_auto_assign_helper; bool auto_assign_helper_warned; struct nf_ip_net nf_ct_proto; +#if defined(CONFIG_NF_CONNTRACK_LABELS) + unsigned int labels_used; + u8 label_words; +#endif #ifdef CONFIG_NF_NAT_NEEDED struct hlist_head *nat_bysource; unsigned int nat_htable_size; diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h new file mode 100644 index 000000000000..c4bc9ee9b330 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_connlabel.h @@ -0,0 +1,12 @@ +#include + +#define XT_CONNLABEL_MAXBIT 127 +enum xt_connlabel_mtopts { + XT_CONNLABEL_OP_INVERT = 1 << 0, + XT_CONNLABEL_OP_SET = 1 << 1, +}; + +struct xt_connlabel_mtinfo { + __u16 bit; + __u16 options; +}; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e96df5fbc4..bb48607d4ee4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -124,6 +124,12 @@ config NF_CONNTRACK_TIMESTAMP If unsure, say `N'. +config NF_CONNTRACK_LABELS + bool + help + This option enables support for assigning user-defined flag bits + to connection tracking entries. It selected by the connlabel match. + config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL @@ -842,6 +848,18 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLABEL + tristate '"connlabel" match support' + select NF_CONNTRACK_LABELS + depends on NETFILTER_ADVANCED + ---help--- + This match allows you to test and assign userspace-defined labels names + to a connection. The kernel only stores bit values - mapping + names to bits is done by userspace. + + Unlike connmark, more than 32 flag bits may be assigned to a + connection simultaneously. + config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 32596978df1d..b3bbda60945e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -4,6 +4,7 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -101,6 +102,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e4a0c4fb3a7c..85aa4b7149c5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct) } EXPORT_SYMBOL_GPL(nf_conntrack_free); + /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * @@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1352,6 +1355,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_labels_fini(net); nf_conntrack_helper_fini(net); nf_conntrack_timeout_fini(net); nf_conntrack_ecache_fini(net); @@ -1583,7 +1587,15 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_helper_init(net); if (ret < 0) goto err_helper; + + ret = nf_conntrack_labels_init(net); + if (ret < 0) + goto err_labels; + return 0; + +err_labels: + nf_conntrack_helper_fini(net); err_helper: nf_conntrack_timeout_fini(net); err_timeout: diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c new file mode 100644 index 000000000000..0c542f41f338 --- /dev/null +++ b/net/netfilter/nf_conntrack_labels.c @@ -0,0 +1,72 @@ +/* + * test/set flag bits stored in conntrack extension area. + * + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static unsigned int label_bits(const struct nf_conn_labels *l) +{ + unsigned int longs = l->words; + return longs * BITS_PER_LONG; +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return bit < label_bits(labels) && test_bit(bit, labels->bits); +} +EXPORT_SYMBOL_GPL(nf_connlabel_match); + +int nf_connlabel_set(struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels || bit >= label_bits(labels)) + return -ENOSPC; + + if (test_bit(bit, labels->bits)) + return 0; + + if (test_and_set_bit(bit, labels->bits)) + return 0; + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabel_set); + +static struct nf_ct_ext_type labels_extend __read_mostly = { + .len = sizeof(struct nf_conn_labels), + .align = __alignof__(struct nf_conn_labels), + .id = NF_CT_EXT_LABELS, +}; + +int nf_conntrack_labels_init(struct net *net) +{ + if (net_eq(net, &init_net)) + return nf_ct_extend_register(&labels_extend); + return 0; +} + +void nf_conntrack_labels_fini(struct net *net) +{ + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&labels_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 627b0e50b238..e0b10ee180ef 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -1598,6 +1599,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c new file mode 100644 index 000000000000..9f8719df2001 --- /dev/null +++ b/net/netfilter/xt_connlabel.c @@ -0,0 +1,99 @@ +/* + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_ALIAS("ipt_connlabel"); +MODULE_ALIAS("ip6t_connlabel"); + +static bool +connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_connlabel_mtinfo *info = par->matchinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + bool invert = info->options & XT_CONNLABEL_OP_INVERT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL || nf_ct_is_untracked(ct)) + return invert; + + if (info->options & XT_CONNLABEL_OP_SET) + return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + + return nf_connlabel_match(ct, info->bit) ^ invert; +} + +static int connlabel_mt_check(const struct xt_mtchk_param *par) +{ + const int options = XT_CONNLABEL_OP_INVERT | + XT_CONNLABEL_OP_SET; + struct xt_connlabel_mtinfo *info = par->matchinfo; + int ret; + size_t words; + + if (info->bit > XT_CONNLABEL_MAXBIT) + return -ERANGE; + + if (info->options & ~options) { + pr_err("Unknown options in mask %x\n", info->options); + return -EINVAL; + } + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) { + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + + par->net->ct.labels_used++; + words = BITS_TO_LONGS(info->bit+1); + if (words > par->net->ct.label_words) + par->net->ct.label_words = words; + + return ret; +} + +static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) +{ + par->net->ct.labels_used--; + if (par->net->ct.labels_used == 0) + par->net->ct.label_words = 0; + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_match connlabels_mt_reg __read_mostly = { + .name = "connlabel", + .family = NFPROTO_UNSPEC, + .checkentry = connlabel_mt_check, + .match = connlabel_mt, + .matchsize = sizeof(struct xt_connlabel_mtinfo), + .destroy = connlabel_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlabel_mt_init(void) +{ + return xt_register_match(&connlabels_mt_reg); +} + +static void __exit connlabel_mt_exit(void) +{ + xt_unregister_match(&connlabels_mt_reg); +} + +module_init(connlabel_mt_init); +module_exit(connlabel_mt_exit); -- cgit v1.2.3-71-gd317 From 0ceabd83875b72a29f33db4ab703d6ba40ea4c58 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:45 +0000 Subject: netfilter: ctnetlink: deliver labels to userspace Introduce CTA_LABELS attribute to send a bit-vector of currently active labels to userspace. Future patch will permit userspace to also set/delete active labels. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 1 + include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_labels.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 1644cdd8be91..d69483fb3825 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -101,6 +101,7 @@ enum ip_conntrack_events { IPCT_MARK, /* new mark has been set */ IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ IPCT_SECMARK, /* new security mark has been set */ + IPCT_LABEL, /* new connlabel has been set */ }; enum ip_conntrack_expect_events { diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 86e930cf3dfb..9e71e0c081fd 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -49,6 +49,7 @@ enum ctattr_type { CTA_SECCTX, CTA_TIMESTAMP, CTA_MARK_MASK, + CTA_LABELS, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 0c542f41f338..ac5d0807d681 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -46,7 +46,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) return 0; if (test_and_set_bit(bit, labels->bits)) - return 0; + nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e0b10ee180ef..5f5386382f13 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -324,6 +324,40 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS +static int ctnetlink_label_size(const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return 0; + return nla_total_size(labels->words * sizeof(long)); +} + +static int +ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int len, i; + + if (!labels) + return 0; + + len = labels->words * sizeof(long); + i = 0; + do { + if (labels->bits[i] != 0) + return nla_put(skb, CTA_LABELS, len, labels->bits); + i++; + } while (i < labels->words); + + return 0; +} +#else +#define ctnetlink_dump_labels(a, b) (0) +#define ctnetlink_label_size(a) (0) +#endif + #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static inline int @@ -464,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || @@ -562,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif + ctnetlink_proto_size(ct) + + ctnetlink_label_size(ct) ; } @@ -663,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif + if (events & (1 << IPCT_LABEL) && + ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) @@ -1986,6 +2025,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + if (ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; rcu_read_unlock(); return 0; -- cgit v1.2.3-71-gd317 From 9b21f6a90924dfe8e5e686c314ddb441fb06501e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:46 +0000 Subject: netfilter: ctnetlink: allow userspace to modify labels Add the ability to set/clear labels assigned to a conntrack via ctnetlink. To allow userspace to only alter specific bits, Pablo suggested to add a new CTA_LABELS_MASK attribute: The new set of active labels is then determined via active = (active & ~mask) ^ changeset i.e., the mask selects those bits in the existing set that should be changed. This follows the same method already used by MARK and CONNMARK targets. Omitting CTA_LABELS_MASK is the same as setting all bits in CTA_LABELS_MASK to 1: The existing set is replaced by the one from userspace. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 ++ include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_labels.c | 43 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 44 ++++++++++++++++++++++ 4 files changed, 91 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index b94fe31c7b39..a3ce5d076fca 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -46,6 +46,9 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); int nf_connlabel_set(struct nf_conn *ct, u16 bit); +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, const u32 *mask, unsigned int words); + #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(struct net *net); void nf_conntrack_labels_fini(struct net *net); diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9e71e0c081fd..08fabc6c93f3 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -50,6 +50,7 @@ enum ctattr_type { CTA_TIMESTAMP, CTA_MARK_MASK, CTA_LABELS, + CTA_LABELS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index ac5d0807d681..e1d1eb850e7f 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -52,6 +52,49 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static void replace_u32(u32 *address, u32 mask, u32 new) +{ + u32 old, tmp; + + do { + old = *address; + tmp = (old & mask) ^ new; + } while (cmpxchg(address, old, tmp) != old); +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, + const u32 *mask, unsigned int words32) +{ + struct nf_conn_labels *labels; + unsigned int size, i; + u32 *dst; + + labels = nf_ct_labels_find(ct); + if (!labels) + return -ENOSPC; + + size = labels->words * sizeof(long); + if (size < (words32 * sizeof(u32))) + words32 = size / sizeof(u32); + + dst = (u32 *) labels->bits; + if (words32) { + for (i = 0; i < words32; i++) + replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); + } + + size /= sizeof(u32); + for (i = words32; i < size; i++) /* pad */ + replace_u32(&dst[i], 0, 0); + + nf_conntrack_event_cache(IPCT_LABEL, ct); + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_replace); +#endif + static struct nf_ct_ext_type labels_extend __read_mostly = { .len = sizeof(struct nf_conn_labels), .align = __alignof__(struct nf_conn_labels), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5f5386382f13..2334cc5d2b16 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -961,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } +#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -977,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, + [CTA_LABELS] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, + [CTA_LABELS_MASK] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, }; static int @@ -1504,6 +1509,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, } #endif +static int +ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + size_t len = nla_len(cda[CTA_LABELS]); + const void *mask = cda[CTA_LABELS_MASK]; + + if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ + return -EINVAL; + + if (mask) { + if (nla_len(cda[CTA_LABELS_MASK]) == 0 || + nla_len(cda[CTA_LABELS_MASK]) != len) + return -EINVAL; + mask = nla_data(cda[CTA_LABELS_MASK]); + } + + len /= sizeof(u32); + + return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); +#else + return -EOPNOTSUPP; +#endif +} + static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) @@ -1550,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } #endif + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } return 0; } @@ -1758,6 +1793,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; + if (cda[CTA_LABELS] && + ctnetlink_attach_labels(ct, cda) == 0) + events |= (1 << IPCT_LABEL); + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -2055,6 +2094,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; } + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); -- cgit v1.2.3-71-gd317 From e6f30c731718db45cec380964dfee210307cfc4a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Jan 2013 07:17:30 +0000 Subject: netfilter: x_tables: add xt_bpf match Support arbitrary linux socket filter (BPF) programs as x_tables match rules. This allows for very expressive filters, and on platforms with BPF JIT appears competitive with traditional hardcoded iptables rules using the u32 match. The size of the filter has been artificially limited to 64 instructions maximum to avoid bloating the size of each rule using this new match. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_bpf.h | 17 ++++++++ net/netfilter/Kconfig | 9 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_bpf.c | 73 +++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_bpf.h create mode 100644 net/netfilter/xt_bpf.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h new file mode 100644 index 000000000000..5dda450eb55b --- /dev/null +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -0,0 +1,17 @@ +#ifndef _XT_BPF_H +#define _XT_BPF_H + +#include +#include + +#define XT_BPF_MAX_NUM_INSTR 64 + +struct xt_bpf_info { + __u16 bpf_program_num_elem; + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + + /* only used in the kernel */ + struct sk_filter *filter __attribute__((aligned(8))); +}; + +#endif /*_XT_BPF_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bb48607d4ee4..eb2c8ebf6d99 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -811,6 +811,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b3bbda60945e..a1abf87d43bf 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 000000000000..12d4da8e6c77 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Willem de Bruijn "); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_bpf"); +MODULE_ALIAS("ip6t_bpf"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = (struct sock_filter __user *) info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit); -- cgit v1.2.3-71-gd317 From e7db3cbcd6508235d63ba4a31bbd1ce4fdece6e1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Jan 2013 12:30:59 +0100 Subject: netfilter: add missing xt_bpf.h header in installation Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 08f555fef13f..8b4bd36a7a84 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -35,6 +35,7 @@ header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_addrtype.h +header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h -- cgit v1.2.3-71-gd317 From 8a454ab95e5ccbffd04363e9c028f60739bc3fa4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Jan 2013 13:02:19 +0100 Subject: netfilter: add missing xt_connlabel.h header in installation In (c539f01 netfilter: add connlabel conntrack extension), it was missing the change to the Kbuild file to install the header in the system. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 8b4bd36a7a84..41115776d76f 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -39,6 +39,7 @@ header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h +header-y += xt_connlabel.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h -- cgit v1.2.3-71-gd317 From bbb923a4c2d17ebd5ec34755fe19a33914cbd86f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:25 +0000 Subject: mcast: define and use MRT[6]_MAX in ip[6]_mroute_opt() This will ease further addition of new MRT[6]_* values and avoid to update in6.h each time. Note that we reduce the maximum value from 210 to 209, but 210 does not match any known value in ip[6]_mroute_setsockopt(). Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- include/uapi/linux/in6.h | 15 ++++----------- include/uapi/linux/mroute.h | 1 + include/uapi/linux/mroute6.h | 1 + 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index ea00d9162ee5..79aaa9fc1a15 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,7 +9,7 @@ #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { - return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10); + return (opt >= MRT_BASE) && (opt <= MRT_MAX); } #else static inline int ip_mroute_opt(int opt) diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index a223561ba12e..66982e764051 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -10,7 +10,7 @@ #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) { - return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10); + return (opt >= MRT6_BASE) && (opt <= MRT6_MAX); } #else static inline int ip6_mroute_opt(int opt) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5673b97dcf54..53b1d56a6e7f 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -259,17 +259,10 @@ struct in6_flowlabel_req { /* * Multicast Routing: - * see include/linux/mroute6.h. + * see include/uapi/linux/mroute6.h. * - * MRT6_INIT 200 - * MRT6_DONE 201 - * MRT6_ADD_MIF 202 - * MRT6_DEL_MIF 203 - * MRT6_ADD_MFC 204 - * MRT6_DEL_MFC 205 - * MRT6_VERSION 206 - * MRT6_ASSERT 207 - * MRT6_PIM 208 - * (reserved) 209 + * MRT6_BASE 200 + * ... + * MRT6_MAX */ #endif /* _UAPI_LINUX_IN6_H */ diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 16929993acc4..1c11004af5db 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,6 +26,7 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ +#define MRT_MAX (MRT_BASE+9) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 3e89b5e7f9e3..c206ae3a2327 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,6 +26,7 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ +#define MRT6_MAX (MRT6_BASE+9) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) -- cgit v1.2.3-71-gd317 From 660b26dc1a8aeb33c2a2246ebf1b3684449a74b7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:26 +0000 Subject: mcast: add multicast proxy support (IPv4 and IPv6) This patch add the support of proxy multicast, ie being able to build a static multicast tree. It adds the support of (*,*) and (*,G) entries. The user should define an (*,*) entry which is not used for real forwarding. This entry defines the upstream in iif and contains all interfaces from the static tree in its oifs. It will be used to forward packet upstream when they come from an interface belonging to the static tree. Hence, the user should define (*,G) entries to build its static tree. Note that upstream interface must be part of oifs: packets are sent to all oifs interfaces except the input interface. This ensures to always join the whole static tree, even if the packet is not coming from the upstream interface. Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 4 +- include/uapi/linux/mroute6.h | 4 +- net/ipv4/ipmr.c | 119 +++++++++++++++++++++++++++++++++++----- net/ipv6/ip6mr.c | 126 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 225 insertions(+), 28 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 1c11004af5db..a382d2c04a42 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,7 +26,9 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ -#define MRT_MAX (MRT_BASE+9) +#define MRT_ADD_MFC_PROXY (MRT_BASE+10) /* Add a (*,*|G) mfc entry */ +#define MRT_DEL_MFC_PROXY (MRT_BASE+11) /* Del a (*,*|G) mfc entry */ +#define MRT_MAX (MRT_BASE+11) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index c206ae3a2327..ce91215cf7e6 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,7 +26,9 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ -#define MRT6_MAX (MRT6_BASE+9) +#define MRT6_ADD_MFC_PROXY (MRT6_BASE+10) /* Add a (*,*|G) mfc entry */ +#define MRT6_DEL_MFC_PROXY (MRT6_BASE+11) /* Del a (*,*|G) mfc entry */ +#define MRT6_MAX (MRT6_BASE+11) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9454cbd953c..4b5e22670d44 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, + int vifi) +{ + int line = MFC_HASH(INADDR_ANY, INADDR_ANY); + struct mfc_cache *c; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == INADDR_ANY && + c->mfc_mcastgrp == INADDR_ANY && + c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, + __be32 mcastgrp, int vifi) +{ + int line = MFC_HASH(mcastgrp, INADDR_ANY); + struct mfc_cache *c, *proxy; + + if (mcastgrp == INADDR_ANY) + goto skip; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == INADDR_ANY && + c->mfc_mcastgrp == mcastgrp) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = ipmr_cache_find_any_parent(mrt, + c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + +skip: + return ipmr_cache_find_any_parent(mrt, vifi); +} + /* * Allocate a multicast cache entry */ @@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) +static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { int line; struct mfc_cache *c, *next; @@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_free(c); @@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, - struct mfcctl *mfc, int mrtsock) + struct mfcctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { found = true; break; } @@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } - if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) + if (mfc->mfcc_mcastgrp.s_addr != INADDR_ANY && + !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; c = ipmr_cache_alloc(); @@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk) int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct vifctl vif; struct mfcctl mfc; struct net *net = sock_net(sk); @@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi */ case MRT_ADD_MFC: case MRT_DEL_MFC: + parent = -1; + case MRT_ADD_MFC_PROXY: + case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mfcc_parent; rtnl_lock(); - if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(mrt, &mfc); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) + ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, - sk == rtnl_dereference(mrt->mroute_sk)); + sk == rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; /* @@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ipmr_find_vif(mrt, skb->dev); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (cache->mfc_origin == INADDR_ANY && true_vifi >= 0) { + struct mfc_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { - int true_vifi; - if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1794,15 +1856,33 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } +forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (cache->mfc_origin == INADDR_ANY && + cache->mfc_mcastgrp == INADDR_ANY) { + if (true_vifi >= 0 && + true_vifi != cache->mfc_parent && + ip_hdr(skb)->ttl > + cache->mfc_un.res.ttls[cache->mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mfc_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((cache->mfc_origin != INADDR_ANY || ct != true_vifi) && + ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1813,6 +1893,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1902,6 +1983,13 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ipmr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2107,7 +2195,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); + if (cache == NULL && skb->dev) { + int vif = ipmr_find_vif(mrt, skb->dev); + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, daddr, vif); + } if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 26dcdec9e3a5..acc32494006a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, + mifi_t mifi) +{ + int line = MFC6_HASH(&in6addr_any, &in6addr_any); + struct mfc6_cache *c; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp) && + (c->mfc_un.res.ttls[mifi] < 255)) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, + struct in6_addr *mcastgrp, + mifi_t mifi) +{ + int line = MFC6_HASH(mcastgrp, &in6addr_any); + struct mfc6_cache *c, *proxy; + + if (ipv6_addr_any(mcastgrp)) + goto skip; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { + if (c->mfc_un.res.ttls[mifi] < 255) + return c; + + /* It's ok if the mifi is part of the static tree */ + proxy = ip6mr_cache_find_any_parent(mrt, + c->mf6c_parent); + if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) + return c; + } + +skip: + return ip6mr_cache_find_any_parent(mrt, mifi); +} + /* * Allocate a multicast cache entry */ @@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, + int parent) { int line; struct mfc6_cache *c, *next; @@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == c->mf6c_parent)) { write_lock_bh(&mrt_lock); list_del(&c->list); write_unlock_bh(&mrt_lock); @@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void) } static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - struct mf6cctl *mfc, int mrtsock) + struct mf6cctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == mfc->mf6cc_parent)) { found = true; break; } @@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return 0; } - if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && + !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); @@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; @@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: + parent = -1; + case MRT6_ADD_MFC_PROXY: + case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mf6cc_parent; rtnl_lock(); - if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(mrt, &mfc); + if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) + ret = ip6mr_mfc_delete(mrt, &mfc, parent); else - ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); + ret = ip6mr_mfc_add(net, mrt, &mfc, + sk == mrt->mroute6_sk, parent); rtnl_unlock(); return ret; @@ -2015,19 +2071,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ip6mr_find_vif(mrt, skb->dev); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + struct mfc6_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif6_table[vif].dev != skb->dev) { - int true_vifi; - cache->mfc_un.res.wrong_if++; - true_vifi = ip6mr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2045,14 +2111,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, goto dont_forward; } +forward: mrt->vif6_table[vif].pkt_in++; mrt->vif6_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (ipv6_addr_any(&cache->mf6c_origin) && + ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (true_vifi >= 0 && + true_vifi != cache->mf6c_parent && + ipv6_hdr(skb)->hop_limit > + cache->mfc_un.res.ttls[cache->mf6c_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mf6c_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -2061,6 +2145,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, cache, psend); return 0; @@ -2096,6 +2181,14 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2183,6 +2276,13 @@ int ip6mr_get_route(struct net *net, read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); + if (!cache && skb->dev) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, + vif); + } if (!cache) { struct sk_buff *skb2; -- cgit v1.2.3-71-gd317 From 7e58d5aea8abb993983a3f3088fd4a3f06180a1c Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Mon, 21 Jan 2013 01:17:23 +0000 Subject: virtio-net: introduce a new control to set macaddr Currently we write MAC address to pci config space byte by byte, this means that we have an intermediate step where mac is wrong. This patch introduced a new control command to set MAC address, it's atomic. VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility. Signed-off-by: Amos Kong Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 21 ++++++++++++++++++--- include/uapi/linux/virtio_net.h | 8 +++++++- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 395ab4ff3e64..701408a1ded6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -802,14 +802,28 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; + struct sockaddr *addr = p; + struct scatterlist sg; - ret = eth_mac_addr(dev, p); + ret = eth_prepare_mac_addr_change(dev, p); if (ret) return ret; - if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + sg_init_one(&sg, addr->sa_data, dev->addr_len); + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, + &sg, 1, 0)) { + dev_warn(&vdev->dev, + "Failed to set mac address by vq command.\n"); + return -EINVAL; + } + } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { vdev->config->set(vdev, offsetof(struct virtio_net_config, mac), - dev->dev_addr, dev->addr_len); + addr->sa_data, dev->addr_len); + } + + eth_commit_mac_addr_change(dev, p); return 0; } @@ -1627,6 +1641,7 @@ static unsigned int features[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, + VIRTIO_NET_F_CTRL_MAC_ADDR, }; static struct virtio_driver virtio_net_driver = { diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 848e3584d7c8..a5a8c88753b9 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -53,6 +53,7 @@ * network */ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ @@ -127,7 +128,7 @@ typedef __u8 virtio_net_ctrl_ack; #define VIRTIO_NET_CTRL_RX_NOBCAST 5 /* - * Control the MAC filter table. + * Control the MAC * * The MAC filter table is managed by the hypervisor, the guest should * assume the size is infinite. Filtering should be considered @@ -140,6 +141,10 @@ typedef __u8 virtio_net_ctrl_ack; * first sg list contains unicast addresses, the second is for multicast. * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. */ struct virtio_net_ctrl_mac { __u32 entries; @@ -148,6 +153,7 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_MAC 1 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 /* * Control VLAN filtering -- cgit v1.2.3-71-gd317 From 749cf76c5a363e1383108a914ea09530bfa0bd43 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:28:06 -0500 Subject: KVM: ARM: Initial skeleton to compile KVM support Targets KVM support for Cortex A-15 processors. Contains all the framework components, make files, header files, some tracing functionality, and basic user space API. Only supported core is Cortex-A15 for now. Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Rusty Russell Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 57 +++++- arch/arm/Kconfig | 2 + arch/arm/Makefile | 1 + arch/arm/include/asm/kvm_arm.h | 24 +++ arch/arm/include/asm/kvm_asm.h | 58 ++++++ arch/arm/include/asm/kvm_coproc.h | 24 +++ arch/arm/include/asm/kvm_emulate.h | 50 ++++++ arch/arm/include/asm/kvm_host.h | 114 ++++++++++++ arch/arm/include/uapi/asm/kvm.h | 106 +++++++++++ arch/arm/kvm/Kconfig | 55 ++++++ arch/arm/kvm/Makefile | 21 +++ arch/arm/kvm/arm.c | 350 +++++++++++++++++++++++++++++++++++++ arch/arm/kvm/coproc.c | 23 +++ arch/arm/kvm/emulate.c | 155 ++++++++++++++++ arch/arm/kvm/guest.c | 221 +++++++++++++++++++++++ arch/arm/kvm/init.S | 19 ++ arch/arm/kvm/interrupts.S | 19 ++ arch/arm/kvm/mmu.c | 17 ++ arch/arm/kvm/reset.c | 74 ++++++++ arch/arm/kvm/trace.h | 52 ++++++ include/uapi/linux/kvm.h | 7 + 21 files changed, 1445 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/kvm_arm.h create mode 100644 arch/arm/include/asm/kvm_asm.h create mode 100644 arch/arm/include/asm/kvm_coproc.h create mode 100644 arch/arm/include/asm/kvm_emulate.h create mode 100644 arch/arm/include/asm/kvm_host.h create mode 100644 arch/arm/include/uapi/asm/kvm.h create mode 100644 arch/arm/kvm/Kconfig create mode 100644 arch/arm/kvm/Makefile create mode 100644 arch/arm/kvm/arm.c create mode 100644 arch/arm/kvm/coproc.c create mode 100644 arch/arm/kvm/emulate.c create mode 100644 arch/arm/kvm/guest.c create mode 100644 arch/arm/kvm/init.S create mode 100644 arch/arm/kvm/interrupts.S create mode 100644 arch/arm/kvm/mmu.c create mode 100644 arch/arm/kvm/reset.c create mode 100644 arch/arm/kvm/trace.h (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4df5535996b..4237c27ea612 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -293,7 +293,7 @@ kvm_run' (see below). 4.11 KVM_GET_REGS Capability: basic -Architectures: all +Architectures: all except ARM Type: vcpu ioctl Parameters: struct kvm_regs (out) Returns: 0 on success, -1 on error @@ -314,7 +314,7 @@ struct kvm_regs { 4.12 KVM_SET_REGS Capability: basic -Architectures: all +Architectures: all except ARM Type: vcpu ioctl Parameters: struct kvm_regs (in) Returns: 0 on success, -1 on error @@ -600,7 +600,7 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86, ia64, ARM Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -608,7 +608,8 @@ Returns: 0 on success, -1 on error Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. +only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is +created. 4.25 KVM_IRQ_LINE @@ -1775,6 +1776,14 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 +ARM registers are mapped using the lower 32 bits. The upper 16 of that +is the register group type, or coprocessor number: + +ARM core registers have the following id bit patterns: + 0x4002 0000 0010 + + + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -2127,6 +2136,46 @@ written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.77 KVM_ARM_VCPU_INIT + +Capability: basic +Architectures: arm +Type: vcpu ioctl +Parameters: struct struct kvm_vcpu_init (in) +Returns: 0 on success; -1 on error +Errors: +  EINVAL:    the target is unknown, or the combination of features is invalid. +  ENOENT:    a features bit specified is unknown. + +This tells KVM what type of CPU to present to the guest, and what +optional features it should have.  This will cause a reset of the cpu +registers to their initial values.  If this is not called, KVM_RUN will +return ENOEXEC for that vcpu. + +Note that because some registers reflect machine topology, all vcpus +should be created before this ioctl is invoked. + + +4.78 KVM_GET_REG_LIST + +Capability: basic +Architectures: arm +Type: vcpu ioctl +Parameters: struct kvm_reg_list (in/out) +Returns: 0 on success; -1 on error +Errors: +  E2BIG:     the reg index list is too big to fit in the array specified by +             the user (the number required will be written into n). + +struct kvm_reg_list { + __u64 n; /* number of registers in reg[] */ + __u64 reg[0]; +}; + +This ioctl returns the guest registers that are supported for the +KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. + + 5. The kvm_run structure ------------------------ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 67874b82a4ed..e0627cdbcda5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2322,3 +2322,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/arm/kvm/Kconfig" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 30c443c406f3..4bcd2d6b0535 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -252,6 +252,7 @@ core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) core-$(CONFIG_VFP) += arch/arm/vfp/ core-$(CONFIG_XEN) += arch/arm/xen/ +core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h new file mode 100644 index 000000000000..dc678e193417 --- /dev/null +++ b/arch/arm/include/asm/kvm_arm.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_ARM_H__ +#define __ARM_KVM_ARM_H__ + +#include + +#endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h new file mode 100644 index 000000000000..f9993e5fb695 --- /dev/null +++ b/arch/arm/include/asm/kvm_asm.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +/* 0 is reserved as an invalid value. */ +#define c0_MPIDR 1 /* MultiProcessor ID Register */ +#define c0_CSSELR 2 /* Cache Size Selection Register */ +#define c1_SCTLR 3 /* System Control Register */ +#define c1_ACTLR 4 /* Auxilliary Control Register */ +#define c1_CPACR 5 /* Coprocessor Access Control */ +#define c2_TTBR0 6 /* Translation Table Base Register 0 */ +#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ +#define c2_TTBR1 8 /* Translation Table Base Register 1 */ +#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */ +#define c2_TTBCR 10 /* Translation Table Base Control R. */ +#define c3_DACR 11 /* Domain Access Control Register */ +#define c5_DFSR 12 /* Data Fault Status Register */ +#define c5_IFSR 13 /* Instruction Fault Status Register */ +#define c5_ADFSR 14 /* Auxilary Data Fault Status R */ +#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ +#define c6_DFAR 16 /* Data Fault Address Register */ +#define c6_IFAR 17 /* Instruction Fault Address Register */ +#define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */ +#define c10_PRRR 19 /* Primary Region Remap Register */ +#define c10_NMRR 20 /* Normal Memory Remap Register */ +#define c12_VBAR 21 /* Vector Base Address Register */ +#define c13_CID 22 /* Context ID Register */ +#define c13_TID_URW 23 /* Thread ID, User R/W */ +#define c13_TID_URO 24 /* Thread ID, User R/O */ +#define c13_TID_PRIV 25 /* Thread ID, Privileged */ +#define NR_CP15_REGS 26 /* Number of regs (incl. invalid) */ + +#define ARM_EXCEPTION_RESET 0 +#define ARM_EXCEPTION_UNDEFINED 1 +#define ARM_EXCEPTION_SOFTWARE 2 +#define ARM_EXCEPTION_PREF_ABORT 3 +#define ARM_EXCEPTION_DATA_ABORT 4 +#define ARM_EXCEPTION_IRQ 5 +#define ARM_EXCEPTION_FIQ 6 + +#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..b6d023deb426 --- /dev/null +++ b/arch/arm/include/asm/kvm_coproc.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_COPROC_H__ +#define __ARM_KVM_COPROC_H__ +#include + +void kvm_reset_coprocs(struct kvm_vcpu *vcpu); + +#endif /* __ARM_KVM_COPROC_H__ */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..17dad674b90f --- /dev/null +++ b/arch/arm/include/asm/kvm_emulate.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_EMULATE_H__ +#define __ARM_KVM_EMULATE_H__ + +#include +#include + +u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); +u32 *vcpu_spsr(struct kvm_vcpu *vcpu); + +static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) +{ + return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; +} + +static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) +{ + return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; +} + +static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); +} + +static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + return cpsr_mode > USR_MODE;; +} + +#endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h new file mode 100644 index 000000000000..0d9938a20751 --- /dev/null +++ b/arch/arm/include/asm/kvm_host.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_HOST_H__ +#define __ARM_KVM_HOST_H__ + +#include +#include + +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#define KVM_MEMORY_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#define KVM_VCPU_MAX_FEATURES 0 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) + +struct kvm_vcpu; +u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +void kvm_reset_coprocs(struct kvm_vcpu *vcpu); + +struct kvm_arch { + /* VTTBR value associated with below pgd and vmid */ + u64 vttbr; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* Stage-2 page table */ + pgd_t *pgd; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_arch { + struct kvm_regs regs; + + int target; /* Processor target */ + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* System control coprocessor (cp15) */ + u32 cp15[NR_CP15_REGS]; + + /* The CPU type we expose to the VM */ + u32 midr; + + /* Exception Information */ + u32 hsr; /* Hyp Syndrome Register */ + u32 hxfar; /* Hyp Data/Inst Fault Address Register */ + u32 hpfar; /* Hyp IPA Fault Address Register */ + + /* Interrupt related fields */ + u32 irq_lines; /* IRQ and FIQ levels */ + + /* Hyp exception information */ + u32 hyp_pc; /* PC when exception was taken from Hyp mode */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +#endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..1083327b5fcd --- /dev/null +++ b/arch/arm/include/uapi/asm/kvm.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#include +#include + +#define __KVM_HAVE_GUEST_DEBUG + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */ +#define KVM_ARM_SVC_sp svc_regs[0] +#define KVM_ARM_SVC_lr svc_regs[1] +#define KVM_ARM_SVC_spsr svc_regs[2] +#define KVM_ARM_ABT_sp abt_regs[0] +#define KVM_ARM_ABT_lr abt_regs[1] +#define KVM_ARM_ABT_spsr abt_regs[2] +#define KVM_ARM_UND_sp und_regs[0] +#define KVM_ARM_UND_lr und_regs[1] +#define KVM_ARM_UND_spsr und_regs[2] +#define KVM_ARM_IRQ_sp irq_regs[0] +#define KVM_ARM_IRQ_lr irq_regs[1] +#define KVM_ARM_IRQ_spsr irq_regs[2] + +/* Valid only for fiq_regs in struct kvm_regs */ +#define KVM_ARM_FIQ_r8 fiq_regs[0] +#define KVM_ARM_FIQ_r9 fiq_regs[1] +#define KVM_ARM_FIQ_r10 fiq_regs[2] +#define KVM_ARM_FIQ_fp fiq_regs[3] +#define KVM_ARM_FIQ_ip fiq_regs[4] +#define KVM_ARM_FIQ_sp fiq_regs[5] +#define KVM_ARM_FIQ_lr fiq_regs[6] +#define KVM_ARM_FIQ_spsr fiq_regs[7] + +struct kvm_regs { + struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */ + __u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ + __u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ + __u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */ + __u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ + __u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_CORTEX_A15 0 +#define KVM_ARM_NUM_TARGETS 1 + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 +#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007 +#define KVM_REG_ARM_32_OPC2_SHIFT 0 +#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078 +#define KVM_REG_ARM_OPC1_SHIFT 3 +#define KVM_REG_ARM_CRM_MASK 0x0000000000000780 +#define KVM_REG_ARM_CRM_SHIFT 7 +#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 +#define KVM_REG_ARM_32_CRN_SHIFT 11 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) + +#endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig new file mode 100644 index 000000000000..4a01b6fbf380 --- /dev/null +++ b/arch/arm/kvm/Kconfig @@ -0,0 +1,55 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_MMIO + select KVM_ARM_HOST + depends on ARM_VIRT_EXT && ARM_LPAE + ---help--- + Support hosting virtualized guest machines. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_ARM_HOST + bool "KVM host support for ARM cpus." + depends on KVM + depends on MMU + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile new file mode 100644 index 000000000000..dfc293f277b3 --- /dev/null +++ b/arch/arm/kvm/Makefile @@ -0,0 +1,21 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +plus_virt := $(call as-instr,.arch_extension virt,+virt) +ifeq ($(plus_virt),+virt) + plus_virt_def := -DREQUIRES_VIRT=1 +endif + +ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +CFLAGS_arm.o := -I. $(plus_virt_def) +CFLAGS_mmu.o := -I. + +AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) +AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) + +kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) + +obj-y += kvm-arm.o init.o interrupts.o +obj-y += arm.o guest.o mmu.o emulate.o reset.o +obj-y += coproc.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c new file mode 100644 index 000000000000..d3506b4001aa --- /dev/null +++ b/arch/arm/kvm/arm.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include "trace.h" + +#include +#include +#include +#include +#include + +#ifdef REQUIRES_VIRT +__asm__(".arch_extension virt"); +#endif + +int kvm_arch_hardware_enable(void *garbage) +{ + return 0; +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + switch (ext) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_ONE_REG: + r = 1; + break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + default: + r = 0; + break; + } + return r; +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + return 0; +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem, + int user_alloc) +{ + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + int err; + struct kvm_vcpu *vcpu; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); +} + +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_CORTEX_A15: + return KVM_ARM_TARGET_CORTEX_A15; + default: + return -EINVAL; + } +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return -EINVAL; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_ARM_VCPU_INIT: { + struct kvm_vcpu_init init; + + if (copy_from_user(&init, argp, sizeof(init))) + return -EFAULT; + + return kvm_vcpu_set_target(vcpu, &init); + + } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + if (copy_from_user(®, argp, sizeof(reg))) + return -EFAULT; + if (ioctl == KVM_SET_ONE_REG) + return kvm_arm_set_reg(vcpu, ®); + else + return kvm_arm_get_reg(vcpu, ®); + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + return -EFAULT; + n = reg_list.n; + reg_list.n = kvm_arm_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + return -EFAULT; + if (n < reg_list.n) + return -E2BIG; + return kvm_arm_copy_reg_indices(vcpu, user_list->reg); + } + default: + return -EINVAL; + } +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return -EINVAL; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +/* NOP: Compiling as a module not supported */ +void kvm_arch_exit(void) +{ +} + +static int arm_init(void) +{ + int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + return rc; +} + +module_init(arm_init); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c new file mode 100644 index 000000000000..0c433558591c --- /dev/null +++ b/arch/arm/kvm/coproc.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include + +void kvm_reset_coprocs(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c new file mode 100644 index 000000000000..3eadc25e95de --- /dev/null +++ b/arch/arm/kvm/emulate.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#define VCPU_NR_MODES 6 +#define VCPU_REG_OFFSET_USR 0 +#define VCPU_REG_OFFSET_FIQ 1 +#define VCPU_REG_OFFSET_IRQ 2 +#define VCPU_REG_OFFSET_SVC 3 +#define VCPU_REG_OFFSET_ABT 4 +#define VCPU_REG_OFFSET_UND 5 +#define REG_OFFSET(_reg) \ + (offsetof(struct kvm_regs, _reg) / sizeof(u32)) + +#define USR_REG_OFFSET(_num) REG_OFFSET(usr_regs.uregs[_num]) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { + /* USR/SYS Registers */ + [VCPU_REG_OFFSET_USR] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + }, + + /* FIQ Registers */ + [VCPU_REG_OFFSET_FIQ] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(fiq_regs[0]), /* r8 */ + REG_OFFSET(fiq_regs[1]), /* r9 */ + REG_OFFSET(fiq_regs[2]), /* r10 */ + REG_OFFSET(fiq_regs[3]), /* r11 */ + REG_OFFSET(fiq_regs[4]), /* r12 */ + REG_OFFSET(fiq_regs[5]), /* r13 */ + REG_OFFSET(fiq_regs[6]), /* r14 */ + }, + + /* IRQ Registers */ + [VCPU_REG_OFFSET_IRQ] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(irq_regs[0]), /* r13 */ + REG_OFFSET(irq_regs[1]), /* r14 */ + }, + + /* SVC Registers */ + [VCPU_REG_OFFSET_SVC] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(svc_regs[0]), /* r13 */ + REG_OFFSET(svc_regs[1]), /* r14 */ + }, + + /* ABT Registers */ + [VCPU_REG_OFFSET_ABT] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(abt_regs[0]), /* r13 */ + REG_OFFSET(abt_regs[1]), /* r14 */ + }, + + /* UND Registers */ + [VCPU_REG_OFFSET_UND] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(und_regs[0]), /* r13 */ + REG_OFFSET(und_regs[1]), /* r14 */ + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) +{ + u32 *reg_array = (u32 *)&vcpu->arch.regs; + u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + + switch (mode) { + case USR_MODE...SVC_MODE: + mode &= ~MODE32_BIT; /* 0 ... 3 */ + break; + + case ABT_MODE: + mode = VCPU_REG_OFFSET_ABT; + break; + + case UND_MODE: + mode = VCPU_REG_OFFSET_UND; + break; + + case SYSTEM_MODE: + mode = VCPU_REG_OFFSET_USR; + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +u32 *vcpu_spsr(struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + switch (mode) { + case SVC_MODE: + return &vcpu->arch.regs.KVM_ARM_SVC_spsr; + case ABT_MODE: + return &vcpu->arch.regs.KVM_ARM_ABT_spsr; + case UND_MODE: + return &vcpu->arch.regs.KVM_ARM_UND_spsr; + case IRQ_MODE: + return &vcpu->arch.regs.KVM_ARM_IRQ_spsr; + case FIQ_MODE: + return &vcpu->arch.regs.KVM_ARM_FIQ_spsr; + default: + BUG(); + } +} diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c new file mode 100644 index 000000000000..a12eb229021d --- /dev/null +++ b/arch/arm/kvm/guest.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } +#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u32 __user *uaddr = (u32 __user *)(long)reg->addr; + struct kvm_regs *regs = &vcpu->arch.regs; + u64 off; + + if (KVM_REG_SIZE(reg->id) != 4) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id)) + return -ENOENT; + + return put_user(((u32 *)regs)[off], uaddr); +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u32 __user *uaddr = (u32 __user *)(long)reg->addr; + struct kvm_regs *regs = &vcpu->arch.regs; + u64 off, val; + + if (KVM_REG_SIZE(reg->id) != 4) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id)) + return -ENOENT; + + if (get_user(val, uaddr) != 0) + return -EFAULT; + + if (off == KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr)) { + unsigned long mode = val & MODE_MASK; + switch (mode) { + case USR_MODE: + case FIQ_MODE: + case IRQ_MODE: + case SVC_MODE: + case ABT_MODE: + case UND_MODE: + break; + default: + return -EINVAL; + } + } + + ((u32 *)regs)[off] = val; + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(u32); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs(); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend coproc regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + + for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return 0; +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + return -EINVAL; +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + + /* We can only do a cortex A15 for now. */ + if (init->target != kvm_target_cpu()) + return -EINVAL; + + vcpu->arch.target = init->target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (test_bit(i, (void *)init->features)) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S new file mode 100644 index 000000000000..1dc8926e26d2 --- /dev/null +++ b/arch/arm/kvm/init.S @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S new file mode 100644 index 000000000000..1dc8926e26d2 --- /dev/null +++ b/arch/arm/kvm/interrupts.S @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c new file mode 100644 index 000000000000..10ed4643269f --- /dev/null +++ b/arch/arm/kvm/mmu.c @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c new file mode 100644 index 000000000000..b80256b554cd --- /dev/null +++ b/arch/arm/kvm/reset.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/****************************************************************************** + * Cortex-A15 Reset Values + */ + +static const int a15_max_cpu_idx = 3; + +static struct kvm_regs a15_regs_reset = { + .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, +}; + + +/******************************************************************************* + * Exported reset function + */ + +/** + * kvm_reset_vcpu - sets core registers and cp15 registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architectually defined reset values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A15: + if (vcpu->vcpu_id > a15_max_cpu_idx) + return -EINVAL; + cpu_reset = &a15_regs_reset; + vcpu->arch.midr = read_cpuid_id(); + break; + default: + return -ENODEV; + } + + /* Reset core registers */ + memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + + /* Reset CP15 registers */ + kvm_reset_coprocs(vcpu); + + return 0; +} diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h new file mode 100644 index 000000000000..f8869c19c0a3 --- /dev/null +++ b/arch/arm/kvm/trace.h @@ -0,0 +1,52 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for entry/exit to guest + */ +TRACE_EVENT(kvm_entry, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + + + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH arch/arm/kvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6e5d4b13708..24978d525c6e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -764,6 +764,11 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_U512 0x0060000000000000ULL #define KVM_REG_SIZE_U1024 0x0070000000000000ULL +struct kvm_reg_list { + __u64 n; /* number of regs */ + __u64 reg[0]; +}; + struct kvm_one_reg { __u64 id; __u64 addr; @@ -932,6 +937,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) +#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v1.2.3-71-gd317 From 86ce85352f0da7e1431ad8efcb04323819a620e7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:28:08 -0500 Subject: KVM: ARM: Inject IRQs and FIQs from userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All interrupt injection is now based on the VM ioctl KVM_IRQ_LINE. This works semantically well for the GIC as we in fact raise/lower a line on a machine component (the gic). The IOCTL uses the follwing struct. struct kvm_irq_level { union { __u32 irq; /* GSI */ __s32 status; /* not used for KVM_IRQ_LEVEL */ }; __u32 level; /* 0 or 1 */ }; ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for specific cpus. The irq field is interpreted like this:  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | field: | irq_type | vcpu_index | irq_number | The irq_type field has the following values: - irq_type[0]: out-of-kernel GIC: irq_number 0 is IRQ, irq_number 1 is FIQ - irq_type[1]: in-kernel GIC: SPI, irq_number between 32 and 1019 (incl.) (the vcpu_index field is ignored) - irq_type[2]: in-kernel GIC: PPI, irq_number between 16 and 31 (incl.) The irq_number thus corresponds to the irq ID in as in the GICv2 specs. This is documented in Documentation/kvm/api.txt. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 25 ++++++++++++--- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/include/uapi/asm/kvm.h | 21 +++++++++++++ arch/arm/kvm/arm.c | 65 +++++++++++++++++++++++++++++++++++++++ arch/arm/kvm/trace.h | 25 +++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 134 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4237c27ea612..505049299298 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -615,15 +615,32 @@ created. 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86, ia64, arm Type: vm ioctl Parameters: struct kvm_irq_level Returns: 0 on success, -1 on error Sets the level of a GSI input to the interrupt controller model in the kernel. -Requires that an interrupt controller model has been previously created with -KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level -to be set to 1 and then back to 0. +On some architectures it is required that an interrupt controller model has +been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered +interrupts require the level to be set to 1 and then back to 0. + +ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip +(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for +specific cpus. The irq field is interpreted like this: + +  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | + field: | irq_type | vcpu_index | irq_id | + +The irq_type field has the following values: +- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ +- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.) + (the vcpu_index field is ignored) +- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.) + +(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs) + +In both cases, level is used to raise/lower the line. struct kvm_irq_level { union { diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 8875b3f605a7..d64b5250ad4e 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -68,6 +68,7 @@ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* Hyp System Control Register (HSCTLR) bits */ #define HSCTLR_TE (1 << 30) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 1083327b5fcd..53f45f146875 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -23,6 +23,7 @@ #include #define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -103,4 +104,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d810afb6cb84..2101152c3a4b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -284,6 +285,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -319,6 +321,69 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return -EINVAL; } +static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) +{ + int bit_index; + bool set; + unsigned long *ptr; + + if (number == KVM_ARM_IRQ_CPU_IRQ) + bit_index = __ffs(HCR_VI); + else /* KVM_ARM_IRQ_CPU_FIQ */ + bit_index = __ffs(HCR_VF); + + ptr = (unsigned long *)&vcpu->arch.irq_lines; + if (level) + set = test_and_set_bit(bit_index, ptr); + else + set = test_and_clear_bit(bit_index, ptr); + + /* + * If we didn't change anything, no need to wake up or kick other CPUs + */ + if (set == level) + return 0; + + /* + * The vcpu irq_lines field was updated, wake up sleeping VCPUs and + * trigger a world-switch round on the running physical CPU to set the + * virtual IRQ/FIQ fields in the HCR appropriately. + */ + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +{ + u32 irq = irq_level->irq; + unsigned int irq_type, vcpu_idx, irq_num; + int nrcpus = atomic_read(&kvm->online_vcpus); + struct kvm_vcpu *vcpu = NULL; + bool level = irq_level->level; + + irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; + vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; + + trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + + if (irq_type != KVM_ARM_IRQ_TYPE_CPU) + return -EINVAL; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 862b2cc12fbe..105d1f79909a 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -39,6 +39,31 @@ TRACE_EVENT(kvm_exit, TP_printk("PC: 0x%08lx", __entry->vcpu_pc) ); +TRACE_EVENT(kvm_irq_line, + TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), + TP_ARGS(type, vcpu_idx, irq_num, level), + + TP_STRUCT__entry( + __field( unsigned int, type ) + __field( int, vcpu_idx ) + __field( int, irq_num ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->vcpu_idx = vcpu_idx; + __entry->irq_num = irq_num; + __entry->level = level; + ), + + TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d", + (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" : + (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" : + (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN", + __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level) +); + TRACE_EVENT(kvm_unmap_hva, TP_PROTO(unsigned long hva), TP_ARGS(hva), diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 24978d525c6e..dc63665e73ad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -115,6 +115,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * For ARM: See Documentation/virtual/kvm/api.txt */ union { __u32 irq; -- cgit v1.2.3-71-gd317 From aa024c2f35a07cc32e48c5f62a5807be01c09249 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 20 Jan 2013 18:28:13 -0500 Subject: KVM: ARM: Power State Coordination Interface implementation Implement the PSCI specification (ARM DEN 0022A) to control virtual CPUs being "powered" on or off. PSCI/KVM is detected using the KVM_CAP_ARM_PSCI capability. A virtual CPU can now be initialized in a "powered off" state, using the KVM_ARM_VCPU_POWER_OFF feature flag. The guest can use either SMC or HVC to execute a PSCI function. Reviewed-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 4 ++ arch/arm/include/asm/kvm_emulate.h | 10 ++++ arch/arm/include/asm/kvm_host.h | 5 +- arch/arm/include/asm/kvm_psci.h | 23 ++++++++ arch/arm/include/uapi/asm/kvm.h | 16 ++++++ arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/arm.c | 30 ++++++++++- arch/arm/kvm/psci.c | 108 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 9 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/kvm_psci.h create mode 100644 arch/arm/kvm/psci.c (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 38066a7a74e1..c25439a58274 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2185,6 +2185,10 @@ return ENOEXEC for that vcpu. Note that because some registers reflect machine topology, all vcpus should be created before this ioctl is invoked. +Possible features: + - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. + Depends on KVM_CAP_ARM_PSCI. + 4.78 KVM_GET_REG_LIST diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 4c1a073280be..fd611996bfb5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -32,6 +32,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) +{ + return 1; +} + static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) { return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; @@ -42,6 +47,11 @@ static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; } +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= PSR_T_BIT; +} + static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) { unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e65fc967a71d..98b4d1a72923 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 0 +#define KVM_VCPU_MAX_FEATURES 1 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 @@ -100,6 +100,9 @@ struct kvm_vcpu_arch { int last_pcpu; cpumask_t require_dcache_flush; + /* Don't run the guest on this vcpu */ + bool pause; + /* IO related fields */ struct kvm_decode mmio_decode; diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h new file mode 100644 index 000000000000..9a83d98bf170 --- /dev/null +++ b/arch/arm/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_PSCI_H__ +#define __ARM_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index bbb6b2328004..3303ff5adbf3 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -65,6 +65,8 @@ struct kvm_regs { #define KVM_ARM_TARGET_CORTEX_A15 0 #define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ + struct kvm_vcpu_init { __u32 target; __u32 features[7]; @@ -145,4 +147,18 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI ((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED ((unsigned long)-3) + #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 1e45cd97a7fc..ea27987bd07f 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,4 +18,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 8680b9ffd2ae..2d30e3afdaf9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #ifdef REQUIRES_VIRT @@ -160,6 +161,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: + case KVM_CAP_ARM_PSCI: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -443,14 +445,18 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), vcpu->arch.hsr & HSR_HVC_IMM_MASK); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* We don't support SMC; don't do that. */ - kvm_debug("smc: at %08x", *vcpu_pc(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } @@ -589,9 +595,26 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; vcpu->arch.has_run_once = true; + + /* + * Handle the "start in power-off" case by calling into the + * PSCI code. + */ + if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { + *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; + kvm_psci_call(vcpu); + } + return 0; } +static void vcpu_pause(struct kvm_vcpu *vcpu) +{ + wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + + wait_event_interruptible(*wq, !vcpu->arch.pause); +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -635,6 +658,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); + if (vcpu->arch.pause) + vcpu_pause(vcpu); + local_irq_disable(); /* diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c new file mode 100644 index 000000000000..7ee5bb7a3667 --- /dev/null +++ b/arch/arm/kvm/psci.c @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include + +/* + * This is an implementation of the Power State Coordination Interface + * as described in ARM document number ARM DEN 0022A. + */ + +static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pause = true; +} + +static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) +{ + struct kvm *kvm = source_vcpu->kvm; + struct kvm_vcpu *vcpu; + wait_queue_head_t *wq; + unsigned long cpu_id; + phys_addr_t target_pc; + + cpu_id = *vcpu_reg(source_vcpu, 1); + if (vcpu_mode_is_32bit(source_vcpu)) + cpu_id &= ~((u32) 0); + + if (cpu_id >= atomic_read(&kvm->online_vcpus)) + return KVM_PSCI_RET_INVAL; + + target_pc = *vcpu_reg(source_vcpu, 2); + + vcpu = kvm_get_vcpu(kvm, cpu_id); + + wq = kvm_arch_vcpu_wq(vcpu); + if (!waitqueue_active(wq)) + return KVM_PSCI_RET_INVAL; + + kvm_reset_vcpu(vcpu); + + /* Gracefully handle Thumb2 entry point */ + if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { + target_pc &= ~((phys_addr_t) 1); + vcpu_set_thumb(vcpu); + } + + *vcpu_pc(vcpu) = target_pc; + vcpu->arch.pause = false; + smp_mb(); /* Make sure the above is visible */ + + wake_up_interruptible(wq); + + return KVM_PSCI_RET_SUCCESS; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC or SMC instructions. + * The calling convention is similar to SMC calls to the secure world where + * the function number is placed in r0 and this function returns true if the + * function number specified in r0 is withing the PSCI range, and false + * otherwise. + */ +bool kvm_psci_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = KVM_PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case KVM_PSCI_FN_CPU_SUSPEND: + case KVM_PSCI_FN_MIGRATE: + val = KVM_PSCI_RET_NI; + break; + + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dc63665e73ad..7f2360a46fc2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -636,6 +636,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_ARM_PSCI 87 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From b878e7fb22ea48b0585bbbbef249f7efc6d42748 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 8 Jan 2013 14:44:25 -0500 Subject: perf: Missing field in PERF_RECORD_SAMPLE documentation While trying to write a perf_event/mmap test for my perf_event test-suite I came across a missing field description in the PERF_RECORD_SAMPLE documentation in perf_event.h Signed-off-by: Vince Weaver Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1301081439300.24507@vincent-weaver-1.um.maine.edu Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4f63c05d27c9..9fa9c622a7f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -579,7 +579,8 @@ enum perf_event_type { * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- cgit v1.2.3-71-gd317 From 77765eaf5cfb6b8dd98ec8b54b411d74ff6095f1 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 18 Jan 2013 11:18:45 +0530 Subject: cfg80211/nl80211: add API for MAC address ACLs Add API to enable drivers to implement MAC address based access control in AP/P2P GO mode. Capable drivers advertise this capability by setting the maximum number of MAC addresses in such a list in wiphy->max_acl_mac_addrs. An initial ACL may be given to the NL80211_CMD_START_AP command and/or changed later with NL80211_CMD_SET_MAC_ACL. Black- and whitelists are supported, but not simultaneously. Signed-off-by: Vasanthakumar Thiagarajan [rewrite commit log, many cleanups] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 34 +++++++++++++ include/uapi/linux/nl80211.h | 51 ++++++++++++++++++- net/wireless/core.c | 5 ++ net/wireless/nl80211.c | 116 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 12 +++++ net/wireless/trace.h | 18 +++++++ 6 files changed, 234 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 183033789e69..36e076e374d2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -531,6 +531,22 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +/** + * struct cfg80211_acl_data - Access control list data + * + * @acl_policy: ACL policy to be applied on the station's + entry specified by mac_addr + * @n_acl_entries: Number of MAC address entries passed + * @mac_addrs: List of MAC addresses of stations to be used for ACL + */ +struct cfg80211_acl_data { + enum nl80211_acl_policy acl_policy; + int n_acl_entries; + + /* Keep it last */ + struct mac_address mac_addrs[]; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -550,6 +566,8 @@ struct mac_address { * @inactivity_timeout: time in seconds to determine station's inactivity. * @p2p_ctwindow: P2P CT Window * @p2p_opp_ps: P2P opportunistic PS + * @acl: ACL configuration used by the drivers which has support for + * MAC address based access control */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -566,6 +584,7 @@ struct cfg80211_ap_settings { int inactivity_timeout; u8 p2p_ctwindow; bool p2p_opp_ps; + const struct cfg80211_acl_data *acl; }; /** @@ -1800,6 +1819,13 @@ struct cfg80211_gtk_rekey_data { * * @start_p2p_device: Start the given P2P device. * @stop_p2p_device: Stop the given P2P device. + * + * @set_mac_acl: Sets MAC address control list in AP and P2P GO mode. + * Parameters include ACL policy, an array of MAC address of stations + * and the number of MAC addresses. If there is already a list in driver + * this new list replaces the existing one. Driver has to clear its ACL + * when number of MAC addresses entries is passed as 0. Drivers which + * advertise the support for MAC based ACL have to implement this callback. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2020,6 +2046,9 @@ struct cfg80211_ops { struct wireless_dev *wdev); void (*stop_p2p_device)(struct wiphy *wiphy, struct wireless_dev *wdev); + + int (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev, + const struct cfg80211_acl_data *params); }; /* @@ -2325,6 +2354,9 @@ struct wiphy_wowlan_support { * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features. * @ht_capa_mod_mask: Specify what ht_cap values can be over-ridden. * If null, then none can be over-ridden. + * + * @max_acl_mac_addrs: Maximum number of MAC addresses that the device + * supports for ACL. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2346,6 +2378,8 @@ struct wiphy { /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; + u16 max_acl_mac_addrs; + u32 flags, features; u32 ap_sme_capa; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e6eeb4ba5dc5..5b7dbc1ea966 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -170,7 +170,8 @@ * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE, * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, - * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. + * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, + * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. * The channel to use can be set on the interface or be given using the * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP @@ -586,6 +587,16 @@ * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames * for IBSS or MESH vif. * + * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control. + * This is to be used with the drivers advertising the support of MAC + * address based access control. List of MAC addresses is passed in + * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in + * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it + * is not already done. The new list will replace any existing list. Driver + * will clear its ACL when the list of MAC addresses passed is empty. This + * command is used in AP/P2P GO mode. Driver has to make sure to clear its + * ACL list during %NL80211_CMD_STOP_AP. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -736,6 +747,8 @@ enum nl80211_commands { NL80211_CMD_SET_MCAST_RATE, + NL80211_CMD_SET_MAC_ACL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1313,6 +1326,16 @@ enum nl80211_commands { * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode * defined in &enum nl80211_mesh_power_mode. * + * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy, + * carried in a u32 attribute + * + * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for + * MAC ACL. + * + * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum + * number of MAC addresses that a device can support for MAC + * ACL. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1585,6 +1608,12 @@ enum nl80211_attrs { NL80211_ATTR_LOCAL_MESH_POWER_MODE, + NL80211_ATTR_ACL_POLICY, + + NL80211_ATTR_MAC_ADDRS, + + NL80211_ATTR_MAC_ACL_MAX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3248,7 +3277,7 @@ enum nl80211_probe_resp_offload_support_attr { * enum nl80211_connect_failed_reason - connection request failed reasons * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be * handled by the AP is reached. - * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Client's MAC is in the AP's blocklist. + * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL. */ enum nl80211_connect_failed_reason { NL80211_CONN_FAIL_MAX_CLIENTS, @@ -3276,4 +3305,22 @@ enum nl80211_scan_flags { NL80211_SCAN_FLAG_AP = 1<<2, }; +/** + * enum nl80211_acl_policy - access control policy + * + * Access control policy is applied on a MAC list set by + * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to + * be used with %NL80211_ATTR_ACL_POLICY. + * + * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are + * listed in ACL, i.e. allow all the stations which are not listed + * in ACL to authenticate. + * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed + * in ACL, i.e. deny all the stations which are not listed in ACL. + */ +enum nl80211_acl_policy { + NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED, + NL80211_ACL_POLICY_DENY_UNLESS_LISTED, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 0e702cdc6043..ce827242f390 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -478,6 +478,11 @@ int wiphy_register(struct wiphy *wiphy) ETH_ALEN))) return -EINVAL; + if (WARN_ON(wiphy->max_acl_mac_addrs && + (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) || + !rdev->ops->set_mac_acl))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 33de80364c5c..b5978ab4ad7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -365,6 +365,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -1268,6 +1270,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: @@ -2491,6 +2499,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } +/* This function returns an error or the number of nested attributes */ +static int validate_acl_mac_addrs(struct nlattr *nl_attr) +{ + struct nlattr *attr; + int n_entries = 0, tmp; + + nla_for_each_nested(attr, nl_attr, tmp) { + if (nla_len(attr) != ETH_ALEN) + return -EINVAL; + + n_entries++; + } + + return n_entries; +} + +/* + * This function parses ACL information and allocates memory for ACL data. + * On successful return, the calling function is responsible to free the + * ACL buffer returned by this function. + */ +static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, + struct genl_info *info) +{ + enum nl80211_acl_policy acl_policy; + struct nlattr *attr; + struct cfg80211_acl_data *acl; + int i = 0, n_entries, tmp; + + if (!wiphy->max_acl_mac_addrs) + return ERR_PTR(-EOPNOTSUPP); + + if (!info->attrs[NL80211_ATTR_ACL_POLICY]) + return ERR_PTR(-EINVAL); + + acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); + if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && + acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) + return ERR_PTR(-EINVAL); + + if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) + return ERR_PTR(-EINVAL); + + n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); + if (n_entries < 0) + return ERR_PTR(n_entries); + + if (n_entries > wiphy->max_acl_mac_addrs) + return ERR_PTR(-ENOTSUPP); + + acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), + GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { + memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); + i++; + } + + acl->n_acl_entries = n_entries; + acl->acl_policy = acl_policy; + + return acl; +} + +static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_acl_data *acl; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!dev->ieee80211_ptr->beacon_interval) + return -EINVAL; + + acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + err = rdev_set_mac_acl(rdev, dev, acl); + + kfree(acl); + + return err; +} + static int nl80211_parse_beacon(struct genl_info *info, struct cfg80211_beacon_data *bcn) { @@ -2734,6 +2833,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -2742,6 +2847,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + + kfree(params.acl); + return err; } @@ -7876,6 +7984,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MAC_ACL, + .doit = nl80211_set_mac_acl, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6c0c8191f837..422d38291d66 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,4 +875,16 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); } + +static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_acl_data *params) +{ + int ret; + + trace_rdev_set_mac_acl(&rdev->wiphy, dev, params); + ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2134576f426e..8bc553199686 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1767,6 +1767,24 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_set_mac_acl, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_acl_data *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, acl_policy) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WIPHY_ASSIGN; + __entry->acl_policy = params->acl_policy; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3-71-gd317 From d904d3edcbb26efc86ea3575bb4265559801a94b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:41 +0100 Subject: can: gw: make routing to the incoming CAN interface configurable Introduce new configuration flag CGW_FLAGS_CAN_IIF_TX_OK to configure if a CAN sk_buff that has been routed with can-gw is allowed to be send back to the originating CAN interface. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 1 + net/can/gw.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 8e1db18c3cb6..0505c7f86213 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -51,6 +51,7 @@ enum { #define CGW_FLAGS_CAN_ECHO 0x01 #define CGW_FLAGS_CAN_SRC_TSTAMP 0x02 +#define CGW_FLAGS_CAN_IIF_TX_OK 0x04 #define CGW_MOD_FUNCS 4 /* AND OR XOR SET */ diff --git a/net/can/gw.c b/net/can/gw.c index 574dda78eb0f..37a3efb7cc9d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,13 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) return; } + /* is sending the skb back to the incoming interface not allowed? */ + if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && + skb_headroom(skb) == sizeof(struct can_skb_priv) && + (((struct can_skb_priv *)(skb->head))->ifindex == + gwj->dst.dev->ifindex)) + return; + /* * clone the given skb, which has not been done in can_rcv() * -- cgit v1.2.3-71-gd317 From e6afa00a1409bc3bceed9ccb33111519463dfe7b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:46 +0100 Subject: can: gw: indicate and count deleted frames due to misconfiguration Add a statistic counter to detect deleted frames due to misconfiguration with a new read-only CGW_DELETED netlink attribute for the CAN gateway. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 1 + net/can/gw.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 0505c7f86213..ae07bec74f4b 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -44,6 +44,7 @@ enum { CGW_SRC_IF, /* ifindex of source network interface */ CGW_DST_IF, /* ifindex of destination network interface */ CGW_FILTER, /* specify struct can_filter on source CAN device */ + CGW_DELETED, /* number of deleted CAN frames (see max_hops param) */ __CGW_MAX }; diff --git a/net/can/gw.c b/net/can/gw.c index 4216a80618cb..acdd4656cc3b 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -131,6 +131,7 @@ struct cgw_job { struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; + u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ @@ -367,8 +368,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); - if (cgw_hops(skb) >= max_hops) + if (cgw_hops(skb) >= max_hops) { + /* indicate deleted frames due to misconfiguration */ + gwj->deleted_frames++; return; + } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; @@ -500,6 +504,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + if (gwj->deleted_frames) { + if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) + goto cancel; + } + /* check non default settings of attributes */ if (gwj->mod.modtype.and) { @@ -799,6 +808,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->handled_frames = 0; gwj->dropped_frames = 0; + gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; -- cgit v1.2.3-71-gd317 From cd8f7cb4e6dfa4ea08fc250a814240b883ef7911 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Jan 2013 12:34:29 +0100 Subject: cfg80211/mac80211: support reporting wakeup reason When waking up from WoWLAN, it is useful to know what triggered the wakeup. Support reporting the wakeup reason(s) in cfg80211 (and a pass-through in mac80211) to allow userspace to know. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 41 +++++++++++++++++++ include/net/mac80211.h | 12 ++++++ include/uapi/linux/nl80211.h | 31 ++++++++++++++ net/mac80211/pm.c | 10 +++++ net/wireless/nl80211.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 35 ++++++++++++++++ 6 files changed, 226 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 36e076e374d2..48add7e3ba1d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1596,6 +1596,32 @@ struct cfg80211_wowlan { int n_patterns; }; +/** + * struct cfg80211_wowlan_wakeup - wakeup report + * @disconnect: woke up by getting disconnected + * @magic_pkt: woke up by receiving magic packet + * @gtk_rekey_failure: woke up by GTK rekey failure + * @eap_identity_req: woke up by EAP identity request packet + * @four_way_handshake: woke up by 4-way handshake + * @rfkill_release: woke up by rfkill being released + * @pattern_idx: pattern that caused wakeup, -1 if not due to pattern + * @packet_present_len: copied wakeup packet data + * @packet_len: original wakeup packet length + * @packet: The packet causing the wakeup, if any. + * @packet_80211: For pattern match, magic packet and other data + * frame triggers an 802.3 frame should be reported, for + * disconnect due to deauth 802.11 frame. This indicates which + * it is. + */ +struct cfg80211_wowlan_wakeup { + bool disconnect, magic_pkt, gtk_rekey_failure, + eap_identity_req, four_way_handshake, + rfkill_release, packet_80211; + s32 pattern_idx; + u32 packet_present_len, packet_len; + const void *packet; +}; + /** * struct cfg80211_gtk_rekey_data - rekey data * @kek: key encryption key @@ -3852,6 +3878,21 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, enum ieee80211_p2p_attr_id attr, u8 *buf, unsigned int bufsize); +/** + * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN + * @wdev: the wireless device reporting the wakeup + * @wakeup: the wakeup report + * @gfp: allocation flags + * + * This function reports that the given device woke up. If it + * caused the wakeup, report the reason(s), otherwise you may + * pass %NULL as the @wakeup parameter to advertise that something + * else caused the wakeup. + */ +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 21831ee57e3c..7a27e00c513a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4206,4 +4206,16 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); +/** + * ieee80211_report_wowlan_wakeup - report WoWLAN wakeup + * @vif: virtual interface + * @wakeup: wakeup reason(s) + * @gfp: allocation flags + * + * See cfg80211_report_wowlan_wakeup(). + */ +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp); + #endif /* MAC80211_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5b7dbc1ea966..225a65e72219 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -513,6 +513,12 @@ * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For * more background information, see * http://wireless.kernel.org/en/users/Documentation/WoWLAN. + * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification + * from the driver reporting the wakeup reason. In this case, the + * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason + * for the wakeup, if it was caused by wireless. If it is not present + * in the wakeup notification, the wireless device didn't cause the + * wakeup but reports that it was woken up. * * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver * the necessary information for supporting GTK rekey offload. This @@ -2947,6 +2953,10 @@ struct nl80211_wowlan_pattern_support { * * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute * carrying a &struct nl80211_wowlan_pattern_support. + * + * When reporting wakeup. it is a u32 attribute containing the 0-based + * index of the pattern that caused the wakeup, in the patterns passed + * to the kernel when configuring. * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be * used when setting, used only to indicate that GTK rekeying is supported * by the device (flag) @@ -2957,8 +2967,25 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag) * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released * (on devices that have rfkill in the device) (flag) + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains + * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame + * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN + * attribute contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the + * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may + * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute + * contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 + * attribute if the packet was truncated somewhere. * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number + * + * These nested attributes are used to configure the wakeup triggers and + * to report the wakeup reason(s). */ enum nl80211_wowlan_triggers { __NL80211_WOWLAN_TRIG_INVALID, @@ -2971,6 +2998,10 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE, NL80211_WOWLAN_TRIG_RFKILL_RELEASE, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, /* keep last */ NUM_NL80211_WOWLAN_TRIG, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index e45b83610e85..53801d20176d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -228,3 +228,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * ieee80211_reconfig(), which is also needed for hardware * hang/firmware failure/etc. recovery. */ + +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + cfg80211_report_wowlan_wakeup(&sdata->wdev, wakeup, gfp); +} +EXPORT_SYMBOL(ieee80211_report_wowlan_wakeup); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b5978ab4ad7a..d359734b6972 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9323,6 +9323,103 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +#ifdef CONFIG_PM +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err, size = 200; + + trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); + + if (wakeup) + size += wakeup->packet_present_len; + + msg = nlmsg_new(size, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto free_msg; + + if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) + goto free_msg; + + if (wakeup) { + struct nlattr *reasons; + + reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + + if (wakeup->disconnect && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) + goto free_msg; + if (wakeup->magic_pkt && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) + goto free_msg; + if (wakeup->gtk_rekey_failure && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) + goto free_msg; + if (wakeup->eap_identity_req && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) + goto free_msg; + if (wakeup->four_way_handshake && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) + goto free_msg; + if (wakeup->rfkill_release && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) + goto free_msg; + + if (wakeup->pattern_idx >= 0 && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + wakeup->pattern_idx)) + goto free_msg; + + if (wakeup->packet) { + u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; + u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; + + if (!wakeup->packet_80211) { + pkt_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; + len_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; + } + + if (wakeup->packet_len && + nla_put_u32(msg, len_attr, wakeup->packet_len)) + goto free_msg; + + if (nla_put(msg, pkt_attr, wakeup->packet_present_len, + wakeup->packet)) + goto free_msg; + } + + nla_nest_end(msg, reasons); + } + + err = genlmsg_end(msg, hdr); + if (err < 0) + goto free_msg; + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + free_msg: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); +#endif + void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8bc553199686..c9cafb0ea95f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2333,6 +2333,41 @@ TRACE_EVENT(cfg80211_return_u32, TP_printk("ret: %u", __entry->ret) ); +TRACE_EVENT(cfg80211_report_wowlan_wakeup, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup), + TP_ARGS(wiphy, wdev, wakeup), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, disconnect) + __field(bool, magic_pkt) + __field(bool, gtk_rekey_failure) + __field(bool, eap_identity_req) + __field(bool, four_way_handshake) + __field(bool, rfkill_release) + __field(s32, pattern_idx) + __field(u32, packet_len) + __dynamic_array(u8, packet, wakeup->packet_present_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->disconnect = wakeup->disconnect; + __entry->magic_pkt = wakeup->magic_pkt; + __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure; + __entry->eap_identity_req = wakeup->eap_identity_req; + __entry->four_way_handshake = wakeup->four_way_handshake; + __entry->rfkill_release = wakeup->rfkill_release; + __entry->pattern_idx = wakeup->pattern_idx; + __entry->packet_len = wakeup->packet_len; + if (wakeup->packet && wakeup->packet_present_len) + memcpy(__get_dynamic_array(packet), wakeup->packet, + wakeup->packet_present_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-71-gd317 From 6fcdf4facb85e7d54ff6195378dd2ba8e0baccc4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 30 Jan 2013 21:50:08 -0500 Subject: wanrouter: delete now orphaned header content, files/drivers The wanrouter support was identified earlier as unused for years, and so the previous commit totally decoupled it from the kernel, leaving the related wanrouter files present, but totally inert. Here we take the final step in that cleanup, by doing a wholesale removal of these files. The two step process is used so that the large deletion is decoupled from the git history of files that we still care about. The drivers deleted here all were dependent on the Kconfig setting CONFIG_WAN_ROUTER_DRIVERS. A stub wanrouter.h header (kernel & uapi) are left behind so that drivers/isdn/i4l/isdn_x25iface.c continues to compile, and so that we don't accidentally break userspace that expected these defines. Cc: Joe Perches Cc: Dan Carpenter Cc: Arnaldo Carvalho de Melo Signed-off-by: Paul Gortmaker --- drivers/net/wan/cycx_drv.c | 569 -------------- drivers/net/wan/cycx_main.c | 346 --------- drivers/net/wan/cycx_x25.c | 1602 ---------------------------------------- include/linux/cyclomx.h | 77 -- include/linux/cycx_drv.h | 64 -- include/linux/wanrouter.h | 127 +--- include/uapi/linux/wanrouter.h | 443 +---------- net/wanrouter/Kconfig | 27 - net/wanrouter/Makefile | 7 - net/wanrouter/patchlevel | 1 - net/wanrouter/wanmain.c | 782 -------------------- net/wanrouter/wanproc.c | 380 ---------- 12 files changed, 8 insertions(+), 4417 deletions(-) delete mode 100644 drivers/net/wan/cycx_drv.c delete mode 100644 drivers/net/wan/cycx_main.c delete mode 100644 drivers/net/wan/cycx_x25.c delete mode 100644 include/linux/cyclomx.h delete mode 100644 include/linux/cycx_drv.h delete mode 100644 net/wanrouter/Kconfig delete mode 100644 net/wanrouter/Makefile delete mode 100644 net/wanrouter/patchlevel delete mode 100644 net/wanrouter/wanmain.c delete mode 100644 net/wanrouter/wanproc.c (limited to 'include/uapi/linux') diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c deleted file mode 100644 index 2a3ecae67a90..000000000000 --- a/drivers/net/wan/cycx_drv.c +++ /dev/null @@ -1,569 +0,0 @@ -/* -* cycx_drv.c Cyclom 2X Support Module. -* -* This module is a library of common hardware specific -* functions used by the Cyclades Cyclom 2X sync card. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/11/11 acme set_current_state(TASK_INTERRUPTIBLE), code -* cleanup -* 1999/11/08 acme init_cyc2x deleted, doing nothing -* 1999/11/06 acme back to read[bw], write[bw] and memcpy_to and -* fromio to use dpmbase ioremaped -* 1999/10/26 acme use isa_read[bw], isa_write[bw] & isa_memcpy_to -* & fromio -* 1999/10/23 acme cleanup to only supports cyclom2x: all the other -* boards are no longer manufactured by cyclades, -* if someone wants to support them... be my guest! -* 1999/05/28 acme cycx_intack & cycx_intde gone for good -* 1999/05/18 acme lots of unlogged work, submitting to Linus... -* 1999/01/03 acme more judicious use of data types -* 1999/01/03 acme judicious use of data types :> -* cycx_inten trying to reset pending interrupts -* from cyclom 2x - I think this isn't the way to -* go, but for now... -* 1999/01/02 acme cycx_intack ok, I think there's nothing to do -* to ack an int in cycx_drv.c, only handle it in -* cyx_isr (or in the other protocols: cyp_isr, -* cyf_isr, when they get implemented. -* Dec 31, 1998 acme cycx_data_boot & cycx_code_boot fixed, crossing -* fingers to see x25_configure in cycx_x25.c -* work... :) -* Dec 26, 1998 acme load implementation fixed, seems to work! :) -* cycx_2x_dpmbase_options with all the possible -* DPM addresses (20). -* cycx_intr implemented (test this!) -* general code cleanup -* Dec 8, 1998 Ivan Passos Cyclom-2X firmware load implementation. -* Aug 8, 1998 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* __init */ -#include -#include /* printk(), and other useful stuff */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* API definitions */ -#include /* CYCX firmware module definitions */ -#include /* udelay, msleep_interruptible */ -#include /* read[wl], write[wl], ioremap, iounmap */ - -#define MOD_VERSION 0 -#define MOD_RELEASE 6 - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2x Sync Card Driver"); -MODULE_LICENSE("GPL"); - -/* Hardware-specific functions */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len); -static void cycx_bootcfg(struct cycx_hw *hw); - -static int reset_cyc2x(void __iomem *addr); -static int detect_cyc2x(void __iomem *addr); - -/* Miscellaneous functions */ -static int get_option_index(const long *optlist, long optval); -static u16 checksum(u8 *buf, u32 len); - -#define wait_cyc(addr) cycx_exec(addr + CMD_OFFSET) - -/* Global Data */ - -/* private data */ -static const char fullname[] = "Cyclom 2X Support Module"; -static const char copyright[] = - "(c) 1998-2003 Arnaldo Carvalho de Melo "; - -/* Hardware configuration options. - * These are arrays of configuration options used by verification routines. - * The first element of each array is its size (i.e. number of options). - */ -static const long cyc2x_dpmbase_options[] = { - 20, - 0xA0000, 0xA4000, 0xA8000, 0xAC000, 0xB0000, 0xB4000, 0xB8000, - 0xBC000, 0xC0000, 0xC4000, 0xC8000, 0xCC000, 0xD0000, 0xD4000, - 0xD8000, 0xDC000, 0xE0000, 0xE4000, 0xE8000, 0xEC000 -}; - -static const long cycx_2x_irq_options[] = { 7, 3, 5, 9, 10, 11, 12, 15 }; - -/* Kernel Loadable Module Entry Points */ -/* Module 'insert' entry point. - * o print announcement - * o initialize static data - * - * Return: 0 Ok - * < 0 error. - * Context: process */ - -static int __init cycx_drv_init(void) -{ - pr_info("%s v%u.%u %s\n", - fullname, MOD_VERSION, MOD_RELEASE, copyright); - - return 0; -} - -/* Module 'remove' entry point. - * o release all remaining system resources */ -static void cycx_drv_cleanup(void) -{ -} - -/* Kernel APIs */ -/* Set up adapter. - * o detect adapter type - * o verify hardware configuration options - * o check for hardware conflicts - * o set up adapter shared memory - * o test adapter memory - * o load firmware - * Return: 0 ok. - * < 0 error */ -EXPORT_SYMBOL(cycx_setup); -int cycx_setup(struct cycx_hw *hw, void *cfm, u32 len, unsigned long dpmbase) -{ - int err; - - /* Verify IRQ configuration options */ - if (!get_option_index(cycx_2x_irq_options, hw->irq)) { - pr_err("IRQ %d is invalid!\n", hw->irq); - return -EINVAL; - } - - /* Setup adapter dual-port memory window and test memory */ - if (!dpmbase) { - pr_err("you must specify the dpm address!\n"); - return -EINVAL; - } else if (!get_option_index(cyc2x_dpmbase_options, dpmbase)) { - pr_err("memory address 0x%lX is invalid!\n", dpmbase); - return -EINVAL; - } - - hw->dpmbase = ioremap(dpmbase, CYCX_WINDOWSIZE); - hw->dpmsize = CYCX_WINDOWSIZE; - - if (!detect_cyc2x(hw->dpmbase)) { - pr_err("adapter Cyclom 2X not found at address 0x%lX!\n", - dpmbase); - return -EINVAL; - } - - pr_info("found Cyclom 2X card at address 0x%lX\n", dpmbase); - - /* Load firmware. If loader fails then shut down adapter */ - err = load_cyc2x(hw, cfm, len); - - if (err) - cycx_down(hw); /* shutdown adapter */ - - return err; -} - -EXPORT_SYMBOL(cycx_down); -int cycx_down(struct cycx_hw *hw) -{ - iounmap(hw->dpmbase); - return 0; -} - -/* Enable interrupt generation. */ -static void cycx_inten(struct cycx_hw *hw) -{ - writeb(0, hw->dpmbase); -} - -/* Generate an interrupt to adapter's CPU. */ -EXPORT_SYMBOL(cycx_intr); -void cycx_intr(struct cycx_hw *hw) -{ - writew(0, hw->dpmbase + GEN_CYCX_INTR); -} - -/* Execute Adapter Command. - * o Set exec flag. - * o Busy-wait until flag is reset. */ -EXPORT_SYMBOL(cycx_exec); -int cycx_exec(void __iomem *addr) -{ - u16 i = 0; - /* wait till addr content is zeroed */ - - while (readw(addr)) { - udelay(1000); - - if (++i > 50) - return -1; - } - - return 0; -} - -/* Read absolute adapter memory. - * Transfer data from adapter's memory to data buffer. */ -EXPORT_SYMBOL(cycx_peek); -int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - *(u8*)buf = readb(hw->dpmbase + addr); - else - memcpy_fromio(buf, hw->dpmbase + addr, len); - - return 0; -} - -/* Write Absolute Adapter Memory. - * Transfer data from data buffer to adapter's memory. */ -EXPORT_SYMBOL(cycx_poke); -int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - writeb(*(u8*)buf, hw->dpmbase + addr); - else - memcpy_toio(hw->dpmbase + addr, buf, len); - - return 0; -} - -/* Hardware-Specific Functions */ - -/* Load Aux Routines */ -/* Reset board hardware. - return 1 if memory exists at addr and 0 if not. */ -static int memory_exists(void __iomem *addr) -{ - int tries = 0; - - for (; tries < 3 ; tries++) { - writew(TEST_PATTERN, addr + 0x10); - - if (readw(addr + 0x10) == TEST_PATTERN) - if (readw(addr + 0x10) == TEST_PATTERN) - return 1; - - msleep_interruptible(1 * 1000); - } - - return 0; -} - -/* Load reset code. */ -static void reset_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - void __iomem *pt_code = addr + RESET_OFFSET; - u16 i; /*, j; */ - - for (i = 0 ; i < cnt ; i++) { -/* for (j = 0 ; j < 50 ; j++); Delay - FIXME busy waiting... */ - writeb(*buffer++, pt_code++); - } -} - -/* Load buffer using boot interface. - * o copy data from buffer to Cyclom-X memory - * o wait for reset code to copy it to right portion of memory */ -static int buffer_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - memcpy_toio(addr + DATA_OFFSET, buffer, cnt); - writew(GEN_BOOT_DAT, addr + CMD_OFFSET); - - return wait_cyc(addr); -} - -/* Set up entry point and kick start Cyclom-X CPU. */ -static void cycx_start(void __iomem *addr) -{ - /* put in 0x30 offset the jump instruction to the code entry point */ - writeb(0xea, addr + 0x30); - writeb(0x00, addr + 0x31); - writeb(0xc4, addr + 0x32); - writeb(0x00, addr + 0x33); - writeb(0x00, addr + 0x34); - - /* cmd to start executing code */ - writew(GEN_START, addr + CMD_OFFSET); -} - -/* Load and boot reset code. */ -static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_start = addr + START_OFFSET; - - writeb(0xea, pt_start++); /* jmp to f000:3f00 */ - writeb(0x00, pt_start++); - writeb(0xfc, pt_start++); - writeb(0x00, pt_start++); - writeb(0xf0, pt_start); - reset_load(addr, code, len); - - /* 80186 was in hold, go */ - writeb(0, addr + START_CPU); - msleep_interruptible(1 * 1000); -} - -/* Load data.bin file through boot (reset) interface. */ -static int cycx_data_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0, pt_boot_cmd + sizeof(u16)); - writew(0x4000, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i))) < 0) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - - -/* Load code.bin file through boot (reset) interface. */ -static int cycx_code_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0x0000, pt_boot_cmd + sizeof(u16)); - writew(0xc400, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i)))) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - -/* Load adapter from the memory image of the CYCX firmware module. - * o verify firmware integrity and compatibility - * o start adapter up */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len) -{ - int i, j; - struct cycx_fw_header *img_hdr; - u8 *reset_image, - *data_image, - *code_image; - void __iomem *pt_cycld = hw->dpmbase + 0x400; - u16 cksum; - - /* Announce */ - pr_info("firmware signature=\"%s\"\n", cfm->signature); - - /* Verify firmware signature */ - if (strcmp(cfm->signature, CFM_SIGNATURE)) { - pr_err("load_cyc2x: not Cyclom-2X firmware!\n"); - return -EINVAL; - } - - pr_info("firmware version=%u\n", cfm->version); - - /* Verify firmware module format version */ - if (cfm->version != CFM_VERSION) { - pr_err("%s: firmware format %u rejected! Expecting %u.\n", - __func__, cfm->version, CFM_VERSION); - return -EINVAL; - } - - /* Verify firmware module length and checksum */ - cksum = checksum((u8*)&cfm->info, sizeof(struct cycx_fw_info) + - cfm->info.codesize); -/* - FIXME cfm->info.codesize is off by 2 - if (((len - sizeof(struct cycx_firmware) - 1) != cfm->info.codesize) || -*/ - if (cksum != cfm->checksum) { - pr_err("%s: firmware corrupted!\n", __func__); - pr_err(" cdsize = 0x%x (expected 0x%lx)\n", - len - (int)sizeof(struct cycx_firmware) - 1, - cfm->info.codesize); - pr_err(" chksum = 0x%x (expected 0x%x)\n", - cksum, cfm->checksum); - return -EINVAL; - } - - /* If everything is ok, set reset, data and code pointers */ - img_hdr = (struct cycx_fw_header *)&cfm->image; -#ifdef FIRMWARE_DEBUG - pr_info("%s: image sizes\n", __func__); - pr_info(" reset=%lu\n", img_hdr->reset_size); - pr_info(" data=%lu\n", img_hdr->data_size); - pr_info(" code=%lu\n", img_hdr->code_size); -#endif - reset_image = ((u8 *)img_hdr) + sizeof(struct cycx_fw_header); - data_image = reset_image + img_hdr->reset_size; - code_image = data_image + img_hdr->data_size; - - /*---- Start load ----*/ - /* Announce */ - pr_info("loading firmware %s (ID=%u)...\n", - cfm->descr[0] ? cfm->descr : "unknown firmware", - cfm->info.codeid); - - for (i = 0 ; i < 5 ; i++) { - /* Reset Cyclom hardware */ - if (!reset_cyc2x(hw->dpmbase)) { - pr_err("dpm problem or board not found\n"); - return -EINVAL; - } - - /* Load reset.bin */ - cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size); - /* reset is waiting for boot */ - writew(GEN_POWER_ON, pt_cycld); - msleep_interruptible(1 * 1000); - - for (j = 0 ; j < 3 ; j++) - if (!readw(pt_cycld)) - goto reset_loaded; - else - msleep_interruptible(1 * 1000); - } - - pr_err("reset not started\n"); - return -EINVAL; - -reset_loaded: - /* Load data.bin */ - if (cycx_data_boot(hw->dpmbase, data_image, img_hdr->data_size)) { - pr_err("cannot load data file\n"); - return -EINVAL; - } - - /* Load code.bin */ - if (cycx_code_boot(hw->dpmbase, code_image, img_hdr->code_size)) { - pr_err("cannot load code file\n"); - return -EINVAL; - } - - /* Prepare boot-time configuration data */ - cycx_bootcfg(hw); - - /* kick-off CPU */ - cycx_start(hw->dpmbase); - - /* Arthur Ganzert's tip: wait a while after the firmware loading... - seg abr 26 17:17:12 EST 1999 - acme */ - msleep_interruptible(7 * 1000); - pr_info("firmware loaded!\n"); - - /* enable interrupts */ - cycx_inten(hw); - - return 0; -} - -/* Prepare boot-time firmware configuration data. - * o initialize configuration data area - From async.doc - V_3.4.0 - 07/18/1994 - - As of now, only static buffers are available to the user. - So, the bit VD_RXDIRC must be set in 'valid'. That means that user - wants to use the static transmission and reception buffers. */ -static void cycx_bootcfg(struct cycx_hw *hw) -{ - /* use fixed buffers */ - writeb(FIXED_BUFFERS, hw->dpmbase + CONF_OFFSET); -} - -/* Detect Cyclom 2x adapter. - * Following tests are used to detect Cyclom 2x adapter: - * to be completed based on the tests done below - * Return 1 if detected o.k. or 0 if failed. - * Note: This test is destructive! Adapter will be left in shutdown - * state after the test. */ -static int detect_cyc2x(void __iomem *addr) -{ - reset_cyc2x(addr); - - return memory_exists(addr); -} - -/* Miscellaneous */ -/* Get option's index into the options list. - * Return option's index (1 .. N) or zero if option is invalid. */ -static int get_option_index(const long *optlist, long optval) -{ - int i = 1; - - for (; i <= optlist[0]; ++i) - if (optlist[i] == optval) - return i; - - return 0; -} - -/* Reset adapter's CPU. */ -static int reset_cyc2x(void __iomem *addr) -{ - writeb(0, addr + RST_ENABLE); - msleep_interruptible(2 * 1000); - writeb(0, addr + RST_DISABLE); - msleep_interruptible(2 * 1000); - - return memory_exists(addr); -} - -/* Calculate 16-bit CRC using CCITT polynomial. */ -static u16 checksum(u8 *buf, u32 len) -{ - u16 crc = 0; - u16 mask, flag; - - for (; len; --len, ++buf) - for (mask = 0x80; mask; mask >>= 1) { - flag = (crc & 0x8000); - crc <<= 1; - crc |= ((*buf & mask) ? 1 : 0); - - if (flag) - crc ^= 0x1021; - } - - return crc; -} - -module_init(cycx_drv_init); -module_exit(cycx_drv_cleanup); - -/* End */ diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c deleted file mode 100644 index 81fbbad406be..000000000000 --- a/drivers/net/wan/cycx_main.c +++ /dev/null @@ -1,346 +0,0 @@ -/* -* cycx_main.c Cyclades Cyclom 2X WAN Link Driver. Main module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdlamain.c by Gene Kozin & -* Jaspreet Singh -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Please look at the bitkeeper changelog (or any other scm tool that ends up -* importing bitkeeper changelog or that replaces bitkeeper in the future as -* main tool for linux development). -* -* 2001/05/09 acme Fix MODULE_DESC for debug, .bss nitpicks, -* some cleanups -* 2000/07/13 acme remove useless #ifdef MODULE and crap -* #if KERNEL_VERSION > blah -* 2000/07/06 acme __exit at cyclomx_cleanup -* 2000/04/02 acme dprintk and cycx_debug -* module_init/module_exit -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 2000/01/08 acme cleanup -* 1999/11/06 acme cycx_down back to life (it needs to be -* called to iounmap the dpmbase) -* 1999/08/09 acme removed references to enable_tx_int -* use spinlocks instead of cli/sti in -* cyclomx_set_state -* 1999/05/19 acme works directly linked into the kernel -* init_waitqueue_head for 2.3.* kernel -* 1999/05/18 acme major cleanup (polling not needed), etc -* 1998/08/28 acme minor cleanup (ioctls for firmware deleted) -* queue_task activated -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* inline memset(), etc. */ -#include /* kmalloc(), kfree() */ -#include /* printk(), and other useful stuff */ -#include /* support for loadable modules */ -#include /* request_region(), release_region() */ -#include /* WAN router definitions */ -#include /* cyclomx common user API definitions */ -#include /* __init (when not using as a module) */ -#include - -unsigned int cycx_debug; - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2X Sync Card Driver."); -MODULE_LICENSE("GPL"); -module_param(cycx_debug, int, 0); -MODULE_PARM_DESC(cycx_debug, "cyclomx debug level"); - -/* Defines & Macros */ - -#define CYCX_DRV_VERSION 0 /* version number */ -#define CYCX_DRV_RELEASE 11 /* release (minor version) number */ -#define CYCX_MAX_CARDS 1 /* max number of adapters */ - -#define CONFIG_CYCX_CARDS 1 - -/* Function Prototypes */ - -/* WAN link driver entry points */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf); -static int cycx_wan_shutdown(struct wan_device *wandev); - -/* Miscellaneous functions */ -static irqreturn_t cycx_isr(int irq, void *dev_id); - -/* Global Data - * Note: All data must be explicitly initialized!!! - */ - -/* private data */ -static const char cycx_drvname[] = "cyclomx"; -static const char cycx_fullname[] = "CYCLOM 2X(tm) Sync Card Driver"; -static const char cycx_copyright[] = "(c) 1998-2003 Arnaldo Carvalho de Melo " - ""; -static int cycx_ncards = CONFIG_CYCX_CARDS; -static struct cycx_device *cycx_card_array; /* adapter data space */ - -/* Kernel Loadable Module Entry Points */ - -/* - * Module 'insert' entry point. - * o print announcement - * o allocate adapter data space - * o initialize static data - * o register all cards with WAN router - * o calibrate Cyclom 2X shared memory access delay. - * - * Return: 0 Ok - * < 0 error. - * Context: process - */ -static int __init cycx_init(void) -{ - int cnt, err = -ENOMEM; - - pr_info("%s v%u.%u %s\n", - cycx_fullname, CYCX_DRV_VERSION, CYCX_DRV_RELEASE, - cycx_copyright); - - /* Verify number of cards and allocate adapter data space */ - cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS); - cycx_ncards = max_t(int, cycx_ncards, 1); - cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL); - if (!cycx_card_array) - goto out; - - - /* Register adapters with WAN router */ - for (cnt = 0; cnt < cycx_ncards; ++cnt) { - struct cycx_device *card = &cycx_card_array[cnt]; - struct wan_device *wandev = &card->wandev; - - sprintf(card->devname, "%s%d", cycx_drvname, cnt + 1); - wandev->magic = ROUTER_MAGIC; - wandev->name = card->devname; - wandev->private = card; - wandev->setup = cycx_wan_setup; - wandev->shutdown = cycx_wan_shutdown; - err = register_wan_device(wandev); - - if (err) { - pr_err("%s registration failed with error %d!\n", - card->devname, err); - break; - } - } - - err = -ENODEV; - if (!cnt) { - kfree(cycx_card_array); - goto out; - } - err = 0; - cycx_ncards = cnt; /* adjust actual number of cards */ -out: return err; -} - -/* - * Module 'remove' entry point. - * o unregister all adapters from the WAN router - * o release all remaining system resources - */ -static void __exit cycx_exit(void) -{ - int i = 0; - - for (; i < cycx_ncards; ++i) { - struct cycx_device *card = &cycx_card_array[i]; - unregister_wan_device(card->devname); - } - - kfree(cycx_card_array); -} - -/* WAN Device Driver Entry Points */ -/* - * Setup/configure WAN link driver. - * o check adapter state - * o make sure firmware is present in configuration - * o allocate interrupt vector - * o setup Cyclom 2X hardware - * o call appropriate routine to perform protocol-specific initialization - * - * This function is called when router handles ROUTER_SETUP IOCTL. The - * configuration structure is in kernel memory (including extended data, if - * any). - */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf) -{ - int rc = -EFAULT; - struct cycx_device *card; - int irq; - - /* Sanity checks */ - - if (!wandev || !wandev->private || !conf) - goto out; - - card = wandev->private; - rc = -EBUSY; - if (wandev->state != WAN_UNCONFIGURED) - goto out; - - rc = -EINVAL; - if (!conf->data_size || !conf->data) { - pr_err("%s: firmware not found in configuration data!\n", - wandev->name); - goto out; - } - - if (conf->irq <= 0) { - pr_err("%s: can't configure without IRQ!\n", wandev->name); - goto out; - } - - /* Allocate IRQ */ - irq = conf->irq == 2 ? 9 : conf->irq; /* IRQ2 -> IRQ9 */ - - if (request_irq(irq, cycx_isr, 0, wandev->name, card)) { - pr_err("%s: can't reserve IRQ %d!\n", wandev->name, irq); - goto out; - } - - /* Configure hardware, load firmware, etc. */ - memset(&card->hw, 0, sizeof(card->hw)); - card->hw.irq = irq; - card->hw.dpmsize = CYCX_WINDOWSIZE; - card->hw.fwid = CFID_X25_2X; - spin_lock_init(&card->lock); - init_waitqueue_head(&card->wait_stats); - - rc = cycx_setup(&card->hw, conf->data, conf->data_size, conf->maddr); - if (rc) - goto out_irq; - - /* Initialize WAN device data space */ - wandev->irq = irq; - wandev->dma = wandev->ioport = 0; - wandev->maddr = (unsigned long)card->hw.dpmbase; - wandev->msize = card->hw.dpmsize; - wandev->hw_opt[2] = 0; - wandev->hw_opt[3] = card->hw.fwid; - - /* Protocol-specific initialization */ - switch (card->hw.fwid) { -#ifdef CONFIG_CYCLOMX_X25 - case CFID_X25_2X: - rc = cycx_x25_wan_init(card, conf); - break; -#endif - default: - pr_err("%s: this firmware is not supported!\n", wandev->name); - rc = -EINVAL; - } - - if (rc) { - cycx_down(&card->hw); - goto out_irq; - } - - rc = 0; -out: - return rc; -out_irq: - free_irq(irq, card); - goto out; -} - -/* - * Shut down WAN link driver. - * o shut down adapter hardware - * o release system resources. - * - * This function is called by the router when device is being unregistered or - * when it handles ROUTER_DOWN IOCTL. - */ -static int cycx_wan_shutdown(struct wan_device *wandev) -{ - int ret = -EFAULT; - struct cycx_device *card; - - /* sanity checks */ - if (!wandev || !wandev->private) - goto out; - - ret = 0; - if (wandev->state == WAN_UNCONFIGURED) - goto out; - - card = wandev->private; - wandev->state = WAN_UNCONFIGURED; - cycx_down(&card->hw); - pr_info("%s: irq %d being freed!\n", wandev->name, wandev->irq); - free_irq(wandev->irq, card); -out: return ret; -} - -/* Miscellaneous */ -/* - * Cyclom 2X Interrupt Service Routine. - * o acknowledge Cyclom 2X hardware interrupt. - * o call protocol-specific interrupt service routine, if any. - */ -static irqreturn_t cycx_isr(int irq, void *dev_id) -{ - struct cycx_device *card = dev_id; - - if (card->wandev.state == WAN_UNCONFIGURED) - goto out; - - if (card->in_isr) { - pr_warn("%s: interrupt re-entrancy on IRQ %d!\n", - card->devname, card->wandev.irq); - goto out; - } - - if (card->isr) - card->isr(card); - return IRQ_HANDLED; -out: - return IRQ_NONE; -} - -/* Set WAN device state. */ -void cycx_set_state(struct cycx_device *card, int state) -{ - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (card->wandev.state != state) { - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - break; - } - pr_info("%s: link %s\n", card->devname, string_state); - card->wandev.state = state; - } - - card->state_tick = jiffies; - spin_unlock_irqrestore(&card->lock, flags); -} - -module_init(cycx_init); -module_exit(cycx_exit); diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c deleted file mode 100644 index 06f3f6309e4b..000000000000 --- a/drivers/net/wan/cycx_x25.c +++ /dev/null @@ -1,1602 +0,0 @@ -/* -* cycx_x25.c Cyclom 2X WAN Link Driver. X.25 module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdla_x25.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2001/01/12 acme use dev_kfree_skb_irq on interrupt context -* 2000/04/02 acme dprintk, cycx_debug -* fixed the bug introduced in get_dev_by_lcn and -* get_dev_by_dte_addr by the anonymous hacker -* that converted this driver to softnet -* 2000/01/08 acme cleanup -* 1999/10/27 acme use ARPHRD_HWX25 so that the X.25 stack know -* that we have a X.25 stack implemented in -* firmware onboard -* 1999/10/18 acme support for X.25 sockets in if_send, -* beware: socket(AF_X25...) IS WORK IN PROGRESS, -* TCP/IP over X.25 via wanrouter not affected, -* working. -* 1999/10/09 acme chan_disc renamed to chan_disconnect, -* began adding support for X.25 sockets: -* conf->protocol in new_if -* 1999/10/05 acme fixed return E... to return -E... -* 1999/08/10 acme serialized access to the card thru a spinlock -* in x25_exec -* 1999/08/09 acme removed per channel spinlocks -* removed references to enable_tx_int -* 1999/05/28 acme fixed nibble_to_byte, ackvc now properly treated -* if_send simplified -* 1999/05/25 acme fixed t1, t2, t21 & t23 configuration -* use spinlocks instead of cli/sti in some points -* 1999/05/24 acme finished the x25_get_stat function -* 1999/05/23 acme dev->type = ARPHRD_X25 (tcpdump only works, -* AFAIT, with ARPHRD_ETHER). This seems to be -* needed to use socket(AF_X25)... -* Now the config file must specify a peer media -* address for svc channels over a crossover cable. -* Removed hold_timeout from x25_channel_t, -* not used. -* A little enhancement in the DEBUG processing -* 1999/05/22 acme go to DISCONNECTED in disconnect_confirm_intr, -* instead of chan_disc. -* 1999/05/16 marcelo fixed timer initialization in SVCs -* 1999/01/05 acme x25_configure now get (most of) all -* parameters... -* 1999/01/05 acme pktlen now (correctly) uses log2 (value -* configured) -* 1999/01/03 acme judicious use of data types (u8, u16, u32, etc) -* 1999/01/03 acme cyx_isr: reset dpmbase to acknowledge -* indication (interrupt from cyclom 2x) -* 1999/01/02 acme cyx_isr: first hackings... -* 1999/01/0203 acme when initializing an array don't give less -* elements than declared... -* example: char send_cmd[6] = "?\xFF\x10"; -* you'll gonna lose a couple hours, 'cause your -* brain won't admit that there's an error in the -* above declaration... the side effect is that -* memset is put into the unresolved symbols -* instead of using the inline memset functions... -* 1999/01/02 acme began chan_connect, chan_send, x25_send -* 1998/12/31 acme x25_configure -* this code can be compiled as non module -* 1998/12/27 acme code cleanup -* IPX code wiped out! let's decrease code -* complexity for now, remember: I'm learning! :) -* bps_to_speed_code OK -* 1998/12/26 acme Minimal debug code cleanup -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#define CYCLOMX_X25_DEBUG 1 - -#include /* isdigit() */ -#include /* return codes */ -#include /* ARPHRD_HWX25 */ -#include /* printk(), and other useful stuff */ -#include -#include /* inline memset(), etc. */ -#include -#include /* kmalloc(), kfree() */ -#include /* offsetof(), etc. */ -#include /* WAN router definitions */ - -#include /* htons(), etc. */ - -#include /* Cyclom 2X common user API definitions */ -#include /* X.25 firmware API definitions */ - -#include - -/* Defines & Macros */ -#define CYCX_X25_MAX_CMD_RETRY 5 -#define CYCX_X25_CHAN_MTU 2048 /* unfragmented logical channel MTU */ - -/* Data Structures */ -/* This is an extension of the 'struct net_device' we create for each network - interface to keep the rest of X.25 channel-specific data. */ -struct cycx_x25_channel { - /* This member must be first. */ - struct net_device *slave; /* WAN slave */ - - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char *local_addr; /* local media address, ASCIIZ - - svc thru crossover cable */ - s16 lcn; /* logical channel number/conn.req.key*/ - u8 link; - struct timer_list timer; /* timer used for svc channel disc. */ - u16 protocol; /* ethertype, 0 - multiplexed */ - u8 svc; /* 0 - permanent, 1 - switched */ - u8 state; /* channel state */ - u8 drop_sequence; /* mark sequence for dropping */ - u32 idle_tmout; /* sec, before disconnecting */ - struct sk_buff *rx_skb; /* receive socket buffer */ - struct cycx_device *card; /* -> owner */ - struct net_device_stats ifstats;/* interface statistics */ -}; - -/* Function Prototypes */ -/* WAN link driver entry points. These are called by the WAN router module. */ -static int cycx_wan_update(struct wan_device *wandev), - cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf), - cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev); - -/* Network device interface */ -static int cycx_netdevice_init(struct net_device *dev); -static int cycx_netdevice_open(struct net_device *dev); -static int cycx_netdevice_stop(struct net_device *dev); -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len); -static int cycx_netdevice_rebuild_header(struct sk_buff *skb); -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev); - -static struct net_device_stats * - cycx_netdevice_get_stats(struct net_device *dev); - -/* Interrupt handlers */ -static void cycx_x25_irq_handler(struct cycx_device *card), - cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_log(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd); - -/* X.25 firmware interface functions */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf), - cycx_x25_get_stats(struct cycx_device *card), - cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf), - cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan), - cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn); - -/* channel functions */ -static int cycx_x25_chan_connect(struct net_device *dev), - cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb); - -static void cycx_x25_chan_disconnect(struct net_device *dev), - cycx_x25_chan_send_event(struct net_device *dev, u8 event); - -/* Miscellaneous functions */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state), - cycx_x25_chan_timer(unsigned long d); - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble), - reset_timer(struct net_device *dev); - -static u8 bps_to_speed_code(u32 bps); -static u8 cycx_log2(u32 n); - -static unsigned dec_to_uint(u8 *str, int len); - -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn); -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte); - -static void cycx_x25_chan_setup(struct net_device *dev); - -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len); -static void cycx_x25_dump_config(struct cycx_x25_config *conf); -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats); -static void cycx_x25_dump_devs(struct wan_device *wandev); -#else -#define hex_dump(msg, p, len) -#define cycx_x25_dump_config(conf) -#define cycx_x25_dump_stats(stats) -#define cycx_x25_dump_devs(wandev) -#endif -/* Public Functions */ - -/* X.25 Protocol Initialization routine. - * - * This routine is called by the main Cyclom 2X module during setup. At this - * point adapter is completely initialized and X.25 firmware is running. - * o configure adapter - * o initialize protocol-specific fields of the adapter data space. - * - * Return: 0 o.k. - * < 0 failure. */ -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf) -{ - struct cycx_x25_config cfg; - - /* Verify configuration ID */ - if (conf->config_id != WANCONFIG_X25) { - pr_info("%s: invalid configuration ID %u!\n", - card->devname, conf->config_id); - return -EINVAL; - } - - /* Initialize protocol-specific fields */ - card->mbox = card->hw.dpmbase + X25_MBOX_OFFS; - card->u.x.connection_keys = 0; - spin_lock_init(&card->u.x.lock); - - /* Configure adapter. Here we set reasonable defaults, then parse - * device configuration structure and set configuration options. - * Most configuration options are verified and corrected (if - * necessary) since we can't rely on the adapter to do so and don't - * want it to fail either. */ - memset(&cfg, 0, sizeof(cfg)); - cfg.link = 0; - cfg.clock = conf->clocking == WANOPT_EXTERNAL ? 8 : 55; - cfg.speed = bps_to_speed_code(conf->bps); - cfg.n3win = 7; - cfg.n2win = 2; - cfg.n2 = 5; - cfg.nvc = 1; - cfg.npvc = 1; - cfg.flags = 0x02; /* default = V35 */ - cfg.t1 = 10; /* line carrier timeout */ - cfg.t2 = 29; /* tx timeout */ - cfg.t21 = 180; /* CALL timeout */ - cfg.t23 = 180; /* CLEAR timeout */ - - /* adjust MTU */ - if (!conf->mtu || conf->mtu >= 512) - card->wandev.mtu = 512; - else if (conf->mtu >= 256) - card->wandev.mtu = 256; - else if (conf->mtu >= 128) - card->wandev.mtu = 128; - else - card->wandev.mtu = 64; - - cfg.pktlen = cycx_log2(card->wandev.mtu); - - if (conf->station == WANOPT_DTE) { - cfg.locaddr = 3; /* DTE */ - cfg.remaddr = 1; /* DCE */ - } else { - cfg.locaddr = 1; /* DCE */ - cfg.remaddr = 3; /* DTE */ - } - - if (conf->interface == WANOPT_RS232) - cfg.flags = 0; /* FIXME just reset the 2nd bit */ - - if (conf->u.x25.hi_pvc) { - card->u.x.hi_pvc = min_t(unsigned int, conf->u.x25.hi_pvc, 4095); - card->u.x.lo_pvc = min_t(unsigned int, conf->u.x25.lo_pvc, card->u.x.hi_pvc); - } - - if (conf->u.x25.hi_svc) { - card->u.x.hi_svc = min_t(unsigned int, conf->u.x25.hi_svc, 4095); - card->u.x.lo_svc = min_t(unsigned int, conf->u.x25.lo_svc, card->u.x.hi_svc); - } - - if (card->u.x.lo_pvc == 255) - cfg.npvc = 0; - else - cfg.npvc = card->u.x.hi_pvc - card->u.x.lo_pvc + 1; - - cfg.nvc = card->u.x.hi_svc - card->u.x.lo_svc + 1 + cfg.npvc; - - if (conf->u.x25.hdlc_window) - cfg.n2win = min_t(unsigned int, conf->u.x25.hdlc_window, 7); - - if (conf->u.x25.pkt_window) - cfg.n3win = min_t(unsigned int, conf->u.x25.pkt_window, 7); - - if (conf->u.x25.t1) - cfg.t1 = min_t(unsigned int, conf->u.x25.t1, 30); - - if (conf->u.x25.t2) - cfg.t2 = min_t(unsigned int, conf->u.x25.t2, 30); - - if (conf->u.x25.t11_t21) - cfg.t21 = min_t(unsigned int, conf->u.x25.t11_t21, 30); - - if (conf->u.x25.t13_t23) - cfg.t23 = min_t(unsigned int, conf->u.x25.t13_t23, 30); - - if (conf->u.x25.n2) - cfg.n2 = min_t(unsigned int, conf->u.x25.n2, 30); - - /* initialize adapter */ - if (cycx_x25_configure(card, &cfg)) - return -EIO; - - /* Initialize protocol-specific fields of adapter data space */ - card->wandev.bps = conf->bps; - card->wandev.interface = conf->interface; - card->wandev.clocking = conf->clocking; - card->wandev.station = conf->station; - card->isr = cycx_x25_irq_handler; - card->exec = NULL; - card->wandev.update = cycx_wan_update; - card->wandev.new_if = cycx_wan_new_if; - card->wandev.del_if = cycx_wan_del_if; - card->wandev.state = WAN_DISCONNECTED; - - return 0; -} - -/* WAN Device Driver Entry Points */ -/* Update device status & statistics. */ -static int cycx_wan_update(struct wan_device *wandev) -{ - /* sanity checks */ - if (!wandev || !wandev->private) - return -EFAULT; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - cycx_x25_get_stats(wandev->private); - - return 0; -} - -/* Create new logical channel. - * This routine is called by the router when ROUTER_IFNEW IOCTL is being - * handled. - * o parse media- and hardware-specific configuration - * o make sure that a new channel can be created - * o allocate resources, if necessary - * o prepare network device structure for registration. - * - * Return: 0 o.k. - * < 0 failure (channel will not be created) */ -static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf) -{ - struct cycx_device *card = wandev->private; - struct cycx_x25_channel *chan; - int err = 0; - - if (!conf->name[0] || strlen(conf->name) > WAN_IFNAME_SZ) { - pr_info("%s: invalid interface name!\n", card->devname); - return -EINVAL; - } - - dev = alloc_netdev(sizeof(struct cycx_x25_channel), conf->name, - cycx_x25_chan_setup); - if (!dev) - return -ENOMEM; - - chan = netdev_priv(dev); - strcpy(chan->name, conf->name); - chan->card = card; - chan->link = conf->port; - chan->protocol = conf->protocol ? ETH_P_X25 : ETH_P_IP; - chan->rx_skb = NULL; - /* only used in svc connected thru crossover cable */ - chan->local_addr = NULL; - - if (conf->addr[0] == '@') { /* SVC */ - int len = strlen(conf->local_addr); - - if (len) { - if (len > WAN_ADDRESS_SZ) { - pr_err("%s: %s local addr too long!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } else { - chan->local_addr = kmalloc(len + 1, GFP_KERNEL); - - if (!chan->local_addr) { - err = -ENOMEM; - goto error; - } - } - - strncpy(chan->local_addr, conf->local_addr, - WAN_ADDRESS_SZ); - } - - chan->svc = 1; - strncpy(chan->addr, &conf->addr[1], WAN_ADDRESS_SZ); - init_timer(&chan->timer); - chan->timer.function = cycx_x25_chan_timer; - chan->timer.data = (unsigned long)dev; - - /* Set channel timeouts (default if not specified) */ - chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90; - } else if (isdigit(conf->addr[0])) { /* PVC */ - s16 lcn = dec_to_uint(conf->addr, 0); - - if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc) - chan->lcn = lcn; - else { - pr_err("%s: PVC %u is out of range on interface %s!\n", - wandev->name, lcn, chan->name); - err = -EINVAL; - goto error; - } - } else { - pr_err("%s: invalid media address on interface %s!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } - - return 0; - -error: - free_netdev(dev); - return err; -} - -/* Delete logical channel. */ -static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - kfree(chan->local_addr); - if (chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - } - - return 0; -} - - -/* Network Device Interface */ - -static const struct header_ops cycx_header_ops = { - .create = cycx_netdevice_hard_header, - .rebuild = cycx_netdevice_rebuild_header, -}; - -static const struct net_device_ops cycx_netdev_ops = { - .ndo_init = cycx_netdevice_init, - .ndo_open = cycx_netdevice_open, - .ndo_stop = cycx_netdevice_stop, - .ndo_start_xmit = cycx_netdevice_hard_start_xmit, - .ndo_get_stats = cycx_netdevice_get_stats, -}; - -static void cycx_x25_chan_setup(struct net_device *dev) -{ - /* Initialize device driver entry points */ - dev->netdev_ops = &cycx_netdev_ops; - dev->header_ops = &cycx_header_ops; - - /* Initialize media-specific parameters */ - dev->mtu = CYCX_X25_CHAN_MTU; - dev->type = ARPHRD_HWX25; /* ARP h/w type */ - dev->hard_header_len = 0; /* media header length */ - dev->addr_len = 0; /* hardware address length */ -} - -/* Initialize Linux network interface. - * - * This routine is called only once for each interface, during Linux network - * interface registration. Returning anything but zero will fail interface - * registration. */ -static int cycx_netdevice_init(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - struct wan_device *wandev = &card->wandev; - - if (!chan->svc) - *(__be16*)dev->dev_addr = htons(chan->lcn); - - /* Initialize hardware parameters (just for reference) */ - dev->irq = wandev->irq; - dev->dma = wandev->dma; - dev->base_addr = wandev->ioport; - dev->mem_start = (unsigned long)wandev->maddr; - dev->mem_end = (unsigned long)(wandev->maddr + - wandev->msize - 1); - dev->flags |= IFF_NOARP; - - /* Set transmit buffer queue length */ - dev->tx_queue_len = 10; - - /* Initialize socket buffers */ - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - - return 0; -} - -/* Open network interface. - * o prevent module from unloading by incrementing use count - * o if link is disconnected then initiate connection - * - * Return 0 if O.k. or errno. */ -static int cycx_netdevice_open(struct net_device *dev) -{ - if (netif_running(dev)) - return -EBUSY; /* only one open is allowed */ - - netif_start_queue(dev); - return 0; -} - -/* Close network interface. - * o reset flags. - * o if there's no more open channels then disconnect physical link. */ -static int cycx_netdevice_stop(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - netif_stop_queue(dev); - - if (chan->state == WAN_CONNECTED || chan->state == WAN_CONNECTING) - cycx_x25_chan_disconnect(dev); - - return 0; -} - -/* Build media header. - * o encapsulate packet according to encapsulation type. - * - * The trick here is to put packet type (Ethertype) into 'protocol' field of - * the socket buffer, so that we don't forget it. If encapsulation fails, - * set skb->protocol to 0 and discard packet later. - * - * Return: media header length. */ -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len) -{ - skb->protocol = htons(type); - - return dev->hard_header_len; -} - -/* * Re-build media header. - * Return: 1 physical address resolved. - * 0 physical address not resolved */ -static int cycx_netdevice_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -/* Send a packet on a network interface. - * o set busy flag (marks start of the transmission). - * o check link state. If link is not up, then drop the packet. - * o check channel status. If it's down then initiate a call. - * o pass a packet to corresponding WAN device. - * o free socket buffer - * - * Return: 0 complete (socket buffer must be freed) - * non-0 packet may be re-transmitted (tbusy must be set) - * - * Notes: - * 1. This routine is called either by the protocol stack or by the "net - * bottom half" (with interrupts enabled). - * 2. Setting tbusy flag will inhibit further transmit requests from the - * protocol stack and can be used for flow control with protocol layer. */ -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (!chan->svc) - chan->protocol = ntohs(skb->protocol); - - if (card->wandev.state != WAN_CONNECTED) - ++chan->ifstats.tx_dropped; - else if (chan->svc && chan->protocol && - chan->protocol != ntohs(skb->protocol)) { - pr_info("%s: unsupported Ethertype 0x%04X on interface %s!\n", - card->devname, ntohs(skb->protocol), dev->name); - ++chan->ifstats.tx_errors; - } else if (chan->protocol == ETH_P_IP) { - switch (chan->state) { - case WAN_DISCONNECTED: - if (cycx_x25_chan_connect(dev)) { - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } - /* fall thru */ - case WAN_CONNECTED: - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) - return NETDEV_TX_BUSY; - - break; - default: - ++chan->ifstats.tx_dropped; - ++card->wandev.stats.tx_dropped; - } - } else { /* chan->protocol == ETH_P_X25 */ - switch (skb->data[0]) { - case X25_IFACE_DATA: - break; - case X25_IFACE_CONNECT: - cycx_x25_chan_connect(dev); - goto free_packet; - case X25_IFACE_DISCONNECT: - cycx_x25_chan_disconnect(dev); - goto free_packet; - default: - pr_info("%s: unknown %d x25-iface request on %s!\n", - card->devname, skb->data[0], dev->name); - ++chan->ifstats.tx_errors; - goto free_packet; - } - - skb_pull(skb, 1); /* Remove control byte */ - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) { - /* prepare for future retransmissions */ - skb_push(skb, 1); - return NETDEV_TX_BUSY; - } - } - -free_packet: - dev_kfree_skb(skb); - - return NETDEV_TX_OK; -} - -/* Get Ethernet-style interface statistics. - * Return a pointer to struct net_device_stats */ -static struct net_device_stats *cycx_netdevice_get_stats(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - return chan ? &chan->ifstats : NULL; -} - -/* Interrupt Handlers */ -/* X.25 Interrupt Service Routine. */ -static void cycx_x25_irq_handler(struct cycx_device *card) -{ - struct cycx_x25_cmd cmd; - u16 z = 0; - - card->in_isr = 1; - card->buff_int_mode_unbusy = 0; - cycx_peek(&card->hw, X25_RXMBOX_OFFS, &cmd, sizeof(cmd)); - - switch (cmd.command) { - case X25_DATA_INDICATION: - cycx_x25_irq_rx(card, &cmd); - break; - case X25_ACK_FROM_VC: - cycx_x25_irq_tx(card, &cmd); - break; - case X25_LOG: - cycx_x25_irq_log(card, &cmd); - break; - case X25_STATISTIC: - cycx_x25_irq_stat(card, &cmd); - break; - case X25_CONNECT_CONFIRM: - cycx_x25_irq_connect_confirm(card, &cmd); - break; - case X25_CONNECT_INDICATION: - cycx_x25_irq_connect(card, &cmd); - break; - case X25_DISCONNECT_INDICATION: - cycx_x25_irq_disconnect(card, &cmd); - break; - case X25_DISCONNECT_CONFIRM: - cycx_x25_irq_disconnect_confirm(card, &cmd); - break; - case X25_LINE_ON: - cycx_set_state(card, WAN_CONNECTED); - break; - case X25_LINE_OFF: - cycx_set_state(card, WAN_DISCONNECTED); - break; - default: - cycx_x25_irq_spurious(card, &cmd); - break; - } - - cycx_poke(&card->hw, 0, &z, sizeof(z)); - cycx_poke(&card->hw, X25_RXMBOX_OFFS, &z, sizeof(z)); - card->in_isr = 0; -} - -/* Transmit interrupt handler. - * o Release socket buffer - * o Clear 'tbusy' flag */ -static void cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct net_device *dev; - struct wan_device *wandev = &card->wandev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - - /* unbusy device and then dev_tint(); */ - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - card->buff_int_mode_unbusy = 1; - netif_wake_queue(dev); - } else - pr_err("%s:ackvc for inexistent lcn %d\n", card->devname, lcn); -} - -/* Receive interrupt handler. - * This routine handles fragmented IP packets using M-bit according to the - * RFC1356. - * o map logical channel number to network interface. - * o allocate socket buffer or append received packet to the existing one. - * o if M-bit is reset (i.e. it's the last packet in a sequence) then - * decapsulate packet and pass socket buffer to the protocol stack. - * - * Notes: - * 1. When allocating a socket buffer, if M-bit is set then more data is - * coming and we have to allocate buffer for the maximum IP packet size - * expected on this channel. - * 2. If something goes wrong and X.25 packet has to be dropped (e.g. no - * socket buffers available) the whole packet sequence must be discarded. */ -static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - struct sk_buff *skb; - u8 bitm, lcn; - int pktlen = cmd->len - 5; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 4, &bitm, sizeof(bitm)); - bitm &= 0x10; - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: receiving on orphaned LCN %d!\n", - card->devname, lcn); - return; - } - - chan = netdev_priv(dev); - reset_timer(dev); - - if (chan->drop_sequence) { - if (!bitm) - chan->drop_sequence = 0; - else - return; - } - - if ((skb = chan->rx_skb) == NULL) { - /* Allocate new socket buffer */ - int bufsize = bitm ? dev->mtu : pktlen; - - if ((skb = dev_alloc_skb((chan->protocol == ETH_P_X25 ? 1 : 0) + - bufsize + - dev->hard_header_len)) == NULL) { - pr_info("%s: no socket buffers available!\n", - card->devname); - chan->drop_sequence = 1; - ++chan->ifstats.rx_dropped; - return; - } - - if (chan->protocol == ETH_P_X25) /* X.25 socket layer control */ - /* 0 = data packet (dev_alloc_skb zeroed skb->data) */ - skb_put(skb, 1); - - skb->dev = dev; - skb->protocol = htons(chan->protocol); - chan->rx_skb = skb; - } - - if (skb_tailroom(skb) < pktlen) { - /* No room for the packet. Call off the whole thing! */ - dev_kfree_skb_irq(skb); - chan->rx_skb = NULL; - - if (bitm) - chan->drop_sequence = 1; - - pr_info("%s: unexpectedly long packet sequence on interface %s!\n", - card->devname, dev->name); - ++chan->ifstats.rx_length_errors; - return; - } - - /* Append packet to the socket buffer */ - cycx_peek(&card->hw, cmd->buf + 5, skb_put(skb, pktlen), pktlen); - - if (bitm) - return; /* more data is coming */ - - chan->rx_skb = NULL; /* dequeue packet */ - - ++chan->ifstats.rx_packets; - chan->ifstats.rx_bytes += pktlen; - - skb_reset_mac_header(skb); - netif_rx(skb); -} - -/* Connect interrupt handler. */ -static void cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev = NULL; - struct cycx_x25_channel *chan; - u8 d[32], - loc[24], - rem[24]; - u8 lcn, sizeloc, sizerem; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 5, &sizeloc, sizeof(sizeloc)); - cycx_peek(&card->hw, cmd->buf + 6, d, cmd->len - 6); - - sizerem = sizeloc >> 4; - sizeloc &= 0x0F; - - loc[0] = rem[0] = '\0'; - - if (sizeloc) - nibble_to_byte(d, loc, sizeloc, 0); - - if (sizerem) - nibble_to_byte(d + (sizeloc >> 1), rem, sizerem, sizeloc & 1); - - dprintk(1, KERN_INFO "%s:lcn=%d, local=%s, remote=%s\n", - __func__, lcn, loc, rem); - - dev = cycx_x25_get_dev_by_dte_addr(wandev, rem); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: connect not expected: remote %s!\n", - card->devname, rem); - return; - } - - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_connect_response(card, chan); - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Connect confirm interrupt handler. */ -static void cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - u8 lcn, key; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 1, &key, sizeof(key)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d, key=%d\n", - card->devname, __func__, lcn, key); - - dev = cycx_x25_get_dev_by_lcn(wandev, -key); - if (!dev) { - /* Invalid channel, discard packet */ - clear_bit(--key, (void*)&card->u.x.connection_keys); - pr_info("%s: connect confirm not expected: lcn %d, key=%d!\n", - card->devname, lcn, key); - return; - } - - clear_bit(--key, (void*)&card->u.x.connection_keys); - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Disconnect confirm interrupt handler. */ -static void cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d\n", - card->devname, __func__, lcn); - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s:disconnect confirm not expected!:lcn %d\n", - card->devname, lcn); - return; - } - - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* disconnect interrupt handler. */ -static void cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s:lcn=%d\n", __func__, lcn); - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - cycx_x25_disconnect_response(card, chan->link, lcn); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - } else - cycx_x25_disconnect_response(card, 0, lcn); -} - -/* LOG interrupt handler. */ -static void cycx_x25_irq_log(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ -#if CYCLOMX_X25_DEBUG - char bf[20]; - u16 size, toread, link, msg_code; - u8 code, routine; - - cycx_peek(&card->hw, cmd->buf, &msg_code, sizeof(msg_code)); - cycx_peek(&card->hw, cmd->buf + 2, &link, sizeof(link)); - cycx_peek(&card->hw, cmd->buf + 4, &size, sizeof(size)); - /* at most 20 bytes are available... thanks to Daniela :) */ - toread = size < 20 ? size : 20; - cycx_peek(&card->hw, cmd->buf + 10, &bf, toread); - cycx_peek(&card->hw, cmd->buf + 10 + toread, &code, 1); - cycx_peek(&card->hw, cmd->buf + 10 + toread + 1, &routine, 1); - - pr_info("cycx_x25_irq_handler: X25_LOG (0x4500) indic.:\n"); - pr_info("cmd->buf=0x%X\n", cmd->buf); - pr_info("Log message code=0x%X\n", msg_code); - pr_info("Link=%d\n", link); - pr_info("log code=0x%X\n", code); - pr_info("log routine=0x%X\n", routine); - pr_info("Message size=%d\n", size); - hex_dump("Message", bf, toread); -#endif -} - -/* STATISTIC interrupt handler. */ -static void cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - cycx_peek(&card->hw, cmd->buf, &card->u.x.stats, - sizeof(card->u.x.stats)); - hex_dump("cycx_x25_irq_stat", (unsigned char*)&card->u.x.stats, - sizeof(card->u.x.stats)); - cycx_x25_dump_stats(&card->u.x.stats); - wake_up_interruptible(&card->wait_stats); -} - -/* Spurious interrupt handler. - * o print a warning - * If number of spurious interrupts exceeded some limit, then ??? */ -static void cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - pr_info("%s: spurious interrupt (0x%X)!\n", - card->devname, cmd->command); -} -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len) -{ - print_hex_dump(KERN_INFO, msg, DUMP_PREFIX_OFFSET, 16, 1, - p, len, true); -} -#endif - -/* Cyclom 2X Firmware-Specific Functions */ -/* Exec X.25 command. */ -static int x25_exec(struct cycx_device *card, int command, int link, - void *d1, int len1, void *d2, int len2) -{ - struct cycx_x25_cmd c; - unsigned long flags; - u32 addr = 0x1200 + 0x2E0 * link + 0x1E2; - u8 retry = CYCX_X25_MAX_CMD_RETRY; - int err = 0; - - c.command = command; - c.link = link; - c.len = len1 + len2; - - spin_lock_irqsave(&card->u.x.lock, flags); - - /* write command */ - cycx_poke(&card->hw, X25_MBOX_OFFS, &c, sizeof(c) - sizeof(c.buf)); - - /* write X.25 data */ - if (d1) { - cycx_poke(&card->hw, addr, d1, len1); - - if (d2) { - if (len2 > 254) { - u32 addr1 = 0xA00 + 0x400 * link; - - cycx_poke(&card->hw, addr + len1, d2, 249); - cycx_poke(&card->hw, addr1, ((u8*)d2) + 249, - len2 - 249); - } else - cycx_poke(&card->hw, addr + len1, d2, len2); - } - } - - /* generate interruption, executing command */ - cycx_intr(&card->hw); - - /* wait till card->mbox == 0 */ - do { - err = cycx_exec(card->mbox); - } while (retry-- && err); - - spin_unlock_irqrestore(&card->u.x.lock, flags); - - return err; -} - -/* Configure adapter. */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf) -{ - struct { - u16 nlinks; - struct cycx_x25_config conf[2]; - } x25_cmd_conf; - - memset(&x25_cmd_conf, 0, sizeof(x25_cmd_conf)); - x25_cmd_conf.nlinks = 2; - x25_cmd_conf.conf[0] = *conf; - /* FIXME: we need to find a way in the wanrouter framework - to configure the second link, for now lets use it - with the same config from the first link, fixing - the interface type to RS232, the speed in 38400 and - the clock to external */ - x25_cmd_conf.conf[1] = *conf; - x25_cmd_conf.conf[1].link = 1; - x25_cmd_conf.conf[1].speed = 5; /* 38400 */ - x25_cmd_conf.conf[1].clock = 8; - x25_cmd_conf.conf[1].flags = 0; /* default = RS232 */ - - cycx_x25_dump_config(&x25_cmd_conf.conf[0]); - cycx_x25_dump_config(&x25_cmd_conf.conf[1]); - - return x25_exec(card, X25_CONFIG, 0, - &x25_cmd_conf, sizeof(x25_cmd_conf), NULL, 0); -} - -/* Get protocol statistics. */ -static int cycx_x25_get_stats(struct cycx_device *card) -{ - /* the firmware expects 20 in the size field!!! - thanks to Daniela */ - int err = x25_exec(card, X25_STATISTIC, 0, NULL, 20, NULL, 0); - - if (err) - return err; - - interruptible_sleep_on(&card->wait_stats); - - if (signal_pending(current)) - return -EINTR; - - card->wandev.stats.rx_packets = card->u.x.stats.n2_rx_frames; - card->wandev.stats.rx_over_errors = card->u.x.stats.rx_over_errors; - card->wandev.stats.rx_crc_errors = card->u.x.stats.rx_crc_errors; - card->wandev.stats.rx_length_errors = 0; /* not available from fw */ - card->wandev.stats.rx_frame_errors = 0; /* not available from fw */ - card->wandev.stats.rx_missed_errors = card->u.x.stats.rx_aborts; - card->wandev.stats.rx_dropped = 0; /* not available from fw */ - card->wandev.stats.rx_errors = 0; /* not available from fw */ - card->wandev.stats.tx_packets = card->u.x.stats.n2_tx_frames; - card->wandev.stats.tx_aborted_errors = card->u.x.stats.tx_aborts; - card->wandev.stats.tx_dropped = 0; /* not available from fw */ - card->wandev.stats.collisions = 0; /* not available from fw */ - card->wandev.stats.tx_errors = 0; /* not available from fw */ - - cycx_x25_dump_devs(&card->wandev); - - return 0; -} - -/* return the number of nibbles */ -static int byte_to_nibble(u8 *s, u8 *d, char *nibble) -{ - int i = 0; - - if (*nibble && *s) { - d[i] |= *s++ - '0'; - *nibble = 0; - ++i; - } - - while (*s) { - d[i] = (*s - '0') << 4; - if (*(s + 1)) - d[i] |= *(s + 1) - '0'; - else { - *nibble = 1; - break; - } - ++i; - s += 2; - } - - return i; -} - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble) -{ - if (nibble) { - *d++ = '0' + (*s++ & 0x0F); - --len; - } - - while (len) { - *d++ = '0' + (*s >> 4); - - if (--len) { - *d++ = '0' + (*s & 0x0F); - --len; - } else break; - - ++s; - } - - *d = '\0'; -} - -/* Place X.25 call. */ -static int x25_place_call(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - int err = 0, - len; - char d[64], - nibble = 0, - mylen = chan->local_addr ? strlen(chan->local_addr) : 0, - remotelen = strlen(chan->addr); - u8 key; - - if (card->u.x.connection_keys == ~0U) { - pr_info("%s: too many simultaneous connection requests!\n", - card->devname); - return -EAGAIN; - } - - key = ffz(card->u.x.connection_keys); - set_bit(key, (void*)&card->u.x.connection_keys); - ++key; - dprintk(1, KERN_INFO "%s:x25_place_call:key=%d\n", card->devname, key); - memset(d, 0, sizeof(d)); - d[1] = key; /* user key */ - d[2] = 0x10; - d[4] = 0x0B; - - len = byte_to_nibble(chan->addr, d + 6, &nibble); - - if (chan->local_addr) - len += byte_to_nibble(chan->local_addr, d + 6 + len, &nibble); - - if (nibble) - ++len; - - d[5] = mylen << 4 | remotelen; - d[6 + len + 1] = 0xCC; /* TCP/IP over X.25, thanks to Daniela :) */ - - if ((err = x25_exec(card, X25_CONNECT_REQUEST, chan->link, - &d, 7 + len + 1, NULL, 0)) != 0) - clear_bit(--key, (void*)&card->u.x.connection_keys); - else - chan->lcn = -key; - - return err; -} - -/* Place X.25 CONNECT RESPONSE. */ -static int cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - u8 d[8]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = chan->lcn; - d[2] = 0x10; - d[4] = 0x0F; - d[7] = 0xCC; /* TCP/IP over X.25, thanks Daniela */ - - return x25_exec(card, X25_CONNECT_RESPONSE, chan->link, &d, 8, NULL, 0); -} - -/* Place X.25 DISCONNECT RESPONSE. */ -static int cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn) -{ - char d[5]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x17; - - return x25_exec(card, X25_DISCONNECT_RESPONSE, link, &d, 5, NULL, 0); -} - -/* Clear X.25 call. */ -static int x25_clear_call(struct cycx_device *card, u8 link, u8 lcn, u8 cause, - u8 diagn) -{ - u8 d[7]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x13; - d[5] = cause; - d[6] = diagn; - - return x25_exec(card, X25_DISCONNECT_REQUEST, link, d, 7, NULL, 0); -} - -/* Send X.25 data packet. */ -static int cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf) -{ - u8 d[] = "?\xFF\x10??"; - - d[0] = d[3] = lcn; - d[4] = bitm; - - return x25_exec(card, X25_DATA_REQUEST, link, &d, 5, buf, len); -} - -/* Miscellaneous */ -/* Find network device by its channel number. */ -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (chan->lcn == lcn) - break; - dev = chan->slave; - } - return dev; -} - -/* Find network device by its remote dte address. */ -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (!strcmp(chan->addr, dte)) - break; - dev = chan->slave; - } - return dev; -} - -/* Initiate connection on the logical channel. - * o for PVC we just get channel configuration - * o for SVCs place an X.25 call - * - * Return: 0 connected - * >0 connection in progress - * <0 failure */ -static int cycx_x25_chan_connect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (chan->svc) { - if (!chan->addr[0]) - return -EINVAL; /* no destination address */ - - dprintk(1, KERN_INFO "%s: placing X.25 call to %s...\n", - card->devname, chan->addr); - - if (x25_place_call(card, chan)) - return -EIO; - - cycx_x25_set_chan_state(dev, WAN_CONNECTING); - return 1; - } else - cycx_x25_set_chan_state(dev, WAN_CONNECTED); - - return 0; -} - -/* Disconnect logical channel. - * o if SVC then clear X.25 call */ -static void cycx_x25_chan_disconnect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - x25_clear_call(chan->card, chan->link, chan->lcn, 0, 0); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTING); - } else - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* Called by kernel timer */ -static void cycx_x25_chan_timer(unsigned long d) -{ - struct net_device *dev = (struct net_device *)d; - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->state == WAN_CONNECTED) - cycx_x25_chan_disconnect(dev); - else - pr_err("%s: %s for svc (%s) not connected!\n", - chan->card->devname, __func__, dev->name); -} - -/* Set logical channel state. */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (chan->state != state) { - if (chan->svc && chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - *(__be16*)dev->dev_addr = htons(chan->lcn); - netif_wake_queue(dev); - reset_timer(dev); - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_CONNECT); - - break; - case WAN_CONNECTING: - string_state = "connecting..."; - break; - case WAN_DISCONNECTING: - string_state = "disconnecting..."; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - - if (chan->svc) { - *(unsigned short*)dev->dev_addr = 0; - chan->lcn = 0; - } - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_DISCONNECT); - - netif_wake_queue(dev); - break; - } - - pr_info("%s: interface %s %s\n", - card->devname, dev->name, string_state); - chan->state = state; - } - - spin_unlock_irqrestore(&card->lock, flags); -} - -/* Send packet on a logical channel. - * When this function is called, tx_skb field of the channel data space - * points to the transmit socket buffer. When transmission is complete, - * release socket buffer and reset 'tbusy' flag. - * - * Return: 0 - transmission complete - * 1 - busy - * - * Notes: - * 1. If packet length is greater than MTU for this channel, we'll fragment - * the packet into 'complete sequence' using M-bit. - * 2. When transmission is complete, an event notification should be issued - * to the router. */ -static int cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - int bitm = 0; /* final packet */ - unsigned len = skb->len; - - if (skb->len > card->wandev.mtu) { - len = card->wandev.mtu; - bitm = 0x10; /* set M-bit (more data) */ - } - - if (cycx_x25_send(card, chan->link, chan->lcn, bitm, len, skb->data)) - return 1; - - if (bitm) { - skb_pull(skb, len); - return 1; - } - - ++chan->ifstats.tx_packets; - chan->ifstats.tx_bytes += len; - - return 0; -} - -/* Send event (connection, disconnection, etc) to X.25 socket layer */ - -static void cycx_x25_chan_send_event(struct net_device *dev, u8 event) -{ - struct sk_buff *skb; - unsigned char *ptr; - - if ((skb = dev_alloc_skb(1)) == NULL) { - pr_err("%s: out of memory\n", __func__); - return; - } - - ptr = skb_put(skb, 1); - *ptr = event; - - skb->protocol = x25_type_trans(skb, dev); - netif_rx(skb); -} - -/* Convert line speed in bps to a number used by cyclom 2x code. */ -static u8 bps_to_speed_code(u32 bps) -{ - u8 number = 0; /* defaults to the lowest (1200) speed ;> */ - - if (bps >= 512000) number = 8; - else if (bps >= 256000) number = 7; - else if (bps >= 64000) number = 6; - else if (bps >= 38400) number = 5; - else if (bps >= 19200) number = 4; - else if (bps >= 9600) number = 3; - else if (bps >= 4800) number = 2; - else if (bps >= 2400) number = 1; - - return number; -} - -/* log base 2 */ -static u8 cycx_log2(u32 n) -{ - u8 log = 0; - - if (!n) - return 0; - - while (n > 1) { - n >>= 1; - ++log; - } - - return log; -} - -/* Convert decimal string to unsigned integer. - * If len != 0 then only 'len' characters of the string are converted. */ -static unsigned dec_to_uint(u8 *str, int len) -{ - unsigned val = 0; - - if (!len) - len = strlen(str); - - for (; len && isdigit(*str); ++str, --len) - val = (val * 10) + (*str - (unsigned) '0'); - - return val; -} - -static void reset_timer(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) - mod_timer(&chan->timer, jiffies+chan->idle_tmout*HZ); -} -#ifdef CYCLOMX_X25_DEBUG -static void cycx_x25_dump_config(struct cycx_x25_config *conf) -{ - pr_info("X.25 configuration\n"); - pr_info("-----------------\n"); - pr_info("link number=%d\n", conf->link); - pr_info("line speed=%d\n", conf->speed); - pr_info("clock=%sternal\n", conf->clock == 8 ? "Ex" : "In"); - pr_info("# level 2 retransm.=%d\n", conf->n2); - pr_info("level 2 window=%d\n", conf->n2win); - pr_info("level 3 window=%d\n", conf->n3win); - pr_info("# logical channels=%d\n", conf->nvc); - pr_info("level 3 pkt len=%d\n", conf->pktlen); - pr_info("my address=%d\n", conf->locaddr); - pr_info("remote address=%d\n", conf->remaddr); - pr_info("t1=%d seconds\n", conf->t1); - pr_info("t2=%d seconds\n", conf->t2); - pr_info("t21=%d seconds\n", conf->t21); - pr_info("# PVCs=%d\n", conf->npvc); - pr_info("t23=%d seconds\n", conf->t23); - pr_info("flags=0x%x\n", conf->flags); -} - -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats) -{ - pr_info("X.25 statistics\n"); - pr_info("--------------\n"); - pr_info("rx_crc_errors=%d\n", stats->rx_crc_errors); - pr_info("rx_over_errors=%d\n", stats->rx_over_errors); - pr_info("n2_tx_frames=%d\n", stats->n2_tx_frames); - pr_info("n2_rx_frames=%d\n", stats->n2_rx_frames); - pr_info("tx_timeouts=%d\n", stats->tx_timeouts); - pr_info("rx_timeouts=%d\n", stats->rx_timeouts); - pr_info("n3_tx_packets=%d\n", stats->n3_tx_packets); - pr_info("n3_rx_packets=%d\n", stats->n3_rx_packets); - pr_info("tx_aborts=%d\n", stats->tx_aborts); - pr_info("rx_aborts=%d\n", stats->rx_aborts); -} - -static void cycx_x25_dump_devs(struct wan_device *wandev) -{ - struct net_device *dev = wandev->dev; - - pr_info("X.25 dev states\n"); - pr_info("name: addr: txoff: protocol:\n"); - pr_info("---------------------------------------\n"); - - while(dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - pr_info("%-5.5s %-15.15s %d ETH_P_%s\n", - chan->name, chan->addr, netif_queue_stopped(dev), - chan->protocol == ETH_P_IP ? "IP" : "X25"); - dev = chan->slave; - } -} - -#endif /* CYCLOMX_X25_DEBUG */ -/* End */ diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h deleted file mode 100644 index b88f7f428e58..000000000000 --- a/include/linux/cyclomx.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _CYCLOMX_H -#define _CYCLOMX_H -/* -* cyclomx.h Cyclom 2X WAN Link Driver. -* User-level API definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on wanpipe.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2000/07/13 acme remove crap #if KERNEL_VERSION > blah -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 1999/05/19 acme wait_queue_head_t wait_stats(support for 2.3.*) -* 1999/01/03 acme judicious use of data types -* 1998/12/27 acme cleanup: PACKED not needed -* 1998/08/08 acme Version 0.0.1 -*/ - -#include -#include - -#ifdef __KERNEL__ -/* Kernel Interface */ - -#include /* Cyclom 2X support module API definitions */ -#include /* Cyclom 2X firmware module definitions */ -#ifdef CONFIG_CYCLOMX_X25 -#include -#endif - -/* Adapter Data Space. - * This structure is needed because we handle multiple cards, otherwise - * static data would do it. - */ -struct cycx_device { - char devname[WAN_DRVNAME_SZ + 1];/* card name */ - struct cycx_hw hw; /* hardware configuration */ - struct wan_device wandev; /* WAN device data space */ - u32 state_tick; /* link state timestamp */ - spinlock_t lock; - char in_isr; /* interrupt-in-service flag */ - char buff_int_mode_unbusy; /* flag for carrying out dev_tint */ - wait_queue_head_t wait_stats; /* to wait for the STATS indication */ - void __iomem *mbox; /* -> mailbox */ - void (*isr)(struct cycx_device* card); /* interrupt service routine */ - int (*exec)(struct cycx_device* card, void* u_cmd, void* u_data); - union { -#ifdef CONFIG_CYCLOMX_X25 - struct { /* X.25 specific data */ - u32 lo_pvc; - u32 hi_pvc; - u32 lo_svc; - u32 hi_svc; - struct cycx_x25_stats stats; - spinlock_t lock; - u32 connection_keys; - } x; -#endif - } u; -}; - -/* Public Functions */ -void cycx_set_state(struct cycx_device *card, int state); - -#ifdef CONFIG_CYCLOMX_X25 -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf); -#endif -#endif /* __KERNEL__ */ -#endif /* _CYCLOMX_H */ diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h deleted file mode 100644 index 12fe6b0bfcff..000000000000 --- a/include/linux/cycx_drv.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -* cycx_drv.h CYCX Support Module. Kernel API Definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/10/23 acme cycxhw_t cleanup -* 1999/01/03 acme more judicious use of data types... -* uclong, ucchar, etc deleted, the u8, u16, u32 -* types are the portable way to go. -* 1999/01/03 acme judicious use of data types... u16, u32, etc -* 1998/12/26 acme FIXED_BUFFERS, CONF_OFFSET, -* removal of cy_read{bwl} -* 1998/08/08 acme Initial version. -*/ -#ifndef _CYCX_DRV_H -#define _CYCX_DRV_H - -#define CYCX_WINDOWSIZE 0x4000 /* default dual-port memory window size */ -#define GEN_CYCX_INTR 0x02 -#define RST_ENABLE 0x04 -#define START_CPU 0x06 -#define RST_DISABLE 0x08 -#define FIXED_BUFFERS 0x08 -#define TEST_PATTERN 0xaa55 -#define CMD_OFFSET 0x20 -#define CONF_OFFSET 0x0380 -#define RESET_OFFSET 0x3c00 /* For reset file load */ -#define DATA_OFFSET 0x0100 /* For code and data files load */ -#define START_OFFSET 0x3ff0 /* 80186 starts here */ - -/** - * struct cycx_hw - Adapter hardware configuration - * @fwid - firmware ID - * @irq - interrupt request level - * @dpmbase - dual-port memory base - * @dpmsize - dual-port memory size - * @reserved - reserved for future use - */ -struct cycx_hw { - u32 fwid; - int irq; - void __iomem *dpmbase; - u32 dpmsize; - u32 reserved[5]; -}; - -/* Function Prototypes */ -extern int cycx_setup(struct cycx_hw *hw, void *sfm, u32 len, unsigned long base); -extern int cycx_down(struct cycx_hw *hw); -extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_exec(void __iomem *addr); - -extern void cycx_intr(struct cycx_hw *hw); -#endif /* _CYCX_DRV_H */ diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index cec4b4159767..8198a63cf459 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -1,129 +1,10 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ +/* + * wanrouter.h Legacy declarations kept around until X25 is removed + */ + #ifndef _ROUTER_H #define _ROUTER_H #include -/****** Kernel Interface ****************************************************/ - -#include /* support for device drivers */ -#include /* proc filesystem pragmatics */ -#include /* support for network drivers */ -#include /* Support for SMP Locking */ - -/*---------------------------------------------------------------------------- - * WAN device data space. - */ -struct wan_device { - unsigned magic; /* magic number */ - char* name; /* -> WAN device name (ASCIIZ) */ - void* private; /* -> driver private data */ - unsigned config_id; /* Configuration ID */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base #1 */ - char S514_cpu_no[1]; /* PCI CPU Number */ - unsigned char S514_slot_no; /* PCI Slot Number */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* max physical transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned enable_tx_int; /* Transmit Interrupt enabled or not */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char signalling; /* Signalling RS232 or V35 */ - char read_mode; /* read mode: Polling or interrupt */ - char new_if_cnt; /* Number of interfaces per wanpipe */ - char del_if_cnt; /* Number of times del_if() gets called */ - unsigned char piggyback; /* Piggibacking a port */ - unsigned hw_opt[4]; /* other hardware options */ - /****** status and statistics *******/ - char state; /* device state */ - char api_status; /* device api status */ - struct net_device_stats stats; /* interface statistics */ - unsigned reserved[16]; /* reserved for future use */ - unsigned long critical; /* critical section flag */ - spinlock_t lock; /* Support for SMP Locking */ - - /****** device management methods ***/ - int (*setup) (struct wan_device *wandev, wandev_conf_t *conf); - int (*shutdown) (struct wan_device *wandev); - int (*update) (struct wan_device *wandev); - int (*ioctl) (struct wan_device *wandev, unsigned cmd, - unsigned long arg); - int (*new_if)(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf); - int (*del_if)(struct wan_device *wandev, struct net_device *dev); - /****** maintained by the router ****/ - struct wan_device* next; /* -> next device */ - struct net_device* dev; /* list of network interfaces */ - unsigned ndev; /* number of interfaces */ - struct proc_dir_entry *dent; /* proc filesystem entry */ -}; - -/* Public functions available for device drivers */ -extern int register_wan_device(struct wan_device *wandev); -extern int unregister_wan_device(char *name); - -/* Proc interface functions. These must not be called by the drivers! */ -extern int wanrouter_proc_init(void); -extern void wanrouter_proc_cleanup(void); -extern int wanrouter_proc_add(struct wan_device *wandev); -extern int wanrouter_proc_delete(struct wan_device *wandev); -extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg); - -/* Public Data */ -/* list of registered devices */ -extern struct wan_device *wanrouter_router_devlist; - #endif /* _ROUTER_H */ diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h index 7617df2833d5..498d6c12c666 100644 --- a/include/uapi/linux/wanrouter.h +++ b/include/uapi/linux/wanrouter.h @@ -1,363 +1,9 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ - -#ifndef _UAPI_ROUTER_H -#define _UAPI_ROUTER_H - -#define ROUTER_NAME "wanrouter" /* in case we ever change it */ -#define ROUTER_VERSION 1 /* version number */ -#define ROUTER_RELEASE 1 /* release (minor version) number */ -#define ROUTER_IOCTL 'W' /* for IOCTL calls */ -#define ROUTER_MAGIC 0x524D4157L /* signature: 'WANR' reversed */ - -/* IOCTL codes for /proc/router/ entries (up to 255) */ -enum router_ioctls -{ - ROUTER_SETUP = ROUTER_IOCTL<<8, /* configure device */ - ROUTER_DOWN, /* shut down device */ - ROUTER_STAT, /* get device status */ - ROUTER_IFNEW, /* add interface */ - ROUTER_IFDEL, /* delete interface */ - ROUTER_IFSTAT, /* get interface status */ - ROUTER_USER = (ROUTER_IOCTL<<8)+16, /* driver-specific calls */ - ROUTER_USER_MAX = (ROUTER_IOCTL<<8)+31 -}; - -/* identifiers for displaying proc file data for dual port adapters */ -#define PROC_DATA_PORT_0 0x8000 /* the data is for port 0 */ -#define PROC_DATA_PORT_1 0x8001 /* the data is for port 1 */ - -/* NLPID for packet encapsulation (ISO/IEC TR 9577) */ -#define NLPID_IP 0xCC /* Internet Protocol Datagram */ -#define NLPID_SNAP 0x80 /* IEEE Subnetwork Access Protocol */ -#define NLPID_CLNP 0x81 /* ISO/IEC 8473 */ -#define NLPID_ESIS 0x82 /* ISO/IEC 9542 */ -#define NLPID_ISIS 0x83 /* ISO/IEC ISIS */ -#define NLPID_Q933 0x08 /* CCITT Q.933 */ - -/* Miscellaneous */ -#define WAN_IFNAME_SZ 15 /* max length of the interface name */ -#define WAN_DRVNAME_SZ 15 /* max length of the link driver name */ -#define WAN_ADDRESS_SZ 31 /* max length of the WAN media address */ -#define USED_BY_FIELD 8 /* max length of the used by field */ - -/* Defines for UDP PACKET TYPE */ -#define UDP_PTPIPE_TYPE 0x01 -#define UDP_FPIPE_TYPE 0x02 -#define UDP_CPIPE_TYPE 0x03 -#define UDP_DRVSTATS_TYPE 0x04 -#define UDP_INVALID_TYPE 0x05 - -/* Command return code */ -#define CMD_OK 0 /* normal firmware return code */ -#define CMD_TIMEOUT 0xFF /* firmware command timed out */ - -/* UDP Packet Management */ -#define UDP_PKT_FRM_STACK 0x00 -#define UDP_PKT_FRM_NETWORK 0x01 - -/* Maximum interrupt test counter */ -#define MAX_INTR_TEST_COUNTER 100 - -/* Critical Values for RACE conditions*/ -#define CRITICAL_IN_ISR 0xA1 -#define CRITICAL_INTR_HANDLED 0xB1 - -/****** Data Types **********************************************************/ - -/*---------------------------------------------------------------------------- - * X.25-specific link-level configuration. - */ -typedef struct wan_x25_conf -{ - unsigned lo_pvc; /* lowest permanent circuit number */ - unsigned hi_pvc; /* highest permanent circuit number */ - unsigned lo_svc; /* lowest switched circuit number */ - unsigned hi_svc; /* highest switched circuit number */ - unsigned hdlc_window; /* HDLC window size (1..7) */ - unsigned pkt_window; /* X.25 packet window size (1..7) */ - unsigned t1; /* HDLC timer T1, sec (1..30) */ - unsigned t2; /* HDLC timer T2, sec (0..29) */ - unsigned t4; /* HDLC supervisory frame timer = T4 * T1 */ - unsigned n2; /* HDLC retransmission limit (1..30) */ - unsigned t10_t20; /* X.25 RESTART timeout, sec (1..255) */ - unsigned t11_t21; /* X.25 CALL timeout, sec (1..255) */ - unsigned t12_t22; /* X.25 RESET timeout, sec (1..255) */ - unsigned t13_t23; /* X.25 CLEAR timeout, sec (1..255) */ - unsigned t16_t26; /* X.25 INTERRUPT timeout, sec (1..255) */ - unsigned t28; /* X.25 REGISTRATION timeout, sec (1..255) */ - unsigned r10_r20; /* RESTART retransmission limit (0..250) */ - unsigned r12_r22; /* RESET retransmission limit (0..250) */ - unsigned r13_r23; /* CLEAR retransmission limit (0..250) */ - unsigned ccitt_compat; /* compatibility mode: 1988/1984/1980 */ - unsigned x25_conf_opt; /* User defined x25 config optoins */ - unsigned char LAPB_hdlc_only; /* Run in HDLC only mode */ - unsigned char logging; /* Control connection logging */ - unsigned char oob_on_modem; /* Whether to send modem status to the user app */ -} wan_x25_conf_t; - -/*---------------------------------------------------------------------------- - * Frame relay specific link-level configuration. - */ -typedef struct wan_fr_conf -{ - unsigned signalling; /* local in-channel signalling type */ - unsigned t391; /* link integrity verification timer */ - unsigned t392; /* polling verification timer */ - unsigned n391; /* full status polling cycle counter */ - unsigned n392; /* error threshold counter */ - unsigned n393; /* monitored events counter */ - unsigned dlci_num; /* number of DLCs (access node) */ - unsigned dlci[100]; /* List of all DLCIs */ -} wan_fr_conf_t; - -/*---------------------------------------------------------------------------- - * PPP-specific link-level configuration. - */ -typedef struct wan_ppp_conf -{ - unsigned restart_tmr; /* restart timer */ - unsigned auth_rsrt_tmr; /* authentication timer */ - unsigned auth_wait_tmr; /* authentication timer */ - unsigned mdm_fail_tmr; /* modem failure timer */ - unsigned dtr_drop_tmr; /* DTR drop timer */ - unsigned connect_tmout; /* connection timeout */ - unsigned conf_retry; /* max. retry */ - unsigned term_retry; /* max. retry */ - unsigned fail_retry; /* max. retry */ - unsigned auth_retry; /* max. retry */ - unsigned auth_options; /* authentication opt. */ - unsigned ip_options; /* IP options */ - char authenticator; /* AUTHENTICATOR or not */ - char ip_mode; /* Static/Host/Peer */ -} wan_ppp_conf_t; - -/*---------------------------------------------------------------------------- - * CHDLC-specific link-level configuration. - */ -typedef struct wan_chdlc_conf -{ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* hdlc_streaming mode (Y/N) */ - unsigned char receive_only; /* no transmit buffering (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ -} wan_chdlc_conf_t; - - -/*---------------------------------------------------------------------------- - * WAN device configuration. Passed to ROUTER_SETUP IOCTL. - */ -typedef struct wandev_conf -{ - unsigned magic; /* magic number (for verification) */ - unsigned config_id; /* configuration structure identifier */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - char S514_CPU_no[1]; /* S514 PCI adapter CPU number ('A' or 'B') */ - unsigned PCI_slot_no; /* S514 PCI adapter slot number */ - char auto_pci_cfg; /* S515 PCI automatic slot detection */ - char comm_port; /* Communication Port (PRI=0, SEC=1) */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned char ft1; /* FT1 Configurator Option */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char read_mode; /* read mode: Polling or interrupt */ - char receive_only; /* disable tx buffers */ - char tty; /* Create a fake tty device */ - unsigned tty_major; /* Major number for wanpipe tty device */ - unsigned tty_minor; /* Minor number for wanpipe tty device */ - unsigned tty_mode; /* TTY operation mode SYNC or ASYNC */ - char backup; /* Backup Mode */ - unsigned hw_opt[4]; /* other hardware options */ - unsigned reserved[4]; - /****** arbitrary data ***************/ - unsigned data_size; /* data buffer size */ - void* data; /* data buffer, e.g. firmware */ - union /****** protocol-specific ************/ - { - wan_x25_conf_t x25; /* X.25 configuration */ - wan_ppp_conf_t ppp; /* PPP configuration */ - wan_fr_conf_t fr; /* frame relay configuration */ - wan_chdlc_conf_t chdlc; /* Cisco HDLC configuration */ - } u; -} wandev_conf_t; - -/* 'config_id' definitions */ -#define WANCONFIG_X25 101 /* X.25 link */ -#define WANCONFIG_FR 102 /* frame relay link */ -#define WANCONFIG_PPP 103 /* synchronous PPP link */ -#define WANCONFIG_CHDLC 104 /* Cisco HDLC Link */ -#define WANCONFIG_BSC 105 /* BiSync Streaming */ -#define WANCONFIG_HDLC 106 /* HDLC Support */ -#define WANCONFIG_MPPP 107 /* Multi Port PPP over RAW CHDLC */ - /* - * Configuration options defines. + * wanrouter.h Legacy declarations kept around until X25 is removed */ -/* general options */ -#define WANOPT_OFF 0 -#define WANOPT_ON 1 -#define WANOPT_NO 0 -#define WANOPT_YES 1 - -/* intercace options */ -#define WANOPT_RS232 0 -#define WANOPT_V35 1 - -/* data encoding options */ -#define WANOPT_NRZ 0 -#define WANOPT_NRZI 1 -#define WANOPT_FM0 2 -#define WANOPT_FM1 3 - -/* link type options */ -#define WANOPT_POINTTOPOINT 0 /* RTS always active */ -#define WANOPT_MULTIDROP 1 /* RTS is active when transmitting */ - -/* clocking options */ -#define WANOPT_EXTERNAL 0 -#define WANOPT_INTERNAL 1 - -/* station options */ -#define WANOPT_DTE 0 -#define WANOPT_DCE 1 -#define WANOPT_CPE 0 -#define WANOPT_NODE 1 -#define WANOPT_SECONDARY 0 -#define WANOPT_PRIMARY 1 - -/* connection options */ -#define WANOPT_PERMANENT 0 /* DTR always active */ -#define WANOPT_SWITCHED 1 /* use DTR to setup link (dial-up) */ -#define WANOPT_ONDEMAND 2 /* activate DTR only before sending */ - -/* frame relay in-channel signalling */ -#define WANOPT_FR_ANSI 1 /* ANSI T1.617 Annex D */ -#define WANOPT_FR_Q933 2 /* ITU Q.933A */ -#define WANOPT_FR_LMI 3 /* LMI */ - -/* PPP IP Mode Options */ -#define WANOPT_PPP_STATIC 0 -#define WANOPT_PPP_HOST 1 -#define WANOPT_PPP_PEER 2 - -/* ASY Mode Options */ -#define WANOPT_ONE 1 -#define WANOPT_TWO 2 -#define WANOPT_ONE_AND_HALF 3 - -#define WANOPT_NONE 0 -#define WANOPT_ODD 1 -#define WANOPT_EVEN 2 - -/* CHDLC Protocol Options */ -/* DF Commented out for now. - -#define WANOPT_CHDLC_NO_DCD IGNORE_DCD_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_CTS IGNORE_CTS_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_KEEPALIVE IGNORE_KPALV_FOR_LINK_STAT -*/ - -/* Port options */ -#define WANOPT_PRI 0 -#define WANOPT_SEC 1 -/* read mode */ -#define WANOPT_INTR 0 -#define WANOPT_POLL 1 - -#define WANOPT_TTY_SYNC 0 -#define WANOPT_TTY_ASYNC 1 -/*---------------------------------------------------------------------------- - * WAN Link Status Info (for ROUTER_STAT IOCTL). - */ -typedef struct wandev_stat -{ - unsigned state; /* link state */ - unsigned ndev; /* number of configured interfaces */ - - /* link/interface configuration */ - unsigned connection; /* permanent/switched/on-demand */ - unsigned media_type; /* Frame relay/PPP/X.25/SDLC, etc. */ - unsigned mtu; /* max. transmit unit for this device */ - - /* physical level statistics */ - unsigned modem_status; /* modem status */ - unsigned rx_frames; /* received frames count */ - unsigned rx_overruns; /* receiver overrun error count */ - unsigned rx_crc_err; /* receive CRC error count */ - unsigned rx_aborts; /* received aborted frames count */ - unsigned rx_bad_length; /* unexpetedly long/short frames count */ - unsigned rx_dropped; /* frames discarded at device level */ - unsigned tx_frames; /* transmitted frames count */ - unsigned tx_underruns; /* aborted transmissions (underruns) count */ - unsigned tx_timeouts; /* transmission timeouts */ - unsigned tx_rejects; /* other transmit errors */ - - /* media level statistics */ - unsigned rx_bad_format; /* frames with invalid format */ - unsigned rx_bad_addr; /* frames with invalid media address */ - unsigned tx_retries; /* frames re-transmitted */ - unsigned reserved[16]; /* reserved for future use */ -} wandev_stat_t; +#ifndef _UAPI_ROUTER_H +#define _UAPI_ROUTER_H /* 'state' defines */ enum wan_states @@ -365,88 +11,7 @@ enum wan_states WAN_UNCONFIGURED, /* link/channel is not configured */ WAN_DISCONNECTED, /* link/channel is disconnected */ WAN_CONNECTING, /* connection is in progress */ - WAN_CONNECTED, /* link/channel is operational */ - WAN_LIMIT, /* for verification only */ - WAN_DUALPORT, /* for Dual Port cards */ - WAN_DISCONNECTING, - WAN_FT1_READY /* FT1 Configurator Ready */ + WAN_CONNECTED /* link/channel is operational */ }; -enum { - WAN_LOCAL_IP, - WAN_POINTOPOINT_IP, - WAN_NETMASK_IP, - WAN_BROADCAST_IP -}; - -/* 'modem_status' masks */ -#define WAN_MODEM_CTS 0x0001 /* CTS line active */ -#define WAN_MODEM_DCD 0x0002 /* DCD line active */ -#define WAN_MODEM_DTR 0x0010 /* DTR line active */ -#define WAN_MODEM_RTS 0x0020 /* RTS line active */ - -/*---------------------------------------------------------------------------- - * WAN interface (logical channel) configuration (for ROUTER_IFNEW IOCTL). - */ -typedef struct wanif_conf -{ - unsigned magic; /* magic number */ - unsigned config_id; /* configuration identifier */ - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char usedby[USED_BY_FIELD]; /* used by API or WANPIPE */ - unsigned idle_timeout; /* sec, before disconnecting */ - unsigned hold_timeout; /* sec, before re-connecting */ - unsigned cir; /* Committed Information Rate fwd,bwd*/ - unsigned bc; /* Committed Burst Size fwd, bwd */ - unsigned be; /* Excess Burst Size fwd, bwd */ - unsigned char enable_IPX; /* Enable or Disable IPX */ - unsigned char inarp; /* Send Inverse ARP requests Y/N */ - unsigned inarp_interval; /* sec, between InARP requests */ - unsigned long network_number; /* Network Number for IPX */ - char mc; /* Multicast on or off */ - char local_addr[WAN_ADDRESS_SZ+1];/* local media address, ASCIIZ */ - unsigned char port; /* board port */ - unsigned char protocol; /* prococol used in this channel (TCPOX25 or X25) */ - char pap; /* PAP enabled or disabled */ - char chap; /* CHAP enabled or disabled */ - unsigned char userid[511]; /* List of User Id */ - unsigned char passwd[511]; /* List of passwords */ - unsigned char sysname[31]; /* Name of the system */ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* Hdlc streaming mode (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ - unsigned char ttl; /* Time To Live for UDP security */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned char if_down; /* brind down interface when disconnected */ - unsigned char gateway; /* Is this interface a gateway */ - unsigned char true_if_encoding; /* Set the dev->type to true board protocol */ - - unsigned char asy_data_trans; /* async API options */ - unsigned char rts_hs_for_receive; /* async Protocol options */ - unsigned char xon_xoff_hs_for_receive; - unsigned char xon_xoff_hs_for_transmit; - unsigned char dcd_hs_for_transmit; - unsigned char cts_hs_for_transmit; - unsigned char async_mode; - unsigned tx_bits_per_char; - unsigned rx_bits_per_char; - unsigned stop_bits; - unsigned char parity; - unsigned break_timer; - unsigned inter_char_timer; - unsigned rx_complete_length; - unsigned xon_char; - unsigned xoff_char; - unsigned char receive_only; /* no transmit buffering (Y/N) */ -} wanif_conf_t; - #endif /* _UAPI_ROUTER_H */ diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e64e18..000000000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from . - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc48078..000000000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea7767e..000000000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab785064b7e..000000000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include /* offsetof(), etc. */ -#include -#include /* return codes */ -#include -#include /* support for loadable modules */ -#include /* kmalloc(), kfree() */ -#include -#include -#include /* inline mem*, str* functions */ - -#include /* htons(), etc. */ -#include /* WAN router API definitions */ - -#include /* vmalloc, vfree */ -#include /* copy_to/from_user */ -#include /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612ee96bb..000000000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include /* __initfunc et al. */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include -#include -#include /* WAN router API definitions */ -#include -#include - -#include -#include - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * entry for each WAN device - */ - -/* - * Generic /proc/net/router/ file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - -- cgit v1.2.3-71-gd317 From 42745e039312ab4672c60ec584651f0c74e8264f Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Mon, 4 Feb 2013 13:53:11 +0200 Subject: cfg80211: expand per-station byte counters to 64bit In per-station statistics, present 32bit counters are too small for practical purposes - with gigabit speeds, it get overlapped every few seconds. Expand counters in the struct station_info to be 64-bit. Driver can still fill only 32-bit and indicate in @filled only bits like STATION_INFO_[TR]X_BYTES; in case driver provides full 64-bit counter, it should also set in @filled bit STATION_INFO_[TR]RX_BYTES64 Netlink sends both 32-bit and 64-bit counters, if present, to not break userspace. Signed-off-by: Vladimir Kondratiev [change to also have 32-bit counters if driver advertises 64-bit] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++---- include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 63599ab6005b..94a0810ef68e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -672,8 +672,10 @@ struct station_parameters { * @STATION_INFO_SIGNAL: @signal filled * @STATION_INFO_TX_BITRATE: @txrate fields are filled * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) - * @STATION_INFO_RX_PACKETS: @rx_packets filled - * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_RX_PACKETS: @rx_packets filled with 32-bit value + * @STATION_INFO_TX_PACKETS: @tx_packets filled with 32-bit value + * @STATION_INFO_RX_PACKETS64: @rx_packets filled with 64-bit value + * @STATION_INFO_TX_PACKETS64: @tx_packets filled with 64-bit value * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled @@ -714,6 +716,8 @@ enum station_info_flags { STATION_INFO_LOCAL_PM = 1<<21, STATION_INFO_PEER_PM = 1<<22, STATION_INFO_NONPEER_PM = 1<<23, + STATION_INFO_RX_BYTES64 = 1<<24, + STATION_INFO_TX_BYTES64 = 1<<25, }; /** @@ -835,8 +839,8 @@ struct station_info { u32 filled; u32 connected_time; u32 inactive_time; - u32 rx_bytes; - u32 tx_bytes; + u64 rx_bytes; + u64 tx_bytes; u16 llid; u16 plid; u8 plink_state; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 225a65e72219..9a2ecdc4136c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1857,6 +1857,8 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs) * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station) * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) + * @NL80211_STA_INFO_RX_BYTES64: total received bytes (u64, from this station) + * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (u64, to this station) * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_rate_info @@ -1909,6 +1911,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_LOCAL_PM, NL80211_STA_INFO_PEER_PM, NL80211_STA_INFO_NONPEER_PM, + NL80211_STA_INFO_RX_BYTES64, + NL80211_STA_INFO_TX_BYTES64, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d359734b6972..807d448e702e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3057,12 +3057,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, sinfo->inactive_time)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES) && + if ((sinfo->filled & (STATION_INFO_RX_BYTES | + STATION_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, - sinfo->rx_bytes)) + (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES) && + if ((sinfo->filled & (STATION_INFO_TX_BYTES | + NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, + (u32)sinfo->tx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_RX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, + sinfo->rx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_TX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, sinfo->tx_bytes)) goto nla_put_failure; if ((sinfo->filled & STATION_INFO_LLID) && -- cgit v1.2.3-71-gd317 From d1beadd1cb649404bfa2c3d92f77dbcb15b712e5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 28 Jan 2013 10:44:48 +0000 Subject: netfilter: xt_conntrack: Add flag to support aliases The patch adds the flag to denote the "state" alias as of the subset of the "conntrack" match. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_conntrack.h b/include/uapi/linux/netfilter/xt_conntrack.h index e3c041d54020..e5bd3083a843 100644 --- a/include/uapi/linux/netfilter/xt_conntrack.h +++ b/include/uapi/linux/netfilter/xt_conntrack.h @@ -31,6 +31,7 @@ enum { XT_CONNTRACK_REPLSRC_PORT = 1 << 10, XT_CONNTRACK_REPLDST_PORT = 1 << 11, XT_CONNTRACK_DIRECTION = 1 << 12, + XT_CONNTRACK_STATE_ALIAS = 1 << 13, }; struct xt_conntrack_mtinfo1 { -- cgit v1.2.3-71-gd317 From 5474f57f7d686ac918355419cb71496f835aaf5d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 30 Jan 2013 20:24:22 +0100 Subject: netfilter: xt_CT: add alias flag This patch adds the alias flag to support full NOTRACK target aliasing. Based on initial patch from Jozsef Kadlecsik. Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_CT.h | 6 +++++- net/netfilter/xt_CT.c | 32 +++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_CT.h b/include/uapi/linux/netfilter/xt_CT.h index a064b8af360c..5a688c1ca4d7 100644 --- a/include/uapi/linux/netfilter/xt_CT.h +++ b/include/uapi/linux/netfilter/xt_CT.h @@ -3,7 +3,11 @@ #include -#define XT_CT_NOTRACK 0x1 +enum { + XT_CT_NOTRACK = 1 << 0, + XT_CT_NOTRACK_ALIAS = 1 << 1, + XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS, +}; struct xt_ct_target_info { __u16 flags; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index d69f1c7532f7..a60261cb0e80 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -185,9 +185,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct nf_conn *ct; int ret = -EOPNOTSUPP; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; - if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); @@ -256,6 +253,9 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) }; int ret; + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); ret = xt_ct_tg_check(par, &info_v1); @@ -269,6 +269,21 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) { + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); +} + +static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_MASK) + return -EINVAL; + return xt_ct_tg_check(par, par->targinfo); } @@ -350,6 +365,17 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .table = "raw", .me = THIS_MODULE, }, + { + .name = "CT", + .family = NFPROTO_UNSPEC, + .revision = 2, + .targetsize = sizeof(struct xt_ct_target_info_v1), + .checkentry = xt_ct_tg_check_v2, + .destroy = xt_ct_tg_destroy_v1, + .target = xt_ct_target_v1, + .table = "raw", + .me = THIS_MODULE, + }, }; static unsigned int -- cgit v1.2.3-71-gd317 From 4f4ffc3a5398ef9bdbb32db04756d7d34e356fcf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Feb 2013 19:39:52 +0000 Subject: unbreak automounter support on 64-bit kernel with 32-bit userspace (v2) automount-support is broken on the parisc architecture, because the existing #if list does not include a check for defined(__hppa__). The HPPA (parisc) architecture is similiar to other 64bit Linux targets where we have to define autofs_wqt_t (which is passed back and forth to user space) as int type which has a size of 32bit across 32 and 64bit kernels. During the discussion on the mailing list, H. Peter Anvin suggested to invert the #if list since only specific platforms (specifically those who do not have a 32bit userspace, like IA64 and Alpha) should have autofs_wqt_t as unsigned long type. This suggestion is probably the best way to go, since Arm64 (and maybe others?) seems to have a non-working automounter. So in the long run even for other new upcoming architectures this inverted check seem to be the best solution, since it will not require them to change this #if again (unless they are 64bit only). Signed-off-by: Helge Deller Acked-by: H. Peter Anvin Acked-by: Ian Kent Acked-by: Catalin Marinas CC: James Bottomley CC: Rolf Eike Beer --- include/uapi/linux/auto_fs.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 77cdba9df274..bb991dfe134f 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -28,25 +28,16 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * Architectures where both 32- and 64-bit binaries can be executed - * on 64-bit kernels need this. This keeps the structure format - * uniform, and makes sure the wait_queue_token isn't too big to be - * passed back down to the kernel. - * - * This assumes that on these architectures: - * mode 32 bit 64 bit - * ------------------------- - * int 32 bit 32 bit - * long 32 bit 64 bit - * - * If so, 32-bit user-space code should be backwards compatible. + * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed + * back to the kernel via ioctl from userspace. On architectures where 32- and + * 64-bit userspace binaries can be executed it's important that the size of + * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we + * do not break the binary ABI interface by changing the structure size. */ - -#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \ - || defined(__powerpc__) || defined(__s390__) -typedef unsigned int autofs_wqt_t; -#else +#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */ typedef unsigned long autofs_wqt_t; +#else +typedef unsigned int autofs_wqt_t; #endif /* Packet types */ -- cgit v1.2.3-71-gd317 From d021c344051af91f42c5ba9fdedc176740cbd238 Mon Sep 17 00:00:00 2001 From: Andy King Date: Wed, 6 Feb 2013 14:23:56 +0000 Subject: VSOCK: Introduce VM Sockets VM Sockets allows communication between virtual machines and the hypervisor. User level applications both in a virtual machine and on the host can use the VM Sockets API, which facilitates fast and efficient communication between guest virtual machines and their host. A socket address family, designed to be compatible with UDP and TCP at the interface level, is provided. Today, VM Sockets is used by various VMware Tools components inside the guest for zero-config, network-less access to VMware host services. In addition to this, VMware's users are using VM Sockets for various applications, where network access of the virtual machine is restricted or non-existent. Examples of this are VMs communicating with device proxies for proprietary hardware running as host applications and automated testing of applications running within virtual machines. The VMware VM Sockets are similar to other socket types, like Berkeley UNIX socket interface. The VM Sockets module supports both connection-oriented stream sockets like TCP, and connectionless datagram sockets like UDP. The VM Sockets protocol family is defined as "AF_VSOCK" and the socket operations split for SOCK_DGRAM and SOCK_STREAM. For additional information about the use of VM Sockets, please refer to the VM Sockets Programming Guide available at: https://www.vmware.com/support/developer/vmci-sdk/ Signed-off-by: George Zhang Signed-off-by: Dmitry Torokhov Signed-off-by: Andy king Signed-off-by: David S. Miller --- include/linux/socket.h | 4 +- include/uapi/linux/vm_sockets.h | 171 ++ net/Kconfig | 1 + net/Makefile | 1 + net/vmw_vsock/Kconfig | 28 + net/vmw_vsock/Makefile | 7 + net/vmw_vsock/af_vsock.c | 2015 ++++++++++++++++++++++++ net/vmw_vsock/af_vsock.h | 175 +++ net/vmw_vsock/vmci_transport.c | 2157 ++++++++++++++++++++++++++ net/vmw_vsock/vmci_transport.h | 139 ++ net/vmw_vsock/vmci_transport_notify.c | 680 ++++++++ net/vmw_vsock/vmci_transport_notify.h | 83 + net/vmw_vsock/vmci_transport_notify_qstate.c | 438 ++++++ net/vmw_vsock/vsock_addr.c | 86 + net/vmw_vsock/vsock_addr.h | 32 + net/vmw_vsock/vsock_version.h | 22 + 16 files changed, 6038 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/vm_sockets.h create mode 100644 net/vmw_vsock/Kconfig create mode 100644 net/vmw_vsock/Makefile create mode 100644 net/vmw_vsock/af_vsock.c create mode 100644 net/vmw_vsock/af_vsock.h create mode 100644 net/vmw_vsock/vmci_transport.c create mode 100644 net/vmw_vsock/vmci_transport.h create mode 100644 net/vmw_vsock/vmci_transport_notify.c create mode 100644 net/vmw_vsock/vmci_transport_notify.h create mode 100644 net/vmw_vsock/vmci_transport_notify_qstate.c create mode 100644 net/vmw_vsock/vsock_addr.c create mode 100644 net/vmw_vsock/vsock_addr.h create mode 100644 net/vmw_vsock/vsock_version.h (limited to 'include/uapi/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9a546ff853dc..2b9f74b0ffea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -178,7 +178,8 @@ struct ucred { #define AF_CAIF 37 /* CAIF sockets */ #define AF_ALG 38 /* Algorithm sockets */ #define AF_NFC 39 /* NFC sockets */ -#define AF_MAX 40 /* For now.. */ +#define AF_VSOCK 40 /* vSockets */ +#define AF_MAX 41 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -221,6 +222,7 @@ struct ucred { #define PF_CAIF AF_CAIF #define PF_ALG AF_ALG #define PF_NFC AF_NFC +#define PF_VSOCK AF_VSOCK #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h new file mode 100644 index 000000000000..f7f2e99dec84 --- /dev/null +++ b/include/uapi/linux/vm_sockets.h @@ -0,0 +1,171 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H_ +#define _VM_SOCKETS_H_ + +#if !defined(__KERNEL__) +#include +#endif + +/* Option name for STREAM socket buffer size. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the size of the buffer underlying a vSockets STREAM socket. + * Value is clamped to the MIN and MAX. + */ + +#define SO_VM_SOCKETS_BUFFER_SIZE 0 + +/* Option name for STREAM socket minimum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the minimum size allowed for the buffer underlying a vSockets + * STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1 + +/* Option name for STREAM socket maximum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long + * that specifies the maximum size allowed for the buffer underlying a + * vSockets STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2 + +/* Option name for socket peer's host-specific VM ID. Use as the option name + * in getsockopt(3) to get a host-specific identifier for the peer endpoint's + * VM. The identifier is a signed integer. + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 + +/* Option name for socket's service label. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. + * The service label is a C-style NUL-terminated string. Only available for + * hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_SERVICE_LABEL 4 + +/* Option name for determining if a socket is trusted. Use as the option name + * in getsockopt(3) to determine if a socket is trusted. The value is a + * signed integer. + */ + +#define SO_VM_SOCKETS_TRUSTED 5 + +/* Option name for STREAM socket connection timeout. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get the connection + * timeout for a STREAM socket. + */ + +#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 + +/* Option name for using non-blocking send/receive. Use as the option name + * for setsockopt(3) or getsockopt(3) to set or get the non-blocking + * transmit/receive flag for a STREAM socket. This flag determines whether + * send() and recv() can be called in non-blocking contexts for the given + * socket. The value is a signed integer. + * + * This option is only relevant to kernel endpoints, where descheduling the + * thread of execution is not allowed, for example, while holding a spinlock. + * It is not to be confused with conventional non-blocking socket operations. + * + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_NONBLOCK_TXRX 7 + +/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of + * sockaddr_vm and indicates the context ID of the current endpoint. + */ + +#define VMADDR_CID_ANY -1U + +/* Bind to any available port. Works for the svm_port field of + * sockaddr_vm. + */ + +#define VMADDR_PORT_ANY -1U + +/* Use this as the destination CID in an address when referring to the + * hypervisor. VMCI relies on it being 0, but this would be useful for other + * transports too. + */ + +#define VMADDR_CID_HYPERVISOR 0 + +/* This CID is specific to VMCI and can be considered reserved (even VMCI + * doesn't use it anymore, it's a legacy value from an older release). + */ + +#define VMADDR_CID_RESERVED 1 + +/* Use this as the destination CID in an address when referring to the host + * (any process other than the hypervisor). VMCI relies on it being 2, but + * this would be useful for other transports too. + */ + +#define VMADDR_CID_HOST 2 + +/* Invalid vSockets version. */ + +#define VM_SOCKETS_INVALID_VERSION -1U + +/* The epoch (first) component of the vSockets version. A single byte + * representing the epoch component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24) + +/* The major (second) component of the vSockets version. A single byte + * representing the major component of the vSockets version. Typically + * changes for every major release of a product. + */ + +#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16) + +/* The minor (third) component of the vSockets version. Two bytes representing + * the minor component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) + +/* Address structure for vSockets. The address family should be set to + * whatever vmci_sock_get_af_value_fd() returns. The structure members should + * all align on their natural boundaries without resorting to compiler packing + * directives. The total size of this structure should be exactly the same as + * that of struct sockaddr. + */ + +struct sockaddr_vm { + sa_family_t svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(sa_family_t) - + sizeof(unsigned short) - + sizeof(unsigned int) - sizeof(unsigned int)]; +}; + +#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) + +#if defined(__KERNEL__) +int vm_sockets_get_local_cid(void); +#endif + +#endif diff --git a/net/Kconfig b/net/Kconfig index c31348e70aad..5a1888bb036d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index c5aa8b3b49dc..091e7b04f301 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 000000000000..b5fa7e40cdcb --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 000000000000..2ce52d70f224 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 000000000000..54bb7bdf92d3 --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2015 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vsock_version.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) + && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 000000000000..7d64d3609ec9 --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include +#include +#include + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 000000000000..e8a87cf37072 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2157 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + struct sockaddr_vm src; + struct sockaddr_vm dst; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + msg->msg_namelen = 0; + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 000000000000..1bf991803ec0 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include +#include + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 000000000000..9a730744e7bc --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 000000000000..7df793249b6c --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include +#include +#include +#include + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 000000000000..622bd7aa1016 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 000000000000..b7df1aea7c59 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,86 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return (addr->svm_cid == VMADDR_CID_ANY || + other->svm_cid == VMADDR_CID_ANY || + addr->svm_cid == other->svm_cid) && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 000000000000..cdfbcefdf843 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,32 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h new file mode 100644 index 000000000000..4df7f5e2151c --- /dev/null +++ b/net/vmw_vsock/vsock_version.h @@ -0,0 +1,22 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2011-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_VERSION_H_ +#define _VSOCK_VERSION_H_ + +#define VSOCK_DRIVER_VERSION_PARTS { 1, 0, 0, 0 } +#define VSOCK_DRIVER_VERSION_STRING "1.0.0.0-k" + +#endif /* _VSOCK_VERSION_H_ */ -- cgit v1.2.3-71-gd317 From 86fbe9bb599fcaf7e92e38dbfdad0414a2d68f7d Mon Sep 17 00:00:00 2001 From: David Ward Date: Fri, 8 Feb 2013 17:17:07 +0000 Subject: net/8021q: Implement Multiple VLAN Registration Protocol (MVRP) Initial implementation of the Multiple VLAN Registration Protocol (MVRP) from IEEE 802.1Q-2011, based on the existing implementation of the GARP VLAN Registration Protocol (GVRP). Signed-off-by: David Ward Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_vlan.h | 1 + net/8021q/Kconfig | 11 +++++++ net/8021q/Makefile | 1 + net/8021q/vlan.c | 27 +++++++++++++--- net/8021q/vlan.h | 16 ++++++++++ net/8021q/vlan_dev.c | 12 +++++++- net/8021q/vlan_mvrp.c | 72 +++++++++++++++++++++++++++++++++++++++++++ net/8021q/vlan_netlink.c | 2 +- 9 files changed, 136 insertions(+), 7 deletions(-) create mode 100644 net/8021q/vlan_mvrp.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 67fb87ca1094..798032d01112 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -83,6 +83,7 @@ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ +#define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ diff --git a/include/uapi/linux/if_vlan.h b/include/uapi/linux/if_vlan.h index 0744f8e65d15..7e5e6b397332 100644 --- a/include/uapi/linux/if_vlan.h +++ b/include/uapi/linux/if_vlan.h @@ -34,6 +34,7 @@ enum vlan_flags { VLAN_FLAG_REORDER_HDR = 0x1, VLAN_FLAG_GVRP = 0x2, VLAN_FLAG_LOOSE_BINDING = 0x4, + VLAN_FLAG_MVRP = 0x8, }; enum vlan_name_types { diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index fa073a54963e..8f7517df41a5 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP automatic propagation of registered VLANs to switches. If unsure, say N. + +config VLAN_8021Q_MVRP + bool "MVRP (Multiple VLAN Registration Protocol) support" + depends on VLAN_8021Q + select MRP + help + Select this to enable MVRP end-system support. MVRP is used for + automatic propagation of registered VLANs to switches; it + supersedes GVRP and is not backwards-compatible. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 9f4f174ead1c..7bc8db08d7ef 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o 8021q-y := vlan.o vlan_dev.o vlan_netlink.o 8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_VLAN_8021Q_MVRP) += vlan_mvrp.o 8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index addc578d5443..a18714469bf7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -95,6 +95,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlan_devs--; + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -107,8 +109,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) netdev_upper_dev_unlink(real_dev, dev); - if (grp->nr_vlan_devs == 0) + if (grp->nr_vlan_devs == 0) { + vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); + } /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -151,15 +155,18 @@ int register_vlan_dev(struct net_device *dev) err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; + err = vlan_mvrp_init_applicant(real_dev); + if (err < 0) + goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = netdev_upper_dev_link(real_dev, dev); if (err) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) @@ -181,7 +188,10 @@ int register_vlan_dev(struct net_device *dev) out_upper_dev_unlink: netdev_upper_dev_unlink(real_dev, dev); -out_uninit_applicant: +out_uninit_mvrp: + if (grp->nr_vlan_devs == 0) + vlan_mvrp_uninit_applicant(real_dev); +out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: @@ -655,13 +665,19 @@ static int __init vlan_proto_init(void) if (err < 0) goto err3; - err = vlan_netlink_init(); + err = vlan_mvrp_init(); if (err < 0) goto err4; + err = vlan_netlink_init(); + if (err < 0) + goto err5; + vlan_ioctl_set(vlan_ioctl_handler); return 0; +err5: + vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: @@ -682,6 +698,7 @@ static void __exit vlan_cleanup_module(void) unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ + vlan_mvrp_uninit(); vlan_gvrp_uninit(); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a4886d94c40c..670f1e8cfc0f 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif +#ifdef CONFIG_VLAN_8021Q_MVRP +extern int vlan_mvrp_request_join(const struct net_device *dev); +extern void vlan_mvrp_request_leave(const struct net_device *dev); +extern int vlan_mvrp_init_applicant(struct net_device *dev); +extern void vlan_mvrp_uninit_applicant(struct net_device *dev); +extern int vlan_mvrp_init(void); +extern void vlan_mvrp_uninit(void); +#else +static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_mvrp_init(void) { return 0; } +static inline void vlan_mvrp_uninit(void) {} +#endif + extern const char vlan_fullname[]; extern const char vlan_version[]; extern int vlan_netlink_init(void); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09f9108d4688..34df5b3c9b75 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) else vlan_gvrp_request_leave(dev); } + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + else + vlan_mvrp_request_leave(dev); + } return 0; } @@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c new file mode 100644 index 000000000000..d9ec1d5964aa --- /dev/null +++ b/net/8021q/vlan_mvrp.c @@ -0,0 +1,72 @@ +/* + * IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/8021q/vlan_gvrp.c + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "vlan.h" + +#define MRP_MVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum mvrp_attributes { + MVRP_ATTR_INVALID, + MVRP_ATTR_VID, + __MVRP_ATTR_MAX +}; +#define MVRP_ATTR_MAX (__MVRP_ATTR_MAX - 1) + +static struct mrp_application vlan_mrp_app __read_mostly = { + .type = MRP_APPLICATION_MVRP, + .maxattr = MVRP_ATTR_MAX, + .pkttype.type = htons(ETH_P_MVRP), + .group_address = MRP_MVRP_ADDRESS, + .version = 0, +}; + +int vlan_mvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return mrp_request_join(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +void vlan_mvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + mrp_request_leave(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +int vlan_mvrp_init_applicant(struct net_device *dev) +{ + return mrp_init_applicant(dev, &vlan_mrp_app); +} + +void vlan_mvrp_uninit_applicant(struct net_device *dev) +{ + mrp_uninit_applicant(dev, &vlan_mrp_app); +} + +int __init vlan_mvrp_init(void) +{ + return mrp_register_application(&vlan_mrp_app); +} + +void vlan_mvrp_uninit(void) +{ + mrp_unregister_application(&vlan_mrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 708c80ea1874..1789658b7cd7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; } -- cgit v1.2.3-71-gd317 From b6a7bceb3b9315478657bc55884dfdcd104c9864 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 11 Feb 2013 23:56:40 -0800 Subject: nl80211: minor correction in sample wowlan mask calculation The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where xx indicates "don't care") should be represented by a pattern of twelve zero bytes, and a mask of "0xed,0x01", not "0xed,0x07". mask_len = (pat_len + 7) / 8 = (12 + 7) / 8 = 2 Hence the mask will be of 2 bytes. Replace each valid byte in pattern by 1 and don't care byte by 0: 10110111 1000 (0000) 1st byte of pattern corresponds to lower order bit in first byte of mask. And 9th byte of pattern corresponds to lower order bit in second byte of mask. With this logic the mask will be 11101101 00000001 = 0xed 0x01 Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9a2ecdc4136c..b23321154e8b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2906,7 +2906,7 @@ enum nl80211_tx_power_setting { * corresponds to the lowest-order bit in the second byte of the mask. * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where * xx indicates "don't care") would be represented by a pattern of - * twelve zero bytes, and a mask of "0xed,0x07". + * twelve zero bytes, and a mask of "0xed,0x01". * Note that the pattern matching is done as though frames were not * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked * first (including SNAP header unpacking) and then matched. -- cgit v1.2.3-71-gd317 From bb92d19983a4b54be3e3b83441a8076d92cd04bc Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 12 Feb 2013 12:16:26 -0800 Subject: nl80211: add packet offset information for wowlan pattern If user knows the location of a wowlan pattern to be matched in Rx packet, he can provide an offset with the pattern. This will help drivers to ignore initial bytes and match the pattern efficiently. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao [refactor pattern sending] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++ include/uapi/linux/nl80211.h | 12 +++++-- net/wireless/nl80211.c | 74 ++++++++++++++++++++++++++++---------------- 3 files changed, 60 insertions(+), 30 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 77686ca28948..d3a73818e44c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1576,6 +1576,7 @@ struct cfg80211_pmksa { * one bit per byte, in same format as nl80211 * @pattern: bytes to match where bitmask is 1 * @pattern_len: length of pattern (in bytes) + * @pkt_offset: packet offset (in bytes) * * Internal note: @mask and @pattern are allocated in one chunk of * memory, free @mask only! @@ -1583,6 +1584,7 @@ struct cfg80211_pmksa { struct cfg80211_wowlan_trig_pkt_pattern { u8 *mask, *pattern; int pattern_len; + int pkt_offset; }; /** @@ -2290,12 +2292,14 @@ enum wiphy_wowlan_support_flags { * (see nl80211.h for the pattern definition) * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern + * @max_pkt_offset: maximum Rx packet offset */ struct wiphy_wowlan_support { u32 flags; int n_patterns; int pattern_max_len; int pattern_min_len; + int max_pkt_offset; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b23321154e8b..eb7b32247ec5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2910,6 +2910,8 @@ enum nl80211_tx_power_setting { * Note that the pattern matching is done as though frames were not * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked * first (including SNAP header unpacking) and then matched. + * @NL80211_WOWLAN_PKTPAT_OFFSET: packet offset, pattern is matched after + * these fixed number of bytes of received packet * @NUM_NL80211_WOWLAN_PKTPAT: number of attributes * @MAX_NL80211_WOWLAN_PKTPAT: max attribute number */ @@ -2917,6 +2919,7 @@ enum nl80211_wowlan_packet_pattern_attr { __NL80211_WOWLAN_PKTPAT_INVALID, NL80211_WOWLAN_PKTPAT_MASK, NL80211_WOWLAN_PKTPAT_PATTERN, + NL80211_WOWLAN_PKTPAT_OFFSET, NUM_NL80211_WOWLAN_PKTPAT, MAX_NL80211_WOWLAN_PKTPAT = NUM_NL80211_WOWLAN_PKTPAT - 1, @@ -2927,6 +2930,7 @@ enum nl80211_wowlan_packet_pattern_attr { * @max_patterns: maximum number of patterns supported * @min_pattern_len: minimum length of each pattern * @max_pattern_len: maximum length of each pattern + * @max_pkt_offset: maximum Rx packet offset * * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED in the @@ -2936,6 +2940,7 @@ struct nl80211_wowlan_pattern_support { __u32 max_patterns; __u32 min_pattern_len; __u32 max_pattern_len; + __u32 max_pkt_offset; } __attribute__((packed)); /** @@ -2951,9 +2956,10 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns * which are passed in an array of nested attributes, each nested attribute * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern. - * Each pattern defines a wakeup packet. The matching is done on the MSDU, - * i.e. as though the packet was an 802.3 packet, so the pattern matching - * is done after the packet is converted to the MSDU. + * Each pattern defines a wakeup packet. Packet offset is associated with + * each pattern which is used while matching the pattern. The matching is + * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the + * pattern matching is done after the packet is converted to the MSDU. * * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute * carrying a &struct nl80211_wowlan_pattern_support. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 93bc63eae076..cc0fad30b8c9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1238,6 +1238,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.wowlan.pattern_min_len, .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = + dev->wiphy.wowlan.max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) @@ -6895,6 +6897,39 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) } #ifdef CONFIG_PM +static int nl80211_send_wowlan_patterns(struct sk_buff *msg, + struct cfg80211_registered_device *rdev) +{ + struct nlattr *nl_pats, *nl_pat; + int i, pat_len; + + if (!rdev->wowlan->n_patterns) + return 0; + + nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + if (!nl_pats) + return -ENOBUFS; + + for (i = 0; i < rdev->wowlan->n_patterns; i++) { + nl_pat = nla_nest_start(msg, i + 1); + if (!nl_pat) + return -ENOBUFS; + pat_len = rdev->wowlan->patterns[i].pattern_len; + if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, + DIV_ROUND_UP(pat_len, 8), + rdev->wowlan->patterns[i].mask) || + nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, + pat_len, rdev->wowlan->patterns[i].pattern) || + nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, + rdev->wowlan->patterns[i].pkt_offset)) + return -ENOBUFS; + nla_nest_end(msg, nl_pat); + } + nla_nest_end(msg, nl_pats); + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6935,32 +6970,8 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; - if (rdev->wowlan->n_patterns) { - struct nlattr *nl_pats, *nl_pat; - int i, pat_len; - - nl_pats = nla_nest_start(msg, - NL80211_WOWLAN_TRIG_PKT_PATTERN); - if (!nl_pats) - goto nla_put_failure; - - for (i = 0; i < rdev->wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); - if (!nl_pat) - goto nla_put_failure; - pat_len = rdev->wowlan->patterns[i].pattern_len; - if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, - DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || - nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, - rdev->wowlan->patterns[i].pattern)) - goto nla_put_failure; - nla_nest_end(msg, nl_pat); - } - nla_nest_end(msg, nl_pats); - } - + if (nl80211_send_wowlan_patterns(msg, rdev)) + goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } @@ -7046,7 +7057,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; - int rem, pat_len, mask_len; + int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], @@ -7081,6 +7092,15 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) pat_len < wowlan->pattern_min_len) goto error; + if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) + pkt_offset = 0; + else + pkt_offset = nla_get_u32( + pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); + if (pkt_offset > wowlan->max_pkt_offset) + goto error; + new_triggers.patterns[i].pkt_offset = pkt_offset; + new_triggers.patterns[i].mask = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!new_triggers.patterns[i].mask) { -- cgit v1.2.3-71-gd317 From 2a0e047ed62f20664005881b8e7f9328f910316a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Jan 2013 22:57:40 +0100 Subject: cfg80211: configuration for WoWLAN over TCP Intel Wireless devices are able to make a TCP connection after suspending, sending some data and waking up when the connection receives wakeup data (or breaks). Add the WoWLAN configuration and feature advertising API for it. Acked-by: David S. Miller Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 55 +++++++- include/uapi/linux/nl80211.h | 125 ++++++++++++++++++ net/wireless/core.h | 3 + net/wireless/nl80211.c | 295 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 474 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d3a73818e44c..7e6569e1f16f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -19,6 +19,7 @@ #include #include #include +#include #include /** @@ -1587,6 +1588,41 @@ struct cfg80211_wowlan_trig_pkt_pattern { int pkt_offset; }; +/** + * struct cfg80211_wowlan_tcp - TCP connection parameters + * + * @sock: (internal) socket for source port allocation + * @src: source IP address + * @dst: destination IP address + * @dst_mac: destination MAC address + * @src_port: source port + * @dst_port: destination port + * @payload_len: data payload length + * @payload: data payload buffer + * @payload_seq: payload sequence stamping configuration + * @data_interval: interval at which to send data packets + * @wake_len: wakeup payload match length + * @wake_data: wakeup payload match data + * @wake_mask: wakeup payload match mask + * @tokens_size: length of the tokens buffer + * @payload_tok: payload token usage configuration + */ +struct cfg80211_wowlan_tcp { + struct socket *sock; + __be32 src, dst; + u16 src_port, dst_port; + u8 dst_mac[ETH_ALEN]; + int payload_len; + const u8 *payload; + struct nl80211_wowlan_tcp_data_seq payload_seq; + u32 data_interval; + u32 wake_len; + const u8 *wake_data, *wake_mask; + u32 tokens_size; + /* must be last, variable member */ + struct nl80211_wowlan_tcp_data_token payload_tok; +}; + /** * struct cfg80211_wowlan - Wake on Wireless-LAN support info * @@ -1601,12 +1637,15 @@ struct cfg80211_wowlan_trig_pkt_pattern { * @eap_identity_req: wake up on EAP identity request packet * @four_way_handshake: wake up on 4-way handshake * @rfkill_release: wake up when rfkill is released + * @tcp: TCP connection establishment/wakeup parameters, see nl80211.h. + * NULL if not configured. */ struct cfg80211_wowlan { bool any, disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release; struct cfg80211_wowlan_trig_pkt_pattern *patterns; + struct cfg80211_wowlan_tcp *tcp; int n_patterns; }; @@ -1626,11 +1665,15 @@ struct cfg80211_wowlan { * frame triggers an 802.3 frame should be reported, for * disconnect due to deauth 802.11 frame. This indicates which * it is. + * @tcp_match: TCP wakeup packet received + * @tcp_connlost: TCP connection lost or failed to establish + * @tcp_nomoretokens: TCP data ran out of tokens */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, - rfkill_release, packet_80211; + rfkill_release, packet_80211, + tcp_match, tcp_connlost, tcp_nomoretokens; s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; @@ -2285,6 +2328,14 @@ enum wiphy_wowlan_support_flags { WIPHY_WOWLAN_RFKILL_RELEASE = BIT(7), }; +struct wiphy_wowlan_tcp_support { + const struct nl80211_wowlan_tcp_data_token_feature *tok; + u32 data_payload_max; + u32 data_interval_max; + u32 wake_payload_max; + bool seq; +}; + /** * struct wiphy_wowlan_support - WoWLAN support data * @flags: see &enum wiphy_wowlan_support_flags @@ -2293,6 +2344,7 @@ enum wiphy_wowlan_support_flags { * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern * @max_pkt_offset: maximum Rx packet offset + * @tcp: TCP wakeup support information */ struct wiphy_wowlan_support { u32 flags; @@ -2300,6 +2352,7 @@ struct wiphy_wowlan_support { int pattern_max_len; int pattern_min_len; int max_pkt_offset; + const struct wiphy_wowlan_tcp_support *tcp; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index eb7b32247ec5..5309b34930ea 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2991,6 +2991,17 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section + * "TCP connection wakeup" for more details. This is a nested attribute + * containing the exact information for establishing and keeping alive + * the TCP connection. + * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the + * wakeup packet was received on the TCP connection + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the + * TCP connection was lost or failed to be established + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only, + * the TCP connection ran out of tokens to use for data to send to the + * service * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number * @@ -3012,12 +3023,126 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, + NL80211_WOWLAN_TRIG_TCP_CONNECTION, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, /* keep last */ NUM_NL80211_WOWLAN_TRIG, MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1 }; +/** + * DOC: TCP connection wakeup + * + * Some devices can establish a TCP connection in order to be woken up by a + * packet coming in from outside their network segment, or behind NAT. If + * configured, the device will establish a TCP connection to the given + * service, and periodically send data to that service. The first data + * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK. + * The data packets can optionally include a (little endian) sequence + * number (in the TCP payload!) that is generated by the device, and, also + * optionally, a token from a list of tokens. This serves as a keep-alive + * with the service, and for NATed connections, etc. + * + * During this keep-alive period, the server doesn't send any data to the + * client. When receiving data, it is compared against the wakeup pattern + * (and mask) and if it matches, the host is woken up. Similarly, if the + * connection breaks or cannot be established to start with, the host is + * also woken up. + * + * Developer's note: ARP offload is required for this, otherwise TCP + * response packets might not go through correctly. + */ + +/** + * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence + * @start: starting value + * @offset: offset of sequence number in packet + * @len: length of the sequence value to write, 1 through 4 + * + * Note: don't confuse with the TCP sequence number(s), this is for the + * keepalive packet payload. The actual value is written into the packet + * in little endian. + */ +struct nl80211_wowlan_tcp_data_seq { + __u32 start, offset, len; +}; + +/** + * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config + * @offset: offset of token in packet + * @len: length of each token + * @token_stream: stream of data to be used for the tokens, the length must + * be a multiple of @len for this to make sense + */ +struct nl80211_wowlan_tcp_data_token { + __u32 offset, len; + __u8 token_stream[]; +}; + +/** + * struct nl80211_wowlan_tcp_data_token_feature - data token features + * @min_len: minimum token length + * @max_len: maximum token length + * @bufsize: total available token buffer size (max size of @token_stream) + */ +struct nl80211_wowlan_tcp_data_token_feature { + __u32 min_len, max_len, bufsize; +}; + +/** + * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters + * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order) + * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address + * (in network byte order) + * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because + * route lookup when configured might be invalid by the time we suspend, + * and doing a route lookup when suspending is no longer possible as it + * might require ARP querying. + * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a + * socket and port will be allocated + * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16) + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte. + * For feature advertising, a u32 attribute holding the maximum length + * of the data payload. + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration + * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature + * advertising it is just a flag + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration, + * see &struct nl80211_wowlan_tcp_data_token and for advertising see + * &struct nl80211_wowlan_tcp_data_token_feature. + * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum + * interval in feature advertising (u32) + * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a + * u32 attribute holding the maximum length + * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for + * feature advertising. The mask works like @NL80211_WOWLAN_PKTPAT_MASK + * but on the TCP payload only. + * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes + * @MAX_NL80211_WOWLAN_TCP: highest attribute number + */ +enum nl80211_wowlan_tcp_attrs { + __NL80211_WOWLAN_TCP_INVALID, + NL80211_WOWLAN_TCP_SRC_IPV4, + NL80211_WOWLAN_TCP_DST_IPV4, + NL80211_WOWLAN_TCP_DST_MAC, + NL80211_WOWLAN_TCP_SRC_PORT, + NL80211_WOWLAN_TCP_DST_PORT, + NL80211_WOWLAN_TCP_DATA_PAYLOAD, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + NL80211_WOWLAN_TCP_DATA_INTERVAL, + NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + NL80211_WOWLAN_TCP_WAKE_MASK, + + /* keep last */ + NUM_NL80211_WOWLAN_TCP, + MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1 +}; + /** * enum nl80211_iface_limit_attrs - limit attributes * @NL80211_IFACE_LIMIT_UNSPEC: (reserved) diff --git a/net/wireless/core.h b/net/wireless/core.h index 37d70dc2fe82..949c9573d8d7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -108,6 +108,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) for (i = 0; i < rdev->wowlan->n_patterns; i++) kfree(rdev->wowlan->patterns[i].mask); kfree(rdev->wowlan->patterns); + if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) + sock_release(rdev->wowlan->tcp->sock); + kfree(rdev->wowlan->tcp); kfree(rdev->wowlan); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cc0fad30b8c9..d29a461b4981 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "core.h" #include "nl80211.h" #include "reg.h" @@ -399,6 +400,26 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, + [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { + [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN }, + [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_seq) + }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_token) + }, + [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; /* policy for GTK rekey offload attributes */ @@ -872,6 +893,48 @@ nla_put_failure: return -ENOBUFS; } +#ifdef CONFIG_PM +static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) + return -ENOBUFS; + + if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(*tcp->tok), tcp->tok)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_payload_max)) + return -ENOBUFS; + + nla_nest_end(msg, nl_tcp); + return 0; +} +#endif + static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -1246,6 +1309,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } + if (nl80211_send_wowlan_tcp_caps(dev, msg)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } #endif @@ -6930,16 +6996,67 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg, return 0; } +static int nl80211_send_wowlan_tcp(struct sk_buff *msg, + struct cfg80211_wowlan_tcp *tcp) +{ + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->payload_len, tcp->payload) || + nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_len, tcp->wake_data) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, + DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) + return -ENOBUFS; + + if (tcp->payload_seq.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + sizeof(tcp->payload_seq), &tcp->payload_seq)) + return -ENOBUFS; + + if (tcp->payload_tok.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(tcp->payload_tok) + tcp->tokens_size, + &tcp->payload_tok)) + return -ENOBUFS; + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; + u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (rdev->wowlan && rdev->wowlan->tcp) { + /* adjust size to have room for all the data */ + size += rdev->wowlan->tcp->tokens_size + + rdev->wowlan->tcp->payload_len + + rdev->wowlan->tcp->wake_len + + rdev->wowlan->tcp->wake_len / 8; + } + + msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6970,8 +7087,13 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; + if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; + + if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } @@ -6983,6 +7105,150 @@ nla_put_failure: return -ENOBUFS; } +static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, + struct nlattr *attr, + struct cfg80211_wowlan *trig) +{ + struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; + struct cfg80211_wowlan_tcp *cfg; + struct nl80211_wowlan_tcp_data_token *tok = NULL; + struct nl80211_wowlan_tcp_data_seq *seq = NULL; + u32 size; + u32 data_size, wake_size, tokens_size = 0, wake_mask_size; + int err, port; + + if (!rdev->wiphy.wowlan.tcp) + return -EINVAL; + + err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, + nla_data(attr), nla_len(attr), + nl80211_wowlan_tcp_policy); + if (err) + return err; + + if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_MAC] || + !tb[NL80211_WOWLAN_TCP_DST_PORT] || + !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || + !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) + return -EINVAL; + + data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); + if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + return -EINVAL; + + if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > + rdev->wiphy.wowlan.tcp->data_interval_max) + return -EINVAL; + + wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); + if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + return -EINVAL; + + wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); + if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) + return -EINVAL; + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { + u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + + tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + tokens_size = tokln - sizeof(*tok); + + if (!tok->len || tokens_size % tok->len) + return -EINVAL; + if (!rdev->wiphy.wowlan.tcp->tok) + return -EINVAL; + if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + return -EINVAL; + if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + return -EINVAL; + if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + return -EINVAL; + if (tok->offset + tok->len > data_size) + return -EINVAL; + } + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { + seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); + if (!rdev->wiphy.wowlan.tcp->seq) + return -EINVAL; + if (seq->len == 0 || seq->len > 4) + return -EINVAL; + if (seq->len + seq->offset > data_size) + return -EINVAL; + } + + size = sizeof(*cfg); + size += data_size; + size += wake_size + wake_mask_size; + size += tokens_size; + + cfg = kzalloc(size, GFP_KERNEL); + if (!cfg) + return -ENOMEM; + cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), + ETH_ALEN); + if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) + port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); + else + port = 0; +#ifdef CONFIG_INET + /* allocate a socket and port for it and use it */ + err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, + IPPROTO_TCP, &cfg->sock, 1); + if (err) { + kfree(cfg); + return err; + } + if (inet_csk_get_port(cfg->sock->sk, port)) { + sock_release(cfg->sock); + kfree(cfg); + return -EADDRINUSE; + } + cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; +#else + if (!port) { + kfree(cfg); + return -EINVAL; + } + cfg->src_port = port; +#endif + + cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); + cfg->payload_len = data_size; + cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; + memcpy((void *)cfg->payload, + nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), + data_size); + if (seq) + cfg->payload_seq = *seq; + cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); + cfg->wake_len = wake_size; + cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; + memcpy((void *)cfg->wake_data, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), + wake_size); + cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + + data_size + wake_size; + memcpy((void *)cfg->wake_mask, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), + wake_mask_size); + if (tok) { + cfg->tokens_size = tokens_size; + memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size); + } + + trig->tcp = cfg; + + return 0; +} + static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6993,7 +7259,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int err, i; bool prev_enabled = rdev->wowlan; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { @@ -7120,6 +7387,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } } + if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + err = nl80211_parse_wowlan_tcp( + rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], + &new_triggers); + if (err) + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -7137,6 +7412,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); + if (new_triggers.tcp && new_triggers.tcp->sock) + sock_release(new_triggers.tcp->sock); + kfree(new_triggers.tcp); return err; } #endif @@ -9418,6 +9696,17 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; + if (wakeup->tcp_match) + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + + if (wakeup->tcp_connlost) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + + if (wakeup->tcp_nomoretokens) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; -- cgit v1.2.3-71-gd317 From 93be6ce0e91b6a94783e012b1857a347a5e6e9f2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 11 Feb 2013 05:50:18 +0000 Subject: tcp: set and get per-socket timestamp A timestamp can be set, only if a socket is in the repair mode. This patch adds a new socket option TCP_TIMESTAMP, which allows to get and set current tcp times stamp. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Eric Dumazet Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index e962faa5ab0d..6b1ead0b0c9d 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -111,6 +111,7 @@ enum { #define TCP_QUEUE_SEQ 21 #define TCP_REPAIR_OPTIONS 22 #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a90bda96038..801b07b796f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2714,6 +2714,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else err = -EINVAL; break; + case TCP_TIMESTAMP: + if (!tp->repair) + err = -EPERM; + else + tp->tsoffset = val - tcp_time_stamp; + break; default: err = -ENOPROTOOPT; break; @@ -2962,6 +2968,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + case TCP_TIMESTAMP: + val = tcp_time_stamp + tp->tsoffset; + break; default: return -ENOPROTOOPT; } -- cgit v1.2.3-71-gd317 From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:12 +0000 Subject: bridge: Add netlink interface to configure vlans on bridge ports Add a netlink interface to add and remove vlan configuration on bridge port. The interface uses the RTM_SETLINK message and encodes the vlan configuration inside the IFLA_AF_SPEC. It is possble to include multiple vlans to either add or remove in a single message. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 + include/uapi/linux/if_bridge.h | 9 +++ net/bridge/br_device.c | 1 + net/bridge/br_if.c | 1 + net/bridge/br_netlink.c | 139 +++++++++++++++++++++++++++++++++++------ net/bridge/br_private.h | 1 + net/core/rtnetlink.c | 72 +++++++++++++++++++++ 7 files changed, 207 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 25bd46f52877..1b90f9401000 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1020,6 +1020,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); }; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5db297514aec..3ca9817ca7e8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -108,15 +108,24 @@ struct __fdb_entry { * [IFLA_AF_SPEC] = { * [IFLA_BRIDGE_FLAGS] * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] * } */ enum { IFLA_BRIDGE_FLAGS, IFLA_BRIDGE_MODE, + IFLA_BRIDGE_VLAN_INFO, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) +#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ + +struct bridge_vlan_info { + u16 flags; + u16 vid; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 35a2c2c84f33..091bedf266a0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index af9d65ab4001..335c60cebfd1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 39ca9796f3f7..534a9f4587a9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "br_private.h" #include "br_private_stp.h" @@ -119,10 +120,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -144,6 +149,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ @@ -162,6 +168,64 @@ out: return err; } +const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid); + } else + err = br_vlan_add(br, vinfo->vid); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f0f24610d111..a42f9d49a64e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -713,6 +713,7 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c1e4db60eeca..2c9ccbfbd93c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2464,6 +2464,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.2.3-71-gd317 From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:13 +0000 Subject: bridge: Dump vlan information from a bridge port Using the RTM_GETLINK dump the vlan filter list of a given bridge port. The information depends on setting the filter flag similar to how nic VF info is dumped. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- include/linux/netdevice.h | 3 +- include/uapi/linux/rtnetlink.h | 1 + net/bridge/br_netlink.c | 94 +++++++++++++++++++++++---- net/bridge/br_private.h | 3 +- net/bridge/br_vlan.c | 2 + net/core/rtnetlink.c | 16 +++-- 7 files changed, 104 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6999269b3a4a..4e2aa47193cb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, } static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, + u32 filter_mask) { struct ixgbe_adapter *adapter = netdev_priv(dev); u16 mode; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b90f9401000..1964ca66df56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1019,7 +1019,8 @@ struct net_device_ops { struct nlmsghdr *nlh); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, + u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a5eb196ade9..7a2144e1afae 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -630,6 +630,7 @@ struct tcamsg { /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) /* End of information exported to user level */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 534a9f4587a9..fe1980d5a7e4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -154,16 +194,17 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } @@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -408,11 +472,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -421,5 +492,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a42f9d49a64e..ce2235255c2f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -73,6 +73,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d8690bfe63d4..f2bf5a197ea3 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) } set_bit(vid, v->vlan_bitmap); + v->num_vlans++; return 0; } @@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) } clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c9ccbfbd93c..f3a112ec86d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } -- cgit v1.2.3-71-gd317 From 552406c488ec2cf1aaf8b5bd24d1750c9fd6d8cc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:15 +0000 Subject: bridge: Add the ability to configure pvid A user may designate a certain vlan as PVID. This means that any ingress frame that does not contain a vlan tag is assigned to this vlan and any forwarding decisions are made with this vlan in mind. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + net/bridge/br_netlink.c | 11 +++++++--- net/bridge/br_private.h | 8 +++---- net/bridge/br_vlan.c | 47 +++++++++++++++++++++++++++++++++--------- 4 files changed, 50 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 3ca9817ca7e8..c6c30e28f396 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -120,6 +120,7 @@ enum { #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ +#define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ struct bridge_vlan_info { u16 flags; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe1980d5a7e4..e044cc0b5650 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -120,6 +120,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_port_vlans *pv; struct bridge_vlan_info vinfo; u16 vid; + u16 pvid; if (port) pv = nbp_get_vlan_info(port); @@ -133,12 +134,15 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (!af) goto nla_put_failure; + pvid = br_get_pvid(pv); for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); vid < BR_VLAN_BITMAP_LEN; vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { vinfo.vid = vid; vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, sizeof(vinfo), &vinfo)) goto nla_put_failure; @@ -239,14 +243,15 @@ static int br_afspec(struct net_bridge *br, switch (cmd) { case RTM_SETLINK: if (p) { - err = nbp_vlan_add(p, vinfo->vid); + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); if (err) break; if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid); + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); } else - err = br_vlan_add(br, vinfo->vid); + err = br_vlan_add(br, vinfo->vid, vinfo->flags); if (err) break; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ea8e7efd9137..1ae6395a0369 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -562,11 +562,11 @@ extern bool br_allowed_egress(struct net_bridge *br, extern struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb); -extern int br_vlan_add(struct net_bridge *br, u16 vid); +extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); extern int br_vlan_delete(struct net_bridge *br, u16 vid); extern void br_vlan_flush(struct net_bridge *br); extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); -extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid); +extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); extern void nbp_vlan_flush(struct net_bridge_port *port); @@ -633,7 +633,7 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, return skb; } -static inline int br_vlan_add(struct net_bridge *br, u16 vid) +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { return -EOPNOTSUPP; } @@ -647,7 +647,7 @@ static inline void br_vlan_flush(struct net_bridge *br) { } -static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid) +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { return -EOPNOTSUPP; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 20057de56db0..c79940cff3a1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -5,12 +5,33 @@ #include "br_private.h" -static int __vlan_add(struct net_port_vlans *v, u16 vid) +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { int err; - if (test_bit(vid, v->vlan_bitmap)) - return -EEXIST; + if (test_bit(vid, v->vlan_bitmap)) { + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + return 0; + } if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -29,6 +50,9 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) set_bit(vid, v->vlan_bitmap); v->num_vlans++; + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + return 0; } @@ -37,6 +61,8 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) if (!test_bit(vid, v->vlan_bitmap)) return -EINVAL; + __vlan_delete_pvid(v, vid); + if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -58,6 +84,8 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) static void __vlan_flush(struct net_port_vlans *v) { + smp_wmb(); + v->pvid = 0; bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); @@ -185,7 +213,7 @@ bool br_allowed_egress(struct net_bridge *br, } /* Must be protected by RTNL */ -int br_vlan_add(struct net_bridge *br, u16 vid) +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; int err; @@ -194,7 +222,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid) pv = rtnl_dereference(br->vlan_info); if (pv) - return __vlan_add(pv, vid); + return __vlan_add(pv, vid, flags); /* Create port vlan infomration */ @@ -203,7 +231,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid) return -ENOMEM; pv->parent.br = br; - err = __vlan_add(pv, vid); + err = __vlan_add(pv, vid, flags); if (err) goto out; @@ -234,7 +262,6 @@ void br_vlan_flush(struct net_bridge *br) struct net_port_vlans *pv; ASSERT_RTNL(); - pv = rtnl_dereference(br->vlan_info); if (!pv) return; @@ -258,7 +285,7 @@ unlock: } /* Must be protected by RTNL */ -int nbp_vlan_add(struct net_bridge_port *port, u16 vid) +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; int err; @@ -267,7 +294,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid) pv = rtnl_dereference(port->vlan_info); if (pv) - return __vlan_add(pv, vid); + return __vlan_add(pv, vid, flags); /* Create port vlan infomration */ @@ -279,7 +306,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid) pv->port_idx = port->port_no; pv->parent.port = port; - err = __vlan_add(pv, vid); + err = __vlan_add(pv, vid, flags); if (err) goto clean_up; -- cgit v1.2.3-71-gd317 From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:18 +0000 Subject: bridge: Add vlan support to static neighbors When a user adds bridge neighbors, allow him to specify VLAN id. If the VLAN id is not specified, the neighbor will be added for VLANs currently in the ports filter list. If no VLANs are configured on the port, we use vlan 0 and only add 1 entry. Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan.c | 3 +- include/linux/netdevice.h | 4 +- include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 148 ++++++++++++++++++++--- net/bridge/br_private.h | 6 +- net/core/rtnetlink.c | 26 ++-- 10 files changed, 162 insertions(+), 35 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4e2aa47193cb..1c0efcb7920f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 937bcc3d3212..5088dc5c3d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } static int mlx4_en_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index b745194391a1..b95316831587 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) return 0; } -static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev, - const unsigned char *addr) +static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *netdev, const unsigned char *addr) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = -EOPNOTSUPP; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index e4b8078e88a9..defcd8a85744 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int macvlan_fdb_del(struct ndmsg *ndm, +static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 72485b9b9005..9d70421cf3a0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* Delete entry (via netlink) */ -static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct vxlan_dev *vxlan = netdev_priv(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1964ca66df56..9deb672d999f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. - * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, + * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1008,6 +1009,7 @@ struct net_device_ops { const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 275e5d65dcb2..adb068c53c4e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -20,6 +20,7 @@ enum { NDA_LLADDR, NDA_CACHEINFO, NDA_PROBES, + NDA_VLAN, __NDA_MAX }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 276a52254606..4b75ad43aa85 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr, 0); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags, - 0); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, 0); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22915c8e9961..799dbb37e5a2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br, const unsigned char *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) { - return rcu_dereference(br->vlan_info); + return rcu_dereference_rtnl(br->vlan_info); } static inline struct net_port_vlans *nbp_get_vlan_info( const struct net_bridge_port *p) { - return rcu_dereference(p->vlan_info); + return rcu_dereference_rtnl(p->vlan_info); } /* Since bridge now depends on 8021Q module, but the time bridge sees the diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f3a112ec86d5..d8aa20f6a46e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) const struct net_device_ops *ops = br_dev->netdev_ops; if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, dev, addr); + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); -- cgit v1.2.3-71-gd317 From 35e03f3a0275a1ba57e432d7c948cf6f70fbb37a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:20 +0000 Subject: bridge: Separate egress policy bitmap Add an ability to configure a separate "untagged" egress policy to the VLAN information of the bridge. This superseeds PVID policy and makes PVID ingress-only. The policy is configured with a new flag and is represented as a port bitmap per vlan. Egress frames with a VLAN id in "untagged" policy bitmap would egress the port without VLAN header. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + net/bridge/br_netlink.c | 4 ++++ net/bridge/br_private.h | 1 + net/bridge/br_vlan.c | 20 ++++++++++++++------ 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c6c30e28f396..f1bf8d34ac9f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -121,6 +121,7 @@ enum { #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ #define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ +#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ struct bridge_vlan_info { u16 flags; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e044cc0b5650..d1dda476d743 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -143,6 +143,10 @@ static int br_fill_ifinfo(struct sk_buff *skb, vinfo.flags = 0; if (vid == pvid) vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, sizeof(vinfo), &vinfo)) goto nla_put_failure; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 32ecfa4ef47f..6d314c4e6bcb 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -75,6 +75,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; u16 num_vlans; }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9ea358fbbf78..93dde75923f0 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -23,6 +23,15 @@ static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) v->pvid = 0; } +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { struct net_bridge_port *p = NULL; @@ -31,8 +40,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) int err; if (test_bit(vid, v->vlan_bitmap)) { - if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_flags(v, vid, flags); return 0; } @@ -69,8 +77,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) set_bit(vid, v->vlan_bitmap); v->num_vlans++; - if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_flags(v, vid, flags); return 0; @@ -86,6 +93,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) return -EINVAL; __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -144,11 +152,11 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, goto out; /* At this point, we know that the frame was filtered and contains - * a valid vlan id. If the vlan id matches the pvid of current port + * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send taged. */ br_vlan_get_tag(skb, &vid); - if (vid == br_get_pvid(pv)) + if (test_bit(vid, pv->untagged_bitmap)) skb = br_vlan_untag(skb); else { /* Egress policy says "send tagged". If output device -- cgit v1.2.3-71-gd317 From 9f89ec82521957de807dc0d56264ee226bbe9b98 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 14 Feb 2013 13:32:31 +0800 Subject: bridge: use __u16 in if_bridge.h We should use "__u16" instead of "u16" in the user-space visable header. Cc: Vlad Yasevich Cc: Stephen Hemminger Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f1bf8d34ac9f..2d70d79ce2fd 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -124,8 +124,8 @@ enum { #define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ struct bridge_vlan_info { - u16 flags; - u16 vid; + __u16 flags; + __u16 vid; }; /* Bridge multicast database attributes -- cgit v1.2.3-71-gd317 From 04f39047af2a6df64b763ea5a271db24879d0391 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 8 Feb 2013 18:16:19 +0100 Subject: nl80211/cfg80211: add radar detection command/event Add new NL80211_CMD_RADAR_DETECT, which starts the Channel Availability Check (CAC). This command will also notify the usermode about events (CAC finished, CAC aborted, radar detected, NOP finished). Once radar detection has started it should continuously monitor for radars as long as the channel is active. This patch enables DFS for AP mode in nl80211/cfg80211. Based on original patch by Victor Goldenshtein Signed-off-by: Simon Wunderlich [remove WIPHY_FLAG_HAS_RADAR_DETECT again -- my mistake] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 46 +++++++++++++++ include/uapi/linux/nl80211.h | 61 +++++++++++++++++++ net/wireless/chan.c | 129 ++++++++++++++++++++++++++++++++++++++++- net/wireless/core.c | 3 + net/wireless/core.h | 28 +++++++++ net/wireless/mlme.c | 120 ++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 135 +++++++++++++++++++++++++++++++++++++++++-- net/wireless/nl80211.h | 7 +++ net/wireless/reg.c | 3 + net/wireless/scan.c | 10 ---- net/wireless/trace.h | 45 +++++++++++++++ 11 files changed, 570 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7e6569e1f16f..ee11a3db730b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -114,6 +114,9 @@ enum ieee80211_channel_flags { #define IEEE80211_CHAN_NO_HT40 \ (IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS) +#define IEEE80211_DFS_MIN_CAC_TIME_MS 60000 +#define IEEE80211_DFS_MIN_NOP_TIME_MS (30 * 60 * 1000) + /** * struct ieee80211_channel - channel definition * @@ -134,6 +137,9 @@ enum ieee80211_channel_flags { * to enable this, this is useful only on 5 GHz band. * @orig_mag: internal use * @orig_mpwr: internal use + * @dfs_state: current state of this channel. Only relevant if radar is required + * on this channel. + * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. */ struct ieee80211_channel { enum ieee80211_band band; @@ -146,6 +152,8 @@ struct ieee80211_channel { bool beacon_found; u32 orig_flags; int orig_mag, orig_mpwr; + enum nl80211_dfs_state dfs_state; + unsigned long dfs_state_entered; }; /** @@ -569,6 +577,7 @@ struct cfg80211_acl_data { * @p2p_opp_ps: P2P opportunistic PS * @acl: ACL configuration used by the drivers which has support for * MAC address based access control + * @radar_required: set if radar detection is required */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -586,6 +595,7 @@ struct cfg80211_ap_settings { u8 p2p_ctwindow; bool p2p_opp_ps; const struct cfg80211_acl_data *acl; + bool radar_required; }; /** @@ -1909,6 +1919,8 @@ struct cfg80211_gtk_rekey_data { * this new list replaces the existing one. Driver has to clear its ACL * when number of MAC addresses entries is passed as 0. Drivers which * advertise the support for MAC based ACL have to implement this callback. + * + * @start_radar_detection: Start radar detection in the driver. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2132,6 +2144,10 @@ struct cfg80211_ops { int (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev, const struct cfg80211_acl_data *params); + + int (*start_radar_detection)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef); }; /* @@ -2715,6 +2731,8 @@ struct cfg80211_cached_keys; * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @p2p_started: true if this is a P2P Device that has been started + * @cac_started: true if DFS channel availability check has been started + * @cac_start_time: timestamp (jiffies) when the dfs state was entered. */ struct wireless_dev { struct wiphy *wiphy; @@ -2766,6 +2784,9 @@ struct wireless_dev { u32 ap_unexpected_nlportid; + bool cac_started; + unsigned long cac_start_time; + #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { @@ -3754,6 +3775,31 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * cfg80211_radar_event - radar detection event + * @wiphy: the wiphy + * @chandef: chandef for the current channel + * @gfp: context flags + * + * This function is called when a radar is detected on the current chanenl. + */ +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, gfp_t gfp); + +/** + * cfg80211_cac_event - Channel availability check (CAC) event + * @netdev: network device + * @event: type of event + * @gfp: context flags + * + * This function is called when a Channel availability check (CAC) is finished + * or aborted. This must be called to notify the completion of a CAC process, + * also by full-MAC drivers. + */ +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp); + + /** * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer * @dev: network device diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5309b34930ea..90b7af86f392 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -603,6 +603,14 @@ * command is used in AP/P2P GO mode. Driver has to make sure to clear its * ACL list during %NL80211_CMD_STOP_AP. * + * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once + * a radar is detected or the channel availability scan (CAC) has finished + * or was aborted, or a radar was detected, usermode will be notified with + * this event. This command is also used to notify userspace about radars + * while operating on this channel. + * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the + * event. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -755,6 +763,8 @@ enum nl80211_commands { NL80211_CMD_SET_MAC_ACL, + NL80211_CMD_RADAR_DETECT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1342,6 +1352,9 @@ enum nl80211_commands { * number of MAC addresses that a device can support for MAC * ACL. * + * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, + * contains a value of enum nl80211_radar_event (u32). + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1620,6 +1633,8 @@ enum nl80211_attrs { NL80211_ATTR_MAC_ACL_MAX, + NL80211_ATTR_RADAR_EVENT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2022,6 +2037,10 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS + * (enum nl80211_dfs_state) + * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long + * this channel is in this DFS state. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2034,6 +2053,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + NL80211_FREQUENCY_ATTR_DFS_STATE, + NL80211_FREQUENCY_ATTR_DFS_TIME, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3489,4 +3510,44 @@ enum nl80211_acl_policy { NL80211_ACL_POLICY_DENY_UNLESS_LISTED, }; +/** + * enum nl80211_radar_event - type of radar event for DFS operation + * + * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace + * about detected radars or success of the channel available check (CAC) + * + * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is + * now unusable. + * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished, + * the channel is now available. + * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no + * change to the channel status. + * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is + * over, channel becomes usable. + */ +enum nl80211_radar_event { + NL80211_RADAR_DETECTED, + NL80211_RADAR_CAC_FINISHED, + NL80211_RADAR_CAC_ABORTED, + NL80211_RADAR_NOP_FINISHED, +}; + +/** + * enum nl80211_dfs_state - DFS states for channels + * + * Channel states used by the DFS code. + * + * @IEEE80211_DFS_USABLE: The channel can be used, but channel availability + * check (CAC) must be performed before using it for AP or IBSS. + * @IEEE80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it + * is therefore marked as not available. + * @IEEE80211_DFS_AVAILABLE: The channel has been CAC checked and is available. + */ + +enum nl80211_dfs_state { + NL80211_DFS_USABLE, + NL80211_DFS_UNAVAILABLE, + NL80211_DFS_AVAILABLE, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 396373f3ec26..810c23cfb894 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -147,6 +147,32 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c, } } +static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) +{ + int width; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); + return -1; + } + return width; +} + const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) @@ -192,6 +218,93 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, } EXPORT_SYMBOL(cfg80211_chandef_compatible); +static void cfg80211_set_chans_dfs_state(struct wiphy *wiphy, u32 center_freq, + u32 bandwidth, + enum nl80211_dfs_state dfs_state) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + c->dfs_state = dfs_state; + c->dfs_state_entered = jiffies; + } +} + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state) +{ + int width; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq1, + width, dfs_state); + + if (!chandef->center_freq2) + return; + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq2, + width, dfs_state); +} + +static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) + return 1; + } + return 0; +} + + +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return -EINVAL; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; + + r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, + width); + if (r) + return r; + + if (!chandef->center_freq2) + return 0; + + return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, + width); +} + static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, u32 prohibited_flags) @@ -203,7 +316,16 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, freq <= center_freq + bandwidth/2 - 10; freq += 20) { c = ieee80211_get_channel(wiphy, freq); - if (!c || c->flags & prohibited_flags) + if (!c) + return false; + + /* check for radar flags */ + if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + (c->dfs_state != NL80211_DFS_AVAILABLE)) + return false; + + /* check for the other flags */ + if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) return false; } @@ -344,7 +466,10 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { + if (wdev->cac_started) { + *chan = wdev->channel; + *chanmode = CHAN_MODE_SHARED; + } else if (wdev->beacon_interval) { *chan = wdev->channel; *chanmode = CHAN_MODE_SHARED; } diff --git a/net/wireless/core.c b/net/wireless/core.c index f0a1bbe95cff..922002105062 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -324,6 +324,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); + INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, + cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT rdev->wiphy.wext = &cfg80211_wext_handler; #endif @@ -695,6 +697,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); + cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); if (rdev->wowlan && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); diff --git a/net/wireless/core.h b/net/wireless/core.h index 949c9573d8d7..3aec0e429d8a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -86,6 +86,8 @@ struct cfg80211_registered_device { struct cfg80211_wowlan *wowlan; + struct delayed_work dfs_update_channels_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -431,6 +433,22 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode, u8 radar_detect); +/** + * cfg80211_chandef_dfs_required - checks if radar detection is required + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * Return: 1 if radar detection is required, 0 if it is not, < 0 on error + */ +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *c); + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state); + +void cfg80211_dfs_channels_update_work(struct work_struct *work); + + static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, @@ -457,6 +475,16 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, chan, chanmode, 0); } +static inline unsigned int elapsed_jiffies_msecs(unsigned long start) +{ + unsigned long end = jiffies; + + if (end >= start) + return jiffies_to_msecs(end - start); + + return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); +} + void cfg80211_get_chan_state(struct wireless_dev *wdev, struct ieee80211_channel **chan, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8e6920728c43..caddca35d686 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -987,3 +987,123 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); + +void cfg80211_dfs_channels_update_work(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct cfg80211_registered_device *rdev; + struct cfg80211_chan_def chandef; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *c; + struct wiphy *wiphy; + bool check_again = false; + unsigned long timeout, next_time = 0; + int bandid, i; + + delayed_work = container_of(work, struct delayed_work, work); + rdev = container_of(delayed_work, struct cfg80211_registered_device, + dfs_update_channels_wk); + wiphy = &rdev->wiphy; + + mutex_lock(&cfg80211_mutex); + for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + sband = wiphy->bands[bandid]; + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + c = &sband->channels[i]; + + if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + continue; + + timeout = c->dfs_state_entered + + IEEE80211_DFS_MIN_NOP_TIME_MS; + + if (time_after_eq(jiffies, timeout)) { + c->dfs_state = NL80211_DFS_USABLE; + cfg80211_chandef_create(&chandef, c, + NL80211_CHAN_NO_HT); + + nl80211_radar_notify(rdev, &chandef, + NL80211_RADAR_NOP_FINISHED, + NULL, GFP_ATOMIC); + continue; + } + + if (!check_again) + next_time = timeout - jiffies; + else + next_time = min(next_time, timeout - jiffies); + check_again = true; + } + } + mutex_unlock(&cfg80211_mutex); + + /* reschedule if there are other channels waiting to be cleared again */ + if (check_again) + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + next_time); +} + + +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + unsigned long timeout; + + trace_cfg80211_radar_event(wiphy, chandef); + + /* only set the chandef supplied channel to unavailable, in + * case the radar is detected on only one of multiple channels + * spanned by the chandef. + */ + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + timeout); + + nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); +} +EXPORT_SYMBOL(cfg80211_radar_event); + +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp) +{ + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_chan_def chandef; + unsigned long timeout; + + trace_cfg80211_cac_event(netdev, event); + + if (WARN_ON(!wdev->cac_started)) + return; + + if (WARN_ON(!wdev->channel)) + return; + + cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + timeout = wdev->cac_start_time + + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + WARN_ON(!time_after_eq(jiffies, timeout)); + cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + break; + case NL80211_RADAR_CAC_ABORTED: + break; + default: + WARN_ON(1); + return; + } + wdev->cac_started = false; + + nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); +} +EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d29a461b4981..c1e18ccf4049 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -552,9 +552,16 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_RADAR) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) - goto nla_put_failure; + if (chan->flags & IEEE80211_CHAN_RADAR) { + u32 time = elapsed_jiffies_msecs(chan->dfs_state_entered); + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, + chan->dfs_state)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) + goto nla_put_failure; + } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) @@ -2775,6 +2782,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; + u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -2893,9 +2901,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) return -EINVAL; + err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); + if (err < 0) + return err; + if (err) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } + mutex_lock(&rdev->devlist_mtx); - err = cfg80211_can_use_chan(rdev, wdev, params.chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + params.chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); mutex_unlock(&rdev->devlist_mtx); if (err) @@ -5055,6 +5073,54 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, return err; } +static int nl80211_start_radar_detection(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_chan_def chandef; + int err; + + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + + if (wdev->cac_started) + return -EBUSY; + + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + if (err < 0) + return err; + + if (err == 0) + return -EINVAL; + + if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + return -EINVAL; + + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + chandef.chan, CHAN_MODE_SHARED, + BIT(chandef.width)); + if (err) + goto err_locked; + + err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); + if (!err) { + wdev->channel = chandef.chan; + wdev->cac_started = true; + wdev->cac_start_time = jiffies; + } +err_locked: + mutex_unlock(&rdev->devlist_mtx); + + return err; +} + static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -8305,6 +8371,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_RADAR_DETECT, + .doit = nl80211_start_radar_detection, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -9501,6 +9575,57 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + /* NOP and radar events don't need a netdev parameter */ + if (netdev) { + struct wireless_dev *wdev = netdev->ieee80211_ptr; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + } + + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2acba8477e9d..b061da4919e1 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -108,6 +108,13 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); + +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp); + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 08d3da2c70ab..e97d5b071ab6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -884,6 +884,9 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->dfs_state = NL80211_DFS_USABLE; + chan->dfs_state_entered = jiffies; + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d0fc6da2d097..f0d9b5154bab 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1210,16 +1210,6 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, } } -static inline unsigned int elapsed_jiffies_msecs(unsigned long start) -{ - unsigned long end = jiffies; - - if (end >= start) - return jiffies_to_msecs(end - start); - - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); -} - static char * ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, struct cfg80211_internal_bss *bss, char *current_ev, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index c9cafb0ea95f..b7a531380e19 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2051,6 +2051,21 @@ TRACE_EVENT(cfg80211_reg_can_beacon, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_chandef_dfs_required, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef), @@ -2067,6 +2082,36 @@ TRACE_EVENT(cfg80211_ch_switch_notify, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_radar_event, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(cfg80211_cac_event, + TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt), + TP_ARGS(netdev, evt), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(enum nl80211_radar_event, evt) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->evt = evt; + ), + TP_printk(NETDEV_PR_FMT ", event: %d", + NETDEV_PR_ARG, __entry->evt) +); + DECLARE_EVENT_CLASS(cfg80211_rx_evt, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr), -- cgit v1.2.3-71-gd317 From 50640f169372b9977487a328dedf13a8debedff7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Dec 2012 17:59:39 +0100 Subject: nl80211: advertise HT/VHT channel limitations When drivers or regulatory have limitations on 40, 80 or 160 MHz channels, advertise these to userspace via nl80211. Also add a new feature flag to let userspace know this is supported. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 17 +++++++++++++++++ net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 12 ++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 90b7af86f392..3880f6ad7ed1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2041,6 +2041,16 @@ enum nl80211_band_attr { * (enum nl80211_dfs_state) * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long * this channel is in this DFS state. + * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible, + * this includes 80+80 channels + * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel + * using this channel as the primary or any of the secondary channels + * isn't possible * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2055,6 +2065,10 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_MAX_TX_POWER, NL80211_FREQUENCY_ATTR_DFS_STATE, NL80211_FREQUENCY_ATTR_DFS_TIME, + NL80211_FREQUENCY_ATTR_NO_HT40_MINUS, + NL80211_FREQUENCY_ATTR_NO_HT40_PLUS, + NL80211_FREQUENCY_ATTR_NO_80MHZ, + NL80211_FREQUENCY_ATTR_NO_160MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3421,6 +3435,8 @@ enum nl80211_ap_sme_features { * Note that even for drivers that support this, the default is to add * stations in authenticated/associated state, so to add unauthenticated * stations the authenticated/associated bits have to be set in the mask. + * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits + * (HT40, VHT 80/160 MHz) if this flag is set */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3437,6 +3453,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 922002105062..33b75b9b8efa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -367,7 +367,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; - rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; + rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH | + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; return &rdev->wiphy; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c1e18ccf4049..7e40b9e82b45 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -562,6 +562,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) goto nla_put_failure; } + if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) + goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) -- cgit v1.2.3-71-gd317 From a50df0c4c0d97170a6c43573612acacc43e62fe7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Feb 2013 14:20:05 +0100 Subject: cfg80211: advertise extended capabilities to userspace In many cases, userspace may need to know which of the 802.11 extended capabilities ("Extended Capabilities element") are implemented in the driver or device, to include them e.g. in beacons, assoc request/response or other frames. Add a new nl80211 attribute to hold the extended capabilities bitmap for this. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ include/uapi/linux/nl80211.h | 9 +++++++++ net/wireless/nl80211.c | 9 +++++++++ 3 files changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8a9200f2f4a4..a229046d86d4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2480,6 +2480,14 @@ struct wiphy_wowlan_support { * * @max_acl_mac_addrs: Maximum number of MAC addresses that the device * supports for ACL. + * + * @extended_capabilities: extended capabilities supported by the driver, + * additional capabilities might be supported by userspace; these are + * the 802.11 extended capabilities ("Extended Capabilities element") + * and are in the same format as in the information element. See + * 802.11-2012 8.4.2.29 for the defined fields. + * @extended_capabilities_mask: mask of the valid values + * @extended_capabilities_len: length of the extended capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2546,6 +2554,9 @@ struct wiphy { */ u32 probe_resp_offload; + const u8 *extended_capabilities, *extended_capabilities_mask; + u8 extended_capabilities_len; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3880f6ad7ed1..1fd6e5611896 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1355,6 +1355,12 @@ enum nl80211_commands { * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, * contains a value of enum nl80211_radar_event (u32). * + * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver + * has and handles. The format is the same as the IE contents. See + * 802.11-2012 8.4.2.29 for more information. + * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver + * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1635,6 +1641,9 @@ enum nl80211_attrs { NL80211_ATTR_RADAR_EVENT, + NL80211_ATTR_EXT_CAPA, + NL80211_ATTR_EXT_CAPA_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7e40b9e82b45..1237431c3efa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1363,6 +1363,15 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; + if (dev->wiphy.extended_capabilities && + (nla_put(msg, NL80211_ATTR_EXT_CAPA, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities) || + nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities_mask))) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3-71-gd317 From 9d62a98617298c1da288f50e84c5dd67732e79b7 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 14 Feb 2013 21:10:13 +0200 Subject: cfg80211: Pass station (extended) capability info to kernel The information of the peer's capabilities and extended capabilities are required for the driver to perform TDLS Peer UAPSD operations and off channel operations. This information of the peer is passed from user space using NL80211_CMD_SET_STATION command. This commit enhances the function nl80211_set_station to pass the capability information of the peer to the driver. Similarly, there may be need for capability information for other modes, so allow this to be provided with both add_station and change_station. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 10 ++++++++++ net/wireless/nl80211.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a229046d86d4..fa2612952c19 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,12 +626,14 @@ enum plink_actions { /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) + * @STATION_PARAM_APPLY_CAPABILITY: apply new capability * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. */ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), + STATION_PARAM_APPLY_CAPABILITY = BIT(1), }; /** @@ -662,6 +664,9 @@ enum station_parameters_apply_mask { * see &enum station_parameters_apply_mask * @local_pm: local link-specific mesh power save mode (no change when set * to unknown) + * @capability: station capability + * @ext_capab: extended capabilities of the station + * @ext_capab_len: number of extended capabilities */ struct station_parameters { u8 *supported_rates; @@ -678,6 +683,9 @@ struct station_parameters { u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; + u16 capability; + u8 *ext_capab; + u8 ext_capab_len; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1fd6e5611896..f7c35ca01efc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1361,6 +1361,13 @@ enum nl80211_commands { * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. * + * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to + * the driver, e.g., to enable TDLS power save (PU-APSD). + * + * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are + * advertised to the driver, e.g., to enable TDLS off channel operations + * and PU-APSD. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1644,6 +1651,9 @@ enum nl80211_attrs { NL80211_ATTR_EXT_CAPA, NL80211_ATTR_EXT_CAPA_MASK, + NL80211_ATTR_STA_CAPABILITY, + NL80211_ATTR_STA_EXT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1237431c3efa..be9f2b5a403f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -368,6 +368,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, + [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, }; /* policy for the key attributes */ @@ -3435,6 +3437,19 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL] || info->attrs[NL80211_ATTR_HT_CAPABILITY]) return -EINVAL; @@ -3505,6 +3520,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* reject other things that can't change */ if (params.supported_rates) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); @@ -3537,6 +3556,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.supported_rates) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; /* * No special handling for TDLS here -- the userspace * mesh code doesn't have this bug. @@ -3601,6 +3624,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); -- cgit v1.2.3-71-gd317 From 932dd97c5fef091dd6f605fb1d40143d67d91e09 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 11:56:13 +0100 Subject: nl80211: renumber NL80211_FEATURE_FULL_AP_CLIENT_STATE Adding the flag to mac80211 already without testing was clearly a mistake, one that we now pay for by having to reserve bit 13 forever. The problem is cfg80211 doesn't allow capability/rate changes for station entries that were added unassociated, so the station entries cannot be set up properly when marked associated. Change the NL80211_FEATURE_FULL_AP_CLIENT_STATE value to make it clear to userspace implementations that all current kernels don't actually support it, even though the previous bit is set, and of course also remove the flag from mac80211 until we test and fix the issues. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- net/mac80211/main.c | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f7c35ca01efc..c46bb016f4e4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3471,8 +3471,9 @@ enum nl80211_feature_flags { NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, - NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, + /* bit 13 is reserved */ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, }; /** diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 035344bc6b9c..f9747689d604 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -572,8 +572,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER | - NL80211_FEATURE_FULL_AP_CLIENT_STATE; + NL80211_FEATURE_VIF_TXPOWER; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | -- cgit v1.2.3-71-gd317 From 5b8ca5344f82e594e21c9fbbdf3b13507ecdb5a2 Mon Sep 17 00:00:00 2001 From: Andy King Date: Mon, 18 Feb 2013 06:04:12 +0000 Subject: VSOCK: Remove hypervisor-only socket option Remove hypervisor-only socket option. Reported-by: Gerd Hoffmann Acked-by: Dmitry Torokhov Signed-off-by: Andy King Signed-off-by: David S. Miller --- include/uapi/linux/vm_sockets.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index f7f2e99dec84..df91301847ec 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -52,14 +52,6 @@ #define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 -/* Option name for socket's service label. Use as the option name in - * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. - * The service label is a C-style NUL-terminated string. Only available for - * hypervisor endpoints. - */ - -#define SO_VM_SOCKETS_SERVICE_LABEL 4 - /* Option name for determining if a socket is trusted. Use as the option name * in getsockopt(3) to determine if a socket is trusted. The value is a * signed integer. -- cgit v1.2.3-71-gd317