From cb2518ca9f06dfcfa3d175773631bfb1e461bdc7 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Wed, 15 Jan 2014 09:50:13 +0100 Subject: can: add ability to allocate CANFD frame in skb data This patch adds the ability of allocating a CANFD frame data structure in the skb data area. Signed-off-by: Stephane Grosjean Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index fb0ab651a041..dc5f9026b67f 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -124,6 +124,8 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); +struct sk_buff *alloc_canfd_skb(struct net_device *dev, + struct canfd_frame **cfd); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); -- cgit v1.2.3-71-gd317 From d8ca16db6bb23d03fcb794df44bae64ae976f27c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Jan 2014 16:20:29 +0100 Subject: mac80211: add length check in ieee80211_is_robust_mgmt_frame() A few places weren't checking that the frame passed to the function actually has enough data even though the function clearly documents it must have a payload byte. Make this safer by changing the function to take an skb and checking the length inside. The old version is preserved for now as the rtl* drivers use it and don't have a correct skb. Signed-off-by: Johannes Berg --- drivers/net/wireless/rtlwifi/rtl8188ee/trx.c | 2 +- drivers/net/wireless/rtlwifi/rtl8192ce/trx.c | 2 +- drivers/net/wireless/rtlwifi/rtl8192se/trx.c | 2 +- drivers/net/wireless/rtlwifi/rtl8723ae/trx.c | 2 +- include/linux/ieee80211.h | 15 +++++++++++++-- net/mac80211/rx.c | 13 ++++++------- net/mac80211/tx.c | 9 ++++----- net/mac80211/wpa.c | 2 +- 8 files changed, 28 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c b/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c index aece6c9cccf1..27ace3054d56 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c @@ -452,7 +452,7 @@ bool rtl88ee_rx_query_desc(struct ieee80211_hw *hw, /* During testing, hdr was NULL */ return false; } - if ((ieee80211_is_robust_mgmt_frame(hdr)) && + if ((_ieee80211_is_robust_mgmt_frame(hdr)) && (ieee80211_has_protected(hdr->frame_control))) rx_status->flag &= ~RX_FLAG_DECRYPTED; else diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c index 52abf0a862fa..114858d46158 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c @@ -393,7 +393,7 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw, /* In testing, hdr was NULL here */ return false; } - if ((ieee80211_is_robust_mgmt_frame(hdr)) && + if ((_ieee80211_is_robust_mgmt_frame(hdr)) && (ieee80211_has_protected(hdr->frame_control))) rx_status->flag &= ~RX_FLAG_DECRYPTED; else diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c index 27efbcdac6a9..163a681962c6 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c @@ -310,7 +310,7 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, /* during testing, hdr was NULL here */ return false; } - if ((ieee80211_is_robust_mgmt_frame(hdr)) && + if ((_ieee80211_is_robust_mgmt_frame(hdr)) && (ieee80211_has_protected(hdr->frame_control))) rx_status->flag &= ~RX_FLAG_DECRYPTED; else diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c index 50b7be3f3a60..721162cacc3a 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/trx.c @@ -334,7 +334,7 @@ bool rtl8723ae_rx_query_desc(struct ieee80211_hw *hw, /* during testing, hdr could be NULL here */ return false; } - if ((ieee80211_is_robust_mgmt_frame(hdr)) && + if ((_ieee80211_is_robust_mgmt_frame(hdr)) && (ieee80211_has_protected(hdr->frame_control))) rx_status->flag &= ~RX_FLAG_DECRYPTED; else diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e526a8cecb70..923c478030a3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2192,10 +2192,10 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) } /** - * ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame + * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) */ -static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) +static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) { if (ieee80211_is_disassoc(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control)) @@ -2223,6 +2223,17 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) return false; } +/** + * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame + * @skb: the skb containing the frame, length will be checked + */ +static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) +{ + if (skb->len < 25) + return false; + return _ieee80211_is_robust_mgmt_frame((void *)skb->data); +} + /** * ieee80211_is_public_action - check if frame is a public action frame * @hdr: the frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c24ca0d0f469..3b7a750ebc70 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -599,10 +599,10 @@ static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - if (skb->len < 24 || is_multicast_ether_addr(hdr->addr1)) + if (is_multicast_ether_addr(hdr->addr1)) return 0; - return ieee80211_is_robust_mgmt_frame(hdr); + return ieee80211_is_robust_mgmt_frame(skb); } @@ -610,10 +610,10 @@ static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - if (skb->len < 24 || !is_multicast_ether_addr(hdr->addr1)) + if (!is_multicast_ether_addr(hdr->addr1)) return 0; - return ieee80211_is_robust_mgmt_frame(hdr); + return ieee80211_is_robust_mgmt_frame(skb); } @@ -626,7 +626,7 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da)) return -1; - if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) hdr)) + if (!ieee80211_is_robust_mgmt_frame(skb)) return -1; /* not a robust management frame */ mmie = (struct ieee80211_mmie *) @@ -1845,8 +1845,7 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) * having configured keys. */ if (unlikely(ieee80211_is_action(fc) && !rx->key && - ieee80211_is_robust_mgmt_frame( - (struct ieee80211_hdr *) rx->skb->data))) + ieee80211_is_robust_mgmt_frame(rx->skb))) return -EACCES; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bb990ecfa655..07a7f38dc348 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -452,8 +452,7 @@ static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta, if (sta == NULL || !test_sta_flag(sta, WLAN_STA_MFP)) return 0; - if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) - skb->data)) + if (!ieee80211_is_robust_mgmt_frame(skb)) return 0; return 1; @@ -567,7 +566,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = key; else if (ieee80211_is_mgmt(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && - ieee80211_is_robust_mgmt_frame(hdr) && + ieee80211_is_robust_mgmt_frame(tx->skb) && (key = rcu_dereference(tx->sdata->default_mgmt_key))) tx->key = key; else if (is_multicast_ether_addr(hdr->addr1) && @@ -582,12 +581,12 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; else if (tx->skb->protocol == tx->sdata->control_port_protocol) tx->key = NULL; - else if (ieee80211_is_robust_mgmt_frame(hdr) && + else if (ieee80211_is_robust_mgmt_frame(tx->skb) && !(ieee80211_is_action(hdr->frame_control) && tx->sta && test_sta_flag(tx->sta, WLAN_STA_MFP))) tx->key = NULL; else if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_robust_mgmt_frame(hdr)) + !ieee80211_is_robust_mgmt_frame(tx->skb)) tx->key = NULL; else { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 4aed45c8ee3b..b8600e3c29c8 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -494,7 +494,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(hdr->frame_control); if (!ieee80211_is_data(hdr->frame_control) && - !ieee80211_is_robust_mgmt_frame(hdr)) + !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - -- cgit v1.2.3-71-gd317 From b4ba544c8c1349afd44e10aebec03c90e9b71d98 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Jan 2014 14:41:44 +0100 Subject: mac80211: fix bufferable MMPDU RX handling Action, disassoc and deauth frames are bufferable, and as such don't have the PM bit in the frame control field reserved which means we need to react to the bit when receiving in such a frame. Fix this by introducing a new helper ieee80211_is_bufferable_mmpdu() and using it for the RX path that currently ignores the PM bit in any non-data frames for doze->wake transitions, but listens to it in all frames for wake->doze transitions, both of which are wrong. Also use the new helper in the TX path to clean up the code. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 14 ++++++++++++++ net/mac80211/rx.c | 19 ++++++++----------- net/mac80211/tx.c | 5 +---- 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 923c478030a3..1e3912d1b029 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -596,6 +596,20 @@ static inline int ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU + * @fc: frame control field in little-endian byteorder + */ +static inline bool ieee80211_is_bufferable_mmpdu(__le16 fc) +{ + /* IEEE 802.11-2012, definition of "bufferable management frame"; + * note that this ignores the IBSS special case. */ + return ieee80211_is_mgmt(fc) && + (ieee80211_is_action(fc) || + ieee80211_is_disassoc(fc) || + ieee80211_is_deauth(fc)); +} + /** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3b7a750ebc70..79a89fe9d616 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1311,18 +1311,15 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) !ieee80211_has_morefrags(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || - rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { + rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && + /* PM bit is only checked in frames where it isn't reserved, + * in AP mode it's reserved in non-bufferable management frames + * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field) + */ + (!ieee80211_is_mgmt(hdr->frame_control) || + ieee80211_is_bufferable_mmpdu(hdr->frame_control))) { if (test_sta_flag(sta, WLAN_STA_PS_STA)) { - /* - * Ignore doze->wake transitions that are - * indicated by non-data frames, the standard - * is unclear here, but for example going to - * PS mode and then scanning would cause a - * doze->wake transition for the probe request, - * and that is clearly undesirable. - */ - if (ieee80211_is_data(hdr->frame_control) && - !ieee80211_has_pm(hdr->frame_control)) + if (!ieee80211_has_pm(hdr->frame_control)) sta_ps_end(sta); } else { if (ieee80211_has_pm(hdr->frame_control)) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 07a7f38dc348..5476a69b45c9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -522,11 +522,8 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) return TX_CONTINUE; - /* only deauth, disassoc and action are bufferable MMPDUs */ if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_deauth(hdr->frame_control) && - !ieee80211_is_disassoc(hdr->frame_control) && - !ieee80211_is_action(hdr->frame_control)) { + !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) { if (tx->flags & IEEE80211_TX_UNICAST) info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; return TX_CONTINUE; -- cgit v1.2.3-71-gd317 From 8c78e38025060a00155a73bf722152c156242490 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Feb 2014 09:41:04 +0100 Subject: wireless: sort and extend element ID list The element ID list is currently almost sorted by amendment or similar topic, but the order is difficult to maintain and not very transparent. Sort the list by ID instead, and add a lot of missing IDs. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 170 +++++++++++++++++++++++++++++----------------- 1 file changed, 106 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1e3912d1b029..5f349355ee54 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1650,51 +1650,22 @@ enum ieee80211_reasoncode { enum ieee80211_eid { WLAN_EID_SSID = 0, WLAN_EID_SUPP_RATES = 1, - WLAN_EID_FH_PARAMS = 2, + WLAN_EID_FH_PARAMS = 2, /* reserved now */ WLAN_EID_DS_PARAMS = 3, WLAN_EID_CF_PARAMS = 4, WLAN_EID_TIM = 5, WLAN_EID_IBSS_PARAMS = 6, - WLAN_EID_CHALLENGE = 16, - WLAN_EID_COUNTRY = 7, WLAN_EID_HP_PARAMS = 8, WLAN_EID_HP_TABLE = 9, WLAN_EID_REQUEST = 10, - WLAN_EID_QBSS_LOAD = 11, WLAN_EID_EDCA_PARAM_SET = 12, WLAN_EID_TSPEC = 13, WLAN_EID_TCLAS = 14, WLAN_EID_SCHEDULE = 15, - WLAN_EID_TS_DELAY = 43, - WLAN_EID_TCLAS_PROCESSING = 44, - WLAN_EID_QOS_CAPA = 46, - /* 802.11z */ - WLAN_EID_LINK_ID = 101, - /* 802.11s */ - WLAN_EID_MESH_CONFIG = 113, - WLAN_EID_MESH_ID = 114, - WLAN_EID_LINK_METRIC_REPORT = 115, - WLAN_EID_CONGESTION_NOTIFICATION = 116, - WLAN_EID_PEER_MGMT = 117, - WLAN_EID_CHAN_SWITCH_PARAM = 118, - WLAN_EID_MESH_AWAKE_WINDOW = 119, - WLAN_EID_BEACON_TIMING = 120, - WLAN_EID_MCCAOP_SETUP_REQ = 121, - WLAN_EID_MCCAOP_SETUP_RESP = 122, - WLAN_EID_MCCAOP_ADVERT = 123, - WLAN_EID_MCCAOP_TEARDOWN = 124, - WLAN_EID_GANN = 125, - WLAN_EID_RANN = 126, - WLAN_EID_PREQ = 130, - WLAN_EID_PREP = 131, - WLAN_EID_PERR = 132, - WLAN_EID_PXU = 137, - WLAN_EID_PXUC = 138, - WLAN_EID_AUTH_MESH_PEER_EXCH = 139, - WLAN_EID_MIC = 140, - + WLAN_EID_CHALLENGE = 16, + /* 17-31 reserved for challenge text extension */ WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, WLAN_EID_TPC_REQUEST = 34, @@ -1705,66 +1676,114 @@ enum ieee80211_eid { WLAN_EID_MEASURE_REPORT = 39, WLAN_EID_QUIET = 40, WLAN_EID_IBSS_DFS = 41, - WLAN_EID_ERP_INFO = 42, - WLAN_EID_EXT_SUPP_RATES = 50, - + WLAN_EID_TS_DELAY = 43, + WLAN_EID_TCLAS_PROCESSING = 44, WLAN_EID_HT_CAPABILITY = 45, - WLAN_EID_HT_OPERATION = 61, - WLAN_EID_SECONDARY_CHANNEL_OFFSET = 62, - + WLAN_EID_QOS_CAPA = 46, + /* 47 reserved for Broadcom */ WLAN_EID_RSN = 48, - WLAN_EID_MMIE = 76, - WLAN_EID_VENDOR_SPECIFIC = 221, - WLAN_EID_QOS_PARAMETER = 222, - + WLAN_EID_802_15_COEX = 49, + WLAN_EID_EXT_SUPP_RATES = 50, WLAN_EID_AP_CHAN_REPORT = 51, WLAN_EID_NEIGHBOR_REPORT = 52, WLAN_EID_RCPI = 53, + WLAN_EID_MOBILITY_DOMAIN = 54, + WLAN_EID_FAST_BSS_TRANSITION = 55, + WLAN_EID_TIMEOUT_INTERVAL = 56, + WLAN_EID_RIC_DATA = 57, + WLAN_EID_DSE_REGISTERED_LOCATION = 58, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES = 59, + WLAN_EID_EXT_CHANSWITCH_ANN = 60, + WLAN_EID_HT_OPERATION = 61, + WLAN_EID_SECONDARY_CHANNEL_OFFSET = 62, WLAN_EID_BSS_AVG_ACCESS_DELAY = 63, WLAN_EID_ANTENNA_INFO = 64, WLAN_EID_RSNI = 65, WLAN_EID_MEASUREMENT_PILOT_TX_INFO = 66, WLAN_EID_BSS_AVAILABLE_CAPACITY = 67, WLAN_EID_BSS_AC_ACCESS_DELAY = 68, + WLAN_EID_TIME_ADVERTISEMENT = 69, WLAN_EID_RRM_ENABLED_CAPABILITIES = 70, WLAN_EID_MULTIPLE_BSSID = 71, WLAN_EID_BSS_COEX_2040 = 72, WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74, - WLAN_EID_EXT_CAPABILITY = 127, - - WLAN_EID_MOBILITY_DOMAIN = 54, - WLAN_EID_FAST_BSS_TRANSITION = 55, - WLAN_EID_TIMEOUT_INTERVAL = 56, - WLAN_EID_RIC_DATA = 57, WLAN_EID_RIC_DESCRIPTOR = 75, - - WLAN_EID_DSE_REGISTERED_LOCATION = 58, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES = 59, - WLAN_EID_EXT_CHANSWITCH_ANN = 60, - - WLAN_EID_VHT_CAPABILITY = 191, - WLAN_EID_VHT_OPERATION = 192, - WLAN_EID_OPMODE_NOTIF = 199, - WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194, - WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196, - WLAN_EID_EXTENDED_BSS_LOAD = 193, - WLAN_EID_VHT_TX_POWER_ENVELOPE = 195, - WLAN_EID_AID = 197, - WLAN_EID_QUIET_CHANNEL = 198, - - /* 802.11ad */ + WLAN_EID_MMIE = 76, + WLAN_EID_ASSOC_COMEBACK_TIME = 77, + WLAN_EID_EVENT_REQUEST = 78, + WLAN_EID_EVENT_REPORT = 79, + WLAN_EID_DIAGNOSTIC_REQUEST = 80, + WLAN_EID_DIAGNOSTIC_REPORT = 81, + WLAN_EID_LOCATION_PARAMS = 82, WLAN_EID_NON_TX_BSSID_CAP = 83, + WLAN_EID_SSID_LIST = 84, + WLAN_EID_MULTI_BSSID_IDX = 85, + WLAN_EID_FMS_DESCRIPTOR = 86, + WLAN_EID_FMS_REQUEST = 87, + WLAN_EID_FMS_RESPONSE = 88, + WLAN_EID_QOS_TRAFFIC_CAPA = 89, + WLAN_EID_BSS_MAX_IDLE_PERIOD = 90, + WLAN_EID_TSF_REQUEST = 91, + WLAN_EID_TSF_RESPOSNE = 92, + WLAN_EID_WNM_SLEEP_MODE = 93, + WLAN_EID_TIM_BCAST_REQ = 94, + WLAN_EID_TIM_BCAST_RESP = 95, + WLAN_EID_COLL_IF_REPORT = 96, + WLAN_EID_CHANNEL_USAGE = 97, + WLAN_EID_TIME_ZONE = 98, + WLAN_EID_DMS_REQUEST = 99, + WLAN_EID_DMS_RESPONSE = 100, + WLAN_EID_LINK_ID = 101, + WLAN_EID_WAKEUP_SCHEDUL = 102, + /* 103 reserved */ + WLAN_EID_CHAN_SWITCH_TIMING = 104, + WLAN_EID_PTI_CONTROL = 105, + WLAN_EID_PU_BUFFER_STATUS = 106, + WLAN_EID_INTERWORKING = 107, + WLAN_EID_ADVERTISEMENT_PROTOCOL = 108, + WLAN_EID_EXPEDITED_BW_REQ = 109, + WLAN_EID_QOS_MAP_SET = 110, + WLAN_EID_ROAMING_CONSORTIUM = 111, + WLAN_EID_EMERGENCY_ALERT = 112, + WLAN_EID_MESH_CONFIG = 113, + WLAN_EID_MESH_ID = 114, + WLAN_EID_LINK_METRIC_REPORT = 115, + WLAN_EID_CONGESTION_NOTIFICATION = 116, + WLAN_EID_PEER_MGMT = 117, + WLAN_EID_CHAN_SWITCH_PARAM = 118, + WLAN_EID_MESH_AWAKE_WINDOW = 119, + WLAN_EID_BEACON_TIMING = 120, + WLAN_EID_MCCAOP_SETUP_REQ = 121, + WLAN_EID_MCCAOP_SETUP_RESP = 122, + WLAN_EID_MCCAOP_ADVERT = 123, + WLAN_EID_MCCAOP_TEARDOWN = 124, + WLAN_EID_GANN = 125, + WLAN_EID_RANN = 126, + WLAN_EID_EXT_CAPABILITY = 127, + /* 128, 129 reserved for Agere */ + WLAN_EID_PREQ = 130, + WLAN_EID_PREP = 131, + WLAN_EID_PERR = 132, + /* 133-136 reserved for Cisco */ + WLAN_EID_PXU = 137, + WLAN_EID_PXUC = 138, + WLAN_EID_AUTH_MESH_PEER_EXCH = 139, + WLAN_EID_MIC = 140, + WLAN_EID_DESTINATION_URI = 141, + WLAN_EID_UAPSD_COEX = 142, WLAN_EID_WAKEUP_SCHEDULE = 143, WLAN_EID_EXT_SCHEDULE = 144, WLAN_EID_STA_AVAILABILITY = 145, WLAN_EID_DMG_TSPEC = 146, WLAN_EID_DMG_AT = 147, WLAN_EID_DMG_CAP = 148, + /* 149-150 reserved for Cisco */ WLAN_EID_DMG_OPERATION = 151, WLAN_EID_DMG_BSS_PARAM_CHANGE = 152, WLAN_EID_DMG_BEAM_REFINEMENT = 153, WLAN_EID_CHANNEL_MEASURE_FEEDBACK = 154, + /* 155-156 reserved for Cisco */ WLAN_EID_AWAKE_WINDOW = 157, WLAN_EID_MULTI_BAND = 158, WLAN_EID_ADDBA_EXT = 159, @@ -1781,11 +1800,34 @@ enum ieee80211_eid { WLAN_EID_MULTIPLE_MAC_ADDR = 170, WLAN_EID_U_PID = 171, WLAN_EID_DMG_LINK_ADAPT_ACK = 172, + /* 173 reserved for Symbol */ + WLAN_EID_MCCAOP_ADV_OVERVIEW = 174, WLAN_EID_QUIET_PERIOD_REQ = 175, + /* 176 reserved for Symbol */ WLAN_EID_QUIET_PERIOD_RESP = 177, + /* 178-179 reserved for Symbol */ + /* 180 reserved for ISO/IEC 20011 */ WLAN_EID_EPAC_POLICY = 182, WLAN_EID_CLISTER_TIME_OFF = 183, + WLAN_EID_INTER_AC_PRIO = 184, + WLAN_EID_SCS_DESCRIPTOR = 185, + WLAN_EID_QLOAD_REPORT = 186, + WLAN_EID_HCCA_TXOP_UPDATE_COUNT = 187, + WLAN_EID_HL_STREAM_ID = 188, + WLAN_EID_GCR_GROUP_ADDR = 189, WLAN_EID_ANTENNA_SECTOR_ID_PATTERN = 190, + WLAN_EID_VHT_CAPABILITY = 191, + WLAN_EID_VHT_OPERATION = 192, + WLAN_EID_EXTENDED_BSS_LOAD = 193, + WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194, + WLAN_EID_VHT_TX_POWER_ENVELOPE = 195, + WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196, + WLAN_EID_AID = 197, + WLAN_EID_QUIET_CHANNEL = 198, + WLAN_EID_OPMODE_NOTIF = 199, + + WLAN_EID_VENDOR_SPECIFIC = 221, + WLAN_EID_QOS_PARAMETER = 222, }; /* Action category code */ -- cgit v1.2.3-71-gd317 From 25a91d8d91911f84a03a039339b29537e7f1970d Mon Sep 17 00:00:00 2001 From: Fan Du Date: Sat, 18 Jan 2014 09:54:23 +0800 Subject: skbuff: Introduce skb_to_sgvec_nomark to map skb without mark new end As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given sglist without mark the sg which contain last skb data as the end. So the caller can mannipulate sg list as will when padding new data after the first call without calling sg_unmark_end to expend sg list. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 2 ++ net/core/skbuff.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af8cbf..c574bf3bd6f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -691,6 +691,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5976ef0846bd..f28c37996aad 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3281,6 +3281,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given + * sglist without mark the sg which contain last skb data as the end. + * So the caller can mannipulate sg list as will when padding new data after + * the first call without calling sg_unmark_end to expend sg list. + * + * Scenario to use skb_to_sgvec_nomark: + * 1. sg_init_table + * 2. skb_to_sgvec_nomark(payload1) + * 3. skb_to_sgvec_nomark(payload2) + * + * This is equivalent to: + * 1. sg_init_table + * 2. skb_to_sgvec(payload1) + * 3. sg_unmark_end + * 4. skb_to_sgvec(payload2) + * + * When mapping mutilple payload conditionally, skb_to_sgvec_nomark + * is more preferable. + */ +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len) +{ + return __skb_to_sgvec(skb, sg, offset, len); +} +EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); + int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int nsg = __skb_to_sgvec(skb, sg, offset, len); -- cgit v1.2.3-71-gd317 From 6e201c857b68ea994c9ac85718eb3d50dcf40d92 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 12 Feb 2014 22:14:53 +0000 Subject: ethtool: Document the general convention for VLAs in kernel space Various ethtool command structures are declared with zero-length array at the end which are intended to be variable-length in userland (relying on lack of compiler bounds checking). However, in the kernel the structure and array are always allocated and passed to driver operations separately. Make that explicit. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/ethtool.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c8e3e7e39c6b..0a114d05f68d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -183,6 +183,9 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * hold the RTNL lock. * * See the structures used by these operations for further documentation. + * Note that for all operations using a structure ending with a zero- + * length array, the array is allocated separately in the kernel and + * is passed to the driver as an additional parameter. * * See &struct net_device and &struct net_device_ops for documentation * of the generic netdev features interface. -- cgit v1.2.3-71-gd317 From a9fa6e6ac29709e7a623b60695c172da675df045 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 11 Feb 2014 17:27:36 -0800 Subject: net: phy: add genphy_aneg_done() In preparation for allowing PHY drivers to potentially override their auto-negotiation done callback, move the contents of phy_aneg_done() to genphy_aneg_done() since that function really is the generic implementation based on the BMSR_ANEGCOMPLETE status. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 +--- drivers/net/phy/phy_device.c | 16 ++++++++++++++++ include/linux/phy.h | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 36fc6e16b569..db9c543bd2af 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -120,9 +120,7 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) */ static inline int phy_aneg_done(struct phy_device *phydev) { - int retval = phy_read(phydev, MII_BMSR); - - return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); + return genphy_aneg_done(phydev); } /* A structure for mapping a particular speed and duplex diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 82514e72b3d8..4e7db726028f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -865,6 +865,22 @@ int genphy_config_aneg(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_config_aneg); +/** + * genphy_aneg_done - return auto-negotiation status + * @phydev: target phy_device struct + * + * Description: Reads the status register and returns 0 either if + * auto-negotiation is incomplete, or if there was an error. + * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. + */ +int genphy_aneg_done(struct phy_device *phydev) +{ + int retval = phy_read(phydev, MII_BMSR); + + return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); +} +EXPORT_SYMBOL(genphy_aneg_done); + static int gen10g_config_aneg(struct phy_device *phydev) { return 0; diff --git a/include/linux/phy.h b/include/linux/phy.h index 565188ca328f..c572842c9029 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -612,6 +612,7 @@ static inline int phy_read_status(struct phy_device *phydev) int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); +int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); -- cgit v1.2.3-71-gd317 From 76a423a3f8f16bfc7fb86360a620be18c775b94d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 11 Feb 2014 17:27:37 -0800 Subject: net: phy: allow driver to implement their own aneg_done Some PHYs out there can be very quirky with respect to how they would report the auto-negotiation is completed. Allow drivers to override the generic aneg_done() implementation by providing their own. Since not all drivers have been updated yet to use genphy_aneg_done() as aneg_done() callback, we explicitely check that this callback is valid before calling into it. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 9 ++++++--- drivers/net/phy/phy_device.c | 1 + include/linux/phy.h | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index db9c543bd2af..2fa4611709a4 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -114,12 +114,15 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) * phy_aneg_done - return auto-negotiation status * @phydev: target phy_device struct * - * Description: Reads the status register and returns 0 either if - * auto-negotiation is incomplete, or if there was an error. - * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. + * Description: Return the auto-negotiation status from this @phydev + * Returns > 0 on success or < 0 on error. 0 means that auto-negotiation + * is still pending. */ static inline int phy_aneg_done(struct phy_device *phydev) { + if (phydev->drv->aneg_done) + return phydev->drv->aneg_done(phydev); + return genphy_aneg_done(phydev); } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 4e7db726028f..7c184208ed4d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1267,6 +1267,7 @@ static struct phy_driver genphy_driver[] = { .config_init = genphy_config_init, .features = 0, .config_aneg = genphy_config_aneg, + .aneg_done = genphy_aneg_done, .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/include/linux/phy.h b/include/linux/phy.h index c572842c9029..eede6579cae7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -417,6 +417,9 @@ struct phy_driver { */ int (*config_aneg)(struct phy_device *phydev); + /* Determines the auto negotiation result */ + int (*aneg_done)(struct phy_device *phydev); + /* Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); -- cgit v1.2.3-71-gd317 From 8a2fe56e8827f2b1eb1766702f0215074dd2767e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 11 Feb 2014 17:27:39 -0800 Subject: net: phy: re-design phy_modes to be self-contained of_get_phy_mode() uses a local array to map phy_interface_t values from include/linux/net/phy.h to a string which is read from the 'phy-mode' or 'phy-connection-type' property. In preparation for exposing the PHY interface mode through sysfs, perform the following: - mode phy_modes from drivers/of/of_net.c to include/linux/phy.h such that it is right below the phy_interface_t enum - make it a static inline function returning the string such that we can use it by just including include/linux/net/phy.h - add a PHY_INTERFACE_MODE_MAX enum value to guard the iteration in of_get_phy_mode() Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/of/of_net.c | 26 ++------------------------ include/linux/phy.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c index a208a457558c..84215c1929c4 100644 --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c @@ -11,28 +11,6 @@ #include #include -/** - * It maps 'enum phy_interface_t' found in include/linux/phy.h - * into the device tree binding of 'phy-mode', so that Ethernet - * device driver can get phy interface from device tree. - */ -static const char *phy_modes[] = { - [PHY_INTERFACE_MODE_NA] = "", - [PHY_INTERFACE_MODE_MII] = "mii", - [PHY_INTERFACE_MODE_GMII] = "gmii", - [PHY_INTERFACE_MODE_SGMII] = "sgmii", - [PHY_INTERFACE_MODE_TBI] = "tbi", - [PHY_INTERFACE_MODE_REVMII] = "rev-mii", - [PHY_INTERFACE_MODE_RMII] = "rmii", - [PHY_INTERFACE_MODE_RGMII] = "rgmii", - [PHY_INTERFACE_MODE_RGMII_ID] = "rgmii-id", - [PHY_INTERFACE_MODE_RGMII_RXID] = "rgmii-rxid", - [PHY_INTERFACE_MODE_RGMII_TXID] = "rgmii-txid", - [PHY_INTERFACE_MODE_RTBI] = "rtbi", - [PHY_INTERFACE_MODE_SMII] = "smii", - [PHY_INTERFACE_MODE_XGMII] = "xgmii", -}; - /** * of_get_phy_mode - Get phy mode for given device_node * @np: Pointer to the given device_node @@ -49,8 +27,8 @@ int of_get_phy_mode(struct device_node *np) if (err < 0) return err; - for (i = 0; i < ARRAY_SIZE(phy_modes); i++) - if (!strcasecmp(pm, phy_modes[i])) + for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) + if (!strcasecmp(pm, phy_modes(i))) return i; return -ENODEV; diff --git a/include/linux/phy.h b/include/linux/phy.h index eede6579cae7..ef7fa1131145 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -74,8 +74,50 @@ typedef enum { PHY_INTERFACE_MODE_RTBI, PHY_INTERFACE_MODE_SMII, PHY_INTERFACE_MODE_XGMII, + PHY_INTERFACE_MODE_MAX, } phy_interface_t; +/** + * It maps 'enum phy_interface_t' found in include/linux/phy.h + * into the device tree binding of 'phy-mode', so that Ethernet + * device driver can get phy interface from device tree. + */ +static inline const char *phy_modes(phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_NA: + return ""; + case PHY_INTERFACE_MODE_MII: + return "mii"; + case PHY_INTERFACE_MODE_GMII: + return "gmii"; + case PHY_INTERFACE_MODE_SGMII: + return "sgmii"; + case PHY_INTERFACE_MODE_TBI: + return "tbi"; + case PHY_INTERFACE_MODE_REVMII: + return "rev-mii"; + case PHY_INTERFACE_MODE_RMII: + return "rmii"; + case PHY_INTERFACE_MODE_RGMII: + return "rgmii"; + case PHY_INTERFACE_MODE_RGMII_ID: + return "rgmii-id"; + case PHY_INTERFACE_MODE_RGMII_RXID: + return "rgmii-rxid"; + case PHY_INTERFACE_MODE_RGMII_TXID: + return "rgmii-txid"; + case PHY_INTERFACE_MODE_RTBI: + return "rtbi"; + case PHY_INTERFACE_MODE_SMII: + return "smii"; + case PHY_INTERFACE_MODE_XGMII: + return "xgmii"; + default: + return "unknown"; + } +} + #define PHY_INIT_TIMEOUT 100000 #define PHY_STATE_TIME 1 -- cgit v1.2.3-71-gd317 From b0ae009f3dc14643e56972cfc08c060dd72cc24d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 11 Feb 2014 17:27:41 -0800 Subject: net: phy: add "has_fixups" boolean property Add a boolean property which indicates if the PHY has had any fixup routine ran on it. We are later going to use that boolean to expose it as a sysfs property to help troubleshooting. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 1 + include/linux/phy.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7c184208ed4d..c2d778d06405 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -139,6 +139,7 @@ static int phy_scan_fixups(struct phy_device *phydev) mutex_unlock(&phy_fixup_lock); return err; } + phydev->has_fixups = true; } } mutex_unlock(&phy_fixup_lock); diff --git a/include/linux/phy.h b/include/linux/phy.h index ef7fa1131145..42f1bc7eaeb0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -350,6 +350,7 @@ struct phy_device { struct phy_c45_device_ids c45_ids; bool is_c45; bool is_internal; + bool has_fixups; enum phy_state state; -- cgit v1.2.3-71-gd317 From fd70f72c66eeada848abaed9eb6bbdbaf57764e9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 13 Feb 2014 16:08:42 -0800 Subject: net: phy: add MoCA PHY type Some Ethernet MACs are connected to a MoCA PHY which will handle the low-level job of sending Ethernet frames on the coaxial cable, these Ethernet MACs need to know about it to be properly configured. Add a new PHY mode "moca" and update the Device Tree parsing logic to look for it. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 42f1bc7eaeb0..f7fe54628424 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -74,6 +74,7 @@ typedef enum { PHY_INTERFACE_MODE_RTBI, PHY_INTERFACE_MODE_SMII, PHY_INTERFACE_MODE_XGMII, + PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -113,6 +114,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "smii"; case PHY_INTERFACE_MODE_XGMII: return "xgmii"; + case PHY_INTERFACE_MODE_MOCA: + return "moca"; default: return "unknown"; } -- cgit v1.2.3-71-gd317 From 439d39a9ac8fbbba9c04581361188f33f21ced50 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 13 Feb 2014 16:08:44 -0800 Subject: net: phy: broadcom: extract register definitions The Broadcom BCM54xx register definitions are shared between BCM54xx and BCM7xx internal PHYs for which we are adding support. Extract these register definitions and put them in include/linux/brcmphy.h for use by the BCM7xxx internal PHY driver. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 52 ---------------------------------------------- include/linux/brcmphy.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index f8c90ea75108..34088d60da74 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -25,58 +25,6 @@ #define BRCM_PHY_REV(phydev) \ ((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask)) - -#define MII_BCM54XX_ECR 0x10 /* BCM54xx extended control register */ -#define MII_BCM54XX_ECR_IM 0x1000 /* Interrupt mask */ -#define MII_BCM54XX_ECR_IF 0x0800 /* Interrupt force */ - -#define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */ -#define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */ - -#define MII_BCM54XX_EXP_DATA 0x15 /* Expansion register data */ -#define MII_BCM54XX_EXP_SEL 0x17 /* Expansion register select */ -#define MII_BCM54XX_EXP_SEL_SSD 0x0e00 /* Secondary SerDes select */ -#define MII_BCM54XX_EXP_SEL_ER 0x0f00 /* Expansion register select */ - -#define MII_BCM54XX_AUX_CTL 0x18 /* Auxiliary control register */ -#define MII_BCM54XX_ISR 0x1a /* BCM54xx interrupt status register */ -#define MII_BCM54XX_IMR 0x1b /* BCM54xx interrupt mask register */ -#define MII_BCM54XX_INT_CRCERR 0x0001 /* CRC error */ -#define MII_BCM54XX_INT_LINK 0x0002 /* Link status changed */ -#define MII_BCM54XX_INT_SPEED 0x0004 /* Link speed change */ -#define MII_BCM54XX_INT_DUPLEX 0x0008 /* Duplex mode changed */ -#define MII_BCM54XX_INT_LRS 0x0010 /* Local receiver status changed */ -#define MII_BCM54XX_INT_RRS 0x0020 /* Remote receiver status changed */ -#define MII_BCM54XX_INT_SSERR 0x0040 /* Scrambler synchronization error */ -#define MII_BCM54XX_INT_UHCD 0x0080 /* Unsupported HCD negotiated */ -#define MII_BCM54XX_INT_NHCD 0x0100 /* No HCD */ -#define MII_BCM54XX_INT_NHCDL 0x0200 /* No HCD link */ -#define MII_BCM54XX_INT_ANPR 0x0400 /* Auto-negotiation page received */ -#define MII_BCM54XX_INT_LC 0x0800 /* All counters below 128 */ -#define MII_BCM54XX_INT_HC 0x1000 /* Counter above 32768 */ -#define MII_BCM54XX_INT_MDIX 0x2000 /* MDIX status change */ -#define MII_BCM54XX_INT_PSERR 0x4000 /* Pair swap error */ - -#define MII_BCM54XX_SHD 0x1c /* 0x1c shadow registers */ -#define MII_BCM54XX_SHD_WRITE 0x8000 -#define MII_BCM54XX_SHD_VAL(x) ((x & 0x1f) << 10) -#define MII_BCM54XX_SHD_DATA(x) ((x & 0x3ff) << 0) - -/* - * AUXILIARY CONTROL SHADOW ACCESS REGISTERS. (PHY REG 0x18) - */ -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 -#define MII_BCM54XX_AUXCTL_ACTL_TX_6DB 0x0400 -#define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 - -#define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 -#define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 -#define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 -#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 - -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 - - /* * Broadcom LED source encodings. These are used in BCM5461, BCM5481, * BCM5482, and possibly some others. diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 677b4f01b2d0..104e3efe46af 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -33,4 +33,55 @@ #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 #define PHY_BCM_FLAGS_VALID 0x80000000 +/* Broadcom BCM54XX register definitions, common to most Broadcom PHYs */ +#define MII_BCM54XX_ECR 0x10 /* BCM54xx extended control register */ +#define MII_BCM54XX_ECR_IM 0x1000 /* Interrupt mask */ +#define MII_BCM54XX_ECR_IF 0x0800 /* Interrupt force */ + +#define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */ +#define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */ + +#define MII_BCM54XX_EXP_DATA 0x15 /* Expansion register data */ +#define MII_BCM54XX_EXP_SEL 0x17 /* Expansion register select */ +#define MII_BCM54XX_EXP_SEL_SSD 0x0e00 /* Secondary SerDes select */ +#define MII_BCM54XX_EXP_SEL_ER 0x0f00 /* Expansion register select */ + +#define MII_BCM54XX_AUX_CTL 0x18 /* Auxiliary control register */ +#define MII_BCM54XX_ISR 0x1a /* BCM54xx interrupt status register */ +#define MII_BCM54XX_IMR 0x1b /* BCM54xx interrupt mask register */ +#define MII_BCM54XX_INT_CRCERR 0x0001 /* CRC error */ +#define MII_BCM54XX_INT_LINK 0x0002 /* Link status changed */ +#define MII_BCM54XX_INT_SPEED 0x0004 /* Link speed change */ +#define MII_BCM54XX_INT_DUPLEX 0x0008 /* Duplex mode changed */ +#define MII_BCM54XX_INT_LRS 0x0010 /* Local receiver status changed */ +#define MII_BCM54XX_INT_RRS 0x0020 /* Remote receiver status changed */ +#define MII_BCM54XX_INT_SSERR 0x0040 /* Scrambler synchronization error */ +#define MII_BCM54XX_INT_UHCD 0x0080 /* Unsupported HCD negotiated */ +#define MII_BCM54XX_INT_NHCD 0x0100 /* No HCD */ +#define MII_BCM54XX_INT_NHCDL 0x0200 /* No HCD link */ +#define MII_BCM54XX_INT_ANPR 0x0400 /* Auto-negotiation page received */ +#define MII_BCM54XX_INT_LC 0x0800 /* All counters below 128 */ +#define MII_BCM54XX_INT_HC 0x1000 /* Counter above 32768 */ +#define MII_BCM54XX_INT_MDIX 0x2000 /* MDIX status change */ +#define MII_BCM54XX_INT_PSERR 0x4000 /* Pair swap error */ + +#define MII_BCM54XX_SHD 0x1c /* 0x1c shadow registers */ +#define MII_BCM54XX_SHD_WRITE 0x8000 +#define MII_BCM54XX_SHD_VAL(x) ((x & 0x1f) << 10) +#define MII_BCM54XX_SHD_DATA(x) ((x & 0x3ff) << 0) + +/* + * AUXILIARY CONTROL SHADOW ACCESS REGISTERS. (PHY REG 0x18) + */ +#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +#define MII_BCM54XX_AUXCTL_ACTL_TX_6DB 0x0400 +#define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 + +#define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 +#define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 +#define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 + +#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3-71-gd317 From b560a58c45c66f68936127040e86b7f02e4c5332 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 13 Feb 2014 16:08:45 -0800 Subject: net: phy: add Broadcom BCM7xxx internal PHY driver This patch adds support for the Broadcom BCM7xxx Set Top Box SoCs internal PHYs. This driver supports the following generation of SoCs: - BCM7366, BCM7439, BCM7445 (28nm process) - all 40nm and 65nm (older MIPS-based SoCs) The PHYs on these SoCs require a bunch of workarounds to operate correctly, both during configuration time and at suspend/resume time, the driver handles that for us. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 6 + drivers/net/phy/Makefile | 1 + drivers/net/phy/bcm7xxx.c | 343 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/brcmphy.h | 9 ++ 4 files changed, 359 insertions(+) create mode 100644 drivers/net/phy/bcm7xxx.c (limited to 'include/linux') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 9b5d46c03eed..6a17f92153b3 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -71,6 +71,12 @@ config BCM63XX_PHY ---help--- Currently supports the 6348 and 6358 PHYs. +config BCM7XXX_PHY + tristate "Drivers for Broadcom 7xxx SOCs internal PHYs" + ---help--- + Currently supports the BCM7366, BCM7439, BCM7445, and + 40nm and 65nm generation of BCM7xxx Set Top Box SoCs. + config BCM87XX_PHY tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs" help diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 9013dfa12aa3..07d24024863e 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_SMSC_PHY) += smsc.o obj-$(CONFIG_VITESSE_PHY) += vitesse.o obj-$(CONFIG_BROADCOM_PHY) += broadcom.o obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o +obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o obj-$(CONFIG_ICPLUS_PHY) += icplus.o obj-$(CONFIG_REALTEK_PHY) += realtek.o diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c new file mode 100644 index 000000000000..697337220016 --- /dev/null +++ b/drivers/net/phy/bcm7xxx.c @@ -0,0 +1,343 @@ +/* + * Broadcom BCM7xxx internal transceivers support. + * + * Copyright (C) 2014, Broadcom Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +/* Broadcom BCM7xxx internal PHY registers */ +#define MII_BCM7XXX_CHANNEL_WIDTH 0x2000 + +/* 40nm only register definitions */ +#define MII_BCM7XXX_100TX_AUX_CTL 0x10 +#define MII_BCM7XXX_100TX_FALSE_CAR 0x13 +#define MII_BCM7XXX_100TX_DISC 0x14 +#define MII_BCM7XXX_AUX_MODE 0x1d +#define MII_BCM7XX_64CLK_MDIO BIT(12) +#define MII_BCM7XXX_CORE_BASE1E 0x1e +#define MII_BCM7XXX_TEST 0x1f +#define MII_BCM7XXX_SHD_MODE_2 BIT(2) + +static int bcm7445_config_init(struct phy_device *phydev) +{ + int ret; + const struct bcm7445_regs { + int reg; + u16 value; + } bcm7445_regs_cfg[] = { + /* increases ADC latency by 24ns */ + { MII_BCM54XX_EXP_SEL, 0x0038 }, + { MII_BCM54XX_EXP_DATA, 0xAB95 }, + /* increases internal 1V LDO voltage by 5% */ + { MII_BCM54XX_EXP_SEL, 0x2038 }, + { MII_BCM54XX_EXP_DATA, 0xBB22 }, + /* reduce RX low pass filter corner frequency */ + { MII_BCM54XX_EXP_SEL, 0x6038 }, + { MII_BCM54XX_EXP_DATA, 0xFFC5 }, + /* reduce RX high pass filter corner frequency */ + { MII_BCM54XX_EXP_SEL, 0x003a }, + { MII_BCM54XX_EXP_DATA, 0x2002 }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(bcm7445_regs_cfg); i++) { + ret = phy_write(phydev, + bcm7445_regs_cfg[i].reg, + bcm7445_regs_cfg[i].value); + if (ret) + return ret; + } + + return 0; +} + +static void phy_write_exp(struct phy_device *phydev, + u16 reg, u16 value) +{ + phy_write(phydev, MII_BCM54XX_EXP_SEL, MII_BCM54XX_EXP_SEL_ER | reg); + phy_write(phydev, MII_BCM54XX_EXP_DATA, value); +} + +static void phy_write_misc(struct phy_device *phydev, + u16 reg, u16 chl, u16 value) +{ + int tmp; + + phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); + + tmp = phy_read(phydev, MII_BCM54XX_AUX_CTL); + tmp |= MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA; + phy_write(phydev, MII_BCM54XX_AUX_CTL, tmp); + + tmp = (chl * MII_BCM7XXX_CHANNEL_WIDTH) | reg; + phy_write(phydev, MII_BCM54XX_EXP_SEL, tmp); + + phy_write(phydev, MII_BCM54XX_EXP_DATA, value); +} + +static int bcm7xxx_28nm_afe_config_init(struct phy_device *phydev) +{ + /* write AFE_RXCONFIG_0 */ + phy_write_misc(phydev, 0x38, 0x0000, 0xeb19); + + /* write AFE_RXCONFIG_1 */ + phy_write_misc(phydev, 0x38, 0x0001, 0x9a3f); + + /* write AFE_RX_LP_COUNTER */ + phy_write_misc(phydev, 0x38, 0x0003, 0x7fc7); + + /* write AFE_HPF_TRIM_OTHERS */ + phy_write_misc(phydev, 0x3A, 0x0000, 0x000b); + + /* write AFTE_TX_CONFIG */ + phy_write_misc(phydev, 0x39, 0x0000, 0x0800); + + /* Increase VCO range to prevent unlocking problem of PLL at low + * temp + */ + phy_write_misc(phydev, 0x0032, 0x0001, 0x0048); + + /* Change Ki to 011 */ + phy_write_misc(phydev, 0x0032, 0x0002, 0x021b); + + /* Disable loading of TVCO buffer to bandgap, set bandgap trim + * to 111 + */ + phy_write_misc(phydev, 0x0033, 0x0000, 0x0e20); + + /* Adjust bias current trim by -3 */ + phy_write_misc(phydev, 0x000a, 0x0000, 0x690b); + + /* Switch to CORE_BASE1E */ + phy_write(phydev, MII_BCM7XXX_CORE_BASE1E, 0xd); + + /* Reset R_CAL/RC_CAL Engine */ + phy_write_exp(phydev, 0x00b0, 0x0010); + + /* Disable Reset R_CAL/RC_CAL Engine */ + phy_write_exp(phydev, 0x00b0, 0x0000); + + return 0; +} + +static int bcm7xxx_28nm_config_init(struct phy_device *phydev) +{ + int ret; + + ret = bcm7445_config_init(phydev); + if (ret) + return ret; + + return bcm7xxx_28nm_afe_config_init(phydev); +} + +static int phy_set_clr_bits(struct phy_device *dev, int location, + int set_mask, int clr_mask) +{ + int v, ret; + + v = phy_read(dev, location); + if (v < 0) + return v; + + v &= ~clr_mask; + v |= set_mask; + + ret = phy_write(dev, location, v); + if (ret < 0) + return ret; + + return v; +} + +static int bcm7xxx_config_init(struct phy_device *phydev) +{ + int ret; + + /* Enable 64 clock MDIO */ + phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO); + phy_read(phydev, MII_BCM7XXX_AUX_MODE); + + /* Workaround only required for 100Mbits/sec */ + if (!(phydev->dev_flags & PHY_BRCM_100MBPS_WAR)) + return 0; + + /* set shadow mode 2 */ + ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, + MII_BCM7XXX_SHD_MODE_2, MII_BCM7XXX_SHD_MODE_2); + if (ret < 0) + return ret; + + /* set iddq_clkbias */ + phy_write(phydev, MII_BCM7XXX_100TX_DISC, 0x0F00); + udelay(10); + + /* reset iddq_clkbias */ + phy_write(phydev, MII_BCM7XXX_100TX_DISC, 0x0C00); + + phy_write(phydev, MII_BCM7XXX_100TX_FALSE_CAR, 0x7555); + + /* reset shadow mode 2 */ + ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, MII_BCM7XXX_SHD_MODE_2, 0); + if (ret < 0) + return ret; + + return 0; +} + +/* Workaround for putting the PHY in IDDQ mode, required + * for all BCM7XXX PHYs + */ +static int bcm7xxx_suspend(struct phy_device *phydev) +{ + int ret; + const struct bcm7xxx_regs { + int reg; + u16 value; + } bcm7xxx_suspend_cfg[] = { + { MII_BCM7XXX_TEST, 0x008b }, + { MII_BCM7XXX_100TX_AUX_CTL, 0x01c0 }, + { MII_BCM7XXX_100TX_DISC, 0x7000 }, + { MII_BCM7XXX_TEST, 0x000f }, + { MII_BCM7XXX_100TX_AUX_CTL, 0x20d0 }, + { MII_BCM7XXX_TEST, 0x000b }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(bcm7xxx_suspend_cfg); i++) { + ret = phy_write(phydev, + bcm7xxx_suspend_cfg[i].reg, + bcm7xxx_suspend_cfg[i].value); + if (ret) + return ret; + } + + return 0; +} + +static int bcm7xxx_dummy_config_init(struct phy_device *phydev) +{ + return 0; +} + +static struct phy_driver bcm7xxx_driver[] = { +{ + .phy_id = PHY_ID_BCM7366, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM7366", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_28nm_afe_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_28nm_afe_config_init, + .driver = { .owner = THIS_MODULE }, +}, { + .phy_id = PHY_ID_BCM7439, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM7439", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_28nm_afe_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_28nm_afe_config_init, + .driver = { .owner = THIS_MODULE }, +}, { + .phy_id = PHY_ID_BCM7445, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM7445", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_28nm_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_28nm_config_init, + .driver = { .owner = THIS_MODULE }, +}, { + .name = "Broadcom BCM7XXX 28nm", + .phy_id = PHY_ID_BCM7XXX_28, + .phy_id_mask = PHY_BCM_OUI_MASK, + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_28nm_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_28nm_config_init, + .driver = { .owner = THIS_MODULE }, +}, { + .phy_id = PHY_BCM_OUI_4, + .phy_id_mask = 0xffff0000, + .name = "Broadcom BCM7XXX 40nm", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_config_init, + .driver = { .owner = THIS_MODULE }, +}, { + .phy_id = PHY_BCM_OUI_5, + .phy_id_mask = 0xffffff00, + .name = "Broadcom BCM7XXX 65nm", + .features = PHY_BASIC_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_dummy_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_config_init, + .driver = { .owner = THIS_MODULE }, +} }; + +static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { + { PHY_ID_BCM7366, 0xfffffff0, }, + { PHY_ID_BCM7439, 0xfffffff0, }, + { PHY_ID_BCM7445, 0xfffffff0, }, + { PHY_ID_BCM7XXX_28, 0xfffffc00 }, + { PHY_BCM_OUI_4, 0xffff0000 }, + { PHY_BCM_OUI_5, 0xffffff00 }, + { } +}; + +static int __init bcm7xxx_phy_init(void) +{ + return phy_drivers_register(bcm7xxx_driver, + ARRAY_SIZE(bcm7xxx_driver)); +} + +static void __exit bcm7xxx_phy_exit(void) +{ + phy_drivers_unregister(bcm7xxx_driver, + ARRAY_SIZE(bcm7xxx_driver)); +} + +module_init(bcm7xxx_phy_init); +module_exit(bcm7xxx_phy_exit); + +MODULE_DEVICE_TABLE(mdio, bcm7xxx_tbl); + +MODULE_DESCRIPTION("Broadcom BCM7xxx internal PHY driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Broadcom Corporation"); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 104e3efe46af..6f76277baf39 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,10 +13,17 @@ #define PHY_ID_BCM5461 0x002060c0 #define PHY_ID_BCM57780 0x03625d90 +#define PHY_ID_BCM7366 0x600d8490 +#define PHY_ID_BCM7439 0x600d8480 +#define PHY_ID_BCM7445 0x600d8510 +#define PHY_ID_BCM7XXX_28 0x600d8400 + #define PHY_BCM_OUI_MASK 0xfffffc00 #define PHY_BCM_OUI_1 0x00206000 #define PHY_BCM_OUI_2 0x0143bc00 #define PHY_BCM_OUI_3 0x03625c00 +#define PHY_BCM_OUI_4 0x600d0000 +#define PHY_BCM_OUI_5 0x03625e00 #define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 @@ -31,6 +38,8 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 +/* Broadcom BCM7xxx specific workarounds */ +#define PHY_BRCM_100MBPS_WAR 0x00010000 #define PHY_BCM_FLAGS_VALID 0x80000000 /* Broadcom BCM54XX register definitions, common to most Broadcom PHYs */ -- cgit v1.2.3-71-gd317 From 1c213bd24ad04f4430031d20d740d7783162b099 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 13 Feb 2014 11:46:28 -0800 Subject: net: introduce netdev_alloc_pcpu_stats() for drivers There are many drivers calling alloc_percpu() to allocate pcpu stats and then initializing ->syncp. So just introduce a helper function for them. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/dummy.c | 8 +------- drivers/net/ethernet/marvell/mvneta.c | 9 +-------- drivers/net/loopback.c | 9 +-------- drivers/net/macvlan.c | 9 +-------- drivers/net/nlmon.c | 11 +---------- drivers/net/team/team.c | 8 +------- drivers/net/veth.c | 11 +---------- drivers/net/vxlan.c | 10 +--------- drivers/net/xen-netfront.c | 8 +------- include/linux/netdevice.h | 14 ++++++++++++++ net/8021q/vlan_dev.c | 11 ++--------- net/bridge/br_device.c | 9 +-------- net/ipv4/ip_tunnel.c | 10 ++-------- net/ipv6/ip6_gre.c | 9 +-------- net/ipv6/ip6_tunnel.c | 9 +-------- net/ipv6/ip6_vti.c | 8 +------- net/ipv6/sit.c | 18 ++---------------- net/openvswitch/datapath.c | 8 +------- net/openvswitch/vport.c | 10 +--------- 19 files changed, 35 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index bd8f84b0b894..1656317c96f8 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -88,16 +88,10 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) static int dummy_dev_init(struct net_device *dev) { - int i; - dev->dstats = alloc_percpu(struct pcpu_dstats); + dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); if (!dev->dstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_dstats *dstats; - dstats = per_cpu_ptr(dev->dstats, i); - u64_stats_init(&dstats->syncp); - } return 0; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f418f4f20f94..12c6a66e54d1 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2784,7 +2784,6 @@ static int mvneta_probe(struct platform_device *pdev) const char *mac_from; int phy_mode; int err; - int cpu; /* Our multiqueue support is not complete, so for now, only * allow the usage of the first RX queue @@ -2845,18 +2844,12 @@ static int mvneta_probe(struct platform_device *pdev) } /* Alloc per-cpu stats */ - pp->stats = alloc_percpu(struct mvneta_pcpu_stats); + pp->stats = netdev_alloc_pcpu_stats(struct mvneta_pcpu_stats); if (!pp->stats) { err = -ENOMEM; goto err_unmap; } - for_each_possible_cpu(cpu) { - struct mvneta_pcpu_stats *stats; - stats = per_cpu_ptr(pp->stats, cpu); - u64_stats_init(&stats->syncp); - } - dt_mac_addr = of_get_mac_address(dn); if (dt_mac_addr) { mac_from = "device tree"; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index e7c1d5f8ab51..771c9bfa7d31 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -136,16 +136,9 @@ static const struct ethtool_ops loopback_ethtool_ops = { static int loopback_dev_init(struct net_device *dev) { - int i; - dev->lstats = alloc_percpu(struct pcpu_lstats); + dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; - - for_each_possible_cpu(i) { - struct pcpu_lstats *lb_stats; - lb_stats = per_cpu_ptr(dev->lstats, i); - u64_stats_init(&lb_stats->syncp); - } return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 8433de4509c7..25685e3eb472 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -534,7 +534,6 @@ static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); const struct net_device *lowerdev = vlan->lowerdev; - int i; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); @@ -546,16 +545,10 @@ static int macvlan_init(struct net_device *dev) macvlan_set_lockdep_class(dev); - vlan->pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); + vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->pcpu_stats) return -ENOMEM; - for_each_possible_cpu(i) { - struct vlan_pcpu_stats *mvlstats; - mvlstats = per_cpu_ptr(vlan->pcpu_stats, i); - u64_stats_init(&mvlstats->syncp); - } - return 0; } diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index d2bb12bfabd5..14ce7de6a933 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -47,16 +47,7 @@ static int nlmon_change_mtu(struct net_device *dev, int new_mtu) static int nlmon_dev_init(struct net_device *dev) { - int i; - - dev->lstats = alloc_percpu(struct pcpu_lstats); - - for_each_possible_cpu(i) { - struct pcpu_lstats *nlmstats; - nlmstats = per_cpu_ptr(dev->lstats, i); - u64_stats_init(&nlmstats->syncp); - } - + dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); return dev->lstats == NULL ? -ENOMEM : 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 28407426fd6f..adb46de7c90d 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1540,16 +1540,10 @@ static int team_init(struct net_device *dev) mutex_init(&team->lock); team_set_no_mode(team); - team->pcpu_stats = alloc_percpu(struct team_pcpu_stats); + team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats); if (!team->pcpu_stats) return -ENOMEM; - for_each_possible_cpu(i) { - struct team_pcpu_stats *team_stats; - team_stats = per_cpu_ptr(team->pcpu_stats, i); - u64_stats_init(&team_stats->syncp); - } - for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) INIT_HLIST_HEAD(&team->en_port_hlist[i]); INIT_LIST_HEAD(&team->port_list); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 2ec2041b62d4..91c33c1d3c9c 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -235,18 +235,9 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static int veth_dev_init(struct net_device *dev) { - int i; - - dev->vstats = alloc_percpu(struct pcpu_vstats); + dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); if (!dev->vstats) return -ENOMEM; - - for_each_possible_cpu(i) { - struct pcpu_vstats *veth_stats; - veth_stats = per_cpu_ptr(dev->vstats, i); - u64_stats_init(&veth_stats->syncp); - } - return 0; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b0f705c2378f..dec9820bc182 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1978,19 +1978,11 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_sock *vs; - int i; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *vxlan_stats; - vxlan_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&vxlan_stats->syncp); - } - - spin_lock(&vn->sock_lock); vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); if (vs) { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index f9daa9e183f2..2b62d799bfd8 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1281,16 +1281,10 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) np->rx_refill_timer.function = rx_refill_timeout; err = -ENOMEM; - np->stats = alloc_percpu(struct netfront_stats); + np->stats = netdev_alloc_pcpu_stats(struct netfront_stats); if (np->stats == NULL) goto exit; - for_each_possible_cpu(i) { - struct netfront_stats *xen_nf_stats; - xen_nf_stats = per_cpu_ptr(np->stats, i); - u64_stats_init(&xen_nf_stats->syncp); - } - /* Initialise tx_skbs as a free chain containing every entry. */ np->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 440a02ee6f92..430c51aed6a4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1726,6 +1726,20 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; +#define netdev_alloc_pcpu_stats(type) \ +({ \ + typeof(type) *pcpu_stats = alloc_percpu(type); \ + if (pcpu_stats) { \ + int i; \ + for_each_possible_cpu(i) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, i); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) + #include /* netdevice notifier chain. Please remember to update the rtnetlink diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index de51c48c4393..566adbf5c506 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -556,7 +556,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0, i; + int subclass = 0; netif_carrier_off(dev); @@ -606,17 +606,10 @@ static int vlan_dev_init(struct net_device *dev) vlan_dev_set_lockdep_class(dev, subclass); - vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); + vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; - for_each_possible_cpu(i) { - struct vlan_pcpu_stats *vlan_stat; - vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); - u64_stats_init(&vlan_stat->syncp); - } - - return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 63f0455c0bc3..bf34451743a1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -88,18 +88,11 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - int i; - br->stats = alloc_percpu(struct pcpu_sw_netstats); + br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *br_dev_stats; - br_dev_stats = per_cpu_ptr(br->stats, i); - u64_stats_init(&br_dev_stats->syncp); - } - return 0; } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 50228be5c17b..3400d737adc6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1041,19 +1041,13 @@ int ip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - int i, err; + int err; dev->destructor = ip_tunnel_dev_free; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipt_stats; - ipt_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipt_stats->syncp); - } - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); if (!tunnel->dst_cache) { free_percpu(dev->tstats); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f3ffb43f59c0..c98338b81d30 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1454,7 +1454,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1464,16 +1463,10 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tap_stats; - ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tap_stats->syncp); - } - return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5db8d310f9c0..8ad59f4811df 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1502,19 +1502,12 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6_tnl_stats; - ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6_tnl_stats->syncp); - } return 0; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2d19272b8cee..864914399391 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -731,18 +731,12 @@ static void vti6_dev_setup(struct net_device *dev) static inline int vti6_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *stats; - stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&stats->syncp); - } return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3dfbcf1dcb1c..958027be0e94 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1356,7 +1356,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1365,16 +1364,10 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipip6_tunnel_stats; - ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_tunnel_stats->syncp); - } - return 0; } @@ -1384,7 +1377,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1395,16 +1387,10 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipip6_fb_stats; - ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_fb_stats->syncp); - } - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e9a48baf8551..3a954067b6a4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1215,18 +1215,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (err) goto err_free_dp; - dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); if (!dp->stats_percpu) { err = -ENOMEM; goto err_destroy_table; } - for_each_possible_cpu(i) { - struct dp_stats_percpu *dpath_stats; - dpath_stats = per_cpu_ptr(dp->stats_percpu, i); - u64_stats_init(&dpath_stats->sync); - } - dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 208dd9a26dd1..3b4db3220456 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -121,7 +121,6 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; - int i; alloc_size = sizeof(struct vport); if (priv_size) { @@ -139,19 +138,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); - vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats); + vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); } - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *vport_stats; - vport_stats = per_cpu_ptr(vport->percpu_stats, i); - u64_stats_init(&vport_stats->syncp); - } - - spin_lock_init(&vport->stats_lock); return vport; -- cgit v1.2.3-71-gd317 From c0fdfb80382e4901473ce0e31d1e7833c1d297be Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 9 Feb 2014 20:59:04 -0500 Subject: tty: Fix ref counting for port krefs The tty core supports two models for handling tty_port lifetimes; the tty_port can use the kref supplied by tty_port (which will automatically destruct the tty_port when the ref count drops to zero) or it can destruct the tty_port manually. For tty drivers that choose to use the port kref to manage the tty_port lifetime, it is not possible to safely acquire a port reference conditionally. If the last reference is released after evaluating the condition but before acquiring the reference, a bogus reference will be held while the tty_port destruction commences. Rather, only acquire a port reference if the ref count is non-zero and allow the caller to distinguish if a reference has successfully been acquired. Cc: Jiri Slaby Signed-off-by: Peter Hurley Acked-by: Greg Kroah-Hartman Tested-By: Alexander Holler Signed-off-by: Marcel Holtmann --- include/linux/tty.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 90b4fdc8a61f..4781d7b27dd3 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -518,9 +518,9 @@ extern void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { - if (port) - kref_get(&port->kref); - return port; + if (port && kref_get_unless_zero(&port->kref)) + return port; + return NULL; } /* If the cts flow control is enabled, return true. */ -- cgit v1.2.3-71-gd317 From 797ac07137d9ae8572008e21e6123a9ae17dae50 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 17 Feb 2014 13:34:02 -0800 Subject: net: phy: move PHY software reset to genphy_soft_reset As pointed out by Shaohui, this function is generic for 10/100/1000 PHYs, but 10G PHYs might have a slightly different reset sequence which prevents most of them from using this function. Move the BMCR_RESET based software resent sequence to genphy_soft_reset() in preparation for allowing PHY drivers to implement a soft_reset() callback. Reported-by: Shaohui Xie Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 27 ++++++++++++++++++++++----- include/linux/phy.h | 1 + 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c2d778d06405..7c21b8214bb9 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -540,11 +540,7 @@ int phy_init_hw(struct phy_device *phydev) if (!phydev->drv || !phydev->drv->config_init) return 0; - ret = phy_write(phydev, MII_BMCR, BMCR_RESET); - if (ret < 0) - return ret; - - ret = phy_poll_reset(phydev); + ret = genphy_soft_reset(phydev); if (ret < 0) return ret; @@ -1045,6 +1041,27 @@ static int gen10g_read_status(struct phy_device *phydev) return 0; } +/** + * genphy_soft_reset - software reset the PHY via BMCR_RESET bit + * @phydev: target phy_device struct + * + * Description: Perform a software PHY reset using the standard + * BMCR_RESET bit and poll for the reset bit to be cleared. + * + * Returns: 0 on success, < 0 on failure + */ +int genphy_soft_reset(struct phy_device *phydev) +{ + int ret; + + ret = phy_write(phydev, MII_BMCR, BMCR_RESET); + if (ret < 0) + return ret; + + return phy_poll_reset(phydev); +} +EXPORT_SYMBOL(genphy_soft_reset); + static int genphy_config_init(struct phy_device *phydev) { int val; diff --git a/include/linux/phy.h b/include/linux/phy.h index f7fe54628424..bffe0ec1604f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -666,6 +666,7 @@ int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); +int genphy_soft_reset(struct phy_device *phydev); void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver); -- cgit v1.2.3-71-gd317 From 9df81dd7583d14862d0cfb673a941b261f3b2112 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 17 Feb 2014 13:34:03 -0800 Subject: net: phy: allow PHY drivers to implement their own software reset As pointed out by Shaohui, most 10G PHYs out there have a non-standard compliant software reset sequence, eventually something much more complex than just toggling the BMCR_RESET bit. Allow PHY driver to implement their own soft_reset() callback to deal with that. If no callback is provided, call into genphy_soft_reset() which makes sure the existing behavior is kept intact. Reported-by: Shaohui Xie Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 16 ++++++++++++++-- include/linux/phy.h | 5 +++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7c21b8214bb9..a70b604ac644 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -535,12 +535,16 @@ static int phy_poll_reset(struct phy_device *phydev) int phy_init_hw(struct phy_device *phydev) { - int ret; + int ret = 0; if (!phydev->drv || !phydev->drv->config_init) return 0; - ret = genphy_soft_reset(phydev); + if (phydev->drv->soft_reset) + ret = phydev->drv->soft_reset(phydev); + else + ret = genphy_soft_reset(phydev); + if (ret < 0) return ret; @@ -1108,6 +1112,12 @@ static int genphy_config_init(struct phy_device *phydev) return 0; } +static int gen10g_soft_reset(struct phy_device *phydev) +{ + /* Do nothing for now */ + return 0; +} + static int gen10g_config_init(struct phy_device *phydev) { /* Temporarily just say we support everything */ @@ -1282,6 +1292,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", + .soft_reset = genphy_soft_reset, .config_init = genphy_config_init, .features = 0, .config_aneg = genphy_config_aneg, @@ -1294,6 +1305,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic 10G PHY", + .soft_reset = gen10g_soft_reset, .config_init = gen10g_config_init, .features = 0, .config_aneg = gen10g_config_aneg, diff --git a/include/linux/phy.h b/include/linux/phy.h index bffe0ec1604f..24126c4b27b5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -439,6 +439,11 @@ struct phy_driver { u32 features; u32 flags; + /* + * Called to issue a PHY software reset + */ + int (*soft_reset)(struct phy_device *phydev); + /* * Called to initialize the PHY, * including after a reset -- cgit v1.2.3-71-gd317 From 9b2777d6089bcd7fb035847f907280560fe233c8 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Mon, 17 Feb 2014 11:34:08 +0100 Subject: ieee802154: add TX power control to wpan_phy Replace the current u8 transmit_power in wpan_phy with s8 transmit_power. The u8 field contained the actual tx power and a tolerance field, which no physical radio every used. Adjust sysfs entries to keep compatibility with userspace, give tolerances of +-1dB statically there. This patch only adds support for this in the at86rf230 driver and the RF212 chip. Configuration calculation for RF212 is also somewhat basic, but does the job - the RF212 datasheet gives a large table with suggested values for combinations of TX power and page/channel, if this does not work well, we might have to copy the whole table. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 25 +++++++++++++++++++ include/linux/nl802154.h | 4 ++++ include/net/mac802154.h | 5 ++++ include/net/wpan-phy.h | 6 +++-- net/ieee802154/ieee802154.h | 1 + net/ieee802154/netlink.c | 1 + net/ieee802154/nl-phy.c | 49 +++++++++++++++++++++++++++++++++++++- net/ieee802154/nl_policy.c | 2 ++ net/ieee802154/wpan-class.c | 4 +--- net/mac802154/ieee802154_dev.c | 11 +++++++++ 10 files changed, 102 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index bd1ef0b3bee9..9afb4b9d7c93 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -753,6 +753,30 @@ at86rf230_set_hw_addr_filt(struct ieee802154_dev *dev, return 0; } +static int +at86rf212_set_txpower(struct ieee802154_dev *dev, int db) +{ + struct at86rf230_local *lp = dev->priv; + int rc; + + /* typical maximum output is 5dBm with RG_PHY_TX_PWR 0x60, lower five + * bits decrease power in 1dB steps. 0x60 represents extra PA gain of + * 0dB. + * thus, supported values for db range from -26 to 5, for 31dB of + * reduction to 0dB of reduction. + */ + if (db > 5 || db < -26) + return -EINVAL; + + db = -(db - 5); + + rc = __at86rf230_write(lp, RG_PHY_TX_PWR, 0x60 | db); + if (rc) + return rc; + + return 0; +} + static struct ieee802154_ops at86rf230_ops = { .owner = THIS_MODULE, .xmit = at86rf230_xmit, @@ -771,6 +795,7 @@ static struct ieee802154_ops at86rf212_ops = { .start = at86rf230_start, .stop = at86rf230_stop, .set_hw_addr_filt = at86rf230_set_hw_addr_filt, + .set_txpower = at86rf212_set_txpower, }; static void at86rf230_irqwork(struct work_struct *work) diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index fd4f2d1cdf6c..625d19e0a1de 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -70,6 +70,8 @@ enum { IEEE802154_ATTR_PHY_NAME, IEEE802154_ATTR_DEV_TYPE, + IEEE802154_ATTR_TXPOWER, + __IEEE802154_ATTR_MAX, }; @@ -122,6 +124,8 @@ enum { IEEE802154_ADD_IFACE, IEEE802154_DEL_IFACE, + IEEE802154_SET_PHYPARAMS, + __IEEE802154_CMD_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 807d6b7a943f..8bd2785a663c 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -113,6 +113,10 @@ struct ieee802154_dev { * Set radio for listening on specific address. * Set the device for listening on specified address. * Returns either zero, or negative errno. + * + * set_txpower: + * Set radio transmit power in dB. Called with pib_lock held. + * Returns either zero, or negative errno. */ struct ieee802154_ops { struct module *owner; @@ -129,6 +133,7 @@ struct ieee802154_ops { unsigned long changed); int (*ieee_addr)(struct ieee802154_dev *dev, u8 addr[IEEE802154_ADDR_LEN]); + int (*set_txpower)(struct ieee802154_dev *dev, int db); }; /* Basic interface to register ieee802154 device */ diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index b52bda8d13b1..47fc0c1bc3c7 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -37,14 +37,14 @@ struct wpan_phy { struct mutex pib_lock; /* - * This is a PIB according to 802.15.4-2006. + * This is a PIB according to 802.15.4-2011. * We do not provide timing-related variables, as they * aren't used outside of driver */ u8 current_channel; u8 current_page; u32 channels_supported[32]; - u8 transmit_power; + s8 transmit_power; u8 cca_mode; struct device dev; @@ -54,6 +54,8 @@ struct wpan_phy { const char *name, int type); void (*del_iface)(struct wpan_phy *phy, struct net_device *dev); + int (*set_txpower)(struct wpan_phy *phy, int db); + char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index cee4425b9956..6cbc8965be91 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -53,6 +53,7 @@ int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_phy(struct sk_buff *skb, struct netlink_callback *cb); int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info); +int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info); enum ieee802154_mcgrp_ids { IEEE802154_COORD_MCGRP, diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 43f1b2bf469f..67c151bf4b91 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -115,6 +115,7 @@ static const struct genl_ops ieee8021154_ops[] = { ieee802154_dump_phy), IEEE802154_OP(IEEE802154_ADD_IFACE, ieee802154_add_iface), IEEE802154_OP(IEEE802154_DEL_IFACE, ieee802154_del_iface), + IEEE802154_OP(IEEE802154_SET_PHYPARAMS, ieee802154_set_phyparams), /* see nl-mac.c */ IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req), IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp), diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 89b265aea151..d3ee62fbae99 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -55,7 +55,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, mutex_lock(&phy->pib_lock); if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || - nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel)) + nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel) || + nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power)) goto nla_put_failure; for (i = 0; i < 32; i++) { if (phy->channels_supported[i]) @@ -354,3 +355,49 @@ out_dev: return rc; } + +int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) +{ + struct wpan_phy *phy; + const char *name; + int txpower; + int rc = -EINVAL; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') + return -EINVAL; /* phy name should be null-terminated */ + + txpower = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); + + phy = wpan_phy_find(name); + if (!phy) + return -ENODEV; + + if (!phy->set_txpower) + goto out; + + mutex_lock(&phy->pib_lock); + + rc = phy->set_txpower(phy, txpower); + if (rc < 0) { + mutex_unlock(&phy->pib_lock); + goto out; + } + + phy->transmit_power = txpower; + + mutex_unlock(&phy->pib_lock); + + wpan_phy_put(phy); + + return 0; + +out: + wpan_phy_put(phy); + return rc; +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 6adda4d46f95..90b1d0d2c14e 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -52,5 +52,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_DURATION] = { .type = NLA_U8, }, [IEEE802154_ATTR_ED_LIST] = { .len = 27 }, [IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, }, + + [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, }, }; diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 4dd37615a749..8d6f6704da84 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -44,9 +44,7 @@ static DEVICE_ATTR_RO(name); MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); -MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", - ((signed char) (phy->transmit_power << 2)) >> 2, - (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1); +MASTER_SHOW(transmit_power, "%d +- 1 dB"); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 52ae6646a411..9eb49e0886a4 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -165,6 +165,16 @@ err: return ERR_PTR(err); } +static int mac802154_set_txpower(struct wpan_phy *phy, int db) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + if (!priv->ops->set_txpower) + return -ENOTSUPP; + + return priv->ops->set_txpower(&priv->hw, db); +} + struct ieee802154_dev * ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) { @@ -242,6 +252,7 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->add_iface = mac802154_add_iface; priv->phy->del_iface = mac802154_del_iface; + priv->phy->set_txpower = mac802154_set_txpower; rc = wpan_phy_register(priv->phy); if (rc < 0) -- cgit v1.2.3-71-gd317 From 84dda3c648fd55898064d76366b14f964cdc9d16 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Mon, 17 Feb 2014 11:34:10 +0100 Subject: ieee802154: add support for listen-before-talk in wpan_phy Listen-before-talk is an alternative to CSMA in uncoordinated networks and prescribed by european regulations if one wants to have a device with radio duty cycles above 10% (or less in some bands). Add a phy property to enable/disable LBT in the phy, including support in the at86rf230 driver for RF212 chips. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 11 +++++++- include/linux/nl802154.h | 1 + include/net/mac802154.h | 6 ++++ include/net/wpan-phy.h | 3 ++ net/ieee802154/nl-phy.c | 58 ++++++++++++++++++++++++++++++-------- net/ieee802154/nl_policy.c | 1 + net/mac802154/ieee802154_dev.c | 11 ++++++++ 7 files changed, 78 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 04a995dad593..3d40c2350261 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -152,7 +152,7 @@ static inline int is_rf212(struct at86rf230_local *local) #define SR_RESERVED_17_5 0x17, 0x08, 3 #define SR_AACK_UPLD_RES_FT 0x17, 0x10, 4 #define SR_AACK_FLTR_RES_FT 0x17, 0x20, 5 -#define SR_RESERVED_17_2 0x17, 0x40, 6 +#define SR_CSMA_LBT_MODE 0x17, 0x40, 6 #define SR_RESERVED_17_1 0x17, 0x80, 7 #define RG_FTN_CTRL (0x18) #define SR_RESERVED_18_2 0x18, 0x7f, 0 @@ -786,6 +786,14 @@ at86rf212_set_txpower(struct ieee802154_dev *dev, int db) return 0; } +static int +at86rf212_set_lbt(struct ieee802154_dev *dev, bool on) +{ + struct at86rf230_local *lp = dev->priv; + + return at86rf230_write_subreg(lp, SR_CSMA_LBT_MODE, on); +} + static struct ieee802154_ops at86rf230_ops = { .owner = THIS_MODULE, .xmit = at86rf230_xmit, @@ -805,6 +813,7 @@ static struct ieee802154_ops at86rf212_ops = { .stop = at86rf230_stop, .set_hw_addr_filt = at86rf230_set_hw_addr_filt, .set_txpower = at86rf212_set_txpower, + .set_lbt = at86rf212_set_lbt, }; static void at86rf230_irqwork(struct work_struct *work) diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 625d19e0a1de..326baee227f7 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -71,6 +71,7 @@ enum { IEEE802154_ATTR_DEV_TYPE, IEEE802154_ATTR_TXPOWER, + IEEE802154_ATTR_LBT_ENABLED, __IEEE802154_ATTR_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 8bd2785a663c..521edcb0e586 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -117,6 +117,11 @@ struct ieee802154_dev { * set_txpower: * Set radio transmit power in dB. Called with pib_lock held. * Returns either zero, or negative errno. + * + * set_lbt + * Enables or disables listen before talk on the device. Called with + * pib_lock held. + * Returns either zero, or negative errno. */ struct ieee802154_ops { struct module *owner; @@ -134,6 +139,7 @@ struct ieee802154_ops { int (*ieee_addr)(struct ieee802154_dev *dev, u8 addr[IEEE802154_ADDR_LEN]); int (*set_txpower)(struct ieee802154_dev *dev, int db); + int (*set_lbt)(struct ieee802154_dev *dev, bool on); }; /* Basic interface to register ieee802154 device */ diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 47fc0c1bc3c7..804e6c4f5f8a 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -47,6 +47,8 @@ struct wpan_phy { s8 transmit_power; u8 cca_mode; + bool lbt; + struct device dev; int idx; @@ -55,6 +57,7 @@ struct wpan_phy { void (*del_iface)(struct wpan_phy *phy, struct net_device *dev); int (*set_txpower)(struct wpan_phy *phy, int db); + int (*set_lbt)(struct wpan_phy *phy, bool on); char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index d3ee62fbae99..f029310b0662 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -56,7 +56,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel) || - nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power)) + nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power) || + nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt)) goto nla_put_failure; for (i = 0; i < 32; i++) { if (phy->channels_supported[i]) @@ -356,40 +357,71 @@ out_dev: return rc; } +static int phy_set_txpower(struct wpan_phy *phy, struct genl_info *info) +{ + int txpower = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); + int rc; + + rc = phy->set_txpower(phy, txpower); + if (rc < 0) + return rc; + + phy->transmit_power = txpower; + + return 0; +} + +static int phy_set_lbt(struct wpan_phy *phy, struct genl_info *info) +{ + u8 on = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); + int rc; + + rc = phy->set_lbt(phy, on); + if (rc < 0) + return rc; + + phy->lbt = on; + + return 0; +} + int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) { struct wpan_phy *phy; const char *name; - int txpower; - int rc = -EINVAL; + int rc = -ENOTSUPP; pr_debug("%s\n", __func__); - if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) + if (!info->attrs[IEEE802154_ATTR_PHY_NAME] && + !info->attrs[IEEE802154_ATTR_LBT_ENABLED]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') return -EINVAL; /* phy name should be null-terminated */ - txpower = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); - phy = wpan_phy_find(name); if (!phy) return -ENODEV; - if (!phy->set_txpower) + if ((!phy->set_txpower && info->attrs[IEEE802154_ATTR_TXPOWER]) || + (!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED])) goto out; mutex_lock(&phy->pib_lock); - rc = phy->set_txpower(phy, txpower); - if (rc < 0) { - mutex_unlock(&phy->pib_lock); - goto out; + if (info->attrs[IEEE802154_ATTR_TXPOWER]) { + rc = phy_set_txpower(phy, info); + if (rc < 0) + goto error; } - phy->transmit_power = txpower; + if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) { + rc = phy_set_lbt(phy, info); + if (rc < 0) + goto error; + } mutex_unlock(&phy->pib_lock); @@ -397,6 +429,8 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) return 0; +error: + mutex_unlock(&phy->pib_lock); out: wpan_phy_put(phy); return rc; diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 90b1d0d2c14e..a09f6423a6e9 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -54,5 +54,6 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, }, [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, }, + [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, }, }; diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 9eb49e0886a4..56338c8cfc33 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -175,6 +175,16 @@ static int mac802154_set_txpower(struct wpan_phy *phy, int db) return priv->ops->set_txpower(&priv->hw, db); } +static int mac802154_set_lbt(struct wpan_phy *phy, bool on) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + if (!priv->ops->set_lbt) + return -ENOTSUPP; + + return priv->ops->set_lbt(&priv->hw, on); +} + struct ieee802154_dev * ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) { @@ -253,6 +263,7 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->add_iface = mac802154_add_iface; priv->phy->del_iface = mac802154_del_iface; priv->phy->set_txpower = mac802154_set_txpower; + priv->phy->set_lbt = mac802154_set_lbt; rc = wpan_phy_register(priv->phy); if (rc < 0) -- cgit v1.2.3-71-gd317 From ba08fea53a43e02b590d89224afdad976dece841 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Mon, 17 Feb 2014 11:34:11 +0100 Subject: ieee802154: add support for CCA mode in wpan phys The standard describes four modes of clear channel assesment: "energy above threshold", "carrier found", and the logical and/or of these two. Support for CCA mode setting is included in the at86rf230 driver, predicated for RF212 chips. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 9 +++++++++ include/linux/nl802154.h | 1 + include/net/mac802154.h | 5 +++++ include/net/wpan-phy.h | 1 + net/ieee802154/nl-phy.c | 32 +++++++++++++++++++++++++++++--- net/ieee802154/nl_policy.c | 1 + net/mac802154/ieee802154_dev.c | 11 +++++++++++ 7 files changed, 57 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 3d40c2350261..c60871aff333 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -794,6 +794,14 @@ at86rf212_set_lbt(struct ieee802154_dev *dev, bool on) return at86rf230_write_subreg(lp, SR_CSMA_LBT_MODE, on); } +static int +at86rf212_set_cca_mode(struct ieee802154_dev *dev, u8 mode) +{ + struct at86rf230_local *lp = dev->priv; + + return at86rf230_write_subreg(lp, SR_CCA_MODE, mode); +} + static struct ieee802154_ops at86rf230_ops = { .owner = THIS_MODULE, .xmit = at86rf230_xmit, @@ -814,6 +822,7 @@ static struct ieee802154_ops at86rf212_ops = { .set_hw_addr_filt = at86rf230_set_hw_addr_filt, .set_txpower = at86rf212_set_txpower, .set_lbt = at86rf212_set_lbt, + .set_cca_mode = at86rf212_set_cca_mode, }; static void at86rf230_irqwork(struct work_struct *work) diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 326baee227f7..5edefc14bd83 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -72,6 +72,7 @@ enum { IEEE802154_ATTR_TXPOWER, IEEE802154_ATTR_LBT_ENABLED, + IEEE802154_ATTR_CCA_MODE, __IEEE802154_ATTR_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 521edcb0e586..1a98e5014e66 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -122,6 +122,10 @@ struct ieee802154_dev { * Enables or disables listen before talk on the device. Called with * pib_lock held. * Returns either zero, or negative errno. + * + * set_cca_mode + * Sets the CCA mode used by the device. Called with pib_lock held. + * Returns either zero, or negative errno. */ struct ieee802154_ops { struct module *owner; @@ -140,6 +144,7 @@ struct ieee802154_ops { u8 addr[IEEE802154_ADDR_LEN]); int (*set_txpower)(struct ieee802154_dev *dev, int db); int (*set_lbt)(struct ieee802154_dev *dev, bool on); + int (*set_cca_mode)(struct ieee802154_dev *dev, u8 mode); }; /* Basic interface to register ieee802154 device */ diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 804e6c4f5f8a..03b59051972d 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -58,6 +58,7 @@ struct wpan_phy { int (*set_txpower)(struct wpan_phy *phy, int db); int (*set_lbt)(struct wpan_phy *phy, bool on); + int (*set_cca_mode)(struct wpan_phy *phy, u8 cca_mode); char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index f029310b0662..36f58d633868 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -57,7 +57,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel) || nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power) || - nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt)) + nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt) || + nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, phy->cca_mode)) goto nla_put_failure; for (i = 0; i < 32; i++) { if (phy->channels_supported[i]) @@ -385,6 +386,23 @@ static int phy_set_lbt(struct wpan_phy *phy, struct genl_info *info) return 0; } +static int phy_set_cca_mode(struct wpan_phy *phy, struct genl_info *info) +{ + u8 mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); + int rc; + + if (mode > 3) + return -EINVAL; + + rc = phy->set_cca_mode(phy, mode); + if (rc < 0) + return rc; + + phy->cca_mode = mode; + + return 0; +} + int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) { struct wpan_phy *phy; @@ -394,7 +412,8 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) pr_debug("%s\n", __func__); if (!info->attrs[IEEE802154_ATTR_PHY_NAME] && - !info->attrs[IEEE802154_ATTR_LBT_ENABLED]) + !info->attrs[IEEE802154_ATTR_LBT_ENABLED] && + !info->attrs[IEEE802154_ATTR_CCA_MODE]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); @@ -406,7 +425,8 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) return -ENODEV; if ((!phy->set_txpower && info->attrs[IEEE802154_ATTR_TXPOWER]) || - (!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED])) + (!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) || + (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE])) goto out; mutex_lock(&phy->pib_lock); @@ -423,6 +443,12 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) goto error; } + if (info->attrs[IEEE802154_ATTR_CCA_MODE]) { + rc = phy_set_cca_mode(phy, info); + if (rc < 0) + goto error; + } + mutex_unlock(&phy->pib_lock); wpan_phy_put(phy); diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index a09f6423a6e9..d87c2c904110 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -55,5 +55,6 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, }, [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, }; diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 56338c8cfc33..4965e4ce6b5b 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -185,6 +185,16 @@ static int mac802154_set_lbt(struct wpan_phy *phy, bool on) return priv->ops->set_lbt(&priv->hw, on); } +static int mac802154_set_cca_mode(struct wpan_phy *phy, u8 mode) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + if (!priv->ops->set_cca_mode) + return -ENOTSUPP; + + return priv->ops->set_cca_mode(&priv->hw, mode); +} + struct ieee802154_dev * ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) { @@ -264,6 +274,7 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->del_iface = mac802154_del_iface; priv->phy->set_txpower = mac802154_set_txpower; priv->phy->set_lbt = mac802154_set_lbt; + priv->phy->set_cca_mode = mac802154_set_cca_mode; rc = wpan_phy_register(priv->phy); if (rc < 0) -- cgit v1.2.3-71-gd317 From 6ca001978dce0d50ebac01a38d6287f241a520c6 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Mon, 17 Feb 2014 11:34:12 +0100 Subject: ieee802154: add support for setting CCA energy detection levels Since three of the four clear channel assesment modes make use of energy detection, provide an API to set the energy detection threshold. Driver support for this is available in at86rf230 for the RF212 chips. Since for these chips the minimal energy detection threshold depends on page and channel used, add a field to struct at86rf230_local that stores the minimal threshold. Actual ED thresholds are configured as offsets from this value. For RF212, setting the ED threshold will not work before a channel/page has been set due to the dependency of energy detection in the chip and the actual channel/page selected. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 26 ++++++++++++++++++++++++-- include/linux/nl802154.h | 1 + include/net/mac802154.h | 7 +++++++ include/net/wpan-phy.h | 2 ++ net/ieee802154/nl-phy.c | 30 +++++++++++++++++++++++++++--- net/ieee802154/nl_policy.c | 1 + net/mac802154/ieee802154_dev.c | 11 +++++++++++ 7 files changed, 73 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index c60871aff333..20596be61028 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -52,6 +52,8 @@ struct at86rf230_local { spinlock_t lock; bool irq_busy; bool is_tx; + + int rssi_base_val; }; static inline int is_rf212(struct at86rf230_local *local) @@ -580,6 +582,8 @@ at86rf230_stop(struct ieee802154_dev *dev) static int at86rf230_set_channel(struct at86rf230_local *lp, int page, int channel) { + lp->rssi_base_val = -91; + return at86rf230_write_subreg(lp, SR_CHANNEL, channel); } @@ -595,10 +599,13 @@ at86rf212_set_channel(struct at86rf230_local *lp, int page, int channel) if (rc < 0) return rc; - if (page == 0) + if (page == 0) { rc = at86rf230_write_subreg(lp, SR_BPSK_QPSK, 0); - else + lp->rssi_base_val = -100; + } else { rc = at86rf230_write_subreg(lp, SR_BPSK_QPSK, 1); + lp->rssi_base_val = -98; + } if (rc < 0) return rc; @@ -802,6 +809,20 @@ at86rf212_set_cca_mode(struct ieee802154_dev *dev, u8 mode) return at86rf230_write_subreg(lp, SR_CCA_MODE, mode); } +static int +at86rf212_set_cca_ed_level(struct ieee802154_dev *dev, s32 level) +{ + struct at86rf230_local *lp = dev->priv; + int desens_steps; + + if (level < lp->rssi_base_val || level > 30) + return -EINVAL; + + desens_steps = (level - lp->rssi_base_val) * 100 / 207; + + return at86rf230_write_subreg(lp, SR_CCA_ED_THRES, desens_steps); +} + static struct ieee802154_ops at86rf230_ops = { .owner = THIS_MODULE, .xmit = at86rf230_xmit, @@ -823,6 +844,7 @@ static struct ieee802154_ops at86rf212_ops = { .set_txpower = at86rf212_set_txpower, .set_lbt = at86rf212_set_lbt, .set_cca_mode = at86rf212_set_cca_mode, + .set_cca_ed_level = at86rf212_set_cca_ed_level, }; static void at86rf230_irqwork(struct work_struct *work) diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 5edefc14bd83..0594a0ae71ba 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -73,6 +73,7 @@ enum { IEEE802154_ATTR_TXPOWER, IEEE802154_ATTR_LBT_ENABLED, IEEE802154_ATTR_CCA_MODE, + IEEE802154_ATTR_CCA_ED_LEVEL, __IEEE802154_ATTR_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 1a98e5014e66..15fe6bca80f0 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -126,6 +126,11 @@ struct ieee802154_dev { * set_cca_mode * Sets the CCA mode used by the device. Called with pib_lock held. * Returns either zero, or negative errno. + * + * set_cca_ed_level + * Sets the CCA energy detection threshold in dBm. Called with pib_lock + * held. + * Returns either zero, or negative errno. */ struct ieee802154_ops { struct module *owner; @@ -145,6 +150,8 @@ struct ieee802154_ops { int (*set_txpower)(struct ieee802154_dev *dev, int db); int (*set_lbt)(struct ieee802154_dev *dev, bool on); int (*set_cca_mode)(struct ieee802154_dev *dev, u8 mode); + int (*set_cca_ed_level)(struct ieee802154_dev *dev, + s32 level); }; /* Basic interface to register ieee802154 device */ diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 03b59051972d..0b570ad5e5fa 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -48,6 +48,7 @@ struct wpan_phy { u8 cca_mode; bool lbt; + s32 cca_ed_level; struct device dev; int idx; @@ -59,6 +60,7 @@ struct wpan_phy { int (*set_txpower)(struct wpan_phy *phy, int db); int (*set_lbt)(struct wpan_phy *phy, bool on); int (*set_cca_mode)(struct wpan_phy *phy, u8 cca_mode); + int (*set_cca_ed_level)(struct wpan_phy *phy, int level); char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 36f58d633868..0af0d424dee0 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -58,7 +58,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel) || nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt) || - nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, phy->cca_mode)) + nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, phy->cca_mode) || + nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, phy->cca_ed_level)) goto nla_put_failure; for (i = 0; i < 32; i++) { if (phy->channels_supported[i]) @@ -403,6 +404,20 @@ static int phy_set_cca_mode(struct wpan_phy *phy, struct genl_info *info) return 0; } +static int phy_set_cca_ed_level(struct wpan_phy *phy, struct genl_info *info) +{ + s32 level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); + int rc; + + rc = phy->set_cca_ed_level(phy, level); + if (rc < 0) + return rc; + + phy->cca_ed_level = level; + + return 0; +} + int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) { struct wpan_phy *phy; @@ -413,7 +428,8 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[IEEE802154_ATTR_PHY_NAME] && !info->attrs[IEEE802154_ATTR_LBT_ENABLED] && - !info->attrs[IEEE802154_ATTR_CCA_MODE]) + !info->attrs[IEEE802154_ATTR_CCA_MODE] && + !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); @@ -426,7 +442,9 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) if ((!phy->set_txpower && info->attrs[IEEE802154_ATTR_TXPOWER]) || (!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) || - (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE])) + (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE]) || + (!phy->set_cca_ed_level && + info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])) goto out; mutex_lock(&phy->pib_lock); @@ -449,6 +467,12 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) goto error; } + if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) { + rc = phy_set_cca_ed_level(phy, info); + if (rc < 0) + goto error; + } + mutex_unlock(&phy->pib_lock); wpan_phy_put(phy); diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index d87c2c904110..55b5616295ff 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -56,5 +56,6 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, }, [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, }, [IEEE802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, }, }; diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 4965e4ce6b5b..4707f36546d3 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -195,6 +195,16 @@ static int mac802154_set_cca_mode(struct wpan_phy *phy, u8 mode) return priv->ops->set_cca_mode(&priv->hw, mode); } +static int mac802154_set_cca_ed_level(struct wpan_phy *phy, s32 level) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + if (!priv->ops->set_cca_ed_level) + return -ENOTSUPP; + + return priv->ops->set_cca_ed_level(&priv->hw, level); +} + struct ieee802154_dev * ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) { @@ -275,6 +285,7 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->set_txpower = mac802154_set_txpower; priv->phy->set_lbt = mac802154_set_lbt; priv->phy->set_cca_mode = mac802154_set_cca_mode; + priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; rc = wpan_phy_register(priv->phy); if (rc < 0) -- cgit v1.2.3-71-gd317 From 4244db1b0b7bc9ab7b67d8c1c38de6cf15bc87a8 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Mon, 17 Feb 2014 11:34:14 +0100 Subject: ieee802154: add netlink APIs for smartMAC configuration Introduce new netlink attributes for SET_PHY_ATTRS: * CSMA minimal backoff exponent * CSMA maximal backoff exponent * CSMA retry limit * frame retransmission limit The CSMA attributes shall correspond to minBE, maxBE and maxCSMABackoffs of 802.15.4, respectively. The frame retransmission shall correspond to maxFrameRetries of 802.15.4, unless given as -1: then the old behaviour of the stack shall apply. For RF2xy, the old behaviour is to not do channel sensing at all and simply send *right now*, which is not intended behaviour for most applications and actually prohibited for some channel/page combinations. For all values except frame retransmission limit, the defaults of 802.15.4 apply. Frame retransmission limits are set to -1 to indicate backward-compatible behaviour. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/linux/nl802154.h | 5 +++ include/net/mac802154.h | 12 ++++++++ include/net/wpan-phy.h | 7 +++++ net/ieee802154/nl-phy.c | 69 ++++++++++++++++++++++++++++++++++++++++-- net/ieee802154/nl_policy.c | 5 +++ net/ieee802154/wpan-class.c | 6 ++++ net/mac802154/ieee802154_dev.c | 23 ++++++++++++++ 7 files changed, 125 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 0594a0ae71ba..e110b8c266f5 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -74,6 +74,11 @@ enum { IEEE802154_ATTR_LBT_ENABLED, IEEE802154_ATTR_CCA_MODE, IEEE802154_ATTR_CCA_ED_LEVEL, + IEEE802154_ATTR_CSMA_RETRIES, + IEEE802154_ATTR_CSMA_MIN_BE, + IEEE802154_ATTR_CSMA_MAX_BE, + + IEEE802154_ATTR_FRAME_RETRIES, __IEEE802154_ATTR_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index 15fe6bca80f0..8ca3d04e7558 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -131,6 +131,14 @@ struct ieee802154_dev { * Sets the CCA energy detection threshold in dBm. Called with pib_lock * held. * Returns either zero, or negative errno. + * + * set_csma_params + * Sets the CSMA parameter set for the PHY. Called with pib_lock held. + * Returns either zero, or negative errno. + * + * set_frame_retries + * Sets the retransmission attempt limit. Called with pib_lock held. + * Returns either zero, or negative errno. */ struct ieee802154_ops { struct module *owner; @@ -152,6 +160,10 @@ struct ieee802154_ops { int (*set_cca_mode)(struct ieee802154_dev *dev, u8 mode); int (*set_cca_ed_level)(struct ieee802154_dev *dev, s32 level); + int (*set_csma_params)(struct ieee802154_dev *dev, + u8 min_be, u8 max_be, u8 retries); + int (*set_frame_retries)(struct ieee802154_dev *dev, + s8 retries); }; /* Basic interface to register ieee802154 device */ diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h index 0b570ad5e5fa..10ab0fc6d4f7 100644 --- a/include/net/wpan-phy.h +++ b/include/net/wpan-phy.h @@ -46,6 +46,10 @@ struct wpan_phy { u32 channels_supported[32]; s8 transmit_power; u8 cca_mode; + u8 min_be; + u8 max_be; + u8 csma_retries; + s8 frame_retries; bool lbt; s32 cca_ed_level; @@ -61,6 +65,9 @@ struct wpan_phy { int (*set_lbt)(struct wpan_phy *phy, bool on); int (*set_cca_mode)(struct wpan_phy *phy, u8 cca_mode); int (*set_cca_ed_level)(struct wpan_phy *phy, int level); + int (*set_csma_params)(struct wpan_phy *phy, u8 min_be, u8 max_be, + u8 retries); + int (*set_frame_retries)(struct wpan_phy *phy, s8 retries); char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 0af0d424dee0..c9dfd6f59e34 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -59,7 +59,11 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt) || nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, phy->cca_mode) || - nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, phy->cca_ed_level)) + nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, phy->cca_ed_level) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, phy->csma_retries) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, phy->min_be) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE, phy->max_be) || + nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES, phy->frame_retries)) goto nla_put_failure; for (i = 0; i < 32; i++) { if (phy->channels_supported[i]) @@ -418,6 +422,49 @@ static int phy_set_cca_ed_level(struct wpan_phy *phy, struct genl_info *info) return 0; } +static int phy_set_csma_params(struct wpan_phy *phy, struct genl_info *info) +{ + int rc; + u8 min_be = phy->min_be; + u8 max_be = phy->max_be; + u8 retries = phy->csma_retries; + + if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) + retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); + if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]) + min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]); + if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) + max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]); + + if (retries > 5 || max_be > 8 || min_be > max_be || + retries < -1 || retries > 7) + return -EINVAL; + + rc = phy->set_csma_params(phy, min_be, max_be, retries); + if (rc < 0) + return rc; + + phy->min_be = min_be; + phy->max_be = max_be; + phy->csma_retries = retries; + + return 0; +} + +static int phy_set_frame_retries(struct wpan_phy *phy, struct genl_info *info) +{ + s8 retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]); + int rc; + + rc = phy->set_frame_retries(phy, retries); + if (rc < 0) + return rc; + + phy->frame_retries = retries; + + return 0; +} + int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) { struct wpan_phy *phy; @@ -429,7 +476,11 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[IEEE802154_ATTR_PHY_NAME] && !info->attrs[IEEE802154_ATTR_LBT_ENABLED] && !info->attrs[IEEE802154_ATTR_CCA_MODE] && - !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) + !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] && + !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] && + !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] && + !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] && + !info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) return -EINVAL; name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); @@ -473,6 +524,20 @@ int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) goto error; } + if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES] || + info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] || + info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) { + rc = phy_set_csma_params(phy, info); + if (rc < 0) + goto error; + } + + if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) { + rc = phy_set_frame_retries(phy, info); + if (rc < 0) + goto error; + } + mutex_unlock(&phy->pib_lock); wpan_phy_put(phy); diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 55b5616295ff..fd7be5e45cef 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -57,5 +57,10 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, }, [IEEE802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, [IEEE802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, }, + [IEEE802154_ATTR_CSMA_RETRIES] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CSMA_MIN_BE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, }, + + [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, }, }; diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 8d6f6704da84..edd0962d55f9 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -169,6 +169,12 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size) phy->current_channel = -1; /* not initialised */ phy->current_page = 0; /* for compatibility */ + /* defaults per 802.15.4-2011 */ + phy->min_be = 3; + phy->max_be = 5; + phy->csma_retries = 4; + phy->frame_retries = -1; /* for compatibility, actual default is 3 */ + return phy; out: diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 4707f36546d3..b75bb01e5c6b 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -205,6 +205,27 @@ static int mac802154_set_cca_ed_level(struct wpan_phy *phy, s32 level) return priv->ops->set_cca_ed_level(&priv->hw, level); } +static int mac802154_set_csma_params(struct wpan_phy *phy, u8 min_be, + u8 max_be, u8 retries) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + if (!priv->ops->set_csma_params) + return -ENOTSUPP; + + return priv->ops->set_csma_params(&priv->hw, min_be, max_be, retries); +} + +static int mac802154_set_frame_retries(struct wpan_phy *phy, s8 retries) +{ + struct mac802154_priv *priv = wpan_phy_priv(phy); + + if (!priv->ops->set_frame_retries) + return -ENOTSUPP; + + return priv->ops->set_frame_retries(&priv->hw, retries); +} + struct ieee802154_dev * ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) { @@ -286,6 +307,8 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->set_lbt = mac802154_set_lbt; priv->phy->set_cca_mode = mac802154_set_cca_mode; priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; + priv->phy->set_csma_params = mac802154_set_csma_params; + priv->phy->set_frame_retries = mac802154_set_frame_retries; rc = wpan_phy_register(priv->phy); if (rc < 0) -- cgit v1.2.3-71-gd317 From b075dd40c95d11c2c8690f6c4d6232fc0d9e7f56 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 18 Feb 2014 02:19:26 -0300 Subject: Bluetooth: allocate static minor for vhci Commit bfacbb9 (Bluetooth: Use devname:vhci module alias for virtual HCI driver) added the module alias to hci_vhci module so it's possible to create the /dev/vhci node. However creating an alias without specifying the minor doesn't allow us to create the node ahead, triggerring module auto-load when it's first accessed. Starting with depmod from kmod 16 we started to warn if there's a devname alias without specifying the major and minor. Let's do the same done for uhid, kvm, fuse and others, specifying a fixed minor. In systems with systemd as the init the following will happen: on early boot systemd will call "kmod static-nodes" to read /lib/modules/$(uname -r)/modules.devname and then create the nodes. When first accessed these "dead" nodes will trigger the module loading. Signed-off-by: Lucas De Marchi Acked-by: Greg Kroah-Hartman Signed-off-by: Marcel Holtmann --- Documentation/devices.txt | 1 + drivers/bluetooth/hci_vhci.c | 3 ++- include/linux/miscdevice.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/devices.txt b/Documentation/devices.txt index 10378cc48374..04356f5bc3af 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -353,6 +353,7 @@ Your cooperation is appreciated. 133 = /dev/exttrp External device trap 134 = /dev/apm_bios Advanced Power Management BIOS 135 = /dev/rtc Real Time Clock + 137 = /dev/vhci Bluetooth virtual HCI driver 139 = /dev/openprom SPARC OpenBoot PROM 140 = /dev/relay8 Berkshire Products Octal relay card 141 = /dev/relay16 Berkshire Products ISO-16 relay card diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 1ef6990a5c7e..add1c6a72063 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -359,7 +359,7 @@ static const struct file_operations vhci_fops = { static struct miscdevice vhci_miscdev= { .name = "vhci", .fops = &vhci_fops, - .minor = MISC_DYNAMIC_MINOR, + .minor = VHCI_MINOR, }; static int __init vhci_init(void) @@ -385,3 +385,4 @@ MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS("devname:vhci"); +MODULE_ALIAS_MISCDEV(VHCI_MINOR); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3737f7218f51..7bb6148d990f 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -23,6 +23,7 @@ #define TEMP_MINOR 131 /* Temperature Sensor */ #define RTC_MINOR 135 #define EFI_RTC_MINOR 136 /* EFI Time services */ +#define VHCI_MINOR 137 #define SUN_OPENPROM_MINOR 139 #define DMAPI_MINOR 140 /* DMAPI */ #define NVRAM_MINOR 144 -- cgit v1.2.3-71-gd317 From f8ff080dacecc5d0ce3579941b664bac3a4405c2 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Tue, 18 Feb 2014 07:48:45 +0100 Subject: bonding: remove useless updating of slave->dev->last_rx Now that all the logic is handled via last_arp_rx, we don't need to use last_rx. CC: Jay Vosburgh CC: Andy Gospodarek CC: "David S. Miller" Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 --- include/linux/netdevice.h | 8 +------- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e7aab9a6a670..14e023de09d6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1115,9 +1115,6 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) slave = bond_slave_get_rcu(skb->dev); bond = slave->bond; - if (bond->params.arp_interval) - slave->dev->last_rx = jiffies; - recv_probe = ACCESS_ONCE(bond->recv_probe); if (recv_probe) { ret = recv_probe(skb, bond, slave); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 430c51aed6a4..891432a994c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1312,13 +1312,7 @@ struct net_device { /* * Cache lines mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx - * This should not be set in - * drivers, unless really needed, - * because network stack (bonding) - * use it if/when necessary, to - * avoid dirtying this cache line. - */ + unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast -- cgit v1.2.3-71-gd317 From 0eb5db7ad302a24fe6f0eb4bfd235357047a28db Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 18 Feb 2014 18:06:48 +0000 Subject: netfilter: nfnetlink: add rcu_dereference_protected() helpers Add a lockdep_nfnl_is_held() function and a nfnl_dereference() macro for RCU dereferences protected by a NFNL subsystem mutex. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 21 +++++++++++++++++++++ net/netfilter/nfnetlink.c | 8 ++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 28c74367e900..e955d4730625 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); +#ifdef CONFIG_PROVE_LOCKING +int lockdep_nfnl_is_held(__u8 subsys_id); +#else +static inline int lockdep_nfnl_is_held(__u8 subsys_id) +{ + return 1; +} +#endif /* CONFIG_PROVE_LOCKING */ + +/* + * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex + * + * @p: The pointer to read, prior to dereferencing + * @ss: The nfnetlink subsystem ID + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * caller holds the NFNL subsystem mutex. + */ +#define nfnl_dereference(p, ss) \ + rcu_dereference_protected(p, lockdep_nfnl_is_held(ss)) #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 046aa13b4fea..e8138da4c14f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id) } EXPORT_SYMBOL_GPL(nfnl_unlock); +#ifdef CONFIG_PROVE_LOCKING +int lockdep_nfnl_is_held(u8 subsys_id) +{ + return lockdep_is_held(&table[subsys_id].mutex); +} +EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); +#endif + int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { nfnl_lock(n->subsys_id); -- cgit v1.2.3-71-gd317 From 3f85944fe207d0225ef21a2c0951d4946fc9a95d Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 25 Feb 2014 18:17:50 +0200 Subject: net: Add sysfs file for port number Add a sysfs file to enable user space to query the device port number used by a netdevice instance. This is needed for devices that have multiple ports on the same PCI function. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ net/core/net-sysfs.c | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e4756553c18..5e84483c0650 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1279,6 +1279,10 @@ struct net_device { * that share the same link * layer address */ + unsigned short dev_port; /* Used to differentiate + * devices that share the same + * function + */ spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 73aa594674ef..daed9a64c6f6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RO(dev_id, fmt_hex); +NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); NETDEVICE_SHOW_RO(iflink, fmt_dec); @@ -373,6 +374,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, &dev_attr_dev_id.attr, + &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, &dev_attr_addr_assign_type.attr, -- cgit v1.2.3-71-gd317 From 363ec392352e55c61ce2799c3f15f89f9429bba7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Feb 2014 14:02:11 -0800 Subject: net: add skb_mstamp infrastructure ktime_get() is too expensive on some cases, and we'd like to get usec resolution timestamps in TCP stack. This patch adds a light weight facility using a combination of local_clock() and jiffies samples. Instead of : u64 t0, t1; t0 = ktime_get(); // stuff t1 = ktime_get(); delta_us = ktime_us_delta(t1, t0); use : struct skb_mstamp t0, t1; skb_mstamp_get(&t0); // stuff skb_mstamp_get(&t1); delta_us = skb_mstamp_us_delta(&t1, &t0); Note : local_clock() might have a (bounded) drift between cpus. Do not use this infra in place of ktime_get() without understanding the issues. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Larry Brakmo Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 11b6925f0e96..6d8ed22accb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,6 +32,7 @@ #include #include #include +#include #include /* A. Checksumming of received packets by device. @@ -356,11 +357,62 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +/** + * struct skb_mstamp - multi resolution time stamps + * @stamp_us: timestamp in us resolution + * @stamp_jiffies: timestamp in jiffies + */ +struct skb_mstamp { + union { + u64 v64; + struct { + u32 stamp_us; + u32 stamp_jiffies; + }; + }; +}; + +/** + * skb_mstamp_get - get current timestamp + * @cl: place to store timestamps + */ +static inline void skb_mstamp_get(struct skb_mstamp *cl) +{ + u64 val = local_clock(); + + do_div(val, NSEC_PER_USEC); + cl->stamp_us = (u32)val; + cl->stamp_jiffies = (u32)jiffies; +} + +/** + * skb_mstamp_delta - compute the difference in usec between two skb_mstamp + * @t1: pointer to newest sample + * @t0: pointer to oldest sample + */ +static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, + const struct skb_mstamp *t0) +{ + s32 delta_us = t1->stamp_us - t0->stamp_us; + u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies; + + /* If delta_us is negative, this might be because interval is too big, + * or local_clock() drift is too big : fallback using jiffies. + */ + if (delta_us <= 0 || + delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ))) + + delta_us = jiffies_to_usecs(delta_jiffies); + + return delta_us; +} + + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @tstamp: Time we arrived + * @tstamp: Time we arrived/left * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -429,7 +481,10 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - ktime_t tstamp; + union { + ktime_t tstamp; + struct skb_mstamp skb_mstamp; + }; struct sock *sk; struct net_device *dev; -- cgit v1.2.3-71-gd317 From 740b0f1841f6e39085b711d41db9ffb07198682b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Feb 2014 14:02:48 -0800 Subject: tcp: switch rtt estimations to usec resolution Upcoming congestion controls for TCP require usec resolution for RTT estimations. Millisecond resolution is simply not enough these days. FQ/pacing in DC environments also require this change for finer control and removal of bimodal behavior due to the current hack in tcp_update_pacing_rate() for 'small rtt' TCP_CONG_RTT_STAMP is no longer needed. As Julian Anastasov pointed out, we need to keep user compatibility : tcp_metrics used to export RTT and RTTVAR in msec resolution, so we added RTT_US and RTTVAR_US. An iproute2 patch is needed to use the new attributes if provided by the kernel. In this example ss command displays a srtt of 32 usecs (10Gbit link) lpk51:~# ./ss -i dst lpk52 Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port tcp ESTAB 0 1 10.246.11.51:42959 10.246.11.52:64614 cubic wscale:6,6 rto:201 rtt:0.032/0.001 ato:40 mss:1448 cwnd:10 send 3620.0Mbps pacing_rate 7240.0Mbps unacked:1 rcv_rtt:993 rcv_space:29559 Updated iproute2 ip command displays : lpk51:~# ./ip tcp_metrics | grep 10.246.11.52 10.246.11.52 age 561.914sec cwnd 10 rtt 274us rttvar 213us source 10.246.11.51 Old binary displays : lpk51:~# ip tcp_metrics | grep 10.246.11.52 10.246.11.52 age 561.914sec cwnd 10 rtt 250us rttvar 125us source 10.246.11.51 With help from Julian Anastasov, Stephen Hemminger and Yuchung Cheng Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Cc: Stephen Hemminger Cc: Yuchung Cheng Cc: Larry Brakmo Cc: Julian Anastasov Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 +- include/net/tcp.h | 10 ++- include/uapi/linux/tcp_metrics.h | 7 +- net/ipv4/tcp.c | 8 +- net/ipv4/tcp_cubic.c | 4 - net/ipv4/tcp_hybla.c | 12 +-- net/ipv4/tcp_illinois.c | 1 - net/ipv4/tcp_input.c | 183 +++++++++++++++++++-------------------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_lp.c | 1 - net/ipv4/tcp_metrics.c | 83 ++++++++++-------- net/ipv4/tcp_minisocks.c | 4 +- net/ipv4/tcp_output.c | 15 ++-- net/ipv4/tcp_probe.c | 2 +- net/ipv4/tcp_vegas.c | 1 - net/ipv4/tcp_veno.c | 1 - net/ipv4/tcp_yeah.c | 1 - 17 files changed, 174 insertions(+), 169 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4ad0706d40eb..239946868142 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,10 +201,10 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ - u32 srtt; /* smoothed round trip time << 3 */ - u32 mdev; /* medium deviation */ - u32 mdev_max; /* maximal mdev for the last rtt period */ - u32 rttvar; /* smoothed mdev_max */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ u32 packets_out; /* Packets which are "in flight" */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 1f820537741a..93eab0b9da60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -478,7 +479,6 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); #ifdef CONFIG_SYN_COOKIES -#include /* Syncookies use a monotonic timer which increments every 64 seconds. * This counter is used both as a hash input and partially encoded into @@ -619,7 +619,7 @@ static inline void tcp_bound_rto(const struct sock *sk) static inline u32 __tcp_set_rto(const struct tcp_sock *tp) { - return (tp->srtt >> 3) + tp->rttvar; + return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -656,6 +656,11 @@ static inline u32 tcp_rto_min(struct sock *sk) return rto_min; } +static inline u32 tcp_rto_min_us(struct sock *sk) +{ + return jiffies_to_usecs(tcp_rto_min(sk)); +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -778,7 +783,6 @@ enum tcp_ca_event { #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) #define TCP_CONG_NON_RESTRICTED 0x1 -#define TCP_CONG_RTT_STAMP 0x2 struct tcp_congestion_ops { struct list_head list; diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 54a37b13f2c4..93533926035c 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -11,12 +11,15 @@ #define TCP_METRICS_GENL_VERSION 0x1 enum tcp_metric_index { - TCP_METRIC_RTT, - TCP_METRIC_RTTVAR, + TCP_METRIC_RTT, /* in ms units */ + TCP_METRIC_RTTVAR, /* in ms units */ TCP_METRIC_SSTHRESH, TCP_METRIC_CWND, TCP_METRIC_REORDERING, + TCP_METRIC_RTT_US, /* in usec units */ + TCP_METRIC_RTTVAR_US, /* in usec units */ + /* Always last. */ __TCP_METRIC_MAX, }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bed379c7abcd..7374905b3701 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; - tp->mdev = TCP_TIMEOUT_INIT; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -2339,7 +2339,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); - tp->srtt = 0; + tp->srtt_us = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; icsk->icsk_backoff = 0; @@ -2783,8 +2783,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_pmtu = icsk->icsk_pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; - info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; - info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; + info->tcpi_rtt = tp->srtt_us >> 3; + info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 828e4c3ffbaf..8bf224516ba2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -476,10 +476,6 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - /* hystart needs ms clock resolution */ - if (hystart && HZ < 1000) - cubictcp.flags |= TCP_CONG_RTT_STAMP; - return tcp_register_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 2a1a9e2a4e51..a15a799bf768 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -21,7 +21,7 @@ struct hybla { u32 rho2; /* Rho * Rho, integer part */ u32 rho_3ls; /* Rho parameter, <<3 */ u32 rho2_7ls; /* Rho^2, <<7 */ - u32 minrtt; /* Minimum smoothed round trip time value seen */ + u32 minrtt_us; /* Minimum smoothed round trip time value seen */ }; /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ @@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk) { struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, + tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC), + 8U); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >> 7; @@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk) hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ - ca->minrtt = tp->srtt; + ca->minrtt_us = tp->srtt_us; tp->snd_cwnd = ca->rho; } @@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ - if (tp->srtt < ca->minrtt){ + if (tp->srtt_us < ca->minrtt_us) { hybla_recalc_param(sk); - ca->minrtt = tp->srtt; + ca->minrtt_us = tp->srtt_us; } if (!tcp_is_cwnd_limited(sk, in_flight)) diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index be047c63ca10..863d105e3015 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -325,7 +325,6 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_illinois __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, .cong_avoid = tcp_illinois_cong_avoid, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 227cba79fa6b..23a41d978fad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -667,11 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) +static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) { struct tcp_sock *tp = tcp_sk(sk); - long m = mrtt; /* RTT */ - u32 srtt = tp->srtt; + long m = mrtt_us; /* RTT */ + u32 srtt = tp->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev @@ -694,7 +694,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain @@ -706,28 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) if (m > 0) m >>= 3; } else { - m -= (tp->mdev >> 2); /* similar update on mdev */ + m -= (tp->mdev_us >> 2); /* similar update on mdev */ } - tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ - if (tp->mdev > tp->mdev_max) { - tp->mdev_max = tp->mdev; - if (tp->mdev_max > tp->rttvar) - tp->rttvar = tp->mdev_max; + tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (tp->mdev_us > tp->mdev_max_us) { + tp->mdev_max_us = tp->mdev_us; + if (tp->mdev_max_us > tp->rttvar_us) + tp->rttvar_us = tp->mdev_max_us; } if (after(tp->snd_una, tp->rtt_seq)) { - if (tp->mdev_max < tp->rttvar) - tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2; + if (tp->mdev_max_us < tp->rttvar_us) + tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; tp->rtt_seq = tp->snd_nxt; - tp->mdev_max = tcp_rto_min(sk); + tp->mdev_max_us = tcp_rto_min_us(sk); } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ - tp->mdev = m << 1; /* make sure rto = 3*rtt */ - tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); + tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ + tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); + tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; } - tp->srtt = max(1U, srtt); + tp->srtt_us = max(1U, srtt); } /* Set the sk_pacing_rate to allow proper sizing of TSO packets. @@ -742,20 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk) u64 rate; /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ - rate = (u64)tp->mss_cache * 2 * (HZ << 3); + rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); rate *= max(tp->snd_cwnd, tp->packets_out); - /* Correction for small srtt and scheduling constraints. - * For small rtt, consider noise is too high, and use - * the minimal value (srtt = 1 -> 125 us for HZ=1000) - * - * We probably need usec resolution in the future. - * Note: This also takes care of possible srtt=0 case, - * when tcp_rtt_estimator() was not yet called. - */ - if (tp->srtt > 8 + 2) - do_div(rate, tp->srtt); + if (likely(tp->srtt_us)) + do_div(rate, tp->srtt_us); /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate * without any lock. We want to make sure compiler wont store @@ -1122,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } struct tcp_sacktag_state { - int reord; - int fack_count; - int flag; - s32 rtt; /* RTT measured by SACKing never-retransmitted data */ + int reord; + int fack_count; + long rtt_us; /* RTT measured by SACKing never-retransmitted data */ + int flag; }; /* Check if skb is fully within the SACK block. In presence of GSO skbs, @@ -1186,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, u32 start_seq, u32 end_seq, - int dup_sack, int pcount, u32 xmit_time) + int dup_sack, int pcount, + const struct skb_mstamp *xmit_time) { struct tcp_sock *tp = tcp_sk(sk); int fack_count = state->fack_count; @@ -1227,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk, if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; /* Pick the earliest sequence sacked for RTT */ - if (state->rtt < 0) - state->rtt = tcp_time_stamp - xmit_time; + if (state->rtt_us < 0) { + struct skb_mstamp now; + + skb_mstamp_get(&now); + state->rtt_us = skb_mstamp_us_delta(&now, + xmit_time); + } } if (sacked & TCPCB_LOST) { @@ -1287,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, - TCP_SKB_CB(skb)->when); + &skb->skb_mstamp); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1565,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), - TCP_SKB_CB(skb)->when); + &skb->skb_mstamp); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1622,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, - u32 prior_snd_una, s32 *sack_rtt) + u32 prior_snd_una, long *sack_rtt_us) { struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + @@ -1640,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, state.flag = 0; state.reord = tp->packets_out; - state.rtt = -1; + state.rtt_us = -1L; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1824,7 +1823,7 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - *sack_rtt = state.rtt; + *sack_rtt_us = state.rtt_us; return state.flag; } @@ -2034,10 +2033,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * available, or RTO is scheduled to fire first. */ if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || - (flag & FLAG_ECE) || !tp->srtt) + (flag & FLAG_ECE) || !tp->srtt_us) return false; - delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); + delay = max(usecs_to_jiffies(tp->srtt_us >> 5), + msecs_to_jiffies(2)); + if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; @@ -2884,7 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, - s32 seq_rtt, s32 sack_rtt) + long seq_rtt_us, long sack_rtt_us) { const struct tcp_sock *tp = tcp_sk(sk); @@ -2894,10 +2895,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * is acked (RFC6298). */ if (flag & FLAG_RETRANS_DATA_ACKED) - seq_rtt = -1; + seq_rtt_us = -1L; - if (seq_rtt < 0) - seq_rtt = sack_rtt; + if (seq_rtt_us < 0) + seq_rtt_us = sack_rtt_us; /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment @@ -2905,14 +2906,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) - seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); - if (seq_rtt < 0) + if (seq_rtt_us < 0) return false; - tcp_rtt_estimator(sk, seq_rtt); + tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); /* RFC6298: only reset backoff on valid RTT measurement. */ @@ -2924,16 +2925,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); - s32 seq_rtt = -1; + long seq_rtt_us = -1L; if (synack_stamp && !tp->total_retrans) - seq_rtt = tcp_time_stamp - synack_stamp; + seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() */ - if (!tp->srtt) - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (!tp->srtt_us) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); } static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) @@ -3022,26 +3023,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * arrived at the other end. */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, s32 sack_rtt) + u32 prior_snd_una, long sack_rtt_us) { - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - u32 now = tcp_time_stamp; + struct skb_mstamp first_ackt, last_ackt, now; + struct tcp_sock *tp = tcp_sk(sk); + u32 prior_sacked = tp->sacked_out; + u32 reord = tp->packets_out; bool fully_acked = true; - int flag = 0; + long ca_seq_rtt_us = -1L; + long seq_rtt_us = -1L; + struct sk_buff *skb; u32 pkts_acked = 0; - u32 reord = tp->packets_out; - u32 prior_sacked = tp->sacked_out; - s32 seq_rtt = -1; - s32 ca_seq_rtt = -1; - ktime_t last_ackt = net_invalid_timestamp(); bool rtt_update; + int flag = 0; + + first_ackt.v64 = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - u32 acked_pcount; u8 sacked = scb->sacked; + u32 acked_pcount; /* Determine how many packets and what bytes were acked, tso and else */ if (after(scb->end_seq, tp->snd_una)) { @@ -3063,11 +3065,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; } else { - ca_seq_rtt = now - scb->when; - last_ackt = skb->tstamp; - if (seq_rtt < 0) { - seq_rtt = ca_seq_rtt; - } + last_ackt = skb->skb_mstamp; + if (!first_ackt.v64) + first_ackt = last_ackt; + if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); if (!after(scb->end_seq, tp->high_seq)) @@ -3113,7 +3114,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); + skb_mstamp_get(&now); + if (first_ackt.v64) { + seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); + ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + } + + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops @@ -3141,25 +3148,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) { - s32 rtt_us = -1; - - /* Is the ACK triggering packet unambiguous? */ - if (!(flag & FLAG_RETRANS_DATA_ACKED)) { - /* High resolution needed and available? */ - if (ca_ops->flags & TCP_CONG_RTT_STAMP && - !ktime_equal(last_ackt, - net_invalid_timestamp())) - rtt_us = ktime_us_delta(ktime_get_real(), - last_ackt); - else if (ca_seq_rtt >= 0) - rtt_us = jiffies_to_usecs(ca_seq_rtt); - } + if (ca_ops->pkts_acked) + ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); - ca_ops->pkts_acked(sk, pkts_acked, rtt_us); - } - } else if (skb && rtt_update && sack_rtt >= 0 && - sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + } else if (skb && rtt_update && sack_rtt_us >= 0 && + sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3369,12 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; + u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; int acked = 0; /* Number of packets newly acked */ - s32 sack_rtt = -1; + long sack_rtt_us = -1L; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3432,7 +3425,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt); + &sack_rtt_us); if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE; @@ -3451,7 +3444,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ acked = tp->packets_out; - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, + sack_rtt_us); acked -= tp->packets_out; /* Advance cwnd if state allows */ @@ -3474,8 +3468,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) - tcp_update_pacing_rate(sk); + tcp_update_pacing_rate(sk); return 1; no_queue: @@ -3504,7 +3497,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt); + &sack_rtt_us); tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf976510497..17c0fb172fba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 503798f2fcd6..c9aecae31327 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -315,7 +315,6 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) } static struct tcp_congestion_ops tcp_lp __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_lp_cong_avoid, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d547075d8300..dcaf72f10216 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -33,6 +33,11 @@ struct tcp_fastopen_metrics { struct tcp_fastopen_cookie cookie; }; +/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility + * Kernel only stores RTT and RTTVAR in usec resolution + */ +#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2) + struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_saddr; @@ -41,7 +46,7 @@ struct tcp_metrics_block { u32 tcpm_ts; u32 tcpm_ts_stamp; u32 tcpm_lock; - u32 tcpm_vals[TCP_METRIC_MAX + 1]; + u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; struct tcp_fastopen_metrics tcpm_fastopen; struct rcu_head rcu_head; @@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm, return tm->tcpm_vals[idx]; } -static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, - enum tcp_metric_index idx) -{ - return msecs_to_jiffies(tm->tcpm_vals[idx]); -} - static void tcp_metric_set(struct tcp_metrics_block *tm, enum tcp_metric_index idx, u32 val) @@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, tm->tcpm_vals[idx] = val; } -static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, - enum tcp_metric_index idx, - u32 val) -{ - tm->tcpm_vals[idx] = jiffies_to_msecs(val); -} - static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { @@ -101,9 +93,11 @@ struct tcpm_hash_bucket { static DEFINE_SPINLOCK(tcp_metrics_lock); -static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, +static void tcpm_suck_dst(struct tcp_metrics_block *tm, + const struct dst_entry *dst, bool fastopen_clear) { + u32 msval; u32 val; tm->tcpm_stamp = jiffies; @@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, val |= 1 << TCP_METRIC_REORDERING; tm->tcpm_lock = val; - tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); - tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); + msval = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + + msval = dst_metric_raw(dst, RTAX_RTTVAR); + tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); @@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); rcu_read_lock(); - if (icsk->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. Reset our * results. @@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk) if (!tm) goto out_unlock; - rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); - m = rtt - tp->srtt; + rtt = tcp_metric_get(tm, TCP_METRIC_RTT); + m = rtt - tp->srtt_us; /* If newly calculated rtt larger than stored one, store new * one. Otherwise, use EWMA. Remember, rtt overestimation is @@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk) */ if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { if (m <= 0) - rtt = tp->srtt; + rtt = tp->srtt_us; else rtt -= (m >> 3); - tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); + tcp_metric_set(tm, TCP_METRIC_RTT, rtt); } if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { @@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk) /* Scale deviation to rttvar fixed point */ m >>= 1; - if (m < tp->mdev) - m = tp->mdev; + if (m < tp->mdev_us) + m = tp->mdev_us; - var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + var = tcp_metric_get(tm, TCP_METRIC_RTTVAR); if (m >= var) var = m; else var -= (var - m) >> 2; - tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); + tcp_metric_set(tm, TCP_METRIC_RTTVAR, var); } if (tcp_in_initial_slowstart(tp)) { @@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk) tp->reordering = val; } - crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); + crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); reset: /* The initial RTT measurement from the SYN/SYN-ACK is not ideal @@ -551,18 +548,20 @@ reset: * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (crtt > tp->srtt) { + if (crtt > tp->srtt_us) { /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ - crtt >>= 3; + crtt /= 8 * USEC_PER_MSEC; inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); - } else if (tp->srtt == 0) { + } else if (tp->srtt_us == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from * 3WHS. This is most likely due to retransmission, * including spurious one. Reset the RTO back to 3secs * from the more aggressive 1sec to avoid more spurious * retransmission. */ - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); + tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been @@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); if (!nest) goto nla_put_failure; - for (i = 0; i < TCP_METRIC_MAX + 1; i++) { - if (!tm->tcpm_vals[i]) + for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { + u32 val = tm->tcpm_vals[i]; + + if (!val) continue; - if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) + if (i == TCP_METRIC_RTT) { + if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1, + val) < 0) + goto nla_put_failure; + n++; + val = max(val / 1000, 1U); + } + if (i == TCP_METRIC_RTTVAR) { + if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1, + val) < 0) + goto nla_put_failure; + n++; + val = max(val / 1000, 1U); + } + if (nla_put_u32(msg, i + 1, val) < 0) goto nla_put_failure; n++; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7a436c517e44..ca788ada5bd3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->rcv_isn); - newtp->srtt = 0; - newtp->mdev = TCP_TIMEOUT_INIT; + newtp->srtt_us = 0; + newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c5eadec001c1..bf38b1fb63ab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -866,11 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (clone_it) { const struct sk_buff *fclone = skb + 1; - /* If congestion control is doing timestamping, we must - * take such a timestamp before we potentially clone/copy. - */ - if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) - __net_timestamp(skb); + skb_mstamp_get(&skb->skb_mstamp); if (unlikely(skb->fclone == SKB_FCLONE_ORIG && fclone->fclone == SKB_FCLONE_CLONE)) @@ -1974,7 +1970,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, tlp_time_stamp, rto_time_stamp; - u32 rtt = tp->srtt >> 3; + u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) return false; @@ -1996,7 +1992,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || + if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) return false; @@ -3050,8 +3046,9 @@ void tcp_send_delayed_ack(struct sock *sk) * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements * directly. */ - if (tp->srtt) { - int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); + if (tp->srtt_us) { + int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), + TCP_DELACK_MIN); if (rtt < max_ato) max_ato = rtt; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 1f2d37613c9e..3b66610d4156 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->snd_wnd = tp->snd_wnd; p->rcv_wnd = tp->rcv_wnd; p->ssthresh = tcp_current_ssthresh(sk); - p->srtt = tp->srtt >> 3; + p->srtt = tp->srtt_us >> 3; tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a022c17c9cf1..48539fff6357 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -306,7 +306,6 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_vegas_cong_avoid, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 326475a94865..1b8e28fcd7e1 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk) } static struct tcp_congestion_ops tcp_veno __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, .cong_avoid = tcp_veno_cong_avoid, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 8eab02030ed0..5ede0e727945 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -227,7 +227,6 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { } static struct tcp_congestion_ops tcp_yeah __read_mostly = { - .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, .cong_avoid = tcp_yeah_cong_avoid, -- cgit v1.2.3-71-gd317 From 7aa98047df95d7caf0678e939cdd936dfb99cd06 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 25 Feb 2014 17:15:13 -0800 Subject: net: move net_device priv_flags out from UAPI These are private to userspace, and they're unstable anyway and can be shuffled at will (see 080e4130b1fb) so any userspace application relying on them is on crack. Test compiled with allyesconfig. mcgrof@drvbp1 /pub/mem/mcgrof/net-next (git::master)$ make allyesconfig mcgrof@drvbp1 /pub/mem/mcgrof/net-next (git::master)$ time make -j 20 ... BUILD arch/x86/boot/bzImage Setup is 16992 bytes (padded to 17408 bytes). System is 56153 kB CRC 721d2751 Kernel: arch/x86/boot/bzImage is ready (#1) real 19m35.744s user 280m37.984s sys 27m54.104s Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Ben Hutchings Cc: Florian Fainelli Cc: David Miller Signed-off-by: Luis R. Rodriguez Signed-off-by: David S. Miller --- include/linux/netdevice.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/if.h | 83 ----------------------------------------------- 2 files changed, 83 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e84483c0650..1a869488b8ae 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1147,6 +1147,89 @@ struct net_device_ops { void *priv); }; +/** + * enum net_device_priv_flags - &struct net_device priv_flags + * + * These are the &struct net_device, they are only set internally + * by drivers and used in the kernel. These flags are invisible to + * userspace, this means that the order of these flags can change + * during any kernel release. + * + * You should have a pretty good reason to be extending these flags. + * + * @IFF_802_1Q_VLAN: 802.1Q VLAN device + * @IFF_EBRIDGE: Ethernet bridging device + * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active + * @IFF_MASTER_8023AD: bonding master, 802.3ad + * @IFF_MASTER_ALB: bonding master, balance-alb + * @IFF_BONDING: bonding master or slave + * @IFF_SLAVE_NEEDARP: need ARPs for validation + * @IFF_ISATAP: ISATAP interface (RFC4214) + * @IFF_MASTER_ARPMON: bonding master, ARP mon in use + * @IFF_WAN_HDLC: WAN HDLC device + * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to + * release skb->dst + * @IFF_DONT_BRIDGE: disallow bridging this ether dev + * @IFF_DISABLE_NETPOLL: disable netpoll at run-time + * @IFF_MACVLAN_PORT: device used as macvlan port + * @IFF_BRIDGE_PORT: device used as bridge port + * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port + * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit + * @IFF_UNICAST_FLT: Supports unicast filtering + * @IFF_TEAM_PORT: device used as team port + * @IFF_SUPP_NOFCS: device supports sending custom FCS + * @IFF_LIVE_ADDR_CHANGE: device supports hardware address + * change when it's running + * @IFF_MACVLAN: Macvlan device + */ +enum netdev_priv_flags { + IFF_802_1Q_VLAN = 1<<0, + IFF_EBRIDGE = 1<<1, + IFF_SLAVE_INACTIVE = 1<<2, + IFF_MASTER_8023AD = 1<<3, + IFF_MASTER_ALB = 1<<4, + IFF_BONDING = 1<<5, + IFF_SLAVE_NEEDARP = 1<<6, + IFF_ISATAP = 1<<7, + IFF_MASTER_ARPMON = 1<<8, + IFF_WAN_HDLC = 1<<9, + IFF_XMIT_DST_RELEASE = 1<<10, + IFF_DONT_BRIDGE = 1<<11, + IFF_DISABLE_NETPOLL = 1<<12, + IFF_MACVLAN_PORT = 1<<13, + IFF_BRIDGE_PORT = 1<<14, + IFF_OVS_DATAPATH = 1<<15, + IFF_TX_SKB_SHARING = 1<<16, + IFF_UNICAST_FLT = 1<<17, + IFF_TEAM_PORT = 1<<18, + IFF_SUPP_NOFCS = 1<<19, + IFF_LIVE_ADDR_CHANGE = 1<<20, + IFF_MACVLAN = 1<<21, +}; + +#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +#define IFF_EBRIDGE IFF_EBRIDGE +#define IFF_SLAVE_INACTIVE IFF_SLAVE_INACTIVE +#define IFF_MASTER_8023AD IFF_MASTER_8023AD +#define IFF_MASTER_ALB IFF_MASTER_ALB +#define IFF_BONDING IFF_BONDING +#define IFF_SLAVE_NEEDARP IFF_SLAVE_NEEDARP +#define IFF_ISATAP IFF_ISATAP +#define IFF_MASTER_ARPMON IFF_MASTER_ARPMON +#define IFF_WAN_HDLC IFF_WAN_HDLC +#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE +#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE +#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL +#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT +#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT +#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH +#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING +#define IFF_UNICAST_FLT IFF_UNICAST_FLT +#define IFF_TEAM_PORT IFF_TEAM_PORT +#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS +#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE +#define IFF_MACVLAN IFF_MACVLAN + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 42ec87994cf6..9cf2394f0bcf 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -112,89 +112,6 @@ enum net_device_flags { #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT) -/** - * enum net_device_priv_flags - &struct net_device priv_flags - * - * These are the &struct net_device, they are only set internally - * by drivers and used in the kernel. These flags are invisible to - * userspace, this means that the order of these flags can change - * during any kernel release. - * - * You should have a pretty good reason to be extending these flags. - * - * @IFF_802_1Q_VLAN: 802.1Q VLAN device - * @IFF_EBRIDGE: Ethernet bridging device - * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active - * @IFF_MASTER_8023AD: bonding master, 802.3ad - * @IFF_MASTER_ALB: bonding master, balance-alb - * @IFF_BONDING: bonding master or slave - * @IFF_SLAVE_NEEDARP: need ARPs for validation - * @IFF_ISATAP: ISATAP interface (RFC4214) - * @IFF_MASTER_ARPMON: bonding master, ARP mon in use - * @IFF_WAN_HDLC: WAN HDLC device - * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to - * release skb->dst - * @IFF_DONT_BRIDGE: disallow bridging this ether dev - * @IFF_DISABLE_NETPOLL: disable netpoll at run-time - * @IFF_MACVLAN_PORT: device used as macvlan port - * @IFF_BRIDGE_PORT: device used as bridge port - * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port - * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit - * @IFF_UNICAST_FLT: Supports unicast filtering - * @IFF_TEAM_PORT: device used as team port - * @IFF_SUPP_NOFCS: device supports sending custom FCS - * @IFF_LIVE_ADDR_CHANGE: device supports hardware address - * change when it's running - * @IFF_MACVLAN: Macvlan device - */ -enum netdev_priv_flags { - IFF_802_1Q_VLAN = 1<<0, - IFF_EBRIDGE = 1<<1, - IFF_SLAVE_INACTIVE = 1<<2, - IFF_MASTER_8023AD = 1<<3, - IFF_MASTER_ALB = 1<<4, - IFF_BONDING = 1<<5, - IFF_SLAVE_NEEDARP = 1<<6, - IFF_ISATAP = 1<<7, - IFF_MASTER_ARPMON = 1<<8, - IFF_WAN_HDLC = 1<<9, - IFF_XMIT_DST_RELEASE = 1<<10, - IFF_DONT_BRIDGE = 1<<11, - IFF_DISABLE_NETPOLL = 1<<12, - IFF_MACVLAN_PORT = 1<<13, - IFF_BRIDGE_PORT = 1<<14, - IFF_OVS_DATAPATH = 1<<15, - IFF_TX_SKB_SHARING = 1<<16, - IFF_UNICAST_FLT = 1<<17, - IFF_TEAM_PORT = 1<<18, - IFF_SUPP_NOFCS = 1<<19, - IFF_LIVE_ADDR_CHANGE = 1<<20, - IFF_MACVLAN = 1<<21, -}; - -#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -#define IFF_EBRIDGE IFF_EBRIDGE -#define IFF_SLAVE_INACTIVE IFF_SLAVE_INACTIVE -#define IFF_MASTER_8023AD IFF_MASTER_8023AD -#define IFF_MASTER_ALB IFF_MASTER_ALB -#define IFF_BONDING IFF_BONDING -#define IFF_SLAVE_NEEDARP IFF_SLAVE_NEEDARP -#define IFF_ISATAP IFF_ISATAP -#define IFF_MASTER_ARPMON IFF_MASTER_ARPMON -#define IFF_WAN_HDLC IFF_WAN_HDLC -#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE -#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE -#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL -#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT -#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT -#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH -#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING -#define IFF_UNICAST_FLT IFF_UNICAST_FLT -#define IFF_TEAM_PORT IFF_TEAM_PORT -#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS -#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE -#define IFF_MACVLAN IFF_MACVLAN - #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 -- cgit v1.2.3-71-gd317 From 946651cba26779864bcdbd7e12502f5a36c2de37 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Sat, 15 Feb 2014 00:05:52 +0100 Subject: wl1251: split wl251 platform data to a separate structure Move the wl1251 part of the wl12xx platform data structure into a new structure specifically for wl1251. Change the platform data built-in block and board files accordingly. Signed-off-by: Luciano Coelho Acked-by: Tony Lindgren Reviewed-by: Felipe Balbi Reviewed-by: Sebastian Reichel Reviewed-by: Pavel Machek Signed-off-by: John W. Linville --- arch/arm/mach-omap2/board-omap3pandora.c | 4 +-- arch/arm/mach-omap2/board-rx51-peripherals.c | 2 +- drivers/net/wireless/ti/wilink_platform_data.c | 37 +++++++++++++++++++++----- drivers/net/wireless/ti/wl1251/sdio.c | 12 ++++----- drivers/net/wireless/ti/wl1251/spi.c | 2 +- include/linux/wl12xx.h | 22 ++++++++++++++- 6 files changed, 62 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index de1bc6bbe585..24f3c1be69a5 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -536,7 +536,7 @@ static struct spi_board_info omap3pandora_spi_board_info[] __initdata = { static void __init pandora_wl1251_init(void) { - struct wl12xx_platform_data pandora_wl1251_pdata; + struct wl1251_platform_data pandora_wl1251_pdata; int ret; memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); @@ -550,7 +550,7 @@ static void __init pandora_wl1251_init(void) goto fail_irq; pandora_wl1251_pdata.use_eeprom = true; - ret = wl12xx_set_platform_data(&pandora_wl1251_pdata); + ret = wl1251_set_platform_data(&pandora_wl1251_pdata); if (ret < 0) goto fail_irq; diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index 8760bbe3baab..e05e740a4426 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -84,7 +84,7 @@ enum { RX51_SPI_MIPID, /* LCD panel */ }; -static struct wl12xx_platform_data wl1251_pdata; +static struct wl1251_platform_data wl1251_pdata; static struct tsc2005_platform_data tsc2005_pdata; #if defined(CONFIG_SENSORS_LIS3_I2C) || defined(CONFIG_SENSORS_LIS3_I2C_MODULE) diff --git a/drivers/net/wireless/ti/wilink_platform_data.c b/drivers/net/wireless/ti/wilink_platform_data.c index 998e95895f9d..a92bd3e89796 100644 --- a/drivers/net/wireless/ti/wilink_platform_data.c +++ b/drivers/net/wireless/ti/wilink_platform_data.c @@ -23,17 +23,17 @@ #include #include -static struct wl12xx_platform_data *platform_data; +static struct wl12xx_platform_data *wl12xx_platform_data; int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data) { - if (platform_data) + if (wl12xx_platform_data) return -EBUSY; if (!data) return -EINVAL; - platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL); - if (!platform_data) + wl12xx_platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL); + if (!wl12xx_platform_data) return -ENOMEM; return 0; @@ -41,9 +41,34 @@ int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data) struct wl12xx_platform_data *wl12xx_get_platform_data(void) { - if (!platform_data) + if (!wl12xx_platform_data) return ERR_PTR(-ENODEV); - return platform_data; + return wl12xx_platform_data; } EXPORT_SYMBOL(wl12xx_get_platform_data); + +static struct wl1251_platform_data *wl1251_platform_data; + +int __init wl1251_set_platform_data(const struct wl1251_platform_data *data) +{ + if (wl1251_platform_data) + return -EBUSY; + if (!data) + return -EINVAL; + + wl1251_platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL); + if (!wl1251_platform_data) + return -ENOMEM; + + return 0; +} + +struct wl1251_platform_data *wl1251_get_platform_data(void) +{ + if (!wl1251_platform_data) + return ERR_PTR(-ENODEV); + + return wl1251_platform_data; +} +EXPORT_SYMBOL(wl1251_get_platform_data); diff --git a/drivers/net/wireless/ti/wl1251/sdio.c b/drivers/net/wireless/ti/wl1251/sdio.c index e2b3d9c541e8..b75a37a58313 100644 --- a/drivers/net/wireless/ti/wl1251/sdio.c +++ b/drivers/net/wireless/ti/wl1251/sdio.c @@ -227,7 +227,7 @@ static int wl1251_sdio_probe(struct sdio_func *func, struct wl1251 *wl; struct ieee80211_hw *hw; struct wl1251_sdio *wl_sdio; - const struct wl12xx_platform_data *wl12xx_board_data; + const struct wl1251_platform_data *wl1251_board_data; hw = wl1251_alloc_hw(); if (IS_ERR(hw)) @@ -254,11 +254,11 @@ static int wl1251_sdio_probe(struct sdio_func *func, wl->if_priv = wl_sdio; wl->if_ops = &wl1251_sdio_ops; - wl12xx_board_data = wl12xx_get_platform_data(); - if (!IS_ERR(wl12xx_board_data)) { - wl->set_power = wl12xx_board_data->set_power; - wl->irq = wl12xx_board_data->irq; - wl->use_eeprom = wl12xx_board_data->use_eeprom; + wl1251_board_data = wl1251_get_platform_data(); + if (!IS_ERR(wl1251_board_data)) { + wl->set_power = wl1251_board_data->set_power; + wl->irq = wl1251_board_data->irq; + wl->use_eeprom = wl1251_board_data->use_eeprom; } if (wl->irq) { diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index 1342f81e683d..62403a147592 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -238,7 +238,7 @@ static const struct wl1251_if_operations wl1251_spi_ops = { static int wl1251_spi_probe(struct spi_device *spi) { - struct wl12xx_platform_data *pdata; + struct wl1251_platform_data *pdata; struct ieee80211_hw *hw; struct wl1251 *wl; int ret; diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index a54fe82e704b..b516b4fa22de 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -48,11 +48,15 @@ enum { WL12XX_TCXOCLOCK_33_6 = 7, /* 33.6 MHz */ }; -struct wl12xx_platform_data { +struct wl1251_platform_data { void (*set_power)(bool enable); /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ int irq; bool use_eeprom; +}; + +struct wl12xx_platform_data { + int irq; int board_ref_clock; int board_tcxo_clock; unsigned long platform_quirks; @@ -68,6 +72,10 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); struct wl12xx_platform_data *wl12xx_get_platform_data(void); +int wl1251_set_platform_data(const struct wl1251_platform_data *data); + +struct wl1251_platform_data *wl1251_get_platform_data(void); + #else static inline @@ -82,6 +90,18 @@ struct wl12xx_platform_data *wl12xx_get_platform_data(void) return ERR_PTR(-ENODATA); } +static inline +int wl1251_set_platform_data(const struct wl1251_platform_data *data) +{ + return -ENOSYS; +} + +static inline +struct wl1251_platform_data *wl1251_get_platform_data(void) +{ + return ERR_PTR(-ENODATA); +} + #endif #endif -- cgit v1.2.3-71-gd317 From 1d207cd30b65fdd60d952cb9e100b6f776564f06 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sat, 15 Feb 2014 00:05:53 +0100 Subject: wl1251: move power GPIO handling into the driver Move the power GPIO handling from the board code into the driver. This is a dependency for device tree support. Signed-off-by: Sebastian Reichel Reviewed-by: Pavel Machek Acked-by: Tony Lindgren Signed-off-by: John W. Linville --- arch/arm/mach-omap2/board-omap3pandora.c | 2 ++ arch/arm/mach-omap2/board-rx51-peripherals.c | 11 ++-------- drivers/net/wireless/ti/wl1251/sdio.c | 21 +++++++++++++----- drivers/net/wireless/ti/wl1251/spi.c | 33 ++++++++++++++++++---------- drivers/net/wireless/ti/wl1251/wl1251.h | 2 +- include/linux/wl12xx.h | 2 +- 6 files changed, 43 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 24f3c1be69a5..cf18340eb3bb 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -541,6 +541,8 @@ static void __init pandora_wl1251_init(void) memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); + pandora_wl1251_pdata.power_gpio = -1; + ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); if (ret < 0) goto fail; diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index e05e740a4426..ddfc8df83c6a 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -1173,13 +1173,7 @@ static inline void board_smc91x_init(void) #endif -static void rx51_wl1251_set_power(bool enable) -{ - gpio_set_value(RX51_WL1251_POWER_GPIO, enable); -} - static struct gpio rx51_wl1251_gpios[] __initdata = { - { RX51_WL1251_POWER_GPIO, GPIOF_OUT_INIT_LOW, "wl1251 power" }, { RX51_WL1251_IRQ_GPIO, GPIOF_IN, "wl1251 irq" }, }; @@ -1196,17 +1190,16 @@ static void __init rx51_init_wl1251(void) if (irq < 0) goto err_irq; - wl1251_pdata.set_power = rx51_wl1251_set_power; + wl1251_pdata.power_gpio = RX51_WL1251_POWER_GPIO; rx51_peripherals_spi_board_info[RX51_SPI_WL1251].irq = irq; return; err_irq: gpio_free(RX51_WL1251_IRQ_GPIO); - gpio_free(RX51_WL1251_POWER_GPIO); error: printk(KERN_ERR "wl1251 board initialisation failed\n"); - wl1251_pdata.set_power = NULL; + wl1251_pdata.power_gpio = -1; /* * Now rx51_peripherals_spi_board_info[1].irq is zero and diff --git a/drivers/net/wireless/ti/wl1251/sdio.c b/drivers/net/wireless/ti/wl1251/sdio.c index b75a37a58313..b661f896e9fe 100644 --- a/drivers/net/wireless/ti/wl1251/sdio.c +++ b/drivers/net/wireless/ti/wl1251/sdio.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "wl1251.h" @@ -182,8 +183,9 @@ static int wl1251_sdio_set_power(struct wl1251 *wl, bool enable) * callback in case it wants to do any additional setup, * for example enabling clock buffer for the module. */ - if (wl->set_power) - wl->set_power(true); + if (gpio_is_valid(wl->power_gpio)) + gpio_set_value(wl->power_gpio, true); + ret = pm_runtime_get_sync(&func->dev); if (ret < 0) { @@ -203,8 +205,8 @@ static int wl1251_sdio_set_power(struct wl1251 *wl, bool enable) if (ret < 0) goto out; - if (wl->set_power) - wl->set_power(false); + if (gpio_is_valid(wl->power_gpio)) + gpio_set_value(wl->power_gpio, false); } out: @@ -256,11 +258,20 @@ static int wl1251_sdio_probe(struct sdio_func *func, wl1251_board_data = wl1251_get_platform_data(); if (!IS_ERR(wl1251_board_data)) { - wl->set_power = wl1251_board_data->set_power; + wl->power_gpio = wl1251_board_data->power_gpio; wl->irq = wl1251_board_data->irq; wl->use_eeprom = wl1251_board_data->use_eeprom; } + if (gpio_is_valid(wl->power_gpio)) { + ret = devm_gpio_request(&func->dev, wl->power_gpio, + "wl1251 power"); + if (ret) { + wl1251_error("Failed to request gpio: %d\n", ret); + goto disable; + } + } + if (wl->irq) { irq_set_status_flags(wl->irq, IRQ_NOAUTOEN); ret = request_irq(wl->irq, wl1251_line_irq, 0, "wl1251", wl); diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index 62403a147592..6abcbc3f7fc7 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "wl1251.h" #include "reg.h" @@ -221,8 +222,8 @@ static void wl1251_spi_disable_irq(struct wl1251 *wl) static int wl1251_spi_set_power(struct wl1251 *wl, bool enable) { - if (wl->set_power) - wl->set_power(enable); + if (gpio_is_valid(wl->power_gpio)) + gpio_set_value(wl->power_gpio, enable); return 0; } @@ -271,22 +272,33 @@ static int wl1251_spi_probe(struct spi_device *spi) goto out_free; } - wl->set_power = pdata->set_power; - if (!wl->set_power) { - wl1251_error("set power function missing in platform data"); - return -ENODEV; + wl->power_gpio = pdata->power_gpio; + + if (gpio_is_valid(wl->power_gpio)) { + ret = devm_gpio_request_one(&spi->dev, wl->power_gpio, + GPIOF_OUT_INIT_LOW, "wl1251 power"); + if (ret) { + wl1251_error("Failed to request gpio: %d\n", ret); + goto out_free; + } + } else { + wl1251_error("set power gpio missing in platform data"); + ret = -ENODEV; + goto out_free; } wl->irq = spi->irq; if (wl->irq < 0) { wl1251_error("irq missing in platform data"); - return -ENODEV; + ret = -ENODEV; + goto out_free; } wl->use_eeprom = pdata->use_eeprom; irq_set_status_flags(wl->irq, IRQ_NOAUTOEN); - ret = request_irq(wl->irq, wl1251_irq, 0, DRIVER_NAME, wl); + ret = devm_request_irq(&spi->dev, wl->irq, wl1251_irq, 0, + DRIVER_NAME, wl); if (ret < 0) { wl1251_error("request_irq() failed: %d", ret); goto out_free; @@ -296,13 +308,10 @@ static int wl1251_spi_probe(struct spi_device *spi) ret = wl1251_init_ieee80211(wl); if (ret) - goto out_irq; + goto out_free; return 0; - out_irq: - free_irq(wl->irq, wl); - out_free: ieee80211_free_hw(hw); diff --git a/drivers/net/wireless/ti/wl1251/wl1251.h b/drivers/net/wireless/ti/wl1251/wl1251.h index 235617a7716d..389fe25af1b6 100644 --- a/drivers/net/wireless/ti/wl1251/wl1251.h +++ b/drivers/net/wireless/ti/wl1251/wl1251.h @@ -276,7 +276,7 @@ struct wl1251 { void *if_priv; const struct wl1251_if_operations *if_ops; - void (*set_power)(bool enable); + int power_gpio; int irq; bool use_eeprom; diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index b516b4fa22de..a9c723be1acf 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -49,7 +49,7 @@ enum { }; struct wl1251_platform_data { - void (*set_power)(bool enable); + int power_gpio; /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ int irq; bool use_eeprom; -- cgit v1.2.3-71-gd317 From 3ebe8e257307a87c33460aa7d2b75dadd374ed9c Mon Sep 17 00:00:00 2001 From: "Zhao, Gang" Date: Tue, 18 Feb 2014 21:36:03 +0800 Subject: ieee80211: remove function ieee80211_{dsss_chan_to_freq, freq_to_dsss_chan} Function ieee80211_{dsss_chan_to_freq, freq_to_dsss_chan} have been replaced with ieee80211_{channel_to_frequency, frequency_to_channel}. There should be no users of the two functions now. So remove them. Cc: Johannes Berg Signed-off-by: Zhao, Gang Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5f349355ee54..06299048c4f4 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2307,42 +2307,6 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; } -/** - * ieee80211_dsss_chan_to_freq - get channel center frequency - * @channel: the DSSS channel - * - * Convert IEEE802.11 DSSS channel to the center frequency (MHz). - * Ref IEEE 802.11-2007 section 15.6 - */ -static inline int ieee80211_dsss_chan_to_freq(int channel) -{ - if ((channel > 0) && (channel < 14)) - return 2407 + (channel * 5); - else if (channel == 14) - return 2484; - else - return -1; -} - -/** - * ieee80211_freq_to_dsss_chan - get channel - * @freq: the frequency - * - * Convert frequency (MHz) to IEEE802.11 DSSS channel - * Ref IEEE 802.11-2007 section 15.6 - * - * This routine selects the channel with the closest center frequency. - */ -static inline int ieee80211_freq_to_dsss_chan(int freq) -{ - if ((freq >= 2410) && (freq < 2475)) - return (freq - 2405) / 5; - else if ((freq >= 2482) && (freq < 2487)) - return 14; - else - return -1; -} - /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs -- cgit v1.2.3-71-gd317 From 9813337a4b16ea5b1701b1d00f7e410f5decdfa5 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Sun, 2 Mar 2014 10:25:01 +0200 Subject: net/mlx4: Replace mlx4_en_mac_to_u64() with mlx4_mac_to_u64() Currently, the EN driver uses a private static function mlx4_en_mac_to_u64(). Move it to a common include file (driver.h) for mlx4_en and mlx4_ib for further use. Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 32 ++++++++------------------ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - include/linux/mlx4/driver.h | 12 ++++++++++ 3 files changed, 22 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 2c0823bf3e05..3db594614fd3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -603,7 +603,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) int err = 0; u64 reg_id; int *qpn = &priv->base_qpn; - u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); + u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr); en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", priv->dev->dev_addr); @@ -672,7 +672,7 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) u64 mac; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { - mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); + mac = mlx4_mac_to_u64(priv->dev->dev_addr); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", priv->dev->dev_addr); mlx4_unregister_mac(dev, priv->port, mac); @@ -685,7 +685,7 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { bucket = &priv->mac_hash[i]; hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { - mac = mlx4_en_mac_to_u64(entry->mac); + mac = mlx4_mac_to_u64(entry->mac); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", entry->mac); mlx4_en_uc_steer_release(priv, entry->mac, @@ -715,14 +715,14 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int err = 0; - u64 new_mac_u64 = mlx4_en_mac_to_u64(new_mac); + u64 new_mac_u64 = mlx4_mac_to_u64(new_mac); if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { struct hlist_head *bucket; unsigned int mac_hash; struct mlx4_mac_entry *entry; struct hlist_node *tmp; - u64 prev_mac_u64 = mlx4_en_mac_to_u64(prev_mac); + u64 prev_mac_u64 = mlx4_mac_to_u64(prev_mac); bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]]; hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { @@ -751,18 +751,6 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64); } -u64 mlx4_en_mac_to_u64(u8 *addr) -{ - u64 mac = 0; - int i; - - for (i = 0; i < ETH_ALEN; i++) { - mac <<= 8; - mac |= addr[i]; - } - return mac; -} - static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv) { int err = 0; @@ -1081,7 +1069,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, mlx4_en_cache_mclist(dev); netif_addr_unlock_bh(dev); list_for_each_entry(mclist, &priv->mc_list, list) { - mcast_addr = mlx4_en_mac_to_u64(mclist->addr); + mcast_addr = mlx4_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } @@ -1173,7 +1161,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, found = true; if (!found) { - mac = mlx4_en_mac_to_u64(entry->mac); + mac = mlx4_mac_to_u64(entry->mac); mlx4_en_uc_steer_release(priv, entry->mac, priv->base_qpn, entry->reg_id); @@ -1216,7 +1204,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC; break; } - mac = mlx4_en_mac_to_u64(ha->addr); + mac = mlx4_mac_to_u64(ha->addr); memcpy(entry->mac, ha->addr, ETH_ALEN); err = mlx4_register_mac(mdev->dev, priv->port, mac); if (err < 0) { @@ -2206,7 +2194,7 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - u64 mac_u64 = mlx4_en_mac_to_u64(mac); + u64 mac_u64 = mlx4_mac_to_u64(mac); if (!is_valid_ether_addr(mac)) return -EINVAL; @@ -2407,7 +2395,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mlx4_is_slave(priv->mdev->dev)) { eth_hw_addr_random(dev); en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr); - mac_u64 = mlx4_en_mac_to_u64(dev->dev_addr); + mac_u64 = mlx4_mac_to_u64(dev->dev_addr); mdev->dev->caps.def_mac[priv->port] = mac_u64; } else { en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c59011d4e830..4ff7da83c4b3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -797,7 +797,6 @@ void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv); #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); -u64 mlx4_en_mac_to_u64(u8 *addr); void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); /* diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index c257e1b211be..022055c8fb26 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -64,4 +64,16 @@ void mlx4_unregister_interface(struct mlx4_interface *intf); void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); +static inline u64 mlx4_mac_to_u64(u8 *addr) +{ + u64 mac = 0; + int i; + + for (i = 0; i < ETH_ALEN; i++) { + mac <<= 8; + mac |= addr[i]; + } + return mac; +} + #endif /* MLX4_DRIVER_H */ -- cgit v1.2.3-71-gd317 From ec5709403e6893acb4f7ca40514ebd29c3116836 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 2 Mar 2014 10:25:04 +0200 Subject: net/mlx4_en: Use union for BlueFlame WQE When BlueFlame is turned on, control segment of the TX WQE is changed, and the second line of it is used for QPN. Changed code to use a union in the mlx4_wqe_ctrl_seg instead of casting. This makes the code clearer and solves the static checker warning: drivers/net/ethernet/mellanox/mlx4/en_tx.c:839 mlx4_en_xmit() warn: potential memory corrupting cast 4 vs 2 bytes CC: Dan Carpenter Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 3 ++- include/linux/mlx4/qp.h | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 56e8fbc128f8..69c2fcef9d4c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -876,7 +876,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tx_tag_present(skb)) { - *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); + tx_desc->ctrl.bf_qpn |= cpu_to_be32(ring->doorbell_qpn); + op_own |= htonl((bf_index & 0xffff) << 8); /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 59f8ba84568b..b66e7610d4ee 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -270,9 +270,14 @@ enum { struct mlx4_wqe_ctrl_seg { __be32 owner_opcode; - __be16 vlan_tag; - u8 ins_vlan; - u8 fence_size; + union { + struct { + __be16 vlan_tag; + u8 ins_vlan; + u8 fence_size; + }; + __be32 bf_qpn; + }; /* * High 24 bits are SRC remote buffer; low 8 bits are flags: * [7] SO (strong ordering) -- cgit v1.2.3-71-gd317 From f3baa393ffc9a7aefc0bf767729382085e81f606 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 3 Mar 2014 17:23:11 +0100 Subject: UAPI: add MPLS label stack definition Labels for the Multiprotocol Label Switching are defined in RFC 3032 which was superseded by RFC 5462. Add the definition to UAPI and a stub header for include/linux. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: David S. Miller --- include/linux/mpls.h | 6 ++++++ include/uapi/linux/mpls.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 include/linux/mpls.h create mode 100644 include/uapi/linux/mpls.h (limited to 'include/linux') diff --git a/include/linux/mpls.h b/include/linux/mpls.h new file mode 100644 index 000000000000..9999145bc190 --- /dev/null +++ b/include/linux/mpls.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_MPLS_H +#define _LINUX_MPLS_H + +#include + +#endif /* _LINUX_MPLS_H */ diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h new file mode 100644 index 000000000000..bc9abfe88c9a --- /dev/null +++ b/include/uapi/linux/mpls.h @@ -0,0 +1,34 @@ +#ifndef _UAPI_MPLS_H +#define _UAPI_MPLS_H + +#include +#include + +/* Reference: RFC 5462, RFC 3032 + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Label | TC |S| TTL | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Label: Label Value, 20 bits + * TC: Traffic Class field, 3 bits + * S: Bottom of Stack, 1 bit + * TTL: Time to Live, 8 bits + */ + +struct mpls_label { + __be32 entry; +}; + +#define MPLS_LS_LABEL_MASK 0xFFFFF000 +#define MPLS_LS_LABEL_SHIFT 12 +#define MPLS_LS_TC_MASK 0x00000E00 +#define MPLS_LS_TC_SHIFT 9 +#define MPLS_LS_S_MASK 0x00000100 +#define MPLS_LS_S_SHIFT 8 +#define MPLS_LS_TTL_MASK 0x000000FF +#define MPLS_LS_TTL_SHIFT 0 + +#endif /* _UAPI_MPLS_H */ -- cgit v1.2.3-71-gd317 From 3b02b56cd5988d569731f6c0c26992296e46b758 Mon Sep 17 00:00:00 2001 From: Vytas Dauksa Date: Tue, 17 Dec 2013 14:01:43 +0000 Subject: netfilter: ipset: add hash:ip,mark data type to ipset Introduce packet mark support with new ip,mark hash set. This includes userspace and kernelspace code, hash:ip,mark set tests and man page updates. The intended use of ip,mark set is similar to the ip:port type, but for protocols which don't use a predictable port number. Instead of port number it matches a firewall mark determined by a layer 7 filtering program like opendpi. As well as allowing or blocking traffic it will also be used for accounting packets and bytes sent for each protocol. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 10 +- include/uapi/linux/netfilter/ipset/ip_set.h | 1 + net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipmark.c | 312 ++++++++++++++++++++++++++++ 5 files changed, 329 insertions(+), 4 deletions(-) create mode 100644 net/netfilter/ipset/ip_set_hash_ipmark.c (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0c7d01eae56c..4ac00d4aa87e 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -39,11 +39,13 @@ enum ip_set_feature { IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), IPSET_TYPE_IFACE_FLAG = 5, IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG), - IPSET_TYPE_NOMATCH_FLAG = 6, + IPSET_TYPE_MARK_FLAG = 6, + IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG), + IPSET_TYPE_NOMATCH_FLAG = 7, IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG), /* Strictly speaking not a feature, but a flag for dumping: * this settype must be dumped last */ - IPSET_DUMP_LAST_FLAG = 7, + IPSET_DUMP_LAST_FLAG = 8, IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), }; @@ -171,8 +173,6 @@ struct ip_set_type { char name[IPSET_MAXNAMELEN]; /* Protocol version */ u8 protocol; - /* Set features to control swapping */ - u8 features; /* Set type dimension */ u8 dimension; /* @@ -182,6 +182,8 @@ struct ip_set_type { u8 family; /* Type revisions */ u8 revision_min, revision_max; + /* Set features to control swapping */ + u16 features; /* Create set */ int (*create)(struct net *net, struct ip_set *set, diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 25d3b2f79c02..5368f8275774 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -82,6 +82,7 @@ enum { IPSET_ATTR_PROTO, /* 7 */ IPSET_ATTR_CADT_FLAGS, /* 8 */ IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ + IPSET_ATTR_MARK, /* 10 */ /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 44cd4f58adf0..2f7f5c32c6f9 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -61,6 +61,15 @@ config IP_SET_HASH_IP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPMARK + tristate "hash:ip,mark set support" + depends on IP_SET + help + This option adds the hash:ip,mark set type support, by which one + can store IPv4/IPv6 address and mark pairs. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_IPPORT tristate "hash:ip,port set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 44b2d38476fa..231f10196cb9 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o +obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c new file mode 100644 index 000000000000..e56c0d916fac --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -0,0 +1,312 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2013 Smoothwall Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,mark type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Vytas Dauksa "); +IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:ip,mark"); + +/* Type specific function prefix */ +#define HTYPE hash_ipmark + +/* IPv4 variant */ + +/* Member elements */ +struct hash_ipmark4_elem { + __be32 ip; + __u32 mark; +}; + +/* Common functions */ + +static inline bool +hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1, + const struct hash_ipmark4_elem *ip2, + u32 *multi) +{ + return ip1->ip == ip2->ip && + ip1->mark == ip2->mark; +} + +static bool +hash_ipmark4_data_list(struct sk_buff *skb, + const struct hash_ipmark4_elem *data) +{ + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_ipmark4_data_next(struct hash_ipmark4_elem *next, + const struct hash_ipmark4_elem *d) +{ + next->ip = d->ip; +} + +#define MTYPE hash_ipmark4 +#define PF 4 +#define HOST_MASK 32 +#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem) +#include "ip_set_hash_gen.h" + +static int +hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + e.mark = skb->mark; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_ipmark *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip, ip_to = 0; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); + + if (adt == IPSET_TEST || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip = ntohl(e.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!cidr || cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip_set_mask_from_to(ip, ip_to, cidr); + } + + if (retried) + ip = ntohl(h->next.ip); + for (; !before(ip_to, ip); ip++) { + e.ip = htonl(ip); + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* IPv6 variant */ + +struct hash_ipmark6_elem { + union nf_inet_addr ip; + __u32 mark; +}; + +/* Common functions */ + +static inline bool +hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1, + const struct hash_ipmark6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && + ip1->mark == ip2->mark; +} + +static bool +hash_ipmark6_data_list(struct sk_buff *skb, + const struct hash_ipmark6_elem *data) +{ + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_ipmark6_data_next(struct hash_ipmark4_elem *next, + const struct hash_ipmark6_elem *d) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK +#undef HKEY_DATALEN + +#define MTYPE hash_ipmark6 +#define PF 6 +#define HOST_MASK 128 +#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem) +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + + +static int +hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark6_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + e.mark = skb->mark; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark6_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK])); + + if (adt == IPSET_TEST) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + + return ret; +} + +static struct ip_set_type hash_ipmark_type __read_mostly = { + .name = "hash:ip,mark", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_MARK, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_ipmark_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_MARK] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipmark_init(void) +{ + return ip_set_type_register(&hash_ipmark_type); +} + +static void __exit +hash_ipmark_fini(void) +{ + ip_set_type_unregister(&hash_ipmark_type); +} + +module_init(hash_ipmark_init); +module_exit(hash_ipmark_fini); -- cgit v1.2.3-71-gd317 From af284ece87365f3a69723f5bcc1bcdb505b5eb5d Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 13 Feb 2014 12:19:56 +0100 Subject: netfilter: ipset: Prepare the kernel for create option flags when no extension is needed Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 ++ include/uapi/linux/netfilter/ipset/ip_set.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 4ac00d4aa87e..f476bcec25ea 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -219,6 +219,8 @@ struct ip_set { u8 revision; /* Extensions */ u8 extensions; + /* Create flags */ + u8 flags; /* Default timeout value, if enabled */ u32 timeout; /* Element data size */ diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index f636f282b142..a29a378701d2 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -188,6 +188,12 @@ enum ipset_cadt_flags { IPSET_FLAG_CADT_MAX = 15, }; +/* The flag bits which correspond to the non-extension create flags */ +enum ipset_create_flags { + IPSET_CREATE_FLAG_NONE = 0, + IPSET_CREATE_FLAG_MAX = 7, +}; + /* Commands with settype-specific attributes */ enum ipset_adt { IPSET_ADD, -- cgit v1.2.3-71-gd317 From 07cf8f5ae2657ac495b906c68ff3441ff8ba80ba Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Fri, 28 Feb 2014 22:14:57 -0500 Subject: netfilter: ipset: add forceadd kernel support for hash set types Adds a new property for hash set types, where if a set is created with the 'forceadd' option and the set becomes full the next addition to the set may succeed and evict a random entry from the set. To keep overhead low eviction is done very simply. It checks to see which bucket the new entry would be added. If the bucket's pos value is non-zero (meaning there's at least one entry in the bucket) it replaces the first entry in the bucket. If pos is zero, then it continues down the normal add process. This property is useful if you have a set for 'ban' lists where it may not matter if you release some entries from the set early. Signed-off-by: Josh Hunt Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 3 +++ include/uapi/linux/netfilter/ipset/ip_set.h | 7 +++++-- net/netfilter/ipset/ip_set_core.c | 2 ++ net/netfilter/ipset/ip_set_hash_gen.h | 12 ++++++++++++ net/netfilter/ipset/ip_set_hash_ip.c | 3 ++- net/netfilter/ipset/ip_set_hash_ipmark.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 3 ++- net/netfilter/ipset/ip_set_hash_ipportip.c | 3 ++- net/netfilter/ipset/ip_set_hash_ipportnet.c | 3 ++- net/netfilter/ipset/ip_set_hash_net.c | 3 ++- net/netfilter/ipset/ip_set_hash_netiface.c | 3 ++- net/netfilter/ipset/ip_set_hash_netnet.c | 2 +- net/netfilter/ipset/ip_set_hash_netport.c | 3 ++- net/netfilter/ipset/ip_set_hash_netportnet.c | 3 ++- 14 files changed, 40 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f476bcec25ea..96afc29184be 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -65,6 +65,7 @@ enum ip_set_extension { #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) +#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD) /* Extension id, in size order */ enum ip_set_ext_id { @@ -255,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) cadt_flags |= IPSET_FLAG_WITH_COUNTERS; if (SET_WITH_COMMENT(set)) cadt_flags |= IPSET_FLAG_WITH_COMMENT; + if (SET_WITH_FORCEADD(set)) + cadt_flags |= IPSET_FLAG_WITH_FORCEADD; if (!cadt_flags) return 0; diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index a1ca24408206..78c2f2e79920 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -185,13 +185,16 @@ enum ipset_cadt_flags { IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), IPSET_FLAG_BIT_WITH_COMMENT = 4, IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), + IPSET_FLAG_BIT_WITH_FORCEADD = 5, + IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD), IPSET_FLAG_CADT_MAX = 15, }; /* The flag bits which correspond to the non-extension create flags */ enum ipset_create_flags { - IPSET_CREATE_FLAG_NONE = 0, - IPSET_CREATE_FLAG_MAX = 7, + IPSET_CREATE_FLAG_BIT_FORCEADD = 0, + IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD), + IPSET_CREATE_FLAG_BIT_MAX = 7, }; /* Commands with settype-specific attributes */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 636cb8df5354..117208321f16 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) if (tb[IPSET_ATTR_CADT_FLAGS]) cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) + set->flags |= IPSET_CREATE_FLAG_FORCEADD; for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index b1eed81e24c5..61c7fb052802 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -633,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 key, multi = 0; + if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) { + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + n = hbucket(t,key); + if (n->pos) { + /* Choosing the first entry in the array to replace */ + j = 0; + goto reuse_slot; + } + rcu_read_unlock_bh(); + } if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ mtype_expire(set, h, NLEN(set->family), set->dsize); diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index e65fc2423d56..dd40607f878e 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -25,7 +25,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support */ -#define IPSET_TYPE_REV_MAX 2 /* Comments support */ +/* 2 Comments support */ +#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 1bf8e8524218..4eff0a297254 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -25,7 +25,7 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 +#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vytas Dauksa "); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 525a595dd1fe..7597b82a8b03 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -27,7 +27,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ -#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ +/* 3 Comments support added */ +#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index f5636631466e..672655ffd573 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -27,7 +27,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ -#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ +/* 3 Comments support added */ +#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5d87fe8a41ff..7308d84f9277 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -29,7 +29,8 @@ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ /* 4 Counters support added */ -#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ +/* 5 Comments support added */ +#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 8295cf4f9fdc..4c7d495783a3 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -26,7 +26,8 @@ /* 1 Range as input support for IPv4 added */ /* 2 nomatch flag support added */ /* 3 Counters support added */ -#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ +/* 4 Comments support added */ +#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index b827a0f1f351..db2606805b35 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -27,7 +27,8 @@ /* 1 nomatch flag support added */ /* 2 /0 support added */ /* 3 Counters support added */ -#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ +/* 4 Comments support added */ +#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 4e7261df8961..3e99987e4bf2 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -24,7 +24,7 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 +#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith "); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7097fb0141bf..1c645fbd09c7 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -28,7 +28,8 @@ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ /* 4 Counters support added */ -#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ +/* 5 Comments support added */ +#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 703d1192a6a2..c0d2ba73f8b2 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -25,7 +25,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ +/* 0 Comments support added */ +#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith "); -- cgit v1.2.3-71-gd317 From 9859ccd2c8be63ce939522e63e265f2b0caa1109 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 28 Feb 2014 16:36:23 +0100 Subject: can: introduce the data bitrate configuration for CAN FD As CAN FD offers a second bitrate for the data section of the CAN frame the infrastructure for storing and configuring this second bitrate is introduced. Improved the readability of the if-statement by inserting some newlines. Signed-off-by: Oliver Hartkopp Acked-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 45 +++++++++++++++++++++++++++++++++++++++- include/linux/can/dev.h | 6 ++++-- include/uapi/linux/can/netlink.h | 2 ++ 3 files changed, 50 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 8141290e4c18..8ebe112458c4 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -647,6 +647,10 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, + [IFLA_CAN_DATA_BITTIMING] + = { .len = sizeof(struct can_bittiming) }, + [IFLA_CAN_DATA_BITTIMING_CONST] + = { .len = sizeof(struct can_bittiming_const) }, }; static int can_changelink(struct net_device *dev, @@ -707,6 +711,27 @@ static int can_changelink(struct net_device *dev, return err; } + if (data[IFLA_CAN_DATA_BITTIMING]) { + struct can_bittiming dbt; + + /* Do not allow changing bittiming while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), + sizeof(dbt)); + err = can_get_bittiming(dev, &dbt, priv->data_bittiming_const); + if (err) + return err; + memcpy(&priv->data_bittiming, &dbt, sizeof(dbt)); + + if (priv->do_set_data_bittiming) { + /* Finally, set the bit-timing registers */ + err = priv->do_set_data_bittiming(dev); + if (err) + return err; + } + } + return 0; } @@ -725,6 +750,10 @@ static size_t can_get_size(const struct net_device *dev) size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ size += nla_total_size(sizeof(struct can_berr_counter)); + if (priv->data_bittiming.bitrate) /* IFLA_CAN_DATA_BITTIMING */ + size += nla_total_size(sizeof(struct can_bittiming)); + if (priv->data_bittiming_const) /* IFLA_CAN_DATA_BITTIMING_CONST */ + size += nla_total_size(sizeof(struct can_bittiming_const)); return size; } @@ -738,20 +767,34 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) if (priv->do_get_state) priv->do_get_state(dev, &state); + if ((priv->bittiming.bitrate && nla_put(skb, IFLA_CAN_BITTIMING, sizeof(priv->bittiming), &priv->bittiming)) || + (priv->bittiming_const && nla_put(skb, IFLA_CAN_BITTIMING_CONST, sizeof(*priv->bittiming_const), priv->bittiming_const)) || + nla_put(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock) || nla_put_u32(skb, IFLA_CAN_STATE, state) || nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || + (priv->do_get_berr_counter && !priv->do_get_berr_counter(dev, &bec) && - nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec))) + nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)) || + + (priv->data_bittiming.bitrate && + nla_put(skb, IFLA_CAN_DATA_BITTIMING, + sizeof(priv->data_bittiming), &priv->data_bittiming)) || + + (priv->data_bittiming_const && + nla_put(skb, IFLA_CAN_DATA_BITTIMING_CONST, + sizeof(*priv->data_bittiming_const), + priv->data_bittiming_const))) return -EMSGSIZE; + return 0; } diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index dc5f9026b67f..8adaee96f292 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -33,8 +33,9 @@ enum can_mode { struct can_priv { struct can_device_stats can_stats; - struct can_bittiming bittiming; - const struct can_bittiming_const *bittiming_const; + struct can_bittiming bittiming, data_bittiming; + const struct can_bittiming_const *bittiming_const, + *data_bittiming_const; struct can_clock clock; enum can_state state; @@ -45,6 +46,7 @@ struct can_priv { struct timer_list restart_timer; int (*do_set_bittiming)(struct net_device *dev); + int (*do_set_data_bittiming)(struct net_device *dev); int (*do_set_mode)(struct net_device *dev, enum can_mode mode); int (*do_get_state)(const struct net_device *dev, enum can_state *state); diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index df944ed206a8..b41933d6bdcd 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -122,6 +122,8 @@ enum { IFLA_CAN_RESTART_MS, IFLA_CAN_RESTART, IFLA_CAN_BERR_COUNTER, + IFLA_CAN_DATA_BITTIMING, + IFLA_CAN_DATA_BITTIMING_CONST, __IFLA_CAN_MAX }; -- cgit v1.2.3-71-gd317 From bc05a8944a344acdb81a65de055ca6febbf9657c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 28 Feb 2014 16:36:24 +0100 Subject: can: allow to change the device mtu for CAN FD capable devices The configuration for CAN FD depends on CAN_CTRLMODE_FD enabled in the driver specific ctrlmode_supported capabilities. The configuration can be done either with the 'fd { on | off }' option in the 'ip' tool from iproute2 or by setting the CAN netdevice MTU to CAN_MTU (16) or to CANFD_MTU (72). Signed-off-by: Oliver Hartkopp Acked-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 1 + include/uapi/linux/can/netlink.h | 1 + 3 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 8ebe112458c4..4e20d82b799e 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -594,6 +594,39 @@ void free_candev(struct net_device *dev) } EXPORT_SYMBOL_GPL(free_candev); +/* + * changing MTU and control mode for CAN/CANFD devices + */ +int can_change_mtu(struct net_device *dev, int new_mtu) +{ + struct can_priv *priv = netdev_priv(dev); + + /* Do not allow changing the MTU while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + + /* allow change of MTU according to the CANFD ability of the device */ + switch (new_mtu) { + case CAN_MTU: + priv->ctrlmode &= ~CAN_CTRLMODE_FD; + break; + + case CANFD_MTU: + if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD)) + return -EINVAL; + + priv->ctrlmode |= CAN_CTRLMODE_FD; + break; + + default: + return -EINVAL; + } + + dev->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(can_change_mtu); + /* * Common open function when the device gets opened. * @@ -693,6 +726,12 @@ static int can_changelink(struct net_device *dev, return -EOPNOTSUPP; priv->ctrlmode &= ~cm->mask; priv->ctrlmode |= cm->flags; + + /* CAN_CTRLMODE_FD can only be set when driver supports FD */ + if (priv->ctrlmode & CAN_CTRLMODE_FD) + dev->mtu = CANFD_MTU; + else + dev->mtu = CAN_MTU; } if (data[IFLA_CAN_RESTART_MS]) { diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 8adaee96f292..3ce5e526525f 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -113,6 +113,7 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); +int can_change_mtu(struct net_device *dev, int new_mtu); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index b41933d6bdcd..7e2e1863db16 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -96,6 +96,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_3_SAMPLES 0x04 /* Triple sampling mode */ #define CAN_CTRLMODE_ONE_SHOT 0x08 /* One-Shot mode */ #define CAN_CTRLMODE_BERR_REPORTING 0x10 /* Bus-error reporting */ +#define CAN_CTRLMODE_FD 0x20 /* CAN FD mode */ /* * CAN device statistics -- cgit v1.2.3-71-gd317 From 693350c2ffe42a21185070b1e8ab77959b45d61e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 10 Mar 2014 09:41:46 -0700 Subject: netdev: set __percpu attribute on netdev_alloc_pcpu_stats This patch fixes sparse warnings in vlan driver. It propagates the sparse __percpu attribute from alloc_percpu into netdev_alloc_pcpu_stats. I expect it may trigger additional sparse warnings from other drivers that are missing the __percpu attribute. Signed-off-by: Stephen Hemminger Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1a869488b8ae..b8d8c805fd75 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1812,7 +1812,7 @@ struct pcpu_sw_netstats { #define netdev_alloc_pcpu_stats(type) \ ({ \ - typeof(type) *pcpu_stats = alloc_percpu(type); \ + typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ if (pcpu_stats) { \ int i; \ for_each_possible_cpu(i) { \ -- cgit v1.2.3-71-gd317 From 6ee51a4e866bbb0921180b457ed16cd172859346 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:37 +0200 Subject: mlx4: Adjust QP1 multiplexing for RoCE/SRIOV This requires the following modifications: 1. Fix build_mlx4_header to properly fill in the ETH fields 2. Adjust mux and demux QP1 flow to support RoCE. This commit still assumes only one GID per slave for RoCE. The commit enabling multiple GIDs is a subsequent commit, and is done separately because of its complexity. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cm.c | 8 +++-- drivers/infiniband/hw/mlx4/mad.c | 50 +++++++++++++++++++++++++++---- drivers/infiniband/hw/mlx4/qp.c | 29 ++++++++++-------- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 5 ++++ drivers/net/ethernet/mellanox/mlx4/port.c | 34 +++++++++++++++++++++ include/linux/mlx4/device.h | 4 +++ 6 files changed, 110 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index d1f5f1dd77b0..b8d911543783 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -315,7 +315,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id } int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, - struct ib_mad *mad) + struct ib_mad *mad) { u32 pv_cm_id; struct id_map_entry *id; @@ -323,6 +323,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { union ib_gid gid; + if (!slave) + return 0; + gid = gid_from_req_msg(ibdev, mad); *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); if (*slave < 0) { @@ -341,7 +344,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, return -ENOENT; } - *slave = id->slave_id; + if (slave) + *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f2a3f48107e7..c2e9879a5a34 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, int ret = 0; u16 tun_pkey_ix; u16 cached_pkey; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; if (dest_qpt > IB_QPT_GSI) return -EINVAL; @@ -509,6 +510,12 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, * The driver will set the force loopback bit in post_send */ memset(&attr, 0, sizeof attr); attr.port_num = port; + if (is_eth) { + ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw); + if (ret) + return ret; + attr.ah_flags = IB_AH_GRH; + } ah = ib_create_ah(tun_ctx->pd, &attr); if (IS_ERR(ah)) return -ENOMEM; @@ -580,6 +587,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, int err; int slave; u8 *slave_id; + int is_eth = 0; + + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + is_eth = 0; + else + is_eth = 1; + + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) { + mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); + return -EINVAL; + } + if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) { + mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); + return -EINVAL; + } + if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } + if (slave >= dev->dev->caps.sqp_demux) { + mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", + slave, dev->dev->caps.sqp_demux); + return -ENOENT; + } + + if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad)) + return 0; + + err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); + if (err) + pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + slave, err); + return 0; + } /* Initially assume that this mad is for us */ slave = mlx4_master_func_num(dev->dev); @@ -1260,12 +1302,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); - if ((ah_attr.ah_flags & IB_AH_GRH) && - (ah_attr.grh.sgid_index != slave)) { - mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n", - slave, ah_attr.grh.sgid_index); - return; - } + if (ah_attr.ah_flags & IB_AH_GRH) + ah_attr.grh.sgid_index = slave; mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d8f4d1fe8494..c6ef2e7e3045 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1842,9 +1842,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); - struct net_device *ndev; union ib_gid sgid; u16 pkey; int send_size; @@ -1868,12 +1868,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ - sgid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sgid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; + err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid.raw[0]); + if (err) + return err; } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, @@ -1902,6 +1901,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + if (is_eth) + memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); + else { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so @@ -1917,6 +1919,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); + } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } @@ -1948,17 +1951,19 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } if (is_eth) { - u8 *smac; + u8 smac[6]; + struct in6_addr in6; + u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; mlx->sched_prio = cpu_to_be16(pcp); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); /* FIXME: cache smac value? */ - ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; - if (!ndev) - return -ENODEV; - smac = ndev->dev_addr; + memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); + memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); + memcpy(&in6, sgid.raw, sizeof(in6)); + rdma_get_ll_mac(&in6, smac); memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7aec6c833973..da829f4ef938 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -788,6 +788,10 @@ enum { MLX4_USE_RR = 1, }; +struct mlx4_roce_gid_entry { + u8 raw[16]; +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -834,6 +838,7 @@ struct mlx4_priv { int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; __be64 slave_node_guids[MLX4_MFUNC_MAX]; + struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][MLX4_ROCE_MAX_GIDS]; atomic_t opreq_count; struct work_struct opreq_task; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index a58bcbf1b806..9c063d6122b3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -927,3 +927,37 @@ void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; } EXPORT_SYMBOL(mlx4_set_stats_bitmap); + +int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i, found_ix = -1; + + if (!mlx4_is_mfunc(dev)) + return -EINVAL; + + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { + if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) { + found_ix = i; + break; + } + } + + if (found_ix >= 0) + *slave_id = found_ix; + + return (found_ix >= 0) ? 0 : -EINVAL; +} +EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid); + +int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (!mlx4_is_master(dev)) + return -EINVAL; + + memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16); + return 0; +} +EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 5edd2c68274d..fbe6cda00ba7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -48,6 +48,8 @@ #define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 +#define MLX4_ROCE_MAX_GIDS 128 + enum { MLX4_FLAG_MSI_X = 1 << 0, MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, @@ -1182,6 +1184,8 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); +int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id); +int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn); -- cgit v1.2.3-71-gd317 From 9cd593529c8652785bc9962acc79b6b176741f99 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:38 +0200 Subject: mlx4_core: For RoCE, allow slaves to set the GID entry at that slave's index For IB transport, the host determines the slave GIDs. For ETH (RoCE), however, the slave's GID is determined by the IP address that the slave itself assigns to the ETH device used by RoCE. In this case, the slave must be able to write its GIDs to the HCA gid table (at the GID indices that slave "owns"). This commit adds processing for the SET_PORT_GID_TABLE opcode modifier for the SET_PORT command wrapper (so that slaves may modify their GIDS for RoCE). Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/port.c | 33 ++++++++++++++++++++++++++++--- include/linux/mlx4/device.h | 7 +++++-- 2 files changed, 35 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 9c063d6122b3..591740b06043 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -505,6 +505,7 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) mlx4_free_cmd_mailbox(dev, outmailbox); return err; } +static struct mlx4_roce_gid_entry zgid_entry; static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) @@ -515,6 +516,7 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, struct mlx4_slave_state *slave_st = &master->slave_state[slave]; struct mlx4_set_port_rqp_calc_context *qpn_context; struct mlx4_set_port_general_context *gen_context; + struct mlx4_roce_gid_entry *gid_entry; int reset_qkey_viols; int port; int is_eth; @@ -535,7 +537,8 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, /* Slaves cannot perform SET_PORT operations except changing MTU */ if (is_eth) { if (slave != dev->caps.function && - in_modifier != MLX4_SET_PORT_GENERAL) { + in_modifier != MLX4_SET_PORT_GENERAL && + in_modifier != MLX4_SET_PORT_GID_TABLE) { mlx4_warn(dev, "denying SET_PORT for slave:%d\n", slave); return -EINVAL; @@ -581,6 +584,28 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, gen_context->mtu = cpu_to_be16(master->max_mtu[port]); break; + case MLX4_SET_PORT_GID_TABLE: + gid_entry = (struct mlx4_roce_gid_entry *)(inbox->buf); + /* check that do not have duplicates */ + if (memcmp(gid_entry->raw, zgid_entry.raw, 16)) { + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { + if (slave != i && + !memcmp(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16)) { + mlx4_warn(dev, "requested gid entry for slave:%d " + "is a duplicate of slave %d\n", + slave, i); + return -EEXIST; + } + } + } + /* insert slave GID at proper index */ + memcpy(priv->roce_gids[port - 1][slave].raw, gid_entry->raw, 16); + /* rewrite roce port gids table to FW */ + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { + memcpy(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16); + gid_entry++; + } + break; } return mlx4_cmd(dev, inbox->dma, in_mod, op_mod, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, @@ -928,7 +953,8 @@ void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) } EXPORT_SYMBOL(mlx4_set_stats_bitmap); -int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id) +int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, + int *slave_id) { struct mlx4_priv *priv = mlx4_priv(dev); int i, found_ix = -1; @@ -950,7 +976,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *s } EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid); -int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid) +int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, + u8 *gid) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index fbe6cda00ba7..e4853650679d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1184,8 +1184,11 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); -int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id); -int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid); + +int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, + int *slave_id); +int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, + u8 *gid); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn); -- cgit v1.2.3-71-gd317 From b6ffaeffaea4d92f05f5ba1ef54df407cb7c8517 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:39 +0200 Subject: mlx4: In RoCE allow guests to have multiple GIDS The GIDs are statically distributed, as follows: PF: gets 16 GIDs VFs: Remaining GIDS are divided evenly between VFs activated by the driver. If the division is not even, lower-numbered VFs get an extra GID. For an IB interface, the number of gids per guest remains as before: one gid per guest. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 34 +++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 +- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 + drivers/net/ethernet/mellanox/mlx4/port.c | 117 +++++++++++++++++---- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 64 ++++++++--- include/linux/mlx4/device.h | 1 + 7 files changed, 192 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c2e9879a5a34..c5bca0f0da4a 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -511,9 +511,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { - ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw); - if (ret) - return ret; + memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); @@ -1216,6 +1214,34 @@ out: return ret; } +static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) +{ + int gids; + int vfs; + + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + return slave; + + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = dev->dev->num_vfs; + + if (slave == 0) + return 0; + if (slave <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); +} + +static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, + struct ib_ah_attr *ah_attr) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + ah_attr->grh.sgid_index = slave; + else + ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); +} + static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); @@ -1303,7 +1329,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (ah_attr.ah_flags & IB_AH_GRH) - ah_attr.grh.sgid_index = slave; + fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9cdf452140da..6e1ee2170a39 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -934,7 +934,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); - short_field = 1; /* slave max gids */ + if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) + short_field = mlx4_get_slave_num_gids(dev, slave); + else + short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_GID_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 979ea4364efb..4c441aa83016 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1462,7 +1462,11 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) int i; for (i = 1; i <= dev->caps.num_ports; i++) { - dev->caps.gid_table_len[i] = 1; + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + dev->caps.gid_table_len[i] = + mlx4_get_slave_num_gids(dev, 0); + else + dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index da829f4ef938..6ba38c98c492 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1287,4 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); void mlx4_init_quotas(struct mlx4_dev *dev); +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 591740b06043..ece328166e94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -507,6 +507,31 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) } static struct mlx4_roce_gid_entry zgid_entry; +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave) +{ + if (slave == 0) + return MLX4_ROCE_PF_GIDS; + if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs)) + return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1; + return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs; +} + +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave) +{ + int gids; + int vfs; + + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = dev->num_vfs; + + if (slave == 0) + return 0; + if (slave <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); +} + static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) { @@ -516,15 +541,18 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, struct mlx4_slave_state *slave_st = &master->slave_state[slave]; struct mlx4_set_port_rqp_calc_context *qpn_context; struct mlx4_set_port_general_context *gen_context; - struct mlx4_roce_gid_entry *gid_entry; + struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1; int reset_qkey_viols; int port; int is_eth; + int num_gids; + int base; u32 in_modifier; u32 promisc; u16 mtu, prev_mtu; int err; - int i; + int i, j; + int offset; __be32 agg_cap_mask; __be32 slave_cap_mask; __be32 new_cap_mask; @@ -585,26 +613,65 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, gen_context->mtu = cpu_to_be16(master->max_mtu[port]); break; case MLX4_SET_PORT_GID_TABLE: - gid_entry = (struct mlx4_roce_gid_entry *)(inbox->buf); - /* check that do not have duplicates */ - if (memcmp(gid_entry->raw, zgid_entry.raw, 16)) { - for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { - if (slave != i && - !memcmp(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16)) { - mlx4_warn(dev, "requested gid entry for slave:%d " - "is a duplicate of slave %d\n", - slave, i); - return -EEXIST; + /* change to MULTIPLE entries: number of guest's gids + * need a FOR-loop here over number of gids the guest has. + * 1. Check no duplicates in gids passed by slave + */ + num_gids = mlx4_get_slave_num_gids(dev, slave); + base = mlx4_get_base_gid_ix(dev, slave); + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < num_gids; gid_entry_mbox++, i++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + gid_entry_mb1 = gid_entry_mbox + 1; + for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) { + if (!memcmp(gid_entry_mb1->raw, + zgid_entry.raw, sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw, + sizeof(gid_entry_mbox->raw))) { + /* found duplicate */ + return -EINVAL; } } } - /* insert slave GID at proper index */ - memcpy(priv->roce_gids[port - 1][slave].raw, gid_entry->raw, 16); - /* rewrite roce port gids table to FW */ + + /* 2. Check that do not have duplicates in OTHER + * entries in the port GID table + */ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { - memcpy(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16); - gid_entry++; + if (i >= base && i < base + num_gids) + continue; /* don't compare to slave's current gids */ + gid_entry_tbl = &priv->roce_gids[port - 1][i]; + if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry))) + continue; + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (j = 0; j < num_gids; gid_entry_mbox++, j++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw, + sizeof(gid_entry_tbl->raw))) { + /* found duplicate */ + mlx4_warn(dev, "requested gid entry for slave:%d " + "is a duplicate of gid at index %d\n", + slave, i); + return -EINVAL; + } + } } + + /* insert slave GIDs with memcpy, starting at slave's base index */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++) + memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16); + + /* Now, copy roce port gids table to current mailbox for passing to FW */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++) + memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16); + break; } return mlx4_cmd(dev, inbox->dma, in_mod, op_mod, @@ -958,6 +1025,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, { struct mlx4_priv *priv = mlx4_priv(dev); int i, found_ix = -1; + int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; if (!mlx4_is_mfunc(dev)) return -EINVAL; @@ -969,8 +1037,19 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, } } - if (found_ix >= 0) - *slave_id = found_ix; + if (found_ix >= 0) { + if (found_ix < MLX4_ROCE_PF_GIDS) + *slave_id = 0; + else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) * + (vf_gids / dev->num_vfs + 1)) + *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) / + (vf_gids / dev->num_vfs + 1)) + 1; + else + *slave_id = + ((found_ix - MLX4_ROCE_PF_GIDS - + ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) / + (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1; + } return (found_ix >= 0) ? 0 : -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 57428a0cb9dd..1c3634eab5e1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -219,6 +219,11 @@ struct res_fs_rule { int qpn; }; +static int mlx4_is_eth(struct mlx4_dev *dev, int port) +{ + return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1; +} + static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; @@ -600,15 +605,34 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, struct mlx4_qp_context *qp_ctx = inbox->buf + 8; enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + int port; - if (MLX4_QP_ST_UD == ts) - qp_ctx->pri_path.mgid_index = 0x80 | slave; - - if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - qp_ctx->pri_path.mgid_index = slave & 0x7F; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - qp_ctx->alt_path.mgid_index = slave & 0x7F; + if (MLX4_QP_ST_UD == ts) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) + qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; + else + qp_ctx->pri_path.mgid_index = slave | 0x80; + + } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->pri_path.mgid_index &= 0x7f; + } else { + qp_ctx->pri_path.mgid_index = slave & 0x7F; + } + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->alt_path.mgid_index &= 0x7f; + } else { + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + } } } @@ -2734,6 +2758,8 @@ static int verify_qp_parameters(struct mlx4_dev *dev, u32 qp_type; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; + int port; + int num_gids; qp_ctx = inbox->buf + 8; qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; @@ -2741,6 +2767,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, switch (qp_type) { case MLX4_QP_ST_RC: + case MLX4_QP_ST_XRC: case MLX4_QP_ST_UC: switch (transition) { case QP_TRANS_INIT2RTR: @@ -2749,13 +2776,24 @@ static int verify_qp_parameters(struct mlx4_dev *dev, case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: if (slave != mlx4_master_func_num(dev)) - /* slaves have only gid index 0 */ - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - if (qp_ctx->pri_path.mgid_index) + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave); + else + num_gids = 1; + if (qp_ctx->pri_path.mgid_index >= num_gids) return -EINVAL; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - if (qp_ctx->alt_path.mgid_index) + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave); + else + num_gids = 1; + if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; + } break; default: break; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4853650679d..86e02e5c2c77 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -49,6 +49,7 @@ #define MIN_MSIX_P_PORT 5 #define MLX4_ROCE_MAX_GIDS 128 +#define MLX4_ROCE_PF_GIDS 16 enum { MLX4_FLAG_MSI_X = 1 << 0, -- cgit v1.2.3-71-gd317 From 5ea8bbfc49291b7e23161fe4de0bf3e4a4e34b18 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:41 +0200 Subject: mlx4: Implement IP based gids support for RoCE/SRIOV Since there is no connection between the MAC/VLAN and the GID when using IP-based addressing, the proxy QP1 (running on the slave) must pass the source-mac, destination-mac, and vlan_id information separately from the GID. Additionally, the Host must pass the remote source-mac and vlan_id back to the slave, This is achieved as follows: Outgoing MADs: 1. Source MAC: obtained from the CQ completion structure (struct ib_wc, smac field). 2. Destination MAC: obtained from the tunnel header 3. vlan_id: obtained from the tunnel header. Incoming MADs 1. The source (i.e., remote) MAC and vlan_id are passed in the tunnel header to the proxy QP1. VST mode support: For outgoing MADs, the vlan_id obtained from the header is discarded, and the vlan_id specified by the Hypervisor is used instead. For incoming MADs, the incoming vlan_id (in the wc) is discarded, and the "invalid" vlan (0xffff) is substituted when forwarding to the slave. Signed-off-by: Moni Shoua Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cq.c | 42 +++++++++++++++++++---------- drivers/infiniband/hw/mlx4/mad.c | 45 ++++++++++++++++++++++++++++---- drivers/infiniband/hw/mlx4/mcg.c | 5 ++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 5 +++- drivers/infiniband/hw/mlx4/qp.c | 11 ++++++-- drivers/net/ethernet/mellanox/mlx4/cmd.c | 24 +++++++++++++++++ include/linux/mlx4/cmd.h | 7 +++++ include/linux/mlx4/device.h | 3 ++- 8 files changed, 117 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index cc40f08ca8f1..5f640814cc81 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -564,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) } static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe) + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -574,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct DMA_FROM_DEVICE); hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); - wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); - wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; wc->dlid_path_bits = 0; + if (is_eth) { + wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); + memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); + memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); + } else { + wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); + wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); + } + return 0; } @@ -594,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_srq *msrq = NULL; int is_send; int is_error; + int is_eth; u32 g_mlpath_rqpn; u16 wqe_ctr; unsigned tail = 0; @@ -778,11 +787,15 @@ repoll: break; } + is_eth = (rdma_port_get_link_layer(wc->qp->device, + (*cur_qp)->port) == + IB_LINK_LAYER_ETHERNET); if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, cqe); + return use_tunnel_data(*cur_qp, cq, wc, tail, + cqe, is_eth); } wc->slid = be16_to_cpu(cqe->rlid); @@ -793,20 +806,21 @@ repoll: wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; - if (rdma_port_get_link_layer(wc->qp->device, - (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) + if (is_eth) { wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; - else - wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; - if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { - wc->vlan_id = be16_to_cpu(cqe->sl_vid) & - MLX4_CQE_VID_MASK; + if (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK) { + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & + MLX4_CQE_VID_MASK; + } else { + wc->vlan_id = 0xffff; + } + memcpy(wc->smac, cqe->smac, ETH_ALEN); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); } else { + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; wc->vlan_id = 0xffff; } - wc->wc_flags |= IB_WC_WITH_VLAN; - memcpy(wc->smac, cqe->smac, ETH_ALEN); - wc->wc_flags |= IB_WC_WITH_SMAC; } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c5bca0f0da4a..2c572aed3f6f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -545,11 +545,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, /* adjust tunnel data */ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); - tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; + if (is_eth) { + u16 vlan = 0; + if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan, + NULL)) { + /* VST mode */ + if (vlan != wc->vlan_id) + /* Packet vlan is not the VST-assigned vlan. + * Drop the packet. + */ + goto out; + else + /* Remove the vlan tag before forwarding + * the packet to the VF. + */ + vlan = 0xffff; + } else { + vlan = wc->vlan_id; + } + + tun_mad->hdr.sl_vid = cpu_to_be16(vlan); + memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4); + memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); + } else { + tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); + tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + } + ib_dma_sync_single_for_device(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), @@ -1116,8 +1141,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) + enum ib_qp_type dest_qpt, u16 pkey_index, + u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, + u8 *s_mac, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -1206,6 +1232,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; + if (s_mac) + memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + ret = ib_post_send(send_qp, &wr, &bad_wr); out: @@ -1331,13 +1360,19 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (ah_attr.ah_flags & IB_AH_GRH) fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); + ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + /* if slave have default vlan use it */ + mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, + &ah_attr.vlan_id, &ah_attr.sl); + mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? IB_QPT_SMI : IB_QPT_GSI, be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, &tunnel->mad); + &ah_attr, wc->smac, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 25b2cdff00f8..ed327e6c8fdc 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) } mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); spin_unlock(&dev->sm_lock); - return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, - IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad); + return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), + ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, + &ah_attr, NULL, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index febc8f9bc59a..f589522fddfd 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -737,9 +737,12 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work); int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad); + int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad); + u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, + struct ib_mad *mad); + __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 11332f074023..aadf7f82e1f3 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2152,7 +2152,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } if (is_eth) { - u8 smac[6]; + u8 *smac; struct in6_addr in6; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; @@ -2164,7 +2164,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); - rdma_get_ll_mac(&in6, smac); + + if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) + smac = to_mdev(sqp->qp.ibqp.device)-> + iboe.netdevs[sqp->qp.port - 1]->dev_addr; + else /* use the src mac of the tunnel */ + smac = ah->av.eth.s_mac; memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); @@ -2396,6 +2401,8 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(hdr.mac, ah->av.eth.mac, 6); + hdr.vlan = ah->av.eth.vlan; spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 0d02fba94536..2b0b45ece14b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2289,6 +2289,30 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); + /* mlx4_get_slave_default_vlan - + * return true if VST ( default vlan) + * if VST, will return vlan & qos (if not NULL) + */ +bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, + u16 *vlan, u8 *qos) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_priv *priv; + + priv = mlx4_priv(dev); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (MLX4_VGT != vp_oper->state.default_vlan) { + if (vlan) + *vlan = vp_oper->state.default_vlan; + if (qos) + *qos = vp_oper->state.default_qos; + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan); + int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 79a347238168..009985628257 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -240,6 +240,13 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); +/* + * mlx4_get_slave_default_vlan - + * return true if VST ( default vlan) + * if VST, will return vlan & qos (if not NULL) + */ +bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, + u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 86e02e5c2c77..f211b51dc726 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -632,7 +632,8 @@ struct mlx4_eth_av { u8 hop_limit; __be32 sl_tclass_flowlabel; u8 dgid[16]; - u32 reserved4[2]; + u8 s_mac[6]; + u8 reserved4[2]; __be16 vlan; u8 mac[ETH_ALEN]; }; -- cgit v1.2.3-71-gd317 From 57a7744e09867ebcfa0ccf1d6d529caa7728d552 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Mar 2014 21:26:42 -0700 Subject: net: Replace u64_stats_fetch_begin_bh to u64_stats_fetch_begin_irq Replace the bh safe variant with the hard irq safe variant. We need a hard irq safe variant to deal with netpoll transmitting packets from hard irq context, and we need it in most if not all of the places using the bh safe variant. Except on 32bit uni-processor the code is exactly the same so don't bother with a bh variant, just have a hard irq safe variant that everyone can use. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- block/blk-cgroup.h | 8 ++++---- drivers/net/dummy.c | 4 ++-- drivers/net/ethernet/broadcom/b44.c | 8 ++++---- drivers/net/ethernet/emulex/benet/be_ethtool.c | 12 ++++++------ drivers/net/ethernet/emulex/benet/be_main.c | 16 ++++++++-------- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 8 ++++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 ++++++++-------- drivers/net/ethernet/intel/igb/igb_ethtool.c | 12 ++++++------ drivers/net/ethernet/intel/igb/igb_main.c | 8 ++++---- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 8 ++++---- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++++---- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 8 ++++---- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- drivers/net/ethernet/marvell/sky2.c | 8 ++++---- drivers/net/ethernet/neterion/vxge/vxge-main.c | 8 ++++---- drivers/net/ethernet/nvidia/forcedeth.c | 8 ++++---- drivers/net/ethernet/realtek/8139too.c | 8 ++++---- drivers/net/ethernet/realtek/r8169.c | 8 ++++---- drivers/net/ethernet/tile/tilepro.c | 4 ++-- drivers/net/ethernet/via/via-rhine.c | 8 ++++---- drivers/net/ifb.c | 8 ++++---- drivers/net/loopback.c | 4 ++-- drivers/net/macvlan.c | 4 ++-- drivers/net/nlmon.c | 4 ++-- drivers/net/team/team.c | 4 ++-- drivers/net/team/team_mode_loadbalance.c | 4 ++-- drivers/net/veth.c | 4 ++-- drivers/net/virtio_net.c | 8 ++++---- drivers/net/xen-netfront.c | 4 ++-- include/linux/u64_stats_sync.h | 16 ++++++++-------- net/8021q/vlan_dev.c | 4 ++-- net/bridge/br_device.c | 4 ++-- net/ipv4/af_inet.c | 4 ++-- net/ipv4/ip_tunnel_core.c | 4 ++-- net/ipv6/ip6_tunnel.c | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/openvswitch/datapath.c | 4 ++-- net/openvswitch/vport.c | 4 ++-- 38 files changed, 132 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 86154eab9523..604f6d99ab92 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -435,9 +435,9 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat) uint64_t v; do { - start = u64_stats_fetch_begin_bh(&stat->syncp); + start = u64_stats_fetch_begin_irq(&stat->syncp); v = stat->cnt; - } while (u64_stats_fetch_retry_bh(&stat->syncp, start)); + } while (u64_stats_fetch_retry_irq(&stat->syncp, start)); return v; } @@ -508,9 +508,9 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) struct blkg_rwstat tmp; do { - start = u64_stats_fetch_begin_bh(&rwstat->syncp); + start = u64_stats_fetch_begin_irq(&rwstat->syncp); tmp = *rwstat; - } while (u64_stats_fetch_retry_bh(&rwstat->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start)); return tmp; } diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 1656317c96f8..0932ffbf381b 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -63,10 +63,10 @@ static struct rtnl_link_stats64 *dummy_get_stats64(struct net_device *dev, dstats = per_cpu_ptr(dev->dstats, i); do { - start = u64_stats_fetch_begin_bh(&dstats->syncp); + start = u64_stats_fetch_begin_irq(&dstats->syncp); tbytes = dstats->tx_bytes; tpackets = dstats->tx_packets; - } while (u64_stats_fetch_retry_bh(&dstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpackets; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 8a7bf7dad898..05ba62589017 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1685,7 +1685,7 @@ static struct rtnl_link_stats64 *b44_get_stats64(struct net_device *dev, unsigned int start; do { - start = u64_stats_fetch_begin_bh(&hwstat->syncp); + start = u64_stats_fetch_begin_irq(&hwstat->syncp); /* Convert HW stats into rtnl_link_stats64 stats. */ nstat->rx_packets = hwstat->rx_pkts; @@ -1719,7 +1719,7 @@ static struct rtnl_link_stats64 *b44_get_stats64(struct net_device *dev, /* Carrier lost counter seems to be broken for some devices */ nstat->tx_carrier_errors = hwstat->tx_carrier_lost; #endif - } while (u64_stats_fetch_retry_bh(&hwstat->syncp, start)); + } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); return nstat; } @@ -2073,12 +2073,12 @@ static void b44_get_ethtool_stats(struct net_device *dev, do { data_src = &hwstat->tx_good_octets; data_dst = data; - start = u64_stats_fetch_begin_bh(&hwstat->syncp); + start = u64_stats_fetch_begin_irq(&hwstat->syncp); for (i = 0; i < ARRAY_SIZE(b44_gstrings); i++) *data_dst++ = *data_src++; - } while (u64_stats_fetch_retry_bh(&hwstat->syncp, start)); + } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); } static void b44_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 66759b6ce373..15ba96cba65d 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -357,10 +357,10 @@ be_get_ethtool_stats(struct net_device *netdev, struct be_rx_stats *stats = rx_stats(rxo); do { - start = u64_stats_fetch_begin_bh(&stats->sync); + start = u64_stats_fetch_begin_irq(&stats->sync); data[base] = stats->rx_bytes; data[base + 1] = stats->rx_pkts; - } while (u64_stats_fetch_retry_bh(&stats->sync, start)); + } while (u64_stats_fetch_retry_irq(&stats->sync, start)); for (i = 2; i < ETHTOOL_RXSTATS_NUM; i++) { p = (u8 *)stats + et_rx_stats[i].offset; @@ -373,19 +373,19 @@ be_get_ethtool_stats(struct net_device *netdev, struct be_tx_stats *stats = tx_stats(txo); do { - start = u64_stats_fetch_begin_bh(&stats->sync_compl); + start = u64_stats_fetch_begin_irq(&stats->sync_compl); data[base] = stats->tx_compl; - } while (u64_stats_fetch_retry_bh(&stats->sync_compl, start)); + } while (u64_stats_fetch_retry_irq(&stats->sync_compl, start)); do { - start = u64_stats_fetch_begin_bh(&stats->sync); + start = u64_stats_fetch_begin_irq(&stats->sync); for (i = 1; i < ETHTOOL_TXSTATS_NUM; i++) { p = (u8 *)stats + et_tx_stats[i].offset; data[base + i] = (et_tx_stats[i].size == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - } while (u64_stats_fetch_retry_bh(&stats->sync, start)); + } while (u64_stats_fetch_retry_irq(&stats->sync, start)); base += ETHTOOL_TXSTATS_NUM; } } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 239273b7b881..a61f967f9ca1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -591,10 +591,10 @@ static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev, for_all_rx_queues(adapter, rxo, i) { const struct be_rx_stats *rx_stats = rx_stats(rxo); do { - start = u64_stats_fetch_begin_bh(&rx_stats->sync); + start = u64_stats_fetch_begin_irq(&rx_stats->sync); pkts = rx_stats(rxo)->rx_pkts; bytes = rx_stats(rxo)->rx_bytes; - } while (u64_stats_fetch_retry_bh(&rx_stats->sync, start)); + } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start)); stats->rx_packets += pkts; stats->rx_bytes += bytes; stats->multicast += rx_stats(rxo)->rx_mcast_pkts; @@ -605,10 +605,10 @@ static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev, for_all_tx_queues(adapter, txo, i) { const struct be_tx_stats *tx_stats = tx_stats(txo); do { - start = u64_stats_fetch_begin_bh(&tx_stats->sync); + start = u64_stats_fetch_begin_irq(&tx_stats->sync); pkts = tx_stats(txo)->tx_pkts; bytes = tx_stats(txo)->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tx_stats->sync, start)); + } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start)); stats->tx_packets += pkts; stats->tx_bytes += bytes; } @@ -1408,15 +1408,15 @@ static void be_eqd_update(struct be_adapter *adapter) rxo = &adapter->rx_obj[eqo->idx]; do { - start = u64_stats_fetch_begin_bh(&rxo->stats.sync); + start = u64_stats_fetch_begin_irq(&rxo->stats.sync); rx_pkts = rxo->stats.rx_pkts; - } while (u64_stats_fetch_retry_bh(&rxo->stats.sync, start)); + } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start)); txo = &adapter->tx_obj[eqo->idx]; do { - start = u64_stats_fetch_begin_bh(&txo->stats.sync); + start = u64_stats_fetch_begin_irq(&txo->stats.sync); tx_pkts = txo->stats.tx_reqs; - } while (u64_stats_fetch_retry_bh(&txo->stats.sync, start)); + } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start)); /* Skip, if wrapped around or first calculation */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 8ee224fdc1d1..6049e63a826d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -653,18 +653,18 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, /* process Tx ring statistics */ do { - start = u64_stats_fetch_begin_bh(&tx_ring->syncp); + start = u64_stats_fetch_begin_irq(&tx_ring->syncp); data[i] = tx_ring->stats.packets; data[i + 1] = tx_ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&tx_ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); /* Rx ring is the 2nd half of the queue pair */ rx_ring = &tx_ring[1]; do { - start = u64_stats_fetch_begin_bh(&rx_ring->syncp); + start = u64_stats_fetch_begin_irq(&rx_ring->syncp); data[i + 2] = rx_ring->stats.packets; data[i + 3] = rx_ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&rx_ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); } rcu_read_unlock(); if (vsi == pf->vsi[pf->lan_vsi]) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7379e5a9126b..c66a11e31e6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -376,20 +376,20 @@ static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( continue; do { - start = u64_stats_fetch_begin_bh(&tx_ring->syncp); + start = u64_stats_fetch_begin_irq(&tx_ring->syncp); packets = tx_ring->stats.packets; bytes = tx_ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&tx_ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; rx_ring = &tx_ring[1]; do { - start = u64_stats_fetch_begin_bh(&rx_ring->syncp); + start = u64_stats_fetch_begin_irq(&rx_ring->syncp); packets = rx_ring->stats.packets; bytes = rx_ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&rx_ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; @@ -770,10 +770,10 @@ void i40e_update_stats(struct i40e_vsi *vsi) p = ACCESS_ONCE(vsi->tx_rings[q]); do { - start = u64_stats_fetch_begin_bh(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; @@ -782,10 +782,10 @@ void i40e_update_stats(struct i40e_vsi *vsi) /* Rx queue is part of the same block as Tx queue */ p = &p[1]; do { - start = u64_stats_fetch_begin_bh(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); rx_b += bytes; rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 170e4dbddc11..e35bc1faa452 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2273,15 +2273,15 @@ static void igb_get_ethtool_stats(struct net_device *netdev, ring = adapter->tx_ring[j]; do { - start = u64_stats_fetch_begin_bh(&ring->tx_syncp); + start = u64_stats_fetch_begin_irq(&ring->tx_syncp); data[i] = ring->tx_stats.packets; data[i+1] = ring->tx_stats.bytes; data[i+2] = ring->tx_stats.restart_queue; - } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); do { - start = u64_stats_fetch_begin_bh(&ring->tx_syncp2); + start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); restart2 = ring->tx_stats.restart_queue2; - } while (u64_stats_fetch_retry_bh(&ring->tx_syncp2, start)); + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); data[i+2] += restart2; i += IGB_TX_QUEUE_STATS_LEN; @@ -2289,13 +2289,13 @@ static void igb_get_ethtool_stats(struct net_device *netdev, for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; do { - start = u64_stats_fetch_begin_bh(&ring->rx_syncp); + start = u64_stats_fetch_begin_irq(&ring->rx_syncp); data[i] = ring->rx_stats.packets; data[i+1] = ring->rx_stats.bytes; data[i+2] = ring->rx_stats.drops; data[i+3] = ring->rx_stats.csum_err; data[i+4] = ring->rx_stats.alloc_failed; - } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); i += IGB_RX_QUEUE_STATS_LEN; } spin_unlock(&adapter->stats64_lock); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ea8b9c41cf9f..f81d87cfcc8d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5168,10 +5168,10 @@ void igb_update_stats(struct igb_adapter *adapter, } do { - start = u64_stats_fetch_begin_bh(&ring->rx_syncp); + start = u64_stats_fetch_begin_irq(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -5184,10 +5184,10 @@ void igb_update_stats(struct igb_adapter *adapter, for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_bh(&ring->tx_syncp); + start = u64_stats_fetch_begin_irq(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 24dd6f0233f3..6c55c14d082a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1128,10 +1128,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); data[i] = ring->stats.packets; data[i+1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); i += 2; #ifdef BP_EXTENDED_STATS data[i] = ring->stats.yields; @@ -1156,10 +1156,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); data[i] = ring->stats.packets; data[i+1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); i += 2; #ifdef BP_EXTENDED_STATS data[i] = ring->stats.yields; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 851c41377b47..5d314fe873bb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7293,10 +7293,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, if (ring) { do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; } @@ -7309,10 +7309,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, if (ring) { do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 475341d0ce7e..8581079791fe 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3337,10 +3337,10 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) { ring = adapter->rx_ring[i]; do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->rx_bytes += bytes; stats->rx_packets += packets; } @@ -3348,10 +3348,10 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, for (i = 0; i < adapter->num_tx_queues; i++) { ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->tx_bytes += bytes; stats->tx_packets += packets; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 12c6a66e54d1..f3afcbdbb725 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -508,12 +508,12 @@ struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev, cpu_stats = per_cpu_ptr(pp->stats, cpu); do { - start = u64_stats_fetch_begin_bh(&cpu_stats->syncp); + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); rx_packets = cpu_stats->rx_packets; rx_bytes = cpu_stats->rx_bytes; tx_packets = cpu_stats->tx_packets; tx_bytes = cpu_stats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 2434611d1b4e..5a5b23741179 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -3908,19 +3908,19 @@ static struct rtnl_link_stats64 *sky2_get_stats(struct net_device *dev, u64 _bytes, _packets; do { - start = u64_stats_fetch_begin_bh(&sky2->rx_stats.syncp); + start = u64_stats_fetch_begin_irq(&sky2->rx_stats.syncp); _bytes = sky2->rx_stats.bytes; _packets = sky2->rx_stats.packets; - } while (u64_stats_fetch_retry_bh(&sky2->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&sky2->rx_stats.syncp, start)); stats->rx_packets = _packets; stats->rx_bytes = _bytes; do { - start = u64_stats_fetch_begin_bh(&sky2->tx_stats.syncp); + start = u64_stats_fetch_begin_irq(&sky2->tx_stats.syncp); _bytes = sky2->tx_stats.bytes; _packets = sky2->tx_stats.packets; - } while (u64_stats_fetch_retry_bh(&sky2->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&sky2->tx_stats.syncp, start)); stats->tx_packets = _packets; stats->tx_bytes = _bytes; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index c83cedd26dec..c5bb1ace4a74 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3134,12 +3134,12 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) u64 packets, bytes, multicast; do { - start = u64_stats_fetch_begin_bh(&rxstats->syncp); + start = u64_stats_fetch_begin_irq(&rxstats->syncp); packets = rxstats->rx_frms; multicast = rxstats->rx_mcast; bytes = rxstats->rx_bytes; - } while (u64_stats_fetch_retry_bh(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); net_stats->rx_packets += packets; net_stats->rx_bytes += bytes; @@ -3149,11 +3149,11 @@ vxge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats) net_stats->rx_dropped += rxstats->rx_dropped; do { - start = u64_stats_fetch_begin_bh(&txstats->syncp); + start = u64_stats_fetch_begin_irq(&txstats->syncp); packets = txstats->tx_frms; bytes = txstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&txstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&txstats->syncp, start)); net_stats->tx_packets += packets; net_stats->tx_bytes += bytes; diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index bad3c057ee8a..811be0bccd14 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1753,19 +1753,19 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) /* software stats */ do { - syncp_start = u64_stats_fetch_begin_bh(&np->swstats_rx_syncp); + syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); storage->rx_packets = np->stat_rx_packets; storage->rx_bytes = np->stat_rx_bytes; storage->rx_dropped = np->stat_rx_dropped; storage->rx_missed_errors = np->stat_rx_missed_errors; - } while (u64_stats_fetch_retry_bh(&np->swstats_rx_syncp, syncp_start)); + } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); do { - syncp_start = u64_stats_fetch_begin_bh(&np->swstats_tx_syncp); + syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); storage->tx_packets = np->stat_tx_packets; storage->tx_bytes = np->stat_tx_bytes; storage->tx_dropped = np->stat_tx_dropped; - } while (u64_stats_fetch_retry_bh(&np->swstats_tx_syncp, syncp_start)); + } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); /* If the nic supports hw counters then retrieve latest values */ if (np->driver_data & DEV_HAS_STATISTICS_V123) { diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 8cb2f357026e..2e5df148af4c 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2522,16 +2522,16 @@ rtl8139_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) netdev_stats_to_stats64(stats, &dev->stats); do { - start = u64_stats_fetch_begin_bh(&tp->rx_stats.syncp); + start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp); stats->rx_packets = tp->rx_stats.packets; stats->rx_bytes = tp->rx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&tp->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start)); do { - start = u64_stats_fetch_begin_bh(&tp->tx_stats.syncp); + start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp); stats->tx_packets = tp->tx_stats.packets; stats->tx_bytes = tp->tx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&tp->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start)); return stats; } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 90c14d16f261..aa1c079f231d 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6590,17 +6590,17 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) rtl8169_rx_missed(dev, ioaddr); do { - start = u64_stats_fetch_begin_bh(&tp->rx_stats.syncp); + start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp); stats->rx_packets = tp->rx_stats.packets; stats->rx_bytes = tp->rx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&tp->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start)); do { - start = u64_stats_fetch_begin_bh(&tp->tx_stats.syncp); + start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp); stats->tx_packets = tp->tx_stats.packets; stats->tx_bytes = tp->tx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&tp->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start)); stats->rx_dropped = dev->stats.rx_dropped; stats->tx_dropped = dev->stats.tx_dropped; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index edb2e12a0fe2..7e33973487ee 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -2068,14 +2068,14 @@ static struct rtnl_link_stats64 *tile_net_get_stats64(struct net_device *dev, cpu_stats = &priv->cpu[i]->stats; do { - start = u64_stats_fetch_begin_bh(&cpu_stats->syncp); + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); trx_packets = cpu_stats->rx_packets; ttx_packets = cpu_stats->tx_packets; trx_bytes = cpu_stats->rx_bytes; ttx_bytes = cpu_stats->tx_bytes; trx_errors = cpu_stats->rx_errors; trx_dropped = cpu_stats->rx_dropped; - } while (u64_stats_fetch_retry_bh(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); rx_packets += trx_packets; tx_packets += ttx_packets; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index ef312bc6b865..5bc1a2d02dc1 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -2070,16 +2070,16 @@ rhine_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) netdev_stats_to_stats64(stats, &dev->stats); do { - start = u64_stats_fetch_begin_bh(&rp->rx_stats.syncp); + start = u64_stats_fetch_begin_irq(&rp->rx_stats.syncp); stats->rx_packets = rp->rx_stats.packets; stats->rx_bytes = rp->rx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&rp->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&rp->rx_stats.syncp, start)); do { - start = u64_stats_fetch_begin_bh(&rp->tx_stats.syncp); + start = u64_stats_fetch_begin_irq(&rp->tx_stats.syncp); stats->tx_packets = rp->tx_stats.packets; stats->tx_bytes = rp->tx_stats.bytes; - } while (u64_stats_fetch_retry_bh(&rp->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry_irq(&rp->tx_stats.syncp, start)); return stats; } diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index c14d39bf32d0..1da36764b1a4 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -136,18 +136,18 @@ static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev, unsigned int start; do { - start = u64_stats_fetch_begin_bh(&dp->rsync); + start = u64_stats_fetch_begin_irq(&dp->rsync); stats->rx_packets = dp->rx_packets; stats->rx_bytes = dp->rx_bytes; - } while (u64_stats_fetch_retry_bh(&dp->rsync, start)); + } while (u64_stats_fetch_retry_irq(&dp->rsync, start)); do { - start = u64_stats_fetch_begin_bh(&dp->tsync); + start = u64_stats_fetch_begin_irq(&dp->tsync); stats->tx_packets = dp->tx_packets; stats->tx_bytes = dp->tx_bytes; - } while (u64_stats_fetch_retry_bh(&dp->tsync, start)); + } while (u64_stats_fetch_retry_irq(&dp->tsync, start)); stats->rx_dropped = dev->stats.rx_dropped; stats->tx_dropped = dev->stats.tx_dropped; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 282effee7e1c..bb96409f8c05 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -111,10 +111,10 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev, lb_stats = per_cpu_ptr(dev->lstats, i); do { - start = u64_stats_fetch_begin_bh(&lb_stats->syncp); + start = u64_stats_fetch_begin_irq(&lb_stats->syncp); tbytes = lb_stats->bytes; tpackets = lb_stats->packets; - } while (u64_stats_fetch_retry_bh(&lb_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&lb_stats->syncp, start)); bytes += tbytes; packets += tpackets; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c683ac2c8c94..753a8c23d15d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -582,13 +582,13 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, for_each_possible_cpu(i) { p = per_cpu_ptr(vlan->pcpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); rx_packets = p->rx_packets; rx_bytes = p->rx_bytes; rx_multicast = p->rx_multicast; tx_packets = p->tx_packets; tx_bytes = p->tx_bytes; - } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index 14ce7de6a933..6929b03ec638 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -90,10 +90,10 @@ nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) nl_stats = per_cpu_ptr(dev->lstats, i); do { - start = u64_stats_fetch_begin_bh(&nl_stats->syncp); + start = u64_stats_fetch_begin_irq(&nl_stats->syncp); tbytes = nl_stats->bytes; tpackets = nl_stats->packets; - } while (u64_stats_fetch_retry_bh(&nl_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&nl_stats->syncp, start)); packets += tpackets; bytes += tbytes; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index aea92f02401b..2b1a1d61072c 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1761,13 +1761,13 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) for_each_possible_cpu(i) { p = per_cpu_ptr(team->pcpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); rx_packets = p->rx_packets; rx_bytes = p->rx_bytes; rx_multicast = p->rx_multicast; tx_packets = p->tx_packets; tx_bytes = p->tx_bytes; - } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index d671fc3ac5ac..dbde3412ee5e 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -432,9 +432,9 @@ static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, struct lb_stats tmp; do { - start = u64_stats_fetch_begin_bh(syncp); + start = u64_stats_fetch_begin_irq(syncp); tmp.tx_bytes = cpu_stats->tx_bytes; - } while (u64_stats_fetch_retry_bh(syncp, start)); + } while (u64_stats_fetch_retry_irq(syncp, start)); acc_stats->tx_bytes += tmp.tx_bytes; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 3aca92e80e1e..e1c77d4b80e4 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -156,10 +156,10 @@ static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) unsigned int start; do { - start = u64_stats_fetch_begin_bh(&stats->syncp); + start = u64_stats_fetch_begin_irq(&stats->syncp); packets = stats->packets; bytes = stats->bytes; - } while (u64_stats_fetch_retry_bh(&stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); result->packets += packets; result->bytes += bytes; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5632a99cbbd2..80d84c446962 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1000,16 +1000,16 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, u64 tpackets, tbytes, rpackets, rbytes; do { - start = u64_stats_fetch_begin_bh(&stats->tx_syncp); + start = u64_stats_fetch_begin_irq(&stats->tx_syncp); tpackets = stats->tx_packets; tbytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start)); do { - start = u64_stats_fetch_begin_bh(&stats->rx_syncp); + start = u64_stats_fetch_begin_irq(&stats->rx_syncp); rpackets = stats->rx_packets; rbytes = stats->rx_bytes; - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a38f03ded5a4..49f3b3dbbed8 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1060,13 +1060,13 @@ static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev, unsigned int start; do { - start = u64_stats_fetch_begin_bh(&stats->syncp); + start = u64_stats_fetch_begin_irq(&stats->syncp); rx_packets = stats->rx_packets; tx_packets = stats->tx_packets; rx_bytes = stats->rx_bytes; tx_bytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); tot->rx_packets += rx_packets; tot->tx_packets += tx_packets; diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 7bfabd20204c..4b4439e75f45 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -27,8 +27,8 @@ * (On UP, there is no seqcount_t protection, a reader allowing interrupts could * read partial values) * - * 7) For softirq uses, readers can use u64_stats_fetch_begin_bh() and - * u64_stats_fetch_retry_bh() helpers + * 7) For irq and softirq uses, readers can use u64_stats_fetch_begin_irq() and + * u64_stats_fetch_retry_irq() helpers * * Usage : * @@ -114,31 +114,31 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, } /* - * In case softirq handlers can update u64 counters, readers can use following helpers + * In case irq handlers can update u64 counters, readers can use following helpers * - SMP 32bit arches use seqcount protection, irq safe. - * - UP 32bit must disable BH. + * - UP 32bit must disable irqs. * - 64bit have no problem atomically reading u64 values, irq safe. */ -static inline unsigned int u64_stats_fetch_begin_bh(const struct u64_stats_sync *syncp) +static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_begin(&syncp->seq); #else #if BITS_PER_LONG==32 - local_bh_disable(); + local_irq_disable(); #endif return 0; #endif } -static inline bool u64_stats_fetch_retry_bh(const struct u64_stats_sync *syncp, +static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_retry(&syncp->seq, start); #else #if BITS_PER_LONG==32 - local_bh_enable(); + local_irq_enable(); #endif return false; #endif diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b38e715b1cd4..4f3e9073cb49 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -678,13 +678,13 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&p->syncp); + start = u64_stats_fetch_begin_irq(&p->syncp); rxpackets = p->rx_packets; rxbytes = p->rx_bytes; rxmulticast = p->rx_multicast; txpackets = p->tx_packets; txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rxpackets; stats->rx_bytes += rxbytes; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index b063050b63e2..f2a08477e0f5 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -136,9 +136,9 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, const struct pcpu_sw_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { - start = u64_stats_fetch_begin_bh(&bstats->syncp); + start = u64_stats_fetch_begin_irq(&bstats->syncp); memcpy(&tmp, bstats, sizeof(tmp)); - } while (u64_stats_fetch_retry_bh(&bstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&bstats->syncp, start)); sum.tx_bytes += tmp.tx_bytes; sum.tx_packets += tmp.tx_packets; sum.rx_bytes += tmp.rx_bytes; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 19ab78aca547..8c54870db792 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1505,9 +1505,9 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) bhptr = per_cpu_ptr(mib[0], cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { - start = u64_stats_fetch_begin_bh(syncp); + start = u64_stats_fetch_begin_irq(syncp); v = *(((u64 *) bhptr) + offt); - } while (u64_stats_fetch_retry_bh(syncp, start)); + } while (u64_stats_fetch_retry_irq(syncp, start)); res += v; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6f847dd56dbc..b86f0a37fa7c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -161,12 +161,12 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, unsigned int start; do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); + start = u64_stats_fetch_begin_irq(&tstats->syncp); rx_packets = tstats->rx_packets; tx_packets = tstats->tx_packets; rx_bytes = tstats->rx_bytes; tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); tot->rx_packets += rx_packets; tot->tx_packets += tx_packets; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8ad59f4811df..e1df691d78be 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -108,12 +108,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) per_cpu_ptr(dev->tstats, i); do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); + start = u64_stats_fetch_begin_irq(&tstats->syncp); tmp.rx_packets = tstats->rx_packets; tmp.rx_bytes = tstats->rx_bytes; tmp.tx_packets = tstats->tx_packets; tmp.tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); sum.rx_packets += tmp.rx_packets; sum.rx_bytes += tmp.rx_bytes; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 35be035ee0ce..d6d75841352a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2177,10 +2177,10 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) __u64 inbytes, outbytes; do { - start = u64_stats_fetch_begin_bh(&u->syncp); + start = u64_stats_fetch_begin_irq(&u->syncp); inbytes = u->ustats.inbytes; outbytes = u->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&u->syncp, start)); + } while (u64_stats_fetch_retry_irq(&u->syncp, start)); seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", i, u->ustats.conns, u->ustats.inpkts, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 36f8872cb072..c53fe0c9697c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -606,9 +606,9 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, percpu_stats = per_cpu_ptr(dp->stats_percpu, i); do { - start = u64_stats_fetch_begin_bh(&percpu_stats->syncp); + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 3b4db3220456..42c0f4a0b78c 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -277,9 +277,9 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) percpu_stats = per_cpu_ptr(vport->percpu_stats, i); do { - start = u64_stats_fetch_begin_bh(&percpu_stats->syncp); + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); stats->rx_bytes += local_stats.rx_bytes; stats->rx_packets += local_stats.rx_packets; -- cgit v1.2.3-71-gd317 From a797ca1eadeef7f4fdba2ab5d143d56cc3ec5da3 Mon Sep 17 00:00:00 2001 From: Franky Lin Date: Sat, 15 Mar 2014 17:18:17 +0100 Subject: brcmfmac: add BCM4354 SDIO interface support BCM4354 is an a/b/g/n/ac 2x2 WiFi chip. This patch adds support for it through SDIO interface. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Signed-off-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 1 + drivers/net/wireless/brcm80211/brcmfmac/chip.c | 5 +++++ drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c | 7 ++++++- drivers/net/wireless/brcm80211/include/brcm_hw_ids.h | 1 + include/linux/mmc/sdio_ids.h | 1 + 5 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index d737cf78469a..6e8718bf6920 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -988,6 +988,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { {SDIO_DEVICE(SDIO_VENDOR_ID_BROADCOM, SDIO_DEVICE_ID_BROADCOM_43362)}, {SDIO_DEVICE(SDIO_VENDOR_ID_BROADCOM, SDIO_DEVICE_ID_BROADCOM_4335_4339)}, + {SDIO_DEVICE(SDIO_VENDOR_ID_BROADCOM, SDIO_DEVICE_ID_BROADCOM_4354)}, { /* end: all zeroes */ }, }; MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c index a07b95ef9e70..df130ef53d1c 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c @@ -504,6 +504,7 @@ static void brcmf_chip_get_raminfo(struct brcmf_chip_priv *ci) ci->pub.ramsize = 0x3c000; break; case BCM4339_CHIP_ID: + case BCM4354_CHIP_ID: ci->pub.ramsize = 0xc0000; ci->pub.rambase = 0x180000; break; @@ -1006,6 +1007,10 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub) chip = container_of(pub, struct brcmf_chip_priv, pub); switch (pub->chip) { + case BCM4354_CHIP_ID: + /* explicitly check SR engine enable bit */ + pmu_cc3_mask = BIT(2); + /* fall-through */ case BCM43241_CHIP_ID: case BCM4335_CHIP_ID: case BCM4339_CHIP_ID: diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index a111b6fbbeba..4aa8678590d5 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -578,6 +578,8 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = { #define BCM43362_NVRAM_NAME "brcm/brcmfmac43362-sdio.txt" #define BCM4339_FIRMWARE_NAME "brcm/brcmfmac4339-sdio.bin" #define BCM4339_NVRAM_NAME "brcm/brcmfmac4339-sdio.txt" +#define BCM4354_FIRMWARE_NAME "brcm/brcmfmac4354-sdio.bin" +#define BCM4354_NVRAM_NAME "brcm/brcmfmac4354-sdio.txt" MODULE_FIRMWARE(BCM43143_FIRMWARE_NAME); MODULE_FIRMWARE(BCM43143_NVRAM_NAME); @@ -597,6 +599,8 @@ MODULE_FIRMWARE(BCM43362_FIRMWARE_NAME); MODULE_FIRMWARE(BCM43362_NVRAM_NAME); MODULE_FIRMWARE(BCM4339_FIRMWARE_NAME); MODULE_FIRMWARE(BCM4339_NVRAM_NAME); +MODULE_FIRMWARE(BCM4354_FIRMWARE_NAME); +MODULE_FIRMWARE(BCM4354_NVRAM_NAME); struct brcmf_firmware_names { u32 chipid; @@ -622,7 +626,8 @@ static const struct brcmf_firmware_names brcmf_fwname_data[] = { { BCM4334_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4334) }, { BCM4335_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4335) }, { BCM43362_CHIP_ID, 0xFFFFFFFE, BRCMF_FIRMWARE_NVRAM(BCM43362) }, - { BCM4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) } + { BCM4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) }, + { BCM4354_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4354) } }; diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h index 6fa5d4863782..d816270db3be 100644 --- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h @@ -43,5 +43,6 @@ #define BCM4335_CHIP_ID 0x4335 #define BCM43362_CHIP_ID 43362 #define BCM4339_CHIP_ID 0x4339 +#define BCM4354_CHIP_ID 0x4354 #endif /* _BRCM_HW_IDS_H_ */ diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index d8836623f36a..0f01fe065424 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -31,6 +31,7 @@ #define SDIO_DEVICE_ID_BROADCOM_4334 0x4334 #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 43362 +#define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_VENDOR_ID_INTEL 0x0089 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 -- cgit v1.2.3-71-gd317 From ff6076314339e079806d9d2f3de9c9b768e94db1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Mar 2014 20:47:49 -0700 Subject: netpoll: Add netpoll_rx_processing Add a helper netpoll_rx_processing that reports when netpoll has receive side processing to perform. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netpoll.h | 18 ++++++++++++++---- net/core/netpoll.c | 4 ++-- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index fbfdb9d8d3a7..479d15c97770 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -82,14 +82,24 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) local_irq_restore(flags); } - +#ifdef CONFIG_NETPOLL_TRAP +static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) +{ + return !list_empty(&npinfo->rx_np); +} +#else +static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) +{ + return false; +} +#endif #ifdef CONFIG_NETPOLL static inline bool netpoll_rx_on(struct sk_buff *skb) { struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo); - return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags); + return npinfo && (netpoll_rx_processing(npinfo) || npinfo->rx_flags); } static inline bool netpoll_rx(struct sk_buff *skb) @@ -105,8 +115,8 @@ static inline bool netpoll_rx(struct sk_buff *skb) npinfo = rcu_dereference_bh(skb->dev->npinfo); spin_lock(&npinfo->rx_lock); - /* check rx_flags again with the lock held */ - if (npinfo->rx_flags && __netpoll_rx(skb, npinfo)) + /* check rx_processing again with the lock held */ + if (netpoll_rx_processing(npinfo) && __netpoll_rx(skb, npinfo)) ret = true; spin_unlock(&npinfo->rx_lock); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2ad330e02967..ef83a2530e98 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -538,7 +538,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo int hlen, tlen; int hits = 0, proto; - if (list_empty(&npinfo->rx_np)) + if (!netpoll_rx_processing(npinfo)) return; /* Before checking the packet, we do some early @@ -770,7 +770,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) struct netpoll *np, *tmp; uint16_t source; - if (list_empty(&npinfo->rx_np)) + if (!netpoll_rx_processing(npinfo)) goto out; if (skb->dev->type != ARPHRD_ETHER) -- cgit v1.2.3-71-gd317 From b6bacd550c33124ea76291bd84ac42c8d30767eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Mar 2014 20:48:28 -0700 Subject: netpoll: Don't drop all received packets. Change the strategy of netpoll from dropping all packets received during netpoll_poll_dev to calling napi poll with a budget of 0 (to avoid processing drivers rx queue), and to ignore packets received with netif_rx (those will safely be placed on the backlog queue). All of the netpoll supporting drivers have been reviewed to ensure either thay use netif_rx or that a budget of 0 is supported by their napi poll routine and that a budget of 0 will not process the drivers rx queues. Not dropping packets makes NETPOLL_RX_DROP unnecesary so it is removed. npinfo->rx_flags is removed as rx_flags with just the NETPOLL_RX_ENABLED flag becomes just a redundant mirror of list_empty(&npinfo->rx_np). Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netpoll.h | 3 +-- net/core/netpoll.c | 17 ++++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 479d15c97770..154f9776056c 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -39,7 +39,6 @@ struct netpoll { struct netpoll_info { atomic_t refcnt; - unsigned long rx_flags; spinlock_t rx_lock; struct semaphore dev_lock; struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ @@ -99,7 +98,7 @@ static inline bool netpoll_rx_on(struct sk_buff *skb) { struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo); - return npinfo && (netpoll_rx_processing(npinfo) || npinfo->rx_flags); + return npinfo && netpoll_rx_processing(npinfo); } static inline bool netpoll_rx(struct sk_buff *skb) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index ef83a2530e98..793dc04d2f19 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -51,8 +51,6 @@ static atomic_t trapped; DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 -#define NETPOLL_RX_ENABLED 1 -#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (sizeof(struct ethhdr) + \ @@ -193,7 +191,8 @@ static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - int budget = 16; + bool rx_processing = netpoll_rx_processing(ni); + int budget = rx_processing? 16 : 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -207,8 +206,8 @@ static void netpoll_poll_dev(struct net_device *dev) return; } - ni->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); + if (rx_processing) + atomic_inc(&trapped); ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { @@ -221,8 +220,8 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev, budget); - atomic_dec(&trapped); - ni->rx_flags &= ~NETPOLL_RX_DROP; + if (rx_processing) + atomic_dec(&trapped); up(&ni->dev_lock); @@ -1050,7 +1049,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto out; } - npinfo->rx_flags = 0; INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); @@ -1076,7 +1074,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) if (np->rx_skb_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_flags |= NETPOLL_RX_ENABLED; list_add_tail(&np->rx, &npinfo->rx_np); spin_unlock_irqrestore(&npinfo->rx_lock, flags); } @@ -1258,8 +1255,6 @@ void __netpoll_cleanup(struct netpoll *np) if (!list_empty(&npinfo->rx_np)) { spin_lock_irqsave(&npinfo->rx_lock, flags); list_del(&np->rx); - if (list_empty(&npinfo->rx_np)) - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -- cgit v1.2.3-71-gd317 From ad8d475244b4112a0f5331e78d043d3a4c9eb37e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Mar 2014 20:49:43 -0700 Subject: netpoll: Move netpoll_trap under CONFIG_NETPOLL_TRAP Now that we no longer need to receive packets to safely drain the network drivers receive queue move netpoll_trap and netpoll_set_trap under CONFIG_NETPOLL_TRAP Making netpoll_trap and netpoll_set_trap noop inline functions when CONFIG_NETPOLL_TRAP is not set. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netpoll.h | 11 +++++++++-- net/core/netpoll.c | 14 +++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 154f9776056c..ab9aaaff8d04 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -65,8 +65,6 @@ void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp); int netpoll_setup(struct netpoll *np); -int netpoll_trap(void); -void netpoll_set_trap(int trap); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free_async(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); @@ -82,11 +80,20 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } #ifdef CONFIG_NETPOLL_TRAP +int netpoll_trap(void); +void netpoll_set_trap(int trap); static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) { return !list_empty(&npinfo->rx_np); } #else +static inline int netpoll_trap(void) +{ + return 0; +} +static inline void netpoll_set_trap(int trap) +{ +} static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) { return false; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 793dc04d2f19..0e45835f1737 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -46,7 +46,9 @@ static struct sk_buff_head skb_pool; +#ifdef CONFIG_NETPOLL_TRAP static atomic_t trapped; +#endif DEFINE_STATIC_SRCU(netpoll_srcu); @@ -207,7 +209,7 @@ static void netpoll_poll_dev(struct net_device *dev) } if (rx_processing) - atomic_inc(&trapped); + netpoll_set_trap(1); ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { @@ -221,7 +223,7 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev, budget); if (rx_processing) - atomic_dec(&trapped); + netpoll_set_trap(0); up(&ni->dev_lock); @@ -776,10 +778,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) goto out; /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { + if (skb->protocol == htons(ETH_P_ARP) && netpoll_trap()) { skb_queue_tail(&npinfo->neigh_tx, skb); return 1; - } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { + } else if (pkt_is_ns(skb) && netpoll_trap()) { skb_queue_tail(&npinfo->neigh_tx, skb); return 1; } @@ -896,7 +898,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) return 1; out: - if (atomic_read(&trapped)) { + if (netpoll_trap()) { kfree_skb(skb); return 1; } @@ -1302,6 +1304,7 @@ out: } EXPORT_SYMBOL(netpoll_cleanup); +#ifdef CONFIG_NETPOLL_TRAP int netpoll_trap(void) { return atomic_read(&trapped); @@ -1316,3 +1319,4 @@ void netpoll_set_trap(int trap) atomic_dec(&trapped); } EXPORT_SYMBOL(netpoll_set_trap); +#endif /* CONFIG_NETPOLL_TRAP */ -- cgit v1.2.3-71-gd317 From e1bd4d3d7dd2a4a0e731ffe07c439927c23f16ea Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Mar 2014 20:50:58 -0700 Subject: netpoll: Move all receive processing under CONFIG_NETPOLL_TRAP Make rx_skb_hook, and rx in struct netpoll depend on CONFIG_NETPOLL_TRAP Make rx_lock, rx_np, and neigh_tx in struct netpoll_info depend on CONFIG_NETPOLL_TRAP Make the functions netpoll_rx_on, netpoll_rx, and netpoll_receive_skb no-ops when CONFIG_NETPOLL_TRAP is not set. Only build netpoll_neigh_reply, checksum_udp service_neigh_queue, pkt_is_ns, and __netpoll_rx when CONFIG_NETPOLL_TRAP is defined. Add helper functions netpoll_trap_setup, netpoll_trap_setup_info, netpoll_trap_cleanup, and netpoll_trap_cleanup_info that initialize and cleanup the struct netpoll and struct netpoll_info receive specific fields when CONFIG_NETPOLL_TRAP is enabled and do nothing otherwise. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netpoll.h | 73 ++++++++++++++++++++++++-------------------- net/core/netpoll.c | 81 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 104 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index ab9aaaff8d04..a0632af88d8b 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -24,32 +24,38 @@ struct netpoll { struct net_device *dev; char dev_name[IFNAMSIZ]; const char *name; - void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, - int offset, int len); union inet_addr local_ip, remote_ip; bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; - struct list_head rx; /* rx_np list element */ struct work_struct cleanup_work; + +#ifdef CONFIG_NETPOLL_TRAP + void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, + int offset, int len); + struct list_head rx; /* rx_np list element */ +#endif }; struct netpoll_info { atomic_t refcnt; - spinlock_t rx_lock; struct semaphore dev_lock; - struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ - struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ struct sk_buff_head txq; struct delayed_work tx_work; struct netpoll *netpoll; struct rcu_head rcu; + +#ifdef CONFIG_NETPOLL_TRAP + spinlock_t rx_lock; + struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ + struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ +#endif }; #ifdef CONFIG_NETPOLL @@ -68,7 +74,6 @@ int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free_async(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo); void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev); static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) @@ -82,25 +87,12 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) #ifdef CONFIG_NETPOLL_TRAP int netpoll_trap(void); void netpoll_set_trap(int trap); +int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo); static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) { return !list_empty(&npinfo->rx_np); } -#else -static inline int netpoll_trap(void) -{ - return 0; -} -static inline void netpoll_set_trap(int trap) -{ -} -static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) -{ - return false; -} -#endif -#ifdef CONFIG_NETPOLL static inline bool netpoll_rx_on(struct sk_buff *skb) { struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo); @@ -138,6 +130,33 @@ static inline int netpoll_receive_skb(struct sk_buff *skb) return 0; } +#else +static inline int netpoll_trap(void) +{ + return 0; +} +static inline void netpoll_set_trap(int trap) +{ +} +static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) +{ + return false; +} +static inline bool netpoll_rx(struct sk_buff *skb) +{ + return false; +} +static inline bool netpoll_rx_on(struct sk_buff *skb) +{ + return false; +} +static inline int netpoll_receive_skb(struct sk_buff *skb) +{ + return 0; +} +#endif + +#ifdef CONFIG_NETPOLL static inline void *netpoll_poll_lock(struct napi_struct *napi) { struct net_device *dev = napi->dev; @@ -166,18 +185,6 @@ static inline bool netpoll_tx_running(struct net_device *dev) } #else -static inline bool netpoll_rx(struct sk_buff *skb) -{ - return false; -} -static inline bool netpoll_rx_on(struct sk_buff *skb) -{ - return false; -} -static inline int netpoll_receive_skb(struct sk_buff *skb) -{ - return 0; -} static inline void *netpoll_poll_lock(struct napi_struct *napi) { return NULL; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b69bb3f1ba3f..eed8b1d2d302 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -48,6 +48,7 @@ static struct sk_buff_head skb_pool; #ifdef CONFIG_NETPOLL_TRAP static atomic_t trapped; +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); #endif DEFINE_STATIC_SRCU(netpoll_srcu); @@ -61,7 +62,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu); MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; @@ -109,6 +109,7 @@ static void queue_process(struct work_struct *work) } } +#ifdef CONFIG_NETPOLL_TRAP static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, unsigned short ulen, __be32 saddr, __be32 daddr) { @@ -127,6 +128,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, return __skb_checksum_complete(skb); } +#endif /* CONFIG_NETPOLL_TRAP */ /* * Check whether delayed processing was scheduled for our NIC. If so, @@ -179,6 +181,7 @@ static void poll_napi(struct net_device *dev, int budget) } } +#ifdef CONFIG_NETPOLL_TRAP static void service_neigh_queue(struct net_device *dev, struct netpoll_info *npi) { @@ -197,6 +200,12 @@ static void service_neigh_queue(struct net_device *dev, while ((skb = skb_dequeue(&npi->neigh_tx))) netpoll_neigh_reply(skb, npi); } +#else /* !CONFIG_NETPOLL_TRAP */ +static inline void service_neigh_queue(struct net_device *dev, + struct netpoll_info *npi) +{ +} +#endif /* CONFIG_NETPOLL_TRAP */ static void netpoll_poll_dev(struct net_device *dev) { @@ -522,6 +531,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); +#ifdef CONFIG_NETPOLL_TRAP static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { int size, type = ARPOP_REPLY; @@ -900,6 +910,55 @@ out: return 0; } +static void netpoll_trap_setup_info(struct netpoll_info *npinfo) +{ + INIT_LIST_HEAD(&npinfo->rx_np); + spin_lock_init(&npinfo->rx_lock); + skb_queue_head_init(&npinfo->neigh_tx); +} + +static void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) +{ + skb_queue_purge(&npinfo->neigh_tx); +} + +static void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) +{ + unsigned long flags; + if (np->rx_skb_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_add_tail(&np->rx, &npinfo->rx_np); + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } +} + +static void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) +{ + unsigned long flags; + if (!list_empty(&npinfo->rx_np)) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_del(&np->rx); + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } +} + +#else /* !CONFIG_NETPOLL_TRAP */ +static inline void netpoll_trap_setup_info(struct netpoll_info *npinfo) +{ +} +static inline void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) +{ +} +static inline +void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) +{ +} +static inline +void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) +{ +} +#endif /* CONFIG_NETPOLL_TRAP */ + void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); @@ -1023,7 +1082,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) { struct netpoll_info *npinfo; const struct net_device_ops *ops; - unsigned long flags; int err; np->dev = ndev; @@ -1045,11 +1103,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto out; } - INIT_LIST_HEAD(&npinfo->rx_np); + netpoll_trap_setup_info(npinfo); - spin_lock_init(&npinfo->rx_lock); sema_init(&npinfo->dev_lock, 1); - skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1068,11 +1124,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_skb_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } + netpoll_trap_setup(np, npinfo); /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -1222,7 +1274,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->neigh_tx); + netpoll_trap_cleanup_info(npinfo); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -1238,7 +1290,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; - unsigned long flags; /* rtnl_dereference would be preferable here but * rcu_cleanup_netpoll path can put us in here safely without @@ -1248,11 +1299,7 @@ void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } + netpoll_trap_cleanup(np, npinfo); synchronize_srcu(&netpoll_srcu); -- cgit v1.2.3-71-gd317 From 9c62a68d13119a1ca9718381d97b0cb415ff4e9d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 14 Mar 2014 20:51:52 -0700 Subject: netpoll: Remove dead packet receive code (CONFIG_NETPOLL_TRAP) The netpoll packet receive code only becomes active if the netpoll rx_skb_hook is implemented, and there is not a single implementation of the netpoll rx_skb_hook in the kernel. All of the out of tree implementations I have found all call netpoll_poll which was removed from the kernel in 2011, so this change should not add any additional breakage. There are problems with the netpoll packet receive code. __netpoll_rx does not call dev_kfree_skb_irq or dev_kfree_skb_any in hard irq context. netpoll_neigh_reply leaks every skb it receives. Reception of packets does not work successfully on stacked devices (aka bonding, team, bridge, and vlans). Given that the netpoll packet receive code is buggy, there are no out of tree users that will be merged soon, and the code has not been used for in tree for a decade let's just remove it. Reverting this commit can server as a starting point for anyone who wants to resurrect netpoll packet reception support. Acked-by: Eric Dumazet Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/Kconfig | 5 - include/linux/netdevice.h | 17 -- include/linux/netpoll.h | 84 -------- net/core/dev.c | 11 +- net/core/netpoll.c | 520 +--------------------------------------------- 5 files changed, 2 insertions(+), 635 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 494b888a6568..89402c3b64f8 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -177,11 +177,6 @@ config NETCONSOLE_DYNAMIC config NETPOLL def_bool NETCONSOLE -config NETPOLL_TRAP - bool "Netpoll traffic trapping" - default n - depends on NETPOLL - config NET_POLL_CONTROLLER def_bool NETPOLL diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b8d8c805fd75..4b6d12c7b803 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1979,9 +1979,6 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void); -#endif int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) @@ -2186,12 +2183,6 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) { -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) { - netif_tx_start_queue(dev_queue); - return; - } -#endif if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) __netif_schedule(dev_queue->qdisc); } @@ -2435,10 +2426,6 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif netif_tx_stop_queue(txq); } @@ -2473,10 +2460,6 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) __netif_schedule(txq->qdisc); } diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index a0632af88d8b..1b475a5a7239 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -31,12 +31,6 @@ struct netpoll { u8 remote_mac[ETH_ALEN]; struct work_struct cleanup_work; - -#ifdef CONFIG_NETPOLL_TRAP - void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, - int offset, int len); - struct list_head rx; /* rx_np list element */ -#endif }; struct netpoll_info { @@ -50,12 +44,6 @@ struct netpoll_info { struct netpoll *netpoll; struct rcu_head rcu; - -#ifdef CONFIG_NETPOLL_TRAP - spinlock_t rx_lock; - struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ - struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ -#endif }; #ifdef CONFIG_NETPOLL @@ -84,78 +72,6 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) local_irq_restore(flags); } -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void); -void netpoll_set_trap(int trap); -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo); -static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) -{ - return !list_empty(&npinfo->rx_np); -} - -static inline bool netpoll_rx_on(struct sk_buff *skb) -{ - struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo); - - return npinfo && netpoll_rx_processing(npinfo); -} - -static inline bool netpoll_rx(struct sk_buff *skb) -{ - struct netpoll_info *npinfo; - unsigned long flags; - bool ret = false; - - local_irq_save(flags); - - if (!netpoll_rx_on(skb)) - goto out; - - npinfo = rcu_dereference_bh(skb->dev->npinfo); - spin_lock(&npinfo->rx_lock); - /* check rx_processing again with the lock held */ - if (netpoll_rx_processing(npinfo) && __netpoll_rx(skb, npinfo)) - ret = true; - spin_unlock(&npinfo->rx_lock); - -out: - local_irq_restore(flags); - return ret; -} - -static inline int netpoll_receive_skb(struct sk_buff *skb) -{ - if (!list_empty(&skb->dev->napi_list)) - return netpoll_rx(skb); - return 0; -} - -#else -static inline int netpoll_trap(void) -{ - return 0; -} -static inline void netpoll_set_trap(int trap) -{ -} -static inline bool netpoll_rx_processing(struct netpoll_info *npinfo) -{ - return false; -} -static inline bool netpoll_rx(struct sk_buff *skb) -{ - return false; -} -static inline bool netpoll_rx_on(struct sk_buff *skb) -{ - return false; -} -static inline int netpoll_receive_skb(struct sk_buff *skb) -{ - return 0; -} -#endif - #ifdef CONFIG_NETPOLL static inline void *netpoll_poll_lock(struct napi_struct *napi) { diff --git a/net/core/dev.c b/net/core/dev.c index 587f9fb85d73..55f8e64c03a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3231,10 +3231,6 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); @@ -3520,10 +3516,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) trace_netif_receive_skb(skb); - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - goto out; - orig_dev = skb->dev; skb_reset_network_header(skb); @@ -3650,7 +3642,6 @@ drop: unlock: rcu_read_unlock(); -out: return ret; } @@ -3875,7 +3866,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff int same_flow; enum gro_result ret; - if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) + if (!(skb->dev->features & NETIF_F_GRO)) goto normal; if (skb_is_gso(skb) || skb_has_frag_list(skb)) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index eed8b1d2d302..7291dde93469 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -46,11 +46,6 @@ static struct sk_buff_head skb_pool; -#ifdef CONFIG_NETPOLL_TRAP -static atomic_t trapped; -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); -#endif - DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 @@ -109,27 +104,6 @@ static void queue_process(struct work_struct *work) } } -#ifdef CONFIG_NETPOLL_TRAP -static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) -{ - __wsum psum; - - if (uh->check == 0 || skb_csum_unnecessary(skb)) - return 0; - - psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - - skb->csum = psum; - - return __skb_checksum_complete(skb); -} -#endif /* CONFIG_NETPOLL_TRAP */ - /* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. @@ -140,11 +114,6 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. - * - * In cases where there is bi-directional communications, reading only - * one message at a time can lead to packets being dropped by the - * network adapter, forcing superfluous retries and possibly timeouts. - * Thus, we set our budget to greater than 1. */ static int poll_one_napi(struct napi_struct *napi, int budget) { @@ -181,38 +150,11 @@ static void poll_napi(struct net_device *dev, int budget) } } -#ifdef CONFIG_NETPOLL_TRAP -static void service_neigh_queue(struct net_device *dev, - struct netpoll_info *npi) -{ - struct sk_buff *skb; - if (dev->flags & IFF_SLAVE) { - struct net_device *bond_dev; - struct netpoll_info *bond_ni; - - bond_dev = netdev_master_upper_dev_get_rcu(dev); - bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&npi->neigh_tx))) { - skb->dev = bond_dev; - skb_queue_tail(&bond_ni->neigh_tx, skb); - } - } - while ((skb = skb_dequeue(&npi->neigh_tx))) - netpoll_neigh_reply(skb, npi); -} -#else /* !CONFIG_NETPOLL_TRAP */ -static inline void service_neigh_queue(struct net_device *dev, - struct netpoll_info *npi) -{ -} -#endif /* CONFIG_NETPOLL_TRAP */ - static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - bool rx_processing = netpoll_rx_processing(ni); - int budget = rx_processing? 16 : 0; + int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -226,9 +168,6 @@ static void netpoll_poll_dev(struct net_device *dev) return; } - if (rx_processing) - netpoll_set_trap(1); - ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { up(&ni->dev_lock); @@ -240,13 +179,8 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev, budget); - if (rx_processing) - netpoll_set_trap(0); - up(&ni->dev_lock); - service_neigh_queue(dev, ni); - zap_completion_queue(); } @@ -531,434 +465,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -#ifdef CONFIG_NETPOLL_TRAP -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int size, type = ARPOP_REPLY; - __be32 sip, tip; - unsigned char *sha; - struct sk_buff *send_skb; - struct netpoll *np, *tmp; - unsigned long flags; - int hlen, tlen; - int hits = 0, proto; - - if (!netpoll_rx_processing(npinfo)) - return; - - /* Before checking the packet, we do some early - inspection whether this is interesting at all */ - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->dev == skb->dev) - hits++; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - /* No netpoll struct is using this dev */ - if (!hits) - return; - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_ARP) { - struct arphdr *arp; - unsigned char *arp_ptr; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); - - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; - - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; - - size = arp_hdr_len(skb->dev); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip.ip) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); - - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } else if( proto == ETH_P_IPV6) { -#if IS_ENABLED(CONFIG_IPV6) - struct nd_msg *msg; - u8 *lladdr = NULL; - struct ipv6hdr *hdr; - struct icmp6hdr *icmp6h; - const struct in6_addr *saddr; - const struct in6_addr *daddr; - struct inet6_dev *in6_dev = NULL; - struct in6_addr *target; - - in6_dev = in6_dev_get(skb->dev); - if (!in6_dev || !in6_dev->cnf.accept_ra) - return; - - if (!pskb_may_pull(skb, skb->len)) - return; - - msg = (struct nd_msg *)skb_transport_header(skb); - - __skb_push(skb, skb->data - skb_transport_header(skb)); - - if (ipv6_hdr(skb)->hop_limit != 255) - return; - if (msg->icmph.icmp6_code != 0) - return; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return; - - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - - size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - send_skb->protocol = htons(ETH_P_IPV6); - send_skb->dev = skb->dev; - - skb_reset_network_header(send_skb); - hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); - *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); - hdr->nexthdr = IPPROTO_ICMPV6; - hdr->hop_limit = 255; - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); - icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; - icmp6h->icmp6_router = 0; - icmp6h->icmp6_solicited = 1; - - target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); - *target = msg->target; - icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, - IPPROTO_ICMPV6, - csum_partial(icmp6h, - size, 0)); - - if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, - lladdr, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address, we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); -#endif - } -} - -static bool pkt_is_ns(struct sk_buff *skb) -{ - struct nd_msg *msg; - struct ipv6hdr *hdr; - - if (skb->protocol != htons(ETH_P_ARP)) - return false; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) - return false; - - msg = (struct nd_msg *)skb_transport_header(skb); - __skb_push(skb, skb->data - skb_transport_header(skb)); - hdr = ipv6_hdr(skb); - - if (hdr->nexthdr != IPPROTO_ICMPV6) - return false; - if (hdr->hop_limit != 255) - return false; - if (msg->icmph.icmp6_code != 0) - return false; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return false; - - return true; -} - -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int proto, len, ulen, data_len; - int hits = 0, offset; - const struct iphdr *iph; - struct udphdr *uh; - struct netpoll *np, *tmp; - uint16_t source; - - if (!netpoll_rx_processing(npinfo)) - goto out; - - if (skb->dev->type != ARPHRD_ETHER) - goto out; - - /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && netpoll_trap()) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } else if (pkt_is_ns(skb) && netpoll_trap()) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } - - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); - if (unlikely(!skb)) - goto out; - } - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP && proto != ETH_P_IPV6) - goto out; - if (skb->pkt_type == PACKET_OTHERHOST) - goto out; - if (skb_shared(skb)) - goto out; - - if (proto == ETH_P_IP) { - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; - - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; - - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip.ip && np->local_ip.ip != iph->daddr) - continue; - if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } - } else { -#if IS_ENABLED(CONFIG_IPV6) - const struct ipv6hdr *ip6h; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - ip6h = (struct ipv6hdr *)skb->data; - if (ip6h->version != 6) - goto out; - len = ntohs(ip6h->payload_len); - if (!len) - goto out; - if (len + sizeof(struct ipv6hdr) > skb->len) - goto out; - if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) - goto out; - ip6h = ipv6_hdr(skb); - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - uh = udp_hdr(skb); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - if (ulen != skb->len) - goto out; - if (udp6_csum_init(skb, uh, IPPROTO_UDP)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) - continue; - if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } -#endif - } - - if (!hits) - goto out; - - kfree_skb(skb); - return 1; - -out: - if (netpoll_trap()) { - kfree_skb(skb); - return 1; - } - - return 0; -} - -static void netpoll_trap_setup_info(struct netpoll_info *npinfo) -{ - INIT_LIST_HEAD(&npinfo->rx_np); - spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->neigh_tx); -} - -static void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) -{ - skb_queue_purge(&npinfo->neigh_tx); -} - -static void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) -{ - unsigned long flags; - if (np->rx_skb_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } -} - -static void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) -{ - unsigned long flags; - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } -} - -#else /* !CONFIG_NETPOLL_TRAP */ -static inline void netpoll_trap_setup_info(struct netpoll_info *npinfo) -{ -} -static inline void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) -{ -} -static inline -void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) -{ -} -static inline -void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) -{ -} -#endif /* CONFIG_NETPOLL_TRAP */ - void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); @@ -1103,8 +609,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto out; } - netpoll_trap_setup_info(npinfo); - sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1124,8 +628,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - netpoll_trap_setup(np, npinfo); - /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -1274,7 +776,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - netpoll_trap_cleanup_info(npinfo); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -1299,8 +800,6 @@ void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; - netpoll_trap_cleanup(np, npinfo); - synchronize_srcu(&netpoll_srcu); if (atomic_dec_and_test(&npinfo->refcnt)) { @@ -1344,20 +843,3 @@ out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); - -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void) -{ - return atomic_read(&trapped); -} -EXPORT_SYMBOL(netpoll_trap); - -void netpoll_set_trap(int trap) -{ - if (trap) - atomic_inc(&trapped); - else - atomic_dec(&trapped); -} -EXPORT_SYMBOL(netpoll_set_trap); -#endif /* CONFIG_NETPOLL_TRAP */ -- cgit v1.2.3-71-gd317 From aa475b0ef34a8230b8cc3a298bf67c668540d689 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Wed, 19 Mar 2014 13:14:40 +0200 Subject: wireless: max MSDU size for DMG networks In the 802.11ad, aka DMG (Dynamic Multi-Gigabit), aka 60Ghz spec, maximum MSDU size extended to 7920 bytes. add #define for this. Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5f349355ee54..cde7ede8e554 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -154,6 +154,10 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) 802.11e clarifies the figure in section 7.1.2. The frame body is up to 2304 octets long (maximum MSDU size) plus any crypt overhead. */ #define IEEE80211_MAX_DATA_LEN 2304 +/* 802.11ad extends maximum MSDU size for DMG (freq > 40Ghz) networks + * to 7920 bytes, see 8.2.3 General frame format + */ +#define IEEE80211_MAX_DATA_LEN_DMG 7920 /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */ #define IEEE80211_MAX_FRAME_LEN 2352 -- cgit v1.2.3-71-gd317 From 1ab95d37bcc3ff2d69e3871e4f056bab7aed0b85 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 19 Mar 2014 18:11:50 +0200 Subject: net/mlx4: Add data structures to support N-Ports per VF Adds the required data structures to support VFs with N (1 or 2) ports instead of always using the number of physical ports. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 109 +++++++++++++++++++++++------- include/linux/mlx4/device.h | 7 ++ 2 files changed, 92 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index abd0b1d277aa..e1a55857af71 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2193,6 +2193,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) struct mlx4_dev *dev; int err; int port; + int nvfs[MLX4_MAX_PORTS + 1], prb_vf[MLX4_MAX_PORTS + 1]; + unsigned total_vfs = 0; + int sriov_initialized = 0; + unsigned int i; pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); @@ -2207,17 +2211,39 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) * per port, we must limit the number of VFs to 63 (since their are * 128 MACs) */ - if (num_vfs >= MLX4_MAX_NUM_VF) { + for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); + total_vfs += nvfs[i], i++) { + nvfs[i] = i == MLX4_MAX_PORTS ? num_vfs : 0; + if (nvfs[i] < 0) { + dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); + return -EINVAL; + } + } + for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]); i++) { + prb_vf[i] = i == MLX4_MAX_PORTS ? probe_vf : 0; + if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) { + dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n"); + return -EINVAL; + } + } + if (total_vfs >= MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, "Requested more VF's (%d) than allowed (%d)\n", - num_vfs, MLX4_MAX_NUM_VF - 1); + total_vfs, MLX4_MAX_NUM_VF - 1); return -EINVAL; } - if (num_vfs < 0) { - pr_err("num_vfs module parameter cannot be negative\n"); - return -EINVAL; + for (i = 0; i < MLX4_MAX_PORTS; i++) { + if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) { + dev_err(&pdev->dev, + "Requested more VF's (%d) for port (%d) than allowed (%d)\n", + nvfs[i] + nvfs[2], i + 1, + MLX4_MAX_NUM_VF_P_PORT - 1); + return -EINVAL; + } } + + /* * Check for BARs. */ @@ -2292,11 +2318,23 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ - if (num_vfs && extended_func_num(pdev) > probe_vf) { - mlx4_warn(dev, "Skipping virtual function:%d\n", - extended_func_num(pdev)); - err = -ENODEV; - goto err_free_dev; + if (total_vfs) { + unsigned vfs_offset = 0; + for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && + vfs_offset + nvfs[i] < extended_func_num(pdev); + vfs_offset += nvfs[i], i++) + ; + if (i == sizeof(nvfs)/sizeof(nvfs[0])) { + err = -ENODEV; + goto err_free_dev; + } + if ((extended_func_num(pdev) - vfs_offset) + > prb_vf[i]) { + mlx4_warn(dev, "Skipping virtual function:%d\n", + extended_func_num(pdev)); + err = -ENODEV; + goto err_free_dev; + } } mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); dev->flags |= MLX4_FLAG_SLAVE; @@ -2316,22 +2354,30 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) } } - if (num_vfs) { - mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); - - atomic_inc(&pf_loading); - err = pci_enable_sriov(pdev, num_vfs); - atomic_dec(&pf_loading); - - if (err) { - mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", - err); + if (total_vfs) { + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", + total_vfs); + dev->dev_vfs = kzalloc( + total_vfs * sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (NULL == dev->dev_vfs) { + mlx4_err(dev, "Failed to allocate memory for VFs\n"); err = 0; } else { - mlx4_warn(dev, "Running in master mode\n"); - dev->flags |= MLX4_FLAG_SRIOV | - MLX4_FLAG_MASTER; - dev->num_vfs = num_vfs; + atomic_inc(&pf_loading); + err = pci_enable_sriov(pdev, total_vfs); + atomic_dec(&pf_loading); + if (err) { + mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", + err); + err = 0; + } else { + mlx4_warn(dev, "Running in master mode\n"); + dev->flags |= MLX4_FLAG_SRIOV | + MLX4_FLAG_MASTER; + dev->num_vfs = total_vfs; + sriov_initialized = 1; + } } } @@ -2396,12 +2442,24 @@ slave_start: /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { + unsigned sum = 0; err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init master mfunc" "interface, aborting.\n"); goto err_close; } + if (sriov_initialized) { + for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); i++) { + unsigned j; + for (j = 0; j < nvfs[i]; ++sum, ++j) { + dev->dev_vfs[sum].min_port = + i < 2 ? i + 1 : 1; + dev->dev_vfs[sum].n_ports = i < 2 ? 1 : + dev->caps.num_ports; + } + } + } } err = mlx4_alloc_eq_table(dev); @@ -2509,6 +2567,8 @@ err_rel_own: if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + kfree(priv->dev.dev_vfs); + err_free_dev: kfree(priv); @@ -2595,6 +2655,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); + kfree(dev->dev_vfs); kfree(priv); pci_release_regions(pdev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f211b51dc726..b3044f32aef1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -84,6 +84,7 @@ enum { enum { MLX4_MAX_NUM_PF = 16, MLX4_MAX_NUM_VF = 64, + MLX4_MAX_NUM_VF_P_PORT = 64, MLX4_MFUNC_MAX = 80, MLX4_MAX_EQ_NUM = 1024, MLX4_MFUNC_EQ_NUM = 4, @@ -664,6 +665,11 @@ struct mlx4_quotas { int xrcd; }; +struct mlx4_vf_dev { + u8 min_port; + u8 n_ports; +}; + struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; @@ -679,6 +685,7 @@ struct mlx4_dev { int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; + struct mlx4_vf_dev *dev_vfs; }; struct mlx4_eqe { -- cgit v1.2.3-71-gd317 From f74462acf8f390528c8b7937f227c6c90d017f3b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 19 Mar 2014 18:11:51 +0200 Subject: net/mlx4: Add utils for N-Port VFs This patch adds the following utils: 1. Convert slave_id -> VF 2. Get the active ports by slave_id 3. Convert slave's port to real port 4. Get the slave's port from real port 5. Get all slaves that uses the i'th real port 6. Get all slaves that uses the i'th real port exclusively Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 106 ++++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + include/linux/mlx4/device.h | 27 ++++++++ 3 files changed, 135 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 2b0b45ece14b..59a1b2703281 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2234,6 +2234,112 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) return vf+1; } +int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) +{ + if (slave < 1 || slave > dev->num_vfs) { + mlx4_err(dev, + "Bad slave number:%d (number of activated slaves: %lu)\n", + slave, dev->num_slaves); + return -EINVAL; + } + return slave - 1; +} + +struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) +{ + struct mlx4_active_ports actv_ports; + int vf; + + bitmap_zero(actv_ports.ports, MLX4_MAX_PORTS); + + if (slave == 0) { + bitmap_fill(actv_ports.ports, dev->caps.num_ports); + return actv_ports; + } + + vf = mlx4_get_vf_indx(dev, slave); + if (vf < 0) + return actv_ports; + + bitmap_set(actv_ports.ports, dev->dev_vfs[vf].min_port - 1, + min((int)dev->dev_vfs[mlx4_get_vf_indx(dev, slave)].n_ports, + dev->caps.num_ports)); + + return actv_ports; +} +EXPORT_SYMBOL_GPL(mlx4_get_active_ports); + +int mlx4_slave_convert_port(struct mlx4_dev *dev, int slave, int port) +{ + unsigned n; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + unsigned m = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + + if (port <= 0 || port > m) + return -EINVAL; + + n = find_first_bit(actv_ports.ports, dev->caps.num_ports); + if (port <= n) + port = n + 1; + + return port; +} +EXPORT_SYMBOL_GPL(mlx4_slave_convert_port); + +int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + if (test_bit(port - 1, actv_ports.ports)) + return port - + find_first_bit(actv_ports.ports, dev->caps.num_ports); + + return -1; +} +EXPORT_SYMBOL_GPL(mlx4_phys_to_slave_port); + +struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, + int port) +{ + unsigned i; + struct mlx4_slaves_pport slaves_pport; + + bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); + + if (port <= 0 || port > dev->caps.num_ports) + return slaves_pport; + + for (i = 0; i < dev->num_vfs + 1; i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, i); + if (test_bit(port - 1, actv_ports.ports)) + set_bit(i, slaves_pport.slaves); + } + + return slaves_pport; +} +EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport); + +struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( + struct mlx4_dev *dev, + const struct mlx4_active_ports *crit_ports) +{ + unsigned i; + struct mlx4_slaves_pport slaves_pport; + + bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); + + for (i = 0; i < dev->num_vfs + 1; i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, i); + if (bitmap_equal(crit_ports->ports, actv_ports.ports, + dev->caps.num_ports)) + set_bit(i, slaves_pport.slaves); + } + + return slaves_pport; +} +EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport_actv); + int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 6ba38c98c492..fe8715e35afa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1289,5 +1289,7 @@ void mlx4_init_quotas(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); +/* Returns the VF index of slave */ +int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); #endif /* MLX4_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b3044f32aef1..122c7cabaee7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1204,4 +1204,31 @@ int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, cycle_t mlx4_read_clock(struct mlx4_dev *dev); +struct mlx4_active_ports { + DECLARE_BITMAP(ports, MLX4_MAX_PORTS); +}; +/* Returns a bitmap of the physical ports which are assigned to slave */ +struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave); + +/* Returns the physical port that represents the virtual port of the slave, */ +/* or a value < 0 in case of an error. If a slave has 2 ports, the identity */ +/* mapping is returned. */ +int mlx4_slave_convert_port(struct mlx4_dev *dev, int slave, int port); + +struct mlx4_slaves_pport { + DECLARE_BITMAP(slaves, MLX4_MFUNC_MAX); +}; +/* Returns a bitmap of all slaves that are assigned to port. */ +struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, + int port); + +/* Returns a bitmap of all slaves that are assigned exactly to all the */ +/* the ports that are set in crit_ports. */ +struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( + struct mlx4_dev *dev, + const struct mlx4_active_ports *crit_ports); + +/* Returns the slave's virtual port that represents the physical port. */ +int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-71-gd317 From 449fc48866f7d84b0d9a19201de18a4dd4d3488c Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 19 Mar 2014 18:11:52 +0200 Subject: net/mlx4: Adapt code for N-Port VF Adds support for N-Port VFs, this includes: 1. Adding support in the wrapped FW command In wrapped commands, we need to verify and convert the slave's port into the real physical port. Furthermore, when sending the response back to the slave, a reverse conversion should be made. 2. Adjusting sqpn for QP1 para-virtualization The slave assumes that sqpn is used for QP1 communication. If the slave is assigned to a port != (first port), we need to adjust the sqpn that will direct its QP1 packets into the correct endpoint. 3. Adjusting gid[5] to modify the port for raw ethernet In B0 steering, gid[5] contains the port. It needs to be adjusted into the physical port. 4. Adjusting number of ports in the query / ports caps in the FW commands When a slave queries the hardware, it needs to view only the physical ports it's assigned to. 5. Adjusting the sched_qp according to the port number The QP port is encoded in the sched_qp, thus in modify_qp we need to encode the correct port in sched_qp. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 29 ++-- drivers/infiniband/hw/mlx4/main.c | 15 +- drivers/infiniband/hw/mlx4/sysfs.c | 5 + drivers/net/ethernet/mellanox/mlx4/cmd.c | 23 ++- drivers/net/ethernet/mellanox/mlx4/eq.c | 48 +++++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 62 +++++++- drivers/net/ethernet/mellanox/mlx4/main.c | 4 +- drivers/net/ethernet/mellanox/mlx4/mcg.c | 5 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 +- drivers/net/ethernet/mellanox/mlx4/port.c | 158 ++++++++++++++++++--- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 143 ++++++++++++++++--- include/linux/mlx4/device.h | 1 + 12 files changed, 417 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 2c572aed3f6f..fd36ec672632 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1245,21 +1245,9 @@ out: static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) { - int gids; - int vfs; - if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) return slave; - - gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = dev->dev->num_vfs; - - if (slave == 0) - return 0; - if (slave <= gids % vfs) - return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); - - return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); + return mlx4_get_base_gid_ix(dev->dev, slave, port); } static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, @@ -1281,6 +1269,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc struct ib_ah_attr ah_attr; u8 *slave_id; int slave; + int port; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1360,6 +1349,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (ah_attr.ah_flags & IB_AH_GRH) fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num); + if (port < 0) + return; + ah_attr.port_num = port; memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); /* if slave have default vlan use it */ @@ -1949,7 +1942,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->port = port; ctx->ib_dev = &dev->ib_dev; - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + for (i = 0; + i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev->dev, i); + + if (!test_bit(port - 1, actv_ports.ports)) + continue; + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); if (ret) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2ff428b17088..6cb85467dde7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2323,17 +2323,24 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; + struct mlx4_active_ports actv_ports; + unsigned int ports; + unsigned int first_port; if (!mlx4_is_master(dev)) return; - dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); + actv_ports = mlx4_get_active_ports(dev, slave); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + + dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); goto out; } - for (i = 0; i < dev->caps.num_ports; i++) { + for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); @@ -2345,9 +2352,9 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) } } /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < dev->caps.num_ports; i++) { + for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); - dm[i]->port = i + 1; + dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index db2ea31df832..5a38e43eca65 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -627,6 +627,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) int port; struct kobject *p, *t; struct mlx4_port *mport; + struct mlx4_active_ports actv_ports; get_name(dev, name, slave, sizeof name); @@ -649,7 +650,11 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) goto err_ports; } + actv_ports = mlx4_get_active_ports(dev->dev, slave); + for (port = 1; port <= dev->dev->caps.num_ports; ++port) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; err = add_port(dev, port, slave); if (err) goto err_add; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 59a1b2703281..516c1dd4963b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1643,8 +1643,16 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; - - for (port = 1; port <= MLX4_MAX_PORTS; port++) { + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + int min_port = find_first_bit(actv_ports.ports, + priv->dev.caps.num_ports) + 1; + int max_port = min_port - 1 + + bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports); + + for (port = min_port; port <= max_port; port++) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; @@ -1685,8 +1693,17 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave { int port; struct mlx4_vport_oper_state *vp_oper; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + int min_port = find_first_bit(actv_ports.ports, + priv->dev.caps.num_ports) + 1; + int max_port = min_port - 1 + + bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports); + - for (port = 1; port <= MLX4_MAX_PORTS; port++) { + for (port = min_port; port <= max_port; port++) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8992b38578d5..d501a2b0fb79 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -271,7 +271,10 @@ enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return SLAVE_PORT_DOWN; @@ -285,8 +288,10 @@ static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return -1; @@ -300,9 +305,13 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) { int i; enum slave_port_gen_event gen_event; + struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, + port); - for (i = 0; i < dev->num_slaves; i++) - set_and_calc_slave_port_state(dev, i, port, event, &gen_event); + for (i = 0; i < dev->num_vfs + 1; i++) + if (test_bit(i, slaves_pport.slaves)) + set_and_calc_slave_port_state(dev, i, port, + event, &gen_event); } /************************************************************************** The function get as input the new event to that port, @@ -321,12 +330,14 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, struct mlx4_slave_state *ctx = NULL; unsigned long flags; int ret = -1; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); enum slave_port_state cur_state = mlx4_get_slave_port_state(dev, slave, port); *gen_event = SLAVE_PORT_GEN_EVENT_NONE; - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return ret; @@ -542,15 +553,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) be64_to_cpu(eqe->event.cmd.out_param)); break; - case MLX4_EVENT_TYPE_PORT_CHANGE: + case MLX4_EVENT_TYPE_PORT_CHANGE: { + struct mlx4_slaves_pport slaves_port; port = be32_to_cpu(eqe->event.port_change.port) >> 28; + slaves_port = mlx4_phys_to_slaves_pport(dev, port); if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; - for (i = 0; i < dev->num_slaves; i++) { + for (i = 0; i < dev->num_vfs + 1; i++) { + if (!test_bit(i, slaves_port.slaves)) + continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { if (i == mlx4_master_func_num(dev)) continue; @@ -558,8 +573,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) " to slave: %d, port:%d\n", __func__, i, port); s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; - if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (mlx4_phys_to_slave_port(dev, i, port) << 28)); mlx4_slave_event(dev, i, eqe); + } } else { /* IB port */ set_and_calc_slave_port_state(dev, i, port, MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, @@ -580,12 +600,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - for (i = 0; i < dev->num_slaves; i++) { + for (i = 0; i < dev->num_vfs + 1; i++) { + if (!test_bit(i, slaves_port.slaves)) + continue; if (i == mlx4_master_func_num(dev)) continue; s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; - if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (mlx4_phys_to_slave_port(dev, i, port) << 28)); mlx4_slave_event(dev, i, eqe); + } } else /* IB port */ /* port-up event will be sent to a slave when the @@ -594,6 +621,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); } break; + } case MLX4_EVENT_TYPE_CQ_ERROR: mlx4_warn(dev, "CQ %s on CQN %06x\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d0d8dd832557..6bd33e2fc17c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -225,13 +225,25 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, slave); + int converted_port = mlx4_slave_convert_port( + dev, slave, vhcr->in_modifier); + + if (converted_port < 0) + return -EINVAL; + + vhcr->in_modifier = converted_port; /* Set nic_info bit to mark new fields support */ field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); - field = vhcr->in_modifier; /* phys-port = logical-port */ + /* phys-port = logical-port */ + field = vhcr->in_modifier - + find_first_bit(actv_ports.ports, dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + field = vhcr->in_modifier; /* size is now the QP number */ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); @@ -249,12 +261,16 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, QUERY_FUNC_CAP_PHYS_PORT_ID); } else if (vhcr->op_modifier == 0) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, slave); /* enable rdma and ethernet interfaces, and new quota locations */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); - field = dev->caps.num_ports; + field = min( + bitmap_weight(actv_ports.ports, dev->caps.num_ports), + dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); size = dev->caps.function_caps; /* set PF behaviours */ @@ -840,6 +856,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, int err = 0; u8 field; u32 bmme_flags; + int real_port; + int slave_port; + int first_port; + struct mlx4_active_ports actv_ports; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); @@ -852,8 +872,26 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; + actv_ports = mlx4_get_active_ports(dev, slave); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + for (slave_port = 0, real_port = first_port; + real_port < first_port + + bitmap_weight(actv_ports.ports, dev->caps.num_ports); + ++real_port, ++slave_port) { + if (flags & (MLX4_DEV_CAP_FLAG_WOL_PORT1 << real_port)) + flags |= MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port; + else + flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); + } + for (; slave_port < dev->caps.num_ports; ++slave_port) + flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET); + field &= ~0x0F; + field |= bitmap_weight(actv_ports.ports, dev->caps.num_ports) & 0x0F; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VL_PORT_OFFSET); + /* For guests, disable timestamp */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); field &= 0x7f; @@ -903,12 +941,20 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, u16 short_field; int err; int admin_link_state; + int port = mlx4_slave_convert_port(dev, slave, + vhcr->in_modifier & 0xFF); #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 #define MLX4_PORT_LINK_UP_MASK 0x80 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e + if (port < 0) + return -EINVAL; + + vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) | + (port & 0xFF); + err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -936,7 +982,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, QUERY_PORT_SUPPORTED_TYPE_OFFSET); if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) - short_field = mlx4_get_slave_num_gids(dev, slave); + short_field = mlx4_get_slave_num_gids(dev, slave, port); else short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, @@ -1588,9 +1634,12 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - int port = vhcr->in_modifier; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); int err; + if (port < 0) + return -EINVAL; + if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; @@ -1680,9 +1729,12 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - int port = vhcr->in_modifier; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); int err; + if (port < 0) + return -EINVAL; + if (!(priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e1a55857af71..472925428de7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1471,7 +1471,7 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) for (i = 1; i <= dev->caps.num_ports; i++) { if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.gid_table_len[i] = - mlx4_get_slave_num_gids(dev, 0); + mlx4_get_slave_num_gids(dev, 0, i); else dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = @@ -1498,7 +1498,7 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index db7dc0b6667d..80ccb4edf825 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1387,9 +1387,12 @@ int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { u32 qpn = (u32) vhcr->in_param & 0xffffffff; - u8 port = vhcr->in_param >> 62; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_param >> 62); enum mlx4_steer_type steer = vhcr->in_modifier; + if (port < 0) + return -EINVAL; + /* Promiscuous unicast is not allowed in mfunc */ if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index fe8715e35afa..9fca6c150de3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1287,8 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); void mlx4_init_quotas(struct mlx4_dev *dev); -int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); -int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index ece328166e94..2705b9ab9463 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -507,30 +507,82 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) } static struct mlx4_roce_gid_entry zgid_entry; -int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave) +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) { + int vfs; + int slave_gid = slave; + unsigned i; + struct mlx4_slaves_pport slaves_pport; + struct mlx4_active_ports actv_ports; + unsigned max_port_p_one; + if (slave == 0) return MLX4_ROCE_PF_GIDS; - if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs)) - return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1; - return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs; + + /* Slave is a VF */ + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + actv_ports = mlx4_get_active_ports(dev, slave); + max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_slaves_pport slaves_pport_actv; + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + if (i == port) + continue; + slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid -= bitmap_weight(slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) + return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; + return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; } -int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave) +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) { int gids; + unsigned i; + int slave_gid = slave; int vfs; - gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = dev->num_vfs; + struct mlx4_slaves_pport slaves_pport; + struct mlx4_active_ports actv_ports; + unsigned max_port_p_one; if (slave == 0) return 0; - if (slave <= gids % vfs) - return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); - return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + actv_ports = mlx4_get_active_ports(dev, slave); + max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_slaves_pport slaves_pport_actv; + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + if (i == port) + continue; + slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid -= bitmap_weight(slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + if (slave_gid <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + + ((gids / vfs) * (slave_gid - 1)); } +EXPORT_SYMBOL_GPL(mlx4_get_base_gid_ix); static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) @@ -617,8 +669,8 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, * need a FOR-loop here over number of gids the guest has. * 1. Check no duplicates in gids passed by slave */ - num_gids = mlx4_get_slave_num_gids(dev, slave); - base = mlx4_get_base_gid_ix(dev, slave); + num_gids = mlx4_get_slave_num_gids(dev, slave, port); + base = mlx4_get_base_gid_ix(dev, slave, port); gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); for (i = 0; i < num_gids; gid_entry_mbox++, i++) { if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, @@ -738,6 +790,15 @@ int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int port = mlx4_slave_convert_port( + dev, slave, vhcr->in_modifier & 0xFF); + + if (port < 0) + return -EINVAL; + + vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) | + (port & 0xFF); + return mlx4_common_set_port(dev, slave, vhcr->in_modifier, vhcr->op_modifier, inbox); } @@ -1026,10 +1087,16 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, struct mlx4_priv *priv = mlx4_priv(dev); int i, found_ix = -1; int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + struct mlx4_slaves_pport slaves_pport; + unsigned num_vfs; + int slave_gid; if (!mlx4_is_mfunc(dev)) return -EINVAL; + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) { found_ix = i; @@ -1039,16 +1106,67 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, if (found_ix >= 0) { if (found_ix < MLX4_ROCE_PF_GIDS) - *slave_id = 0; - else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) * - (vf_gids / dev->num_vfs + 1)) - *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) / - (vf_gids / dev->num_vfs + 1)) + 1; + slave_gid = 0; + else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % num_vfs) * + (vf_gids / num_vfs + 1)) + slave_gid = ((found_ix - MLX4_ROCE_PF_GIDS) / + (vf_gids / num_vfs + 1)) + 1; else - *slave_id = + slave_gid = ((found_ix - MLX4_ROCE_PF_GIDS - - ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) / - (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1; + ((vf_gids % num_vfs) * ((vf_gids / num_vfs + 1)))) / + (vf_gids / num_vfs)) + vf_gids % num_vfs + 1; + + if (slave_gid) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_active_ports actv_ports; + struct mlx4_slaves_pport slaves_pport_actv; + unsigned max_port_p_one; + int num_slaves_before = 1; + + for (i = 1; i < port; i++) { + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + num_slaves_before += bitmap_weight( + slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + + if (slave_gid < num_slaves_before) { + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(port - 1, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid += bitmap_weight( + slaves_pport_actv.slaves, + dev->num_vfs + 1) - + num_slaves_before; + } + actv_ports = mlx4_get_active_ports(dev, slave_gid); + max_port_p_one = find_first_bit( + actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, + dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + if (i == port) + continue; + bitmap_zero(exclusive_ports.ports, + dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid += bitmap_weight( + slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + } + *slave_id = slave_gid; } return (found_ix >= 0) ? 0 : -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 74e490d70184..2a33513a0e31 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -468,6 +468,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) spin_lock_init(&res_alloc->alloc_lock); for (t = 0; t < dev->num_vfs + 1; t++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, t); switch (i) { case RES_QP: initialize_res_quotas(dev, res_alloc, RES_QP, @@ -497,10 +499,27 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_MAC: if (t == mlx4_master_func_num(dev)) { - res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + int max_vfs_pport = 0; + /* Calculate the max vfs per port for */ + /* both ports. */ + for (j = 0; j < dev->caps.num_ports; + j++) { + struct mlx4_slaves_pport slaves_pport = + mlx4_phys_to_slaves_pport(dev, j + 1); + unsigned current_slaves = + bitmap_weight(slaves_pport.slaves, + dev->caps.num_ports) - 1; + if (max_vfs_pport < current_slaves) + max_vfs_pport = + current_slaves; + } + res_alloc->quota[t] = + MLX4_MAX_MAC_NUM - + 2 * max_vfs_pport; res_alloc->guaranteed[t] = 2; for (j = 0; j < MLX4_MAX_PORTS; j++) - res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; + res_alloc->res_port_free[j] = + MLX4_MAX_MAC_NUM; } else { res_alloc->quota[t] = MLX4_MAX_MAC_NUM; res_alloc->guaranteed[t] = 2; @@ -528,9 +547,10 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; } if (i == RES_MAC || i == RES_VLAN) { - for (j = 0; j < MLX4_MAX_PORTS; j++) - res_alloc->res_port_rsvd[j] += - res_alloc->guaranteed[t]; + for (j = 0; j < dev->caps.num_ports; j++) + if (test_bit(j, actv_ports.ports)) + res_alloc->res_port_rsvd[j] += + res_alloc->guaranteed[t]; } else { res_alloc->res_reserved += res_alloc->guaranteed[t]; } @@ -612,7 +632,8 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (MLX4_QP_ST_UD == ts) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) - qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; + qp_ctx->pri_path.mgid_index = + mlx4_get_base_gid_ix(dev, slave, port) | 0x80; else qp_ctx->pri_path.mgid_index = slave | 0x80; @@ -620,7 +641,8 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { - qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->pri_path.mgid_index += + mlx4_get_base_gid_ix(dev, slave, port); qp_ctx->pri_path.mgid_index &= 0x7f; } else { qp_ctx->pri_path.mgid_index = slave & 0x7F; @@ -629,7 +651,8 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { - qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->alt_path.mgid_index += + mlx4_get_base_gid_ix(dev, slave, port); qp_ctx->alt_path.mgid_index &= 0x7f; } else { qp_ctx->alt_path.mgid_index = slave & 0x7F; @@ -1780,6 +1803,11 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; port = !in_port ? get_param_l(out_param) : in_port; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; mac = in_param; err = __mlx4_register_mac(dev, port, mac); @@ -1887,6 +1915,11 @@ static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (!port || op != RES_OP_RESERVE_AND_MAP) return -EINVAL; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; /* upstream kernels had NOP for reg/unreg vlan. Continue this. */ if (!in_port && port > 0 && port <= dev->caps.num_ports) { slave_state[slave].old_vlan_api = true; @@ -2184,6 +2217,11 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE_AND_MAP: port = !in_port ? get_param_l(out_param) : in_port; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; @@ -2203,6 +2241,11 @@ static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int err = 0; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; switch (op) { case RES_OP_RESERVE_AND_MAP: if (slave_state[slave].old_vlan_api) @@ -2811,7 +2854,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) - num_gids = mlx4_get_slave_num_gids(dev, slave); + num_gids = mlx4_get_slave_num_gids(dev, slave, port); else num_gids = 1; if (qp_ctx->pri_path.mgid_index >= num_gids) @@ -2820,7 +2863,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) - num_gids = mlx4_get_slave_num_gids(dev, slave); + num_gids = mlx4_get_slave_num_gids(dev, slave, port); else num_gids = 1; if (qp_ctx->alt_path.mgid_index >= num_gids) @@ -3338,6 +3381,39 @@ int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } +static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox) +{ + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *)inbox->buf); + u8 pri_sched_queue; + int port = mlx4_slave_convert_port( + dev, slave, (qpc->pri_path.sched_queue >> 6 & 1) + 1) - 1; + + if (port < 0) + return -EINVAL; + + pri_sched_queue = (qpc->pri_path.sched_queue & ~(1 << 6)) | + ((port & 1) << 6); + + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH || + mlx4_is_eth(dev, port + 1)) { + qpc->pri_path.sched_queue = pri_sched_queue; + } + + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = mlx4_slave_convert_port( + dev, slave, (qpc->alt_path.sched_queue >> 6 & 1) + + 1) - 1; + if (port < 0) + return -EINVAL; + qpc->alt_path.sched_queue = + (qpc->alt_path.sched_queue & ~(1 << 6)) | + (port & 1) << 6; + } + return 0; +} + static int roce_verify_mac(struct mlx4_dev *dev, int slave, struct mlx4_qp_context *qpc, struct mlx4_cmd_mailbox *inbox) @@ -3375,6 +3451,9 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, u8 orig_vlan_index = qpc->pri_path.vlan_index; u8 orig_feup = qpc->pri_path.feup; + err = adjust_qp_sched_queue(dev, slave, qpc, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; @@ -3426,6 +3505,9 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; @@ -3445,6 +3527,9 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; @@ -3463,6 +3548,9 @@ int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_qp_context *context = inbox->buf + 8; + int err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } @@ -3476,6 +3564,9 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; @@ -3495,6 +3586,9 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; @@ -3598,16 +3692,26 @@ static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, return err; } -static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_loopback, enum mlx4_protocol prot, +static int qp_attach(struct mlx4_dev *dev, int slave, struct mlx4_qp *qp, + u8 gid[16], int block_loopback, enum mlx4_protocol prot, enum mlx4_steer_type type, u64 *reg_id) { switch (dev->caps.steering_mode) { - case MLX4_STEERING_MODE_DEVICE_MANAGED: - return mlx4_trans_to_dmfs_attach(dev, qp, gid, gid[5], + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + int port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (port < 0) + return port; + return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, block_loopback, prot, reg_id); + } case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) { + int port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (port < 0) + return port; + gid[5] = port; + } return mlx4_qp_attach_common(dev, qp, gid, block_loopback, prot, type); default: @@ -3615,9 +3719,9 @@ static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], } } -static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - enum mlx4_protocol prot, enum mlx4_steer_type type, - u64 reg_id) +static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], enum mlx4_protocol prot, + enum mlx4_steer_type type, u64 reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: @@ -3654,7 +3758,7 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, qp.qpn = qpn; if (attach) { - err = qp_attach(dev, &qp, gid, block_loopback, prot, + err = qp_attach(dev, slave, &qp, gid, block_loopback, prot, type, ®_id); if (err) { pr_err("Fail to attach rule to qp 0x%x\n", qpn); @@ -3790,6 +3894,9 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + ctrl->port = mlx4_slave_convert_port(dev, slave, ctrl->port); + if (ctrl->port <= 0) + return -EINVAL; qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 122c7cabaee7..6b3998396b99 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1231,4 +1231,5 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( /* Returns the slave's virtual port that represents the physical port. */ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-71-gd317 From 259fef033ffe4e70bf7f358c53400a09f1b5384e Mon Sep 17 00:00:00 2001 From: Ben Chan Date: Wed, 19 Mar 2014 14:00:06 -0700 Subject: net: cdc_ncm: respect operator preferred MTU reported by MBIM According to "Universal Serial Bus Communications Class Subclass Specification for Mobile Broadband Interface Model, Revision 1.0, Errata-1" published by USB-IF, the wMTU field of the MBIM extended functional descriptor indicates the operator preferred MTU for IP data streams. This patch modifies cdc_ncm_setup to ensure that the MTU value set on the usbnet device does not exceed the operator preferred MTU indicated by wMTU if the MBIM device exposes a MBIM extended functional descriptor. Signed-off-by: Ben Chan Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 17 +++++++++++++++++ include/linux/usb/cdc_ncm.h | 1 + 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index dbff290ed0e4..e8711a8cfa01 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -74,6 +74,7 @@ static int cdc_ncm_setup(struct usbnet *dev) u8 iface_no; int err; int eth_hlen; + u16 mbim_mtu; u16 ntb_fmt_supported; __le16 max_datagram_size; @@ -261,6 +262,14 @@ out: /* set MTU to max supported by the device if necessary */ if (dev->net->mtu > ctx->max_datagram_size - eth_hlen) dev->net->mtu = ctx->max_datagram_size - eth_hlen; + + /* do not exceed operater preferred MTU */ + if (ctx->mbim_extended_desc) { + mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU); + if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu) + dev->net->mtu = mbim_mtu; + } + return 0; } @@ -399,6 +408,14 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ ctx->mbim_desc = (const struct usb_cdc_mbim_desc *)buf; break; + case USB_CDC_MBIM_EXTENDED_TYPE: + if (buf[0] < sizeof(*(ctx->mbim_extended_desc))) + break; + + ctx->mbim_extended_desc = + (const struct usb_cdc_mbim_extended_desc *)buf; + break; + default: break; } diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index c3fa80745996..bdf05fb36729 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -93,6 +93,7 @@ struct cdc_ncm_ctx { const struct usb_cdc_ncm_desc *func_desc; const struct usb_cdc_mbim_desc *mbim_desc; + const struct usb_cdc_mbim_extended_desc *mbim_extended_desc; const struct usb_cdc_ether_desc *ether_desc; struct usb_interface *control; -- cgit v1.2.3-71-gd317 From 6092315dfdec5185881605d15a0e200d6e90eb66 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 20 Mar 2014 22:21:52 +0100 Subject: ptp: introduce programmable pins. This patch adds a pair of new ioctls to the PTP Hardware Clock device interface. Using the ioctls, user space programs can query each pin to find out its current function and also reprogram a different function if desired. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 128 ++++++++++++++++++++++++++++++++++++++- drivers/ptp/ptp_clock.c | 23 +++++++ drivers/ptp/ptp_private.h | 5 ++ include/linux/ptp_clock_kernel.h | 33 ++++++++++ include/uapi/linux/ptp_clock.h | 39 +++++++++++- 5 files changed, 226 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 34a0c607318e..419056d7887e 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -25,6 +25,96 @@ #include "ptp_private.h" +static int ptp_disable_pinfunc(struct ptp_clock_info *ops, + enum ptp_pin_function func, unsigned int chan) +{ + struct ptp_clock_request rq; + int err = 0; + + memset(&rq, 0, sizeof(rq)); + + switch (func) { + case PTP_PF_NONE: + break; + case PTP_PF_EXTTS: + rq.type = PTP_CLK_REQ_EXTTS; + rq.extts.index = chan; + err = ops->enable(ops, &rq, 0); + break; + case PTP_PF_PEROUT: + rq.type = PTP_CLK_REQ_PEROUT; + rq.perout.index = chan; + err = ops->enable(ops, &rq, 0); + break; + case PTP_PF_PHYSYNC: + break; + default: + return -EINVAL; + } + + return err; +} + +int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + struct ptp_clock_info *info = ptp->info; + struct ptp_pin_desc *pin1 = NULL, *pin2 = &info->pin_config[pin]; + unsigned int i; + + /* Check to see if any other pin previously had this function. */ + for (i = 0; i < info->n_pins; i++) { + if (info->pin_config[i].func == func && + info->pin_config[i].chan == chan) { + pin1 = &info->pin_config[i]; + break; + } + } + if (pin1 && i == pin) + return 0; + + /* Check the desired function and channel. */ + switch (func) { + case PTP_PF_NONE: + break; + case PTP_PF_EXTTS: + if (chan >= info->n_ext_ts) + return -EINVAL; + break; + case PTP_PF_PEROUT: + if (chan >= info->n_per_out) + return -EINVAL; + break; + case PTP_PF_PHYSYNC: + pr_err("sorry, cannot reassign the calibration pin\n"); + return -EINVAL; + default: + return -EINVAL; + } + + if (pin2->func == PTP_PF_PHYSYNC) { + pr_err("sorry, cannot reprogram the calibration pin\n"); + return -EINVAL; + } + + if (info->verify(info, pin, func, chan)) { + pr_err("driver cannot use function %u on pin %u\n", func, chan); + return -EOPNOTSUPP; + } + + /* Disable whatever function was previously assigned. */ + if (pin1) { + ptp_disable_pinfunc(info, func, chan); + pin1->func = PTP_PF_NONE; + pin1->chan = 0; + } + ptp_disable_pinfunc(info, pin2->func, pin2->chan); + pin2->func = func; + pin2->chan = chan; + + return 0; +} + int ptp_open(struct posix_clock *pc, fmode_t fmode) { return 0; @@ -35,12 +125,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) struct ptp_clock_caps caps; struct ptp_clock_request req; struct ptp_sys_offset *sysoff = NULL; + struct ptp_pin_desc pd; struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops = ptp->info; struct ptp_clock_time *pct; struct timespec ts; int enable, err = 0; - unsigned int i; + unsigned int i, pin_index; switch (cmd) { @@ -51,6 +142,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) caps.n_ext_ts = ptp->info->n_ext_ts; caps.n_per_out = ptp->info->n_per_out; caps.pps = ptp->info->pps; + caps.n_pins = ptp->info->n_pins; if (copy_to_user((void __user *)arg, &caps, sizeof(caps))) err = -EFAULT; break; @@ -126,6 +218,40 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EFAULT; break; + case PTP_PIN_GETFUNC: + if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) { + err = -EFAULT; + break; + } + pin_index = pd.index; + if (pin_index >= ops->n_pins) { + err = -EINVAL; + break; + } + if (mutex_lock_interruptible(&ptp->pincfg_mux)) + return -ERESTARTSYS; + pd = ops->pin_config[pin_index]; + mutex_unlock(&ptp->pincfg_mux); + if (!err && copy_to_user((void __user *)arg, &pd, sizeof(pd))) + err = -EFAULT; + break; + + case PTP_PIN_SETFUNC: + if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) { + err = -EFAULT; + break; + } + pin_index = pd.index; + if (pin_index >= ops->n_pins) { + err = -EINVAL; + break; + } + if (mutex_lock_interruptible(&ptp->pincfg_mux)) + return -ERESTARTSYS; + err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); + mutex_unlock(&ptp->pincfg_mux); + break; + default: err = -ENOTTY; break; diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index a8319b266643..e25d2bc898e5 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -169,6 +169,7 @@ static void delete_ptp_clock(struct posix_clock *pc) struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); mutex_destroy(&ptp->tsevq_mux); + mutex_destroy(&ptp->pincfg_mux); ida_simple_remove(&ptp_clocks_map, ptp->index); kfree(ptp); } @@ -203,6 +204,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->index = index; spin_lock_init(&ptp->tsevq.lock); mutex_init(&ptp->tsevq_mux); + mutex_init(&ptp->pincfg_mux); init_waitqueue_head(&ptp->tsev_wq); /* Create a new device in our class. */ @@ -249,6 +251,7 @@ no_sysfs: device_destroy(ptp_class, ptp->devid); no_device: mutex_destroy(&ptp->tsevq_mux); + mutex_destroy(&ptp->pincfg_mux); no_slot: kfree(ptp); no_memory: @@ -305,6 +308,26 @@ int ptp_clock_index(struct ptp_clock *ptp) } EXPORT_SYMBOL(ptp_clock_index); +int ptp_find_pin(struct ptp_clock *ptp, + enum ptp_pin_function func, unsigned int chan) +{ + struct ptp_pin_desc *pin = NULL; + int i; + + mutex_lock(&ptp->pincfg_mux); + for (i = 0; i < ptp->info->n_pins; i++) { + if (ptp->info->pin_config[i].func == func && + ptp->info->pin_config[i].chan == chan) { + pin = &ptp->info->pin_config[i]; + break; + } + } + mutex_unlock(&ptp->pincfg_mux); + + return pin ? i : -1; +} +EXPORT_SYMBOL(ptp_find_pin); + /* module operations */ static void __exit ptp_exit(void) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index df03f2e30ad9..b114a84c63c7 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -48,6 +48,7 @@ struct ptp_clock { long dialed_frequency; /* remembers the frequency adjustment */ struct timestamp_event_queue tsevq; /* simple fifo for time stamps */ struct mutex tsevq_mux; /* one process at a time reading the fifo */ + struct mutex pincfg_mux; /* protect concurrent info->pin_config access */ wait_queue_head_t tsev_wq; int defunct; /* tells readers to go away when clock is being removed */ }; @@ -69,6 +70,10 @@ static inline int queue_cnt(struct timestamp_event_queue *q) * see ptp_chardev.c */ +/* caller must hold pincfg_mux */ +int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan); + long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 38a993508327..0d8ff3fb84ba 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -49,7 +49,11 @@ struct ptp_clock_request { * @n_alarm: The number of programmable alarms. * @n_ext_ts: The number of external time stamp channels. * @n_per_out: The number of programmable periodic signals. + * @n_pins: The number of programmable pins. * @pps: Indicates whether the clock supports a PPS callback. + * @pin_config: Array of length 'n_pins'. If the number of + * programmable pins is nonzero, then drivers must + * allocate and initialize this array. * * clock operations * @@ -70,6 +74,18 @@ struct ptp_clock_request { * parameter request: Desired resource to enable or disable. * parameter on: Caller passes one to enable or zero to disable. * + * @verify: Confirm that a pin can perform a given function. The PTP + * Hardware Clock subsystem maintains the 'pin_config' + * array on behalf of the drivers, but the PHC subsystem + * assumes that every pin can perform every function. This + * hook gives drivers a way of telling the core about + * limitations on specific pins. This function must return + * zero if the function can be assigned to this pin, and + * nonzero otherwise. + * parameter pin: index of the pin in question. + * parameter func: the desired function to use. + * parameter chan: the function channel index to use. + * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). * @@ -83,13 +99,17 @@ struct ptp_clock_info { int n_alarm; int n_ext_ts; int n_per_out; + int n_pins; int pps; + struct ptp_pin_desc *pin_config; int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); + int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan); }; struct ptp_clock; @@ -156,4 +176,17 @@ extern void ptp_clock_event(struct ptp_clock *ptp, extern int ptp_clock_index(struct ptp_clock *ptp); +/** + * ptp_find_pin() - obtain the pin index of a given auxiliary function + * + * @ptp: The clock obtained from ptp_clock_register(). + * @func: One of the ptp_pin_function enumerated values. + * @chan: The particular functional channel to find. + * Return: Pin index in the range of zero to ptp_clock_caps.n_pins - 1, + * or -1 if the auxiliary function cannot be found. + */ + +int ptp_find_pin(struct ptp_clock *ptp, + enum ptp_pin_function func, unsigned int chan); + #endif diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index b65c834f83e9..f0b7bfe5da92 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -50,7 +50,8 @@ struct ptp_clock_caps { int n_ext_ts; /* Number of external time stamp channels. */ int n_per_out; /* Number of programmable periodic signals. */ int pps; /* Whether the clock supports a PPS callback. */ - int rsv[15]; /* Reserved for future use. */ + int n_pins; /* Number of input/output pins. */ + int rsv[14]; /* Reserved for future use. */ }; struct ptp_extts_request { @@ -80,6 +81,40 @@ struct ptp_sys_offset { struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; }; +enum ptp_pin_function { + PTP_PF_NONE, + PTP_PF_EXTTS, + PTP_PF_PEROUT, + PTP_PF_PHYSYNC, +}; + +struct ptp_pin_desc { + /* + * Hardware specific human readable pin name. This field is + * set by the kernel during the PTP_PIN_GETFUNC ioctl and is + * ignored for the PTP_PIN_SETFUNC ioctl. + */ + char name[64]; + /* + * Pin index in the range of zero to ptp_clock_caps.n_pins - 1. + */ + unsigned int index; + /* + * Which of the PTP_PF_xxx functions to use on this pin. + */ + unsigned int func; + /* + * The specific channel to use for this function. + * This corresponds to the 'index' field of the + * PTP_EXTTS_REQUEST and PTP_PEROUT_REQUEST ioctls. + */ + unsigned int chan; + /* + * Reserved for future use. + */ + unsigned int rsv[5]; +}; + #define PTP_CLK_MAGIC '=' #define PTP_CLOCK_GETCAPS _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps) @@ -87,6 +122,8 @@ struct ptp_sys_offset { #define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request) #define PTP_ENABLE_PPS _IOW(PTP_CLK_MAGIC, 4, int) #define PTP_SYS_OFFSET _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset) +#define PTP_PIN_GETFUNC _IOWR(PTP_CLK_MAGIC, 6, struct ptp_pin_desc) +#define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3-71-gd317 From fb7c03dfc48eb81bda368561f4ca162749d127c9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 20 Mar 2014 19:50:33 -0700 Subject: if_vlan: Call dev_kfree_skb_any instead of kfree_skb. Replace kfree_skb with dev_kfree_skb_any in vlan_insert_tag as vlan_insert_tag can be called from hard irq context (netpoll) and from other contexts. dev_kfree_skb_any is used as vlan_insert_tag only frees the skb if the skb can not be modified to insert a tag, in which case vlan_insert_tag drops the skb. Signed-off-by: "Eric W. Biederman" --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index bbedfb56bd66..d3d2306f00bf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -288,7 +288,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) { - kfree_skb(skb); + dev_kfree_skb_any(skb); return NULL; } veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); -- cgit v1.2.3-71-gd317 From 61b905da33ae25edb6b9d2a5de21e34c3a77efe3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 24 Mar 2014 15:34:47 -0700 Subject: net: Rename skb->rxhash to skb->hash The packet hash can be considered a property of the packet, not just on RX path. This patch changes name of rxhash and l4_rxhash skbuff fields to be hash and l4_hash respectively. This includes changing uses of the field in the code which don't call the access functions. Signed-off-by: Tom Herbert Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 4 ++-- arch/powerpc/net/bpf_jit_comp.c | 4 ++-- arch/s390/net/bpf_jit_comp.c | 8 ++++---- arch/sparc/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 8 ++++---- include/linux/skbuff.h | 28 ++++++++++++++-------------- include/net/sock.h | 4 ++-- include/trace/events/net.h | 12 ++++++------ net/core/dev.c | 13 +++++++------ net/core/filter.c | 2 +- net/core/flow_dissector.c | 10 +++++----- net/packet/af_packet.c | 3 +-- 12 files changed, 49 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 271b5e971568..7ddb9c83cdfc 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -825,8 +825,8 @@ b_epilogue: break; case BPF_S_ANC_RXHASH: ctx->seen |= SEEN_SKB; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - off = offsetof(struct sk_buff, rxhash); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; case BPF_S_ANC_VLAN_TAG: diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 555034f8505e..4afad6c17d50 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -390,9 +390,9 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, mark)); break; case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - rxhash)); + hash)); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 708d60e40066..153f8f2cfd56 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -737,10 +737,10 @@ call_fn: /* lg %r1,(%r13) */ /* icm %r5,3,(%r1) */ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); break; - case BPF_S_ANC_RXHASH: /* A = skb->rxhash */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - /* l %r5,(%r2) */ - EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash)); + case BPF_S_ANC_RXHASH: /* A = skb->hash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + /* l %r5,(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 01fe9946d388..d96d2a7c78ee 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -618,7 +618,7 @@ void bpf_jit_compile(struct sk_filter *fp) emit_load16(r_A, struct net_device, type, r_A); break; case BPF_S_ANC_RXHASH: - emit_skb_load32(rxhash, r_A); + emit_skb_load32(hash, r_A); break; case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4ed75dd81d05..293c57b74edc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -553,13 +553,13 @@ void bpf_jit_compile(struct sk_filter *fp) } break; case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); - if (is_imm8(offsetof(struct sk_buff, rxhash))) { + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + if (is_imm8(offsetof(struct sk_buff, hash))) { /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); + EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); } else { EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, rxhash), 4); + EMIT(offsetof(struct sk_buff, hash), 4); } break; case BPF_S_ANC_QUEUE: diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 03db95ab8a8c..aa2c22cb8158 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -444,11 +444,11 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @rxhash: the packet hash computed on receive + * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed - * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport + * @l4_hash: indicate hash is a canonical 4-tuple hash over transport * ports. * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not @@ -537,7 +537,7 @@ struct sk_buff { int skb_iif; - __u32 rxhash; + __u32 hash; __be16 vlan_proto; __u16 vlan_tci; @@ -556,7 +556,7 @@ struct sk_buff { #endif __u8 pfmemalloc:1; __u8 ooo_okay:1; - __u8 l4_rxhash:1; + __u8 l4_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; __u8 no_fcs:1; @@ -815,40 +815,40 @@ enum pkt_hash_types { static inline void skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) { - skb->l4_rxhash = (type == PKT_HASH_TYPE_L4); - skb->rxhash = hash; + skb->l4_hash = (type == PKT_HASH_TYPE_L4); + skb->hash = hash; } void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) __skb_get_hash(skb); - return skb->rxhash; + return skb->hash; } static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { - return skb->rxhash; + return skb->hash; } static inline void skb_clear_hash(struct sk_buff *skb) { - skb->rxhash = 0; - skb->l4_rxhash = 0; + skb->hash = 0; + skb->l4_hash = 0; } static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) skb_clear_hash(skb); } static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) { - to->rxhash = from->rxhash; - to->l4_rxhash = from->l4_rxhash; + to->hash = from->hash; + to->l4_hash = from->l4_hash; }; #ifdef NET_SKBUFF_DATA_USES_OFFSET diff --git a/include/net/sock.h b/include/net/sock.h index 625e65b12366..8d7c431a0660 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,9 +862,9 @@ static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != skb->rxhash)) { + if (unlikely(sk->sk_rxhash != skb->hash)) { sock_rps_reset_flow(sk); - sk->sk_rxhash = skb->rxhash; + sk->sk_rxhash = skb->hash; } #endif } diff --git a/include/trace/events/net.h b/include/trace/events/net.h index a34f27b2e394..1de256b35807 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -153,8 +153,8 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) - __field( u32, rxhash ) - __field( bool, l4_rxhash ) + __field( u32, hash ) + __field( bool, l4_hash ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( unsigned int, truesize ) @@ -179,8 +179,8 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->vlan_tci = vlan_tx_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; - __entry->rxhash = skb->rxhash; - __entry->l4_rxhash = skb->l4_rxhash; + __entry->hash = skb->hash; + __entry->l4_hash = skb->l4_hash; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->truesize = skb->truesize; @@ -191,11 +191,11 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d rxhash=0x%08x l4_rxhash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, - __entry->rxhash, __entry->l4_rxhash, __entry->len, + __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, __entry->nr_frags, __entry->gso_size, __entry->gso_type) diff --git a/net/core/dev.c b/net/core/dev.c index 55f8e64c03a2..48dd323d5918 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2952,7 +2952,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb->rxhash & flow_table->mask; + flow_id = skb_get_hash(skb) & flow_table->mask; rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -2986,6 +2986,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_sock_flow_table *sock_flow_table; int cpu = -1; u16 tcpu; + u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -3014,7 +3015,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_hash(skb)) + hash = skb_get_hash(skb); + if (!hash) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3023,11 +3025,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u16 next_cpu; struct rps_dev_flow *rflow; - rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; + rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[skb->rxhash & - sock_flow_table->mask]; + next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; /* * If the desired CPU (where last recvmsg was done) is @@ -3056,7 +3057,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } if (map) { - tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + tcpu = map->cpus[((u64) hash * map->len) >> 32]; if (cpu_online(tcpu)) { cpu = tcpu; diff --git a/net/core/filter.c b/net/core/filter.c index ad30d626a5bd..65b75966e206 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -336,7 +336,7 @@ load_b: A = skb->dev->type; continue; case BPF_S_ANC_RXHASH: - A = skb->rxhash; + A = skb->hash; continue; case BPF_S_ANC_CPU: A = raw_smp_processor_id(); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 80201bf69d59..107ed12a5323 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -203,8 +203,8 @@ static __always_inline u32 __flow_hash_1word(u32 a) /* * __skb_get_hash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * and src/dst port numbers. Sets hash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ void __skb_get_hash(struct sk_buff *skb) @@ -216,7 +216,7 @@ void __skb_get_hash(struct sk_buff *skb) return; if (keys.ports) - skb->l4_rxhash = 1; + skb->l4_hash = 1; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys.dst < (__force u32)keys.src) || @@ -232,7 +232,7 @@ void __skb_get_hash(struct sk_buff *skb) if (!hash) hash = 1; - skb->rxhash = hash; + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); @@ -344,7 +344,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ - skb->rxhash; + skb->hash; hash = __flow_hash_1word(hash); queue_index = map->queues[ ((u64)hash * map->len) >> 32]; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 292304404fda..097a354ec8cd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1277,7 +1277,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb->rxhash, num); + return reciprocal_scale(skb_get_hash(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1362,7 +1362,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: -- cgit v1.2.3-71-gd317 From 71e415e44c1f9b7a4f05dac4f8038575dbef0c6f Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 25 Mar 2014 17:44:42 +0800 Subject: vlan: make a new function vlan_dev_vlan_proto() and export The vlan support 2 proto: 802.1q and 802.1ad, so make a new function called vlan_dev_vlan_proto() which could return the vlan proto for input dev. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ net/8021q/vlan_core.c | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d3d2306f00bf..13bbbde00e68 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -110,6 +110,7 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); +extern __be16 vlan_dev_vlan_proto(const struct net_device *dev); /** * struct vlan_priority_tci_mapping - vlan egress priority mappings @@ -216,6 +217,12 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev) return 0; } +static inline __be16 vlan_dev_vlan_proto(const struct net_device *dev) +{ + BUG(); + return 0; +} + static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 35b3c192d7b9..3c32bd257b73 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -106,6 +106,12 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); +__be16 vlan_dev_vlan_proto(const struct net_device *dev) +{ + return vlan_dev_priv(dev)->vlan_proto; +} +EXPORT_SYMBOL(vlan_dev_vlan_proto); + static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { if (skb_cow(skb, skb_headroom(skb)) < 0) -- cgit v1.2.3-71-gd317 From 1edb9ca69e8a7988900fc0283e10550b5592164d Mon Sep 17 00:00:00 2001 From: Siva Reddy Date: Tue, 25 Mar 2014 12:10:54 -0700 Subject: net: sxgbe: add basic framework for Samsung 10Gb ethernet driver This patch adds support for Samsung 10Gb ethernet driver(sxgbe). - sxgbe core initialization - Tx and Rx support - MDIO support - ISRs for Tx and Rx - ifconfig support to driver Signed-off-by: Siva Reddy Kallam Signed-off-by: Vipul Pandya Signed-off-by: Girish K S Neatening-by: Joe Perches Signed-off-by: Byungho An Signed-off-by: David S. Miller --- drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/samsung/Kconfig | 16 + drivers/net/ethernet/samsung/Makefile | 5 + drivers/net/ethernet/samsung/sxgbe/Kconfig | 9 + drivers/net/ethernet/samsung/sxgbe/Makefile | 4 + drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h | 462 +++++ drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c | 158 ++ drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c | 515 +++++ drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h | 298 +++ drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c | 372 ++++ drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h | 48 + drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c | 44 + drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 2052 ++++++++++++++++++++ drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c | 251 +++ drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c | 254 +++ drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h | 104 + .../net/ethernet/samsung/sxgbe/sxgbe_platform.c | 253 +++ drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h | 477 +++++ drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c | 91 + drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h | 38 + include/linux/sxgbe_platform.h | 54 + 22 files changed, 5507 insertions(+) create mode 100644 drivers/net/ethernet/samsung/Kconfig create mode 100644 drivers/net/ethernet/samsung/Makefile create mode 100644 drivers/net/ethernet/samsung/sxgbe/Kconfig create mode 100644 drivers/net/ethernet/samsung/sxgbe/Makefile create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c create mode 100644 drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h create mode 100644 include/linux/sxgbe_platform.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 39484b534f5e..39b26fe28d10 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -150,6 +150,7 @@ config S6GMAC To compile this driver as a module, choose M here. The module will be called s6gmac. +source "drivers/net/ethernet/samsung/Kconfig" source "drivers/net/ethernet/seeq/Kconfig" source "drivers/net/ethernet/silan/Kconfig" source "drivers/net/ethernet/sis/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index adf61af507f7..545d0b3b9cb4 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_NET_VENDOR_REALTEK) += realtek/ obj-$(CONFIG_SH_ETH) += renesas/ obj-$(CONFIG_NET_VENDOR_RDC) += rdc/ obj-$(CONFIG_S6GMAC) += s6gmac.o +obj-$(CONFIG_NET_VENDOR_SAMSUNG) += samsung/ obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/ obj-$(CONFIG_NET_VENDOR_SILAN) += silan/ obj-$(CONFIG_NET_VENDOR_SIS) += sis/ diff --git a/drivers/net/ethernet/samsung/Kconfig b/drivers/net/ethernet/samsung/Kconfig new file mode 100644 index 000000000000..7902341f2623 --- /dev/null +++ b/drivers/net/ethernet/samsung/Kconfig @@ -0,0 +1,16 @@ +# +# Samsung Ethernet device configuration +# + +config NET_VENDOR_SAMSUNG + bool "Samsung Ethernet device" + default y + ---help--- + This is the driver for the SXGBE 10G Ethernet IP block found on Samsung + platforms. + +if NET_VENDOR_SAMSUNG + +source "drivers/net/ethernet/samsung/sxgbe/Kconfig" + +endif # NET_VENDOR_SAMSUNG diff --git a/drivers/net/ethernet/samsung/Makefile b/drivers/net/ethernet/samsung/Makefile new file mode 100644 index 000000000000..1773c29b8d76 --- /dev/null +++ b/drivers/net/ethernet/samsung/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Samsung Ethernet device drivers. +# + +obj-$(CONFIG_SXGBE_ETH) += sxgbe/ diff --git a/drivers/net/ethernet/samsung/sxgbe/Kconfig b/drivers/net/ethernet/samsung/sxgbe/Kconfig new file mode 100644 index 000000000000..d79288c51d0a --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/Kconfig @@ -0,0 +1,9 @@ +config SXGBE_ETH + tristate "Samsung 10G/2.5G/1G SXGBE Ethernet driver" + depends on HAS_IOMEM && HAS_DMA + select PHYLIB + select CRC32 + select PTP_1588_CLOCK + ---help--- + This is the driver for the SXGBE 10G Ethernet IP block found on Samsung + platforms. diff --git a/drivers/net/ethernet/samsung/sxgbe/Makefile b/drivers/net/ethernet/samsung/sxgbe/Makefile new file mode 100644 index 000000000000..dcc80b9d4370 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_SXGBE_ETH) += samsung-sxgbe.o +samsung-sxgbe-objs:= sxgbe_platform.o sxgbe_main.o sxgbe_desc.o \ + sxgbe_dma.o sxgbe_core.o sxgbe_mtl.o sxgbe_mdio.o \ + sxgbe_ethtool.o sxgbe_xpcs.o $(samsung-sxgbe-y) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h new file mode 100644 index 000000000000..c7803f199967 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h @@ -0,0 +1,462 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __SXGBE_COMMON_H__ +#define __SXGBE_COMMON_H__ + +/* forward references */ +struct sxgbe_desc_ops; +struct sxgbe_dma_ops; +struct sxgbe_mtl_ops; + +#define SXGBE_RESOURCE_NAME "sam_sxgbeeth" +#define DRV_MODULE_VERSION "November_2013" + +/* MAX HW feature words */ +#define SXGBE_HW_WORDS 3 + +#define SXGBE_RX_COE_NONE 0 + +/* CSR Frequency Access Defines*/ +#define SXGBE_CSR_F_150M 150000000 +#define SXGBE_CSR_F_250M 250000000 +#define SXGBE_CSR_F_300M 300000000 +#define SXGBE_CSR_F_350M 350000000 +#define SXGBE_CSR_F_400M 400000000 +#define SXGBE_CSR_F_500M 500000000 + +/* pause time */ +#define SXGBE_PAUSE_TIME 0x200 + +/* tx queues */ +#define SXGBE_TX_QUEUES 8 +#define SXGBE_RX_QUEUES 16 + +/* Calculated based how much time does it take to fill 256KB Rx memory + * at 10Gb speed at 156MHz clock rate and considered little less then + * the actual value. + */ +#define SXGBE_MAX_DMA_RIWT 0x70 +#define SXGBE_MIN_DMA_RIWT 0x01 + +/* Tx coalesce parameters */ +#define SXGBE_COAL_TX_TIMER 40000 +#define SXGBE_MAX_COAL_TX_TICK 100000 +#define SXGBE_TX_MAX_FRAMES 512 +#define SXGBE_TX_FRAMES 128 + +/* SXGBE TX FIFO is 8K, Rx FIFO is 16K */ +#define BUF_SIZE_16KiB 16384 +#define BUF_SIZE_8KiB 8192 +#define BUF_SIZE_4KiB 4096 +#define BUF_SIZE_2KiB 2048 + +#define SXGBE_DEFAULT_LIT_LS 0x3E8 +#define SXGBE_DEFAULT_TWT_LS 0x0 + +/* Flow Control defines */ +#define SXGBE_FLOW_OFF 0 +#define SXGBE_FLOW_RX 1 +#define SXGBE_FLOW_TX 2 +#define SXGBE_FLOW_AUTO (SXGBE_FLOW_TX | SXGBE_FLOW_RX) + +#define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */ + +/* errors */ +#define RX_GMII_ERR 0x01 +#define RX_WATCHDOG_ERR 0x02 +#define RX_CRC_ERR 0x03 +#define RX_GAINT_ERR 0x04 +#define RX_IP_HDR_ERR 0x05 +#define RX_PAYLOAD_ERR 0x06 +#define RX_OVERFLOW_ERR 0x07 + +/* pkt type */ +#define RX_LEN_PKT 0x00 +#define RX_MACCTL_PKT 0x01 +#define RX_DCBCTL_PKT 0x02 +#define RX_ARP_PKT 0x03 +#define RX_OAM_PKT 0x04 +#define RX_UNTAG_PKT 0x05 +#define RX_OTHER_PKT 0x07 +#define RX_SVLAN_PKT 0x08 +#define RX_CVLAN_PKT 0x09 +#define RX_DVLAN_OCVLAN_ICVLAN_PKT 0x0A +#define RX_DVLAN_OSVLAN_ISVLAN_PKT 0x0B +#define RX_DVLAN_OSVLAN_ICVLAN_PKT 0x0C +#define RX_DVLAN_OCVLAN_ISVLAN_PKT 0x0D + +#define RX_NOT_IP_PKT 0x00 +#define RX_IPV4_TCP_PKT 0x01 +#define RX_IPV4_UDP_PKT 0x02 +#define RX_IPV4_ICMP_PKT 0x03 +#define RX_IPV4_UNKNOWN_PKT 0x07 +#define RX_IPV6_TCP_PKT 0x09 +#define RX_IPV6_UDP_PKT 0x0A +#define RX_IPV6_ICMP_PKT 0x0B +#define RX_IPV6_UNKNOWN_PKT 0x0F + +#define RX_NO_PTP 0x00 +#define RX_PTP_SYNC 0x01 +#define RX_PTP_FOLLOW_UP 0x02 +#define RX_PTP_DELAY_REQ 0x03 +#define RX_PTP_DELAY_RESP 0x04 +#define RX_PTP_PDELAY_REQ 0x05 +#define RX_PTP_PDELAY_RESP 0x06 +#define RX_PTP_PDELAY_FOLLOW_UP 0x07 +#define RX_PTP_ANNOUNCE 0x08 +#define RX_PTP_MGMT 0x09 +#define RX_PTP_SIGNAL 0x0A +#define RX_PTP_RESV_MSG 0x0F + +enum dma_irq_status { + tx_hard_error = BIT(0), + tx_bump_tc = BIT(1), + handle_tx = BIT(2), + rx_hard_error = BIT(3), + rx_bump_tc = BIT(4), + handle_rx = BIT(5), +}; + +#define NETIF_F_HW_VLAN_ALL (NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_STAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_TX | \ + NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_STAG_FILTER) + +/* MMC control defines */ +#define SXGBE_MMC_CTRL_CNT_FRZ 0x00000008 + +/* SXGBE HW ADDR regs */ +#define SXGBE_ADDR_HIGH(reg) (((reg > 15) ? 0x00000800 : 0x00000040) + \ + (reg * 8)) +#define SXGBE_ADDR_LOW(reg) (((reg > 15) ? 0x00000804 : 0x00000044) + \ + (reg * 8)) +#define SXGBE_MAX_PERFECT_ADDRESSES 32 /* Maximum unicast perfect filtering */ +#define SXGBE_FRAME_FILTER 0x00000004 /* Frame Filter */ + +/* SXGBE Frame Filter defines */ +#define SXGBE_FRAME_FILTER_PR 0x00000001 /* Promiscuous Mode */ +#define SXGBE_FRAME_FILTER_HUC 0x00000002 /* Hash Unicast */ +#define SXGBE_FRAME_FILTER_HMC 0x00000004 /* Hash Multicast */ +#define SXGBE_FRAME_FILTER_DAIF 0x00000008 /* DA Inverse Filtering */ +#define SXGBE_FRAME_FILTER_PM 0x00000010 /* Pass all multicast */ +#define SXGBE_FRAME_FILTER_DBF 0x00000020 /* Disable Broadcast frames */ +#define SXGBE_FRAME_FILTER_SAIF 0x00000100 /* Inverse Filtering */ +#define SXGBE_FRAME_FILTER_SAF 0x00000200 /* Source Address Filter */ +#define SXGBE_FRAME_FILTER_HPF 0x00000400 /* Hash or perfect Filter */ +#define SXGBE_FRAME_FILTER_RA 0x80000000 /* Receive all mode */ + +#define SXGBE_HASH_TABLE_SIZE 64 +#define SXGBE_HASH_HIGH 0x00000008 /* Multicast Hash Table High */ +#define SXGBE_HASH_LOW 0x0000000c /* Multicast Hash Table Low */ + +#define SXGBE_HI_REG_AE 0x80000000 + +/* Minimum and maximum MTU */ +#define MIN_MTU 68 +#define MAX_MTU 9000 + +#define SXGBE_FOR_EACH_QUEUE(max_queues, queue_num) \ + for (queue_num = 0; queue_num < max_queues; queue_num++) + +/* sxgbe statistics counters */ +struct sxgbe_extra_stats { + /* TX/RX IRQ events */ + unsigned long tx_underflow_irq; + unsigned long tx_process_stopped_irq; + unsigned long tx_ctxt_desc_err; + unsigned long tx_threshold; + unsigned long rx_threshold; + unsigned long tx_pkt_n; + unsigned long rx_pkt_n; + unsigned long normal_irq_n; + unsigned long tx_normal_irq_n; + unsigned long rx_normal_irq_n; + unsigned long napi_poll; + unsigned long tx_clean; + unsigned long tx_reset_ic_bit; + unsigned long rx_process_stopped_irq; + unsigned long rx_underflow_irq; + + /* Bus access errors */ + unsigned long fatal_bus_error_irq; + unsigned long tx_read_transfer_err; + unsigned long tx_write_transfer_err; + unsigned long tx_desc_access_err; + unsigned long tx_buffer_access_err; + unsigned long tx_data_transfer_err; + unsigned long rx_read_transfer_err; + unsigned long rx_write_transfer_err; + unsigned long rx_desc_access_err; + unsigned long rx_buffer_access_err; + unsigned long rx_data_transfer_err; + + /* RX specific */ + /* L2 error */ + unsigned long rx_code_gmii_err; + unsigned long rx_watchdog_err; + unsigned long rx_crc_err; + unsigned long rx_gaint_pkt_err; + unsigned long ip_hdr_err; + unsigned long ip_payload_err; + unsigned long overflow_error; + + /* L2 Pkt type */ + unsigned long len_pkt; + unsigned long mac_ctl_pkt; + unsigned long dcb_ctl_pkt; + unsigned long arp_pkt; + unsigned long oam_pkt; + unsigned long untag_okt; + unsigned long other_pkt; + unsigned long svlan_tag_pkt; + unsigned long cvlan_tag_pkt; + unsigned long dvlan_ocvlan_icvlan_pkt; + unsigned long dvlan_osvlan_isvlan_pkt; + unsigned long dvlan_osvlan_icvlan_pkt; + unsigned long dvan_ocvlan_icvlan_pkt; + + /* L3/L4 Pkt type */ + unsigned long not_ip_pkt; + unsigned long ip4_tcp_pkt; + unsigned long ip4_udp_pkt; + unsigned long ip4_icmp_pkt; + unsigned long ip4_unknown_pkt; + unsigned long ip6_tcp_pkt; + unsigned long ip6_udp_pkt; + unsigned long ip6_icmp_pkt; + unsigned long ip6_unknown_pkt; + + /* Filter specific */ + unsigned long vlan_filter_match; + unsigned long sa_filter_fail; + unsigned long da_filter_fail; + unsigned long hash_filter_pass; + unsigned long l3_filter_match; + unsigned long l4_filter_match; + + /* RX context specific */ + unsigned long timestamp_dropped; + unsigned long rx_msg_type_no_ptp; + unsigned long rx_ptp_type_sync; + unsigned long rx_ptp_type_follow_up; + unsigned long rx_ptp_type_delay_req; + unsigned long rx_ptp_type_delay_resp; + unsigned long rx_ptp_type_pdelay_req; + unsigned long rx_ptp_type_pdelay_resp; + unsigned long rx_ptp_type_pdelay_follow_up; + unsigned long rx_ptp_announce; + unsigned long rx_ptp_mgmt; + unsigned long rx_ptp_signal; + unsigned long rx_ptp_resv_msg_type; +}; + +struct mac_link { + int port; + int duplex; + int speed; +}; + +struct mii_regs { + unsigned int addr; /* MII Address */ + unsigned int data; /* MII Data */ +}; + +struct sxgbe_core_ops { + /* MAC core initialization */ + void (*core_init)(void __iomem *ioaddr); + /* Dump MAC registers */ + void (*dump_regs)(void __iomem *ioaddr); + /* Handle extra events on specific interrupts hw dependent */ + int (*host_irq_status)(void __iomem *ioaddr, + struct sxgbe_extra_stats *x); + /* Set power management mode (e.g. magic frame) */ + void (*pmt)(void __iomem *ioaddr, unsigned long mode); + /* Set/Get Unicast MAC addresses */ + void (*set_umac_addr)(void __iomem *ioaddr, unsigned char *addr, + unsigned int reg_n); + void (*get_umac_addr)(void __iomem *ioaddr, unsigned char *addr, + unsigned int reg_n); + void (*enable_rx)(void __iomem *ioaddr, bool enable); + void (*enable_tx)(void __iomem *ioaddr, bool enable); + + /* controller version specific operations */ + int (*get_controller_version)(void __iomem *ioaddr); + + /* If supported then get the optional core features */ + unsigned int (*get_hw_feature)(void __iomem *ioaddr, + unsigned char feature_index); + /* adjust SXGBE speed */ + void (*set_speed)(void __iomem *ioaddr, unsigned char speed); +}; + +const struct sxgbe_core_ops *sxgbe_get_core_ops(void); + +struct sxgbe_ops { + const struct sxgbe_core_ops *mac; + const struct sxgbe_desc_ops *desc; + const struct sxgbe_dma_ops *dma; + const struct sxgbe_mtl_ops *mtl; + struct mii_regs mii; /* MII register Addresses */ + struct mac_link link; + unsigned int ctrl_uid; + unsigned int ctrl_id; +}; + +/* SXGBE private data structures */ +struct sxgbe_tx_queue { + unsigned int irq_no; + struct sxgbe_priv_data *priv_ptr; + struct sxgbe_tx_norm_desc *dma_tx; + dma_addr_t dma_tx_phy; + dma_addr_t *tx_skbuff_dma; + struct sk_buff **tx_skbuff; + struct timer_list txtimer; + spinlock_t tx_lock; /* lock for tx queues */ + unsigned int cur_tx; + unsigned int dirty_tx; + u32 tx_count_frames; + u32 tx_coal_frames; + u32 tx_coal_timer; + int hwts_tx_en; + u8 queue_no; +}; + +struct sxgbe_rx_queue { + struct sxgbe_priv_data *priv_ptr; + struct sxgbe_rx_norm_desc *dma_rx; + struct sk_buff **rx_skbuff; + unsigned int cur_rx; + unsigned int dirty_rx; + unsigned int irq_no; + u32 rx_riwt; + dma_addr_t *rx_skbuff_dma; + dma_addr_t dma_rx_phy; + u8 queue_no; +}; + +/* SXGBE HW capabilities */ +struct sxgbe_hw_features { + /****** CAP [0] *******/ + unsigned int pmt_remote_wake_up; + unsigned int pmt_magic_frame; + /* IEEE 1588-2008 */ + unsigned int atime_stamp; + + unsigned int tx_csum_offload; + unsigned int rx_csum_offload; + unsigned int multi_macaddr; + unsigned int tstamp_srcselect; + unsigned int sa_vlan_insert; + + /****** CAP [1] *******/ + unsigned int rxfifo_size; + unsigned int txfifo_size; + unsigned int atstmap_hword; + unsigned int dcb_enable; + unsigned int splithead_enable; + unsigned int tcpseg_offload; + unsigned int debug_mem; + unsigned int rss_enable; + unsigned int hash_tsize; + unsigned int l3l4_filer_size; + + /* This value is in bytes and + * as mentioned in HW features + * of SXGBE data book + */ + unsigned int rx_mtl_qsize; + unsigned int tx_mtl_qsize; + + /****** CAP [2] *******/ + /* TX and RX number of channels */ + unsigned int rx_mtl_queues; + unsigned int tx_mtl_queues; + unsigned int rx_dma_channels; + unsigned int tx_dma_channels; + unsigned int pps_output_count; + unsigned int aux_input_count; +}; + +struct sxgbe_priv_data { + /* DMA descriptos */ + struct sxgbe_tx_queue *txq[SXGBE_TX_QUEUES]; + struct sxgbe_rx_queue *rxq[SXGBE_RX_QUEUES]; + u8 cur_rx_qnum; + + unsigned int dma_tx_size; + unsigned int dma_rx_size; + unsigned int dma_buf_sz; + u32 rx_riwt; + + struct napi_struct napi; + + void __iomem *ioaddr; + struct net_device *dev; + struct device *device; + struct sxgbe_ops *hw; /* sxgbe specific ops */ + int no_csum_insertion; + int irq; + spinlock_t stats_lock; /* lock for tx/rx statatics */ + + struct phy_device *phydev; + int oldlink; + int speed; + int oldduplex; + struct mii_bus *mii; + int mii_irq[PHY_MAX_ADDR]; + u8 rx_pause; + u8 tx_pause; + + struct sxgbe_extra_stats xstats; + struct sxgbe_plat_data *plat; + struct sxgbe_hw_features hw_cap; + + u32 msg_enable; + + struct clk *sxgbe_clk; + int clk_csr; + unsigned int mode; + unsigned int default_addend; + + /* advanced time stamp support */ + u32 adv_ts; + int use_riwt; + + /* tc control */ + int tx_tc; + int rx_tc; +}; + +/* Function prototypes */ +struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device, + struct sxgbe_plat_data *plat_dat, + void __iomem *addr); +int sxgbe_drv_remove(struct net_device *ndev); +void sxgbe_set_ethtool_ops(struct net_device *netdev); +int sxgbe_mdio_unregister(struct net_device *ndev); +int sxgbe_mdio_register(struct net_device *ndev); +int sxgbe_register_platform(void); +void sxgbe_unregister_platform(void); + +#ifdef CONFIG_PM +int sxgbe_suspend(struct net_device *ndev); +int sxgbe_resume(struct net_device *ndev); +int sxgbe_freeze(struct net_device *ndev); +int sxgbe_restore(struct net_device *ndev); +#endif /* CONFIG_PM */ + +const struct sxgbe_mtl_ops *sxgbe_get_mtl_ops(void); + +#endif /* __SXGBE_COMMON_H__ */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c new file mode 100644 index 000000000000..4ad31bbc42c9 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c @@ -0,0 +1,158 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "sxgbe_common.h" +#include "sxgbe_reg.h" + +/* MAC core initialization */ +static void sxgbe_core_init(void __iomem *ioaddr) +{ + u32 regval; + + /* TX configuration */ + regval = readl(ioaddr + SXGBE_CORE_TX_CONFIG_REG); + /* Other configurable parameters IFP, IPG, ISR, ISM + * needs to be set if needed + */ + regval |= SXGBE_TX_JABBER_DISABLE; + writel(regval, ioaddr + SXGBE_CORE_TX_CONFIG_REG); + + /* RX configuration */ + regval = readl(ioaddr + SXGBE_CORE_RX_CONFIG_REG); + /* Other configurable parameters CST, SPEN, USP, GPSLCE + * WD, LM, S2KP, HDSMS, GPSL, ELEN, ARPEN needs to be + * set if needed + */ + regval |= SXGBE_RX_JUMBPKT_ENABLE | SXGBE_RX_ACS_ENABLE; + writel(regval, ioaddr + SXGBE_CORE_RX_CONFIG_REG); +} + +/* Dump MAC registers */ +static void sxgbe_core_dump_regs(void __iomem *ioaddr) +{ +} + +/* Handle extra events on specific interrupts hw dependent */ +static int sxgbe_core_host_irq_status(void __iomem *ioaddr, + struct sxgbe_extra_stats *x) +{ + return 0; +} + +/* Set power management mode (e.g. magic frame) */ +static void sxgbe_core_pmt(void __iomem *ioaddr, unsigned long mode) +{ +} + +/* Set/Get Unicast MAC addresses */ +static void sxgbe_core_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + u32 high_word, low_word; + + high_word = (addr[5] << 8) || (addr[4]); + low_word = ((addr[3] << 24) || (addr[2] << 16) || + (addr[1] << 8) || (addr[0])); + writel(high_word, ioaddr + SXGBE_CORE_ADD_HIGHOFFSET(reg_n)); + writel(low_word, ioaddr + SXGBE_CORE_ADD_LOWOFFSET(reg_n)); +} + +static void sxgbe_core_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + u32 high_word, low_word; + + high_word = readl(ioaddr + SXGBE_CORE_ADD_HIGHOFFSET(reg_n)); + low_word = readl(ioaddr + SXGBE_CORE_ADD_LOWOFFSET(reg_n)); + + /* extract and assign address */ + addr[5] = (high_word & 0x0000FF00) >> 8; + addr[4] = (high_word & 0x000000FF); + addr[3] = (low_word & 0xFF000000) >> 24; + addr[2] = (low_word & 0x00FF0000) >> 16; + addr[1] = (low_word & 0x0000FF00) >> 8; + addr[0] = (low_word & 0x000000FF); +} + +static void sxgbe_enable_tx(void __iomem *ioaddr, bool enable) +{ + u32 tx_config; + + tx_config = readl(ioaddr + SXGBE_CORE_TX_CONFIG_REG); + tx_config &= ~SXGBE_TX_ENABLE; + + if (enable) + tx_config |= SXGBE_TX_ENABLE; + writel(tx_config, ioaddr + SXGBE_CORE_TX_CONFIG_REG); +} + +static void sxgbe_enable_rx(void __iomem *ioaddr, bool enable) +{ + u32 rx_config; + + rx_config = readl(ioaddr + SXGBE_CORE_RX_CONFIG_REG); + rx_config &= ~SXGBE_RX_ENABLE; + + if (enable) + rx_config |= SXGBE_RX_ENABLE; + writel(rx_config, ioaddr + SXGBE_CORE_RX_CONFIG_REG); +} + +static int sxgbe_get_controller_version(void __iomem *ioaddr) +{ + return readl(ioaddr + SXGBE_CORE_VERSION_REG); +} + +/* If supported then get the optional core features */ +static unsigned int sxgbe_get_hw_feature(void __iomem *ioaddr, + unsigned char feature_index) +{ + return readl(ioaddr + (SXGBE_CORE_HW_FEA_REG(feature_index))); +} + +static void sxgbe_core_set_speed(void __iomem *ioaddr, unsigned char speed) +{ + u32 tx_cfg = readl(ioaddr + SXGBE_CORE_TX_CONFIG_REG); + + /* clear the speed bits */ + tx_cfg &= ~0x60000000; + tx_cfg |= (speed << SXGBE_SPEED_LSHIFT); + + /* set the speed */ + writel(tx_cfg, ioaddr + SXGBE_CORE_TX_CONFIG_REG); +} + +const struct sxgbe_core_ops core_ops = { + .core_init = sxgbe_core_init, + .dump_regs = sxgbe_core_dump_regs, + .host_irq_status = sxgbe_core_host_irq_status, + .pmt = sxgbe_core_pmt, + .set_umac_addr = sxgbe_core_set_umac_addr, + .get_umac_addr = sxgbe_core_get_umac_addr, + .enable_rx = sxgbe_enable_rx, + .enable_tx = sxgbe_enable_tx, + .get_controller_version = sxgbe_get_controller_version, + .get_hw_feature = sxgbe_get_hw_feature, + .set_speed = sxgbe_core_set_speed, +}; + +const struct sxgbe_core_ops *sxgbe_get_core_ops(void) +{ + return &core_ops; +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c new file mode 100644 index 000000000000..7cb5520475b7 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.c @@ -0,0 +1,515 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "sxgbe_common.h" +#include "sxgbe_dma.h" +#include "sxgbe_desc.h" + +/* DMA TX descriptor ring initialization */ +static void sxgbe_init_tx_desc(struct sxgbe_tx_norm_desc *p) +{ + p->tdes23.tx_rd_des23.own_bit = 0; +} + +static void sxgbe_tx_desc_enable_tse(struct sxgbe_tx_norm_desc *p, u8 is_tse, + u32 total_hdr_len, u32 tcp_hdr_len, + u32 tcp_payload_len) +{ + p->tdes23.tx_rd_des23.tse_bit = is_tse; + p->tdes23.tx_rd_des23.buf1_size = total_hdr_len; + p->tdes23.tx_rd_des23.tcp_hdr_len = tcp_hdr_len / 4; + p->tdes23.tx_rd_des23.tx_pkt_len.tcp_payload_len = tcp_payload_len; +} + +/* Assign buffer lengths for descriptor */ +static void sxgbe_prepare_tx_desc(struct sxgbe_tx_norm_desc *p, u8 is_fd, + int buf1_len, int pkt_len, int cksum) +{ + p->tdes23.tx_rd_des23.first_desc = is_fd; + p->tdes23.tx_rd_des23.buf1_size = buf1_len; + + p->tdes23.tx_rd_des23.tx_pkt_len.cksum_pktlen.total_pkt_len = pkt_len; + + if (cksum) + p->tdes23.tx_rd_des23.tx_pkt_len.cksum_pktlen.cksum_ctl = cic_full; +} + +/* Set VLAN control information */ +static void sxgbe_tx_vlanctl_desc(struct sxgbe_tx_norm_desc *p, int vlan_ctl) +{ + p->tdes23.tx_rd_des23.vlan_tag_ctl = vlan_ctl; +} + +/* Set the owner of Normal descriptor */ +static void sxgbe_set_tx_owner(struct sxgbe_tx_norm_desc *p) +{ + p->tdes23.tx_rd_des23.own_bit = 1; +} + +/* Get the owner of Normal descriptor */ +static int sxgbe_get_tx_owner(struct sxgbe_tx_norm_desc *p) +{ + return p->tdes23.tx_rd_des23.own_bit; +} + +/* Invoked by the xmit function to close the tx descriptor */ +static void sxgbe_close_tx_desc(struct sxgbe_tx_norm_desc *p) +{ + p->tdes23.tx_rd_des23.last_desc = 1; + p->tdes23.tx_rd_des23.int_on_com = 1; +} + +/* Clean the tx descriptor as soon as the tx irq is received */ +static void sxgbe_release_tx_desc(struct sxgbe_tx_norm_desc *p) +{ + memset(p, 0, sizeof(*p)); +} + +/* Clear interrupt on tx frame completion. When this bit is + * set an interrupt happens as soon as the frame is transmitted + */ +static void sxgbe_clear_tx_ic(struct sxgbe_tx_norm_desc *p) +{ + p->tdes23.tx_rd_des23.int_on_com = 0; +} + +/* Last tx segment reports the transmit status */ +static int sxgbe_get_tx_ls(struct sxgbe_tx_norm_desc *p) +{ + return p->tdes23.tx_rd_des23.last_desc; +} + +/* Get the buffer size from the descriptor */ +static int sxgbe_get_tx_len(struct sxgbe_tx_norm_desc *p) +{ + return p->tdes23.tx_rd_des23.buf1_size; +} + +/* Set tx timestamp enable bit */ +static void sxgbe_tx_enable_tstamp(struct sxgbe_tx_norm_desc *p) +{ + p->tdes23.tx_rd_des23.timestmp_enable = 1; +} + +/* get tx timestamp status */ +static int sxgbe_get_tx_timestamp_status(struct sxgbe_tx_norm_desc *p) +{ + return p->tdes23.tx_rd_des23.timestmp_enable; +} + +/* TX Context Descripto Specific */ +static void sxgbe_tx_ctxt_desc_set_ctxt(struct sxgbe_tx_ctxt_desc *p) +{ + p->ctxt_bit = 1; +} + +/* Set the owner of TX context descriptor */ +static void sxgbe_tx_ctxt_desc_set_owner(struct sxgbe_tx_ctxt_desc *p) +{ + p->own_bit = 1; +} + +/* Get the owner of TX context descriptor */ +static int sxgbe_tx_ctxt_desc_get_owner(struct sxgbe_tx_ctxt_desc *p) +{ + return p->own_bit; +} + +/* Set TX mss in TX context Descriptor */ +static void sxgbe_tx_ctxt_desc_set_mss(struct sxgbe_tx_ctxt_desc *p, int mss) +{ + p->maxseg_size = mss; +} + +/* Get TX mss from TX context Descriptor */ +static int sxgbe_tx_ctxt_desc_get_mss(struct sxgbe_tx_ctxt_desc *p) +{ + return p->maxseg_size; +} + +/* Set TX tcmssv in TX context Descriptor */ +static void sxgbe_tx_ctxt_desc_set_tcmssv(struct sxgbe_tx_ctxt_desc *p) +{ + p->tcmssv = 1; +} + +/* Reset TX ostc in TX context Descriptor */ +static void sxgbe_tx_ctxt_desc_reset_ostc(struct sxgbe_tx_ctxt_desc *p) +{ + p->ostc = 0; +} + +/* Set IVLAN information */ +static void sxgbe_tx_ctxt_desc_set_ivlantag(struct sxgbe_tx_ctxt_desc *p, + int is_ivlanvalid, int ivlan_tag, + int ivlan_ctl) +{ + if (is_ivlanvalid) { + p->ivlan_tag_valid = is_ivlanvalid; + p->ivlan_tag = ivlan_tag; + p->ivlan_tag_ctl = ivlan_ctl; + } +} + +/* Return IVLAN Tag */ +static int sxgbe_tx_ctxt_desc_get_ivlantag(struct sxgbe_tx_ctxt_desc *p) +{ + return p->ivlan_tag; +} + +/* Set VLAN Tag */ +static void sxgbe_tx_ctxt_desc_set_vlantag(struct sxgbe_tx_ctxt_desc *p, + int is_vlanvalid, int vlan_tag) +{ + if (is_vlanvalid) { + p->vltag_valid = is_vlanvalid; + p->vlan_tag = vlan_tag; + } +} + +/* Return VLAN Tag */ +static int sxgbe_tx_ctxt_desc_get_vlantag(struct sxgbe_tx_ctxt_desc *p) +{ + return p->vlan_tag; +} + +/* Set Time stamp */ +static void sxgbe_tx_ctxt_desc_set_tstamp(struct sxgbe_tx_ctxt_desc *p, + u8 ostc_enable, u64 tstamp) +{ + if (ostc_enable) { + p->ostc = ostc_enable; + p->tstamp_lo = (u32) tstamp; + p->tstamp_hi = (u32) (tstamp>>32); + } +} +/* Close TX context descriptor */ +static void sxgbe_tx_ctxt_desc_close(struct sxgbe_tx_ctxt_desc *p) +{ + p->own_bit = 1; +} + +/* WB status of context descriptor */ +static int sxgbe_tx_ctxt_desc_get_cde(struct sxgbe_tx_ctxt_desc *p) +{ + return p->ctxt_desc_err; +} + +/* DMA RX descriptor ring initialization */ +static void sxgbe_init_rx_desc(struct sxgbe_rx_norm_desc *p, int disable_rx_ic, + int mode, int end) +{ + p->rdes23.rx_rd_des23.own_bit = 1; + if (disable_rx_ic) + p->rdes23.rx_rd_des23.int_on_com = disable_rx_ic; +} + +/* Get RX own bit */ +static int sxgbe_get_rx_owner(struct sxgbe_rx_norm_desc *p) +{ + return p->rdes23.rx_rd_des23.own_bit; +} + +/* Set RX own bit */ +static void sxgbe_set_rx_owner(struct sxgbe_rx_norm_desc *p) +{ + p->rdes23.rx_rd_des23.own_bit = 1; +} + +/* Get the receive frame size */ +static int sxgbe_get_rx_frame_len(struct sxgbe_rx_norm_desc *p) +{ + return p->rdes23.rx_wb_des23.pkt_len; +} + +/* Return first Descriptor status */ +static int sxgbe_get_rx_fd_status(struct sxgbe_rx_norm_desc *p) +{ + return p->rdes23.rx_wb_des23.first_desc; +} + +/* Return Last Descriptor status */ +static int sxgbe_get_rx_ld_status(struct sxgbe_rx_norm_desc *p) +{ + return p->rdes23.rx_wb_des23.last_desc; +} + + +/* Return the RX status looking at the WB fields */ +static int sxgbe_rx_wbstatus(struct sxgbe_rx_norm_desc *p, + struct sxgbe_extra_stats *x, int *checksum) +{ + int status = 0; + + *checksum = CHECKSUM_UNNECESSARY; + if (p->rdes23.rx_wb_des23.err_summary) { + switch (p->rdes23.rx_wb_des23.err_l2_type) { + case RX_GMII_ERR: + status = -EINVAL; + x->rx_code_gmii_err++; + break; + case RX_WATCHDOG_ERR: + status = -EINVAL; + x->rx_watchdog_err++; + break; + case RX_CRC_ERR: + status = -EINVAL; + x->rx_crc_err++; + break; + case RX_GAINT_ERR: + status = -EINVAL; + x->rx_gaint_pkt_err++; + break; + case RX_IP_HDR_ERR: + *checksum = CHECKSUM_NONE; + x->ip_hdr_err++; + break; + case RX_PAYLOAD_ERR: + *checksum = CHECKSUM_NONE; + x->ip_payload_err++; + break; + case RX_OVERFLOW_ERR: + status = -EINVAL; + x->overflow_error++; + break; + default: + pr_err("Invalid Error type\n"); + break; + } + } else { + switch (p->rdes23.rx_wb_des23.err_l2_type) { + case RX_LEN_PKT: + x->len_pkt++; + break; + case RX_MACCTL_PKT: + x->mac_ctl_pkt++; + break; + case RX_DCBCTL_PKT: + x->dcb_ctl_pkt++; + break; + case RX_ARP_PKT: + x->arp_pkt++; + break; + case RX_OAM_PKT: + x->oam_pkt++; + break; + case RX_UNTAG_PKT: + x->untag_okt++; + break; + case RX_OTHER_PKT: + x->other_pkt++; + break; + case RX_SVLAN_PKT: + x->svlan_tag_pkt++; + break; + case RX_CVLAN_PKT: + x->cvlan_tag_pkt++; + break; + case RX_DVLAN_OCVLAN_ICVLAN_PKT: + x->dvlan_ocvlan_icvlan_pkt++; + break; + case RX_DVLAN_OSVLAN_ISVLAN_PKT: + x->dvlan_osvlan_isvlan_pkt++; + break; + case RX_DVLAN_OSVLAN_ICVLAN_PKT: + x->dvlan_osvlan_icvlan_pkt++; + break; + case RX_DVLAN_OCVLAN_ISVLAN_PKT: + x->dvlan_ocvlan_icvlan_pkt++; + break; + default: + pr_err("Invalid L2 Packet type\n"); + break; + } + } + + /* L3/L4 Pkt type */ + switch (p->rdes23.rx_wb_des23.layer34_pkt_type) { + case RX_NOT_IP_PKT: + x->not_ip_pkt++; + break; + case RX_IPV4_TCP_PKT: + x->ip4_tcp_pkt++; + break; + case RX_IPV4_UDP_PKT: + x->ip4_udp_pkt++; + break; + case RX_IPV4_ICMP_PKT: + x->ip4_icmp_pkt++; + break; + case RX_IPV4_UNKNOWN_PKT: + x->ip4_unknown_pkt++; + break; + case RX_IPV6_TCP_PKT: + x->ip6_tcp_pkt++; + break; + case RX_IPV6_UDP_PKT: + x->ip6_udp_pkt++; + break; + case RX_IPV6_ICMP_PKT: + x->ip6_icmp_pkt++; + break; + case RX_IPV6_UNKNOWN_PKT: + x->ip6_unknown_pkt++; + break; + default: + pr_err("Invalid L3/L4 Packet type\n"); + break; + } + + /* Filter */ + if (p->rdes23.rx_wb_des23.vlan_filter_match) + x->vlan_filter_match++; + + if (p->rdes23.rx_wb_des23.sa_filter_fail) { + status = -EINVAL; + x->sa_filter_fail++; + } + if (p->rdes23.rx_wb_des23.da_filter_fail) { + status = -EINVAL; + x->da_filter_fail++; + } + if (p->rdes23.rx_wb_des23.hash_filter_pass) + x->hash_filter_pass++; + + if (p->rdes23.rx_wb_des23.l3_filter_match) + x->l3_filter_match++; + + if (p->rdes23.rx_wb_des23.l4_filter_match) + x->l4_filter_match++; + + return status; +} + +/* Get own bit of context descriptor */ +static int sxgbe_get_rx_ctxt_owner(struct sxgbe_rx_ctxt_desc *p) +{ + return p->own_bit; +} + +/* Set own bit for context descriptor */ +static void sxgbe_set_ctxt_rx_owner(struct sxgbe_rx_ctxt_desc *p) +{ + p->own_bit = 1; +} + + +/* Return the reception status looking at Context control information */ +static void sxgbe_rx_ctxt_wbstatus(struct sxgbe_rx_ctxt_desc *p, + struct sxgbe_extra_stats *x) +{ + if (p->tstamp_dropped) + x->timestamp_dropped++; + + /* ptp */ + if (p->ptp_msgtype == RX_NO_PTP) + x->rx_msg_type_no_ptp++; + else if (p->ptp_msgtype == RX_PTP_SYNC) + x->rx_ptp_type_sync++; + else if (p->ptp_msgtype == RX_PTP_FOLLOW_UP) + x->rx_ptp_type_follow_up++; + else if (p->ptp_msgtype == RX_PTP_DELAY_REQ) + x->rx_ptp_type_delay_req++; + else if (p->ptp_msgtype == RX_PTP_DELAY_RESP) + x->rx_ptp_type_delay_resp++; + else if (p->ptp_msgtype == RX_PTP_PDELAY_REQ) + x->rx_ptp_type_pdelay_req++; + else if (p->ptp_msgtype == RX_PTP_PDELAY_RESP) + x->rx_ptp_type_pdelay_resp++; + else if (p->ptp_msgtype == RX_PTP_PDELAY_FOLLOW_UP) + x->rx_ptp_type_pdelay_follow_up++; + else if (p->ptp_msgtype == RX_PTP_ANNOUNCE) + x->rx_ptp_announce++; + else if (p->ptp_msgtype == RX_PTP_MGMT) + x->rx_ptp_mgmt++; + else if (p->ptp_msgtype == RX_PTP_SIGNAL) + x->rx_ptp_signal++; + else if (p->ptp_msgtype == RX_PTP_RESV_MSG) + x->rx_ptp_resv_msg_type++; +} + +/* Get rx timestamp status */ +static int sxgbe_get_rx_ctxt_tstamp_status(struct sxgbe_rx_ctxt_desc *p) +{ + if ((p->tstamp_hi == 0xffffffff) && (p->tstamp_lo == 0xffffffff)) { + pr_err("Time stamp corrupted\n"); + return 0; + } + + return p->tstamp_available; +} + + +static u64 sxgbe_get_rx_timestamp(struct sxgbe_rx_ctxt_desc *p) +{ + u64 ns; + + ns = p->tstamp_lo; + ns |= ((u64)p->tstamp_hi) << 32; + + return ns; +} + +static const struct sxgbe_desc_ops desc_ops = { + .init_tx_desc = sxgbe_init_tx_desc, + .tx_desc_enable_tse = sxgbe_tx_desc_enable_tse, + .prepare_tx_desc = sxgbe_prepare_tx_desc, + .tx_vlanctl_desc = sxgbe_tx_vlanctl_desc, + .set_tx_owner = sxgbe_set_tx_owner, + .get_tx_owner = sxgbe_get_tx_owner, + .close_tx_desc = sxgbe_close_tx_desc, + .release_tx_desc = sxgbe_release_tx_desc, + .clear_tx_ic = sxgbe_clear_tx_ic, + .get_tx_ls = sxgbe_get_tx_ls, + .get_tx_len = sxgbe_get_tx_len, + .tx_enable_tstamp = sxgbe_tx_enable_tstamp, + .get_tx_timestamp_status = sxgbe_get_tx_timestamp_status, + .tx_ctxt_desc_set_ctxt = sxgbe_tx_ctxt_desc_set_ctxt, + .tx_ctxt_desc_set_owner = sxgbe_tx_ctxt_desc_set_owner, + .get_tx_ctxt_owner = sxgbe_tx_ctxt_desc_get_owner, + .tx_ctxt_desc_set_mss = sxgbe_tx_ctxt_desc_set_mss, + .tx_ctxt_desc_get_mss = sxgbe_tx_ctxt_desc_get_mss, + .tx_ctxt_desc_set_tcmssv = sxgbe_tx_ctxt_desc_set_tcmssv, + .tx_ctxt_desc_reset_ostc = sxgbe_tx_ctxt_desc_reset_ostc, + .tx_ctxt_desc_set_ivlantag = sxgbe_tx_ctxt_desc_set_ivlantag, + .tx_ctxt_desc_get_ivlantag = sxgbe_tx_ctxt_desc_get_ivlantag, + .tx_ctxt_desc_set_vlantag = sxgbe_tx_ctxt_desc_set_vlantag, + .tx_ctxt_desc_get_vlantag = sxgbe_tx_ctxt_desc_get_vlantag, + .tx_ctxt_set_tstamp = sxgbe_tx_ctxt_desc_set_tstamp, + .close_tx_ctxt_desc = sxgbe_tx_ctxt_desc_close, + .get_tx_ctxt_cde = sxgbe_tx_ctxt_desc_get_cde, + .init_rx_desc = sxgbe_init_rx_desc, + .get_rx_owner = sxgbe_get_rx_owner, + .set_rx_owner = sxgbe_set_rx_owner, + .get_rx_frame_len = sxgbe_get_rx_frame_len, + .get_rx_fd_status = sxgbe_get_rx_fd_status, + .get_rx_ld_status = sxgbe_get_rx_ld_status, + .rx_wbstatus = sxgbe_rx_wbstatus, + .get_rx_ctxt_owner = sxgbe_get_rx_ctxt_owner, + .set_rx_ctxt_owner = sxgbe_set_ctxt_rx_owner, + .rx_ctxt_wbstatus = sxgbe_rx_ctxt_wbstatus, + .get_rx_ctxt_tstamp_status = sxgbe_get_rx_ctxt_tstamp_status, + .get_timestamp = sxgbe_get_rx_timestamp, +}; + +const struct sxgbe_desc_ops *sxgbe_get_desc_ops(void) +{ + return &desc_ops; +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h new file mode 100644 index 000000000000..2caef1ae1ac5 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_desc.h @@ -0,0 +1,298 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SXGBE_DESC_H__ +#define __SXGBE_DESC_H__ + +#define SXGBE_DESC_SIZE_BYTES 16 + +/* forward declaration */ +struct sxgbe_extra_stats; + +/* Transmit checksum insertion control */ +enum tdes_csum_insertion { + cic_disabled = 0, /* Checksum Insertion Control */ + cic_only_ip = 1, /* Only IP header */ + /* IP header but pseudoheader is not calculated */ + cic_no_pseudoheader = 2, + cic_full = 3, /* IP header and pseudoheader */ +}; + +struct sxgbe_tx_norm_desc { + u64 tdes01; /* buf1 address */ + union { + /* TX Read-Format Desc 2,3 */ + struct { + /* TDES2 */ + u32 buf1_size:14; + u32 vlan_tag_ctl:2; + u32 buf2_size:14; + u32 timestmp_enable:1; + u32 int_on_com:1; + /* TDES3 */ + union { + u32 tcp_payload_len:18; + struct { + u32 total_pkt_len:15; + u32 reserved1:1; + u32 cksum_ctl:2; + } cksum_pktlen; + } tx_pkt_len; + + u32 tse_bit:1; + u32 tcp_hdr_len:4; + u32 sa_insert_ctl:3; + u32 crc_pad_ctl:2; + u32 last_desc:1; + u32 first_desc:1; + u32 ctxt_bit:1; + u32 own_bit:1; + } tx_rd_des23; + + /* tx write back Desc 2,3 */ + struct { + /* WB TES2 */ + u32 reserved1; + /* WB TES3 */ + u32 reserved2:31; + u32 own_bit:1; + } tx_wb_des23; + } tdes23; +}; + +struct sxgbe_rx_norm_desc { + union { + u32 rdes0; /* buf1 address */ + struct { + u32 out_vlan_tag:16; + u32 in_vlan_tag:16; + } wb_rx_des0; + } rd_wb_des0; + + union { + u32 rdes1; /* buf2 address or buf1[63:32] */ + u32 rss_hash; /* Write-back RX */ + } rd_wb_des1; + + union { + /* RX Read format Desc 2,3 */ + struct{ + /* RDES2 */ + u32 buf2_addr; + /* RDES3 */ + u32 buf2_hi_addr:30; + u32 int_on_com:1; + u32 own_bit:1; + } rx_rd_des23; + + /* RX write back */ + struct{ + /* WB RDES2 */ + u32 hdr_len:10; + u32 rdes2_reserved:2; + u32 elrd_val:1; + u32 iovt_sel:1; + u32 res_pkt:1; + u32 vlan_filter_match:1; + u32 sa_filter_fail:1; + u32 da_filter_fail:1; + u32 hash_filter_pass:1; + u32 macaddr_filter_match:8; + u32 l3_filter_match:1; + u32 l4_filter_match:1; + u32 l34_filter_num:3; + + /* WB RDES3 */ + u32 pkt_len:14; + u32 rdes3_reserved:1; + u32 err_summary:15; + u32 err_l2_type:4; + u32 layer34_pkt_type:4; + u32 no_coagulation_pkt:1; + u32 in_seq_pkt:1; + u32 rss_valid:1; + u32 context_des_avail:1; + u32 last_desc:1; + u32 first_desc:1; + u32 recv_context_desc:1; + u32 own_bit:1; + } rx_wb_des23; + } rdes23; +}; + +/* Context descriptor structure */ +struct sxgbe_tx_ctxt_desc { + u32 tstamp_lo; + u32 tstamp_hi; + u32 maxseg_size:15; + u32 reserved1:1; + u32 ivlan_tag:16; + u32 vlan_tag:16; + u32 vltag_valid:1; + u32 ivlan_tag_valid:1; + u32 ivlan_tag_ctl:2; + u32 reserved2:3; + u32 ctxt_desc_err:1; + u32 reserved3:2; + u32 ostc:1; + u32 tcmssv:1; + u32 reserved4:2; + u32 ctxt_bit:1; + u32 own_bit:1; +}; + +struct sxgbe_rx_ctxt_desc { + u32 tstamp_lo; + u32 tstamp_hi; + u32 reserved1; + u32 ptp_msgtype:4; + u32 tstamp_available:1; + u32 ptp_rsp_err:1; + u32 tstamp_dropped:1; + u32 reserved2:23; + u32 rx_ctxt_desc:1; + u32 own_bit:1; +}; + +struct sxgbe_desc_ops { + /* DMA TX descriptor ring initialization */ + void (*init_tx_desc)(struct sxgbe_tx_norm_desc *p); + + /* Invoked by the xmit function to prepare the tx descriptor */ + void (*tx_desc_enable_tse)(struct sxgbe_tx_norm_desc *p, u8 is_tse, + u32 total_hdr_len, u32 payload_len, + u32 tcp_payload_len); + + /* Assign buffer lengths for descriptor */ + void (*prepare_tx_desc)(struct sxgbe_tx_norm_desc *p, u8 is_fd, + int buf1_len, int pkt_len, int cksum); + + /* Set VLAN control information */ + void (*tx_vlanctl_desc)(struct sxgbe_tx_norm_desc *p, int vlan_ctl); + + /* Set the owner of the descriptor */ + void (*set_tx_owner)(struct sxgbe_tx_norm_desc *p); + + /* Get the owner of the descriptor */ + int (*get_tx_owner)(struct sxgbe_tx_norm_desc *p); + + /* Invoked by the xmit function to close the tx descriptor */ + void (*close_tx_desc)(struct sxgbe_tx_norm_desc *p); + + /* Clean the tx descriptor as soon as the tx irq is received */ + void (*release_tx_desc)(struct sxgbe_tx_norm_desc *p); + + /* Clear interrupt on tx frame completion. When this bit is + * set an interrupt happens as soon as the frame is transmitted + */ + void (*clear_tx_ic)(struct sxgbe_tx_norm_desc *p); + + /* Last tx segment reports the transmit status */ + int (*get_tx_ls)(struct sxgbe_tx_norm_desc *p); + + /* Get the buffer size from the descriptor */ + int (*get_tx_len)(struct sxgbe_tx_norm_desc *p); + + /* Set tx timestamp enable bit */ + void (*tx_enable_tstamp)(struct sxgbe_tx_norm_desc *p); + + /* get tx timestamp status */ + int (*get_tx_timestamp_status)(struct sxgbe_tx_norm_desc *p); + + /* TX Context Descripto Specific */ + void (*tx_ctxt_desc_set_ctxt)(struct sxgbe_tx_ctxt_desc *p); + + /* Set the owner of the TX context descriptor */ + void (*tx_ctxt_desc_set_owner)(struct sxgbe_tx_ctxt_desc *p); + + /* Get the owner of the TX context descriptor */ + int (*get_tx_ctxt_owner)(struct sxgbe_tx_ctxt_desc *p); + + /* Set TX mss */ + void (*tx_ctxt_desc_set_mss)(struct sxgbe_tx_ctxt_desc *p, int mss); + + /* Set TX mss */ + int (*tx_ctxt_desc_get_mss)(struct sxgbe_tx_ctxt_desc *p); + + /* Set TX tcmssv */ + void (*tx_ctxt_desc_set_tcmssv)(struct sxgbe_tx_ctxt_desc *p); + + /* Reset TX ostc */ + void (*tx_ctxt_desc_reset_ostc)(struct sxgbe_tx_ctxt_desc *p); + + /* Set IVLAN information */ + void (*tx_ctxt_desc_set_ivlantag)(struct sxgbe_tx_ctxt_desc *p, + int is_ivlanvalid, int ivlan_tag, + int ivlan_ctl); + + /* Return IVLAN Tag */ + int (*tx_ctxt_desc_get_ivlantag)(struct sxgbe_tx_ctxt_desc *p); + + /* Set VLAN Tag */ + void (*tx_ctxt_desc_set_vlantag)(struct sxgbe_tx_ctxt_desc *p, + int is_vlanvalid, int vlan_tag); + + /* Return VLAN Tag */ + int (*tx_ctxt_desc_get_vlantag)(struct sxgbe_tx_ctxt_desc *p); + + /* Set Time stamp */ + void (*tx_ctxt_set_tstamp)(struct sxgbe_tx_ctxt_desc *p, + u8 ostc_enable, u64 tstamp); + + /* Close TX context descriptor */ + void (*close_tx_ctxt_desc)(struct sxgbe_tx_ctxt_desc *p); + + /* WB status of context descriptor */ + int (*get_tx_ctxt_cde)(struct sxgbe_tx_ctxt_desc *p); + + /* DMA RX descriptor ring initialization */ + void (*init_rx_desc)(struct sxgbe_rx_norm_desc *p, int disable_rx_ic, + int mode, int end); + + /* Get own bit */ + int (*get_rx_owner)(struct sxgbe_rx_norm_desc *p); + + /* Set own bit */ + void (*set_rx_owner)(struct sxgbe_rx_norm_desc *p); + + /* Get the receive frame size */ + int (*get_rx_frame_len)(struct sxgbe_rx_norm_desc *p); + + /* Return first Descriptor status */ + int (*get_rx_fd_status)(struct sxgbe_rx_norm_desc *p); + + /* Return first Descriptor status */ + int (*get_rx_ld_status)(struct sxgbe_rx_norm_desc *p); + + /* Return the reception status looking at the RDES1 */ + int (*rx_wbstatus)(struct sxgbe_rx_norm_desc *p, + struct sxgbe_extra_stats *x, int *checksum); + + /* Get own bit */ + int (*get_rx_ctxt_owner)(struct sxgbe_rx_ctxt_desc *p); + + /* Set own bit */ + void (*set_rx_ctxt_owner)(struct sxgbe_rx_ctxt_desc *p); + + /* Return the reception status looking at Context control information */ + void (*rx_ctxt_wbstatus)(struct sxgbe_rx_ctxt_desc *p, + struct sxgbe_extra_stats *x); + + /* Get rx timestamp status */ + int (*get_rx_ctxt_tstamp_status)(struct sxgbe_rx_ctxt_desc *p); + + /* Get timestamp value for rx, need to check this */ + u64 (*get_timestamp)(struct sxgbe_rx_ctxt_desc *p); +}; + +const struct sxgbe_desc_ops *sxgbe_get_desc_ops(void); + +#endif /* __SXGBE_DESC_H__ */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c new file mode 100644 index 000000000000..59d2d3976277 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.c @@ -0,0 +1,372 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include "sxgbe_common.h" +#include "sxgbe_dma.h" +#include "sxgbe_reg.h" +#include "sxgbe_desc.h" + +/* DMA core initialization */ +static int sxgbe_dma_init(void __iomem *ioaddr, int fix_burst, int burst_map) +{ + int retry_count = 10; + u32 reg_val; + + /* reset the DMA */ + writel(SXGBE_DMA_SOFT_RESET, ioaddr + SXGBE_DMA_MODE_REG); + while (retry_count--) { + if (!(readl(ioaddr + SXGBE_DMA_MODE_REG) & + SXGBE_DMA_SOFT_RESET)) + break; + mdelay(10); + } + + if (retry_count < 0) + return -EBUSY; + + reg_val = readl(ioaddr + SXGBE_DMA_SYSBUS_MODE_REG); + + /* if fix_burst = 0, Set UNDEF = 1 of DMA_Sys_Mode Register. + * if fix_burst = 1, Set UNDEF = 0 of DMA_Sys_Mode Register. + * burst_map is bitmap for BLEN[4, 8, 16, 32, 64, 128 and 256]. + * Set burst_map irrespective of fix_burst value. + */ + if (!fix_burst) + reg_val |= SXGBE_DMA_AXI_UNDEF_BURST; + + /* write burst len map */ + reg_val |= (burst_map << SXGBE_DMA_BLENMAP_LSHIFT); + + writel(reg_val, ioaddr + SXGBE_DMA_SYSBUS_MODE_REG); + + return 0; +} + +static void sxgbe_dma_channel_init(void __iomem *ioaddr, int cha_num, + int fix_burst, int pbl, dma_addr_t dma_tx, + dma_addr_t dma_rx, int t_rsize, int r_rsize) +{ + u32 reg_val; + dma_addr_t dma_addr; + + reg_val = readl(ioaddr + SXGBE_DMA_CHA_CTL_REG(cha_num)); + /* set the pbl */ + if (fix_burst) { + reg_val |= SXGBE_DMA_PBL_X8MODE; + writel(reg_val, ioaddr + SXGBE_DMA_CHA_CTL_REG(cha_num)); + /* program the TX pbl */ + reg_val = readl(ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cha_num)); + reg_val |= (pbl << SXGBE_DMA_TXPBL_LSHIFT); + writel(reg_val, ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cha_num)); + /* program the RX pbl */ + reg_val = readl(ioaddr + SXGBE_DMA_CHA_RXCTL_REG(cha_num)); + reg_val |= (pbl << SXGBE_DMA_RXPBL_LSHIFT); + writel(reg_val, ioaddr + SXGBE_DMA_CHA_RXCTL_REG(cha_num)); + } + + /* program desc registers */ + writel(upper_32_bits(dma_tx), + ioaddr + SXGBE_DMA_CHA_TXDESC_HADD_REG(cha_num)); + writel(lower_32_bits(dma_tx), + ioaddr + SXGBE_DMA_CHA_TXDESC_LADD_REG(cha_num)); + + writel(upper_32_bits(dma_rx), + ioaddr + SXGBE_DMA_CHA_RXDESC_HADD_REG(cha_num)); + writel(lower_32_bits(dma_rx), + ioaddr + SXGBE_DMA_CHA_RXDESC_LADD_REG(cha_num)); + + /* program tail pointers */ + /* assumption: upper 32 bits are constant and + * same as TX/RX desc list + */ + dma_addr = dma_tx + ((t_rsize - 1) * SXGBE_DESC_SIZE_BYTES); + writel(lower_32_bits(dma_addr), + ioaddr + SXGBE_DMA_CHA_TXDESC_TAILPTR_REG(cha_num)); + + dma_addr = dma_rx + ((r_rsize - 1) * SXGBE_DESC_SIZE_BYTES); + writel(lower_32_bits(dma_addr), + ioaddr + SXGBE_DMA_CHA_RXDESC_LADD_REG(cha_num)); + /* program the ring sizes */ + writel(t_rsize - 1, ioaddr + SXGBE_DMA_CHA_TXDESC_RINGLEN_REG(cha_num)); + writel(r_rsize - 1, ioaddr + SXGBE_DMA_CHA_RXDESC_RINGLEN_REG(cha_num)); + + /* Enable TX/RX interrupts */ + writel(SXGBE_DMA_ENA_INT, + ioaddr + SXGBE_DMA_CHA_INT_ENABLE_REG(cha_num)); +} + +static void sxgbe_enable_dma_transmission(void __iomem *ioaddr, int cha_num) +{ + u32 tx_config; + + tx_config = readl(ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cha_num)); + tx_config |= SXGBE_TX_START_DMA; + writel(tx_config, ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cha_num)); +} + +static void sxgbe_enable_dma_irq(void __iomem *ioaddr, int dma_cnum) +{ + /* Enable TX/RX interrupts */ + writel(SXGBE_DMA_ENA_INT, + ioaddr + SXGBE_DMA_CHA_INT_ENABLE_REG(dma_cnum)); +} + +static void sxgbe_disable_dma_irq(void __iomem *ioaddr, int dma_cnum) +{ + /* Disable TX/RX interrupts */ + writel(0, ioaddr + SXGBE_DMA_CHA_INT_ENABLE_REG(dma_cnum)); +} + +static void sxgbe_dma_start_tx(void __iomem *ioaddr, int tchannels) +{ + int cnum; + u32 tx_ctl_reg; + + for (cnum = 0; cnum < tchannels; cnum++) { + tx_ctl_reg = readl(ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cnum)); + tx_ctl_reg |= SXGBE_TX_ENABLE; + writel(tx_ctl_reg, + ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cnum)); + } +} + +static void sxgbe_dma_start_tx_queue(void __iomem *ioaddr, int dma_cnum) +{ + u32 tx_ctl_reg; + + tx_ctl_reg = readl(ioaddr + SXGBE_DMA_CHA_TXCTL_REG(dma_cnum)); + tx_ctl_reg |= SXGBE_TX_ENABLE; + writel(tx_ctl_reg, ioaddr + SXGBE_DMA_CHA_TXCTL_REG(dma_cnum)); +} + +static void sxgbe_dma_stop_tx_queue(void __iomem *ioaddr, int dma_cnum) +{ + u32 tx_ctl_reg; + + tx_ctl_reg = readl(ioaddr + SXGBE_DMA_CHA_TXCTL_REG(dma_cnum)); + tx_ctl_reg &= ~(SXGBE_TX_ENABLE); + writel(tx_ctl_reg, ioaddr + SXGBE_DMA_CHA_TXCTL_REG(dma_cnum)); +} + +static void sxgbe_dma_stop_tx(void __iomem *ioaddr, int tchannels) +{ + int cnum; + u32 tx_ctl_reg; + + for (cnum = 0; cnum < tchannels; cnum++) { + tx_ctl_reg = readl(ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cnum)); + tx_ctl_reg &= ~(SXGBE_TX_ENABLE); + writel(tx_ctl_reg, ioaddr + SXGBE_DMA_CHA_TXCTL_REG(cnum)); + } +} + +static void sxgbe_dma_start_rx(void __iomem *ioaddr, int rchannels) +{ + int cnum; + u32 rx_ctl_reg; + + for (cnum = 0; cnum < rchannels; cnum++) { + rx_ctl_reg = readl(ioaddr + SXGBE_DMA_CHA_RXCTL_REG(cnum)); + rx_ctl_reg |= SXGBE_RX_ENABLE; + writel(rx_ctl_reg, + ioaddr + SXGBE_DMA_CHA_RXCTL_REG(cnum)); + } +} + +static void sxgbe_dma_stop_rx(void __iomem *ioaddr, int rchannels) +{ + int cnum; + u32 rx_ctl_reg; + + for (cnum = 0; cnum < rchannels; cnum++) { + rx_ctl_reg = readl(ioaddr + SXGBE_DMA_CHA_RXCTL_REG(cnum)); + rx_ctl_reg &= ~(SXGBE_RX_ENABLE); + writel(rx_ctl_reg, ioaddr + SXGBE_DMA_CHA_RXCTL_REG(cnum)); + } +} + +static int sxgbe_tx_dma_int_status(void __iomem *ioaddr, int channel_no, + struct sxgbe_extra_stats *x) +{ + u32 int_status = readl(ioaddr + SXGBE_DMA_CHA_STATUS_REG(channel_no)); + u32 clear_val = 0; + u32 ret_val = 0; + + /* TX Normal Interrupt Summary */ + if (likely(int_status & SXGBE_DMA_INT_STATUS_NIS)) { + x->normal_irq_n++; + if (int_status & SXGBE_DMA_INT_STATUS_TI) { + ret_val |= handle_tx; + x->tx_normal_irq_n++; + clear_val |= SXGBE_DMA_INT_STATUS_TI; + } + + if (int_status & SXGBE_DMA_INT_STATUS_TBU) { + x->tx_underflow_irq++; + ret_val |= tx_bump_tc; + clear_val |= SXGBE_DMA_INT_STATUS_TBU; + } + } else if (unlikely(int_status & SXGBE_DMA_INT_STATUS_AIS)) { + /* TX Abnormal Interrupt Summary */ + if (int_status & SXGBE_DMA_INT_STATUS_TPS) { + ret_val |= tx_hard_error; + clear_val |= SXGBE_DMA_INT_STATUS_TPS; + x->tx_process_stopped_irq++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_FBE) { + ret_val |= tx_hard_error; + x->fatal_bus_error_irq++; + + /* Assumption: FBE bit is the combination of + * all the bus access erros and cleared when + * the respective error bits cleared + */ + + /* check for actual cause */ + if (int_status & SXGBE_DMA_INT_STATUS_TEB0) { + x->tx_read_transfer_err++; + clear_val |= SXGBE_DMA_INT_STATUS_TEB0; + } else { + x->tx_write_transfer_err++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_TEB1) { + x->tx_desc_access_err++; + clear_val |= SXGBE_DMA_INT_STATUS_TEB1; + } else { + x->tx_buffer_access_err++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_TEB2) { + x->tx_data_transfer_err++; + clear_val |= SXGBE_DMA_INT_STATUS_TEB2; + } + } + + /* context descriptor error */ + if (int_status & SXGBE_DMA_INT_STATUS_CTXTERR) { + x->tx_ctxt_desc_err++; + clear_val |= SXGBE_DMA_INT_STATUS_CTXTERR; + } + } + + /* clear the served bits */ + writel(clear_val, ioaddr + SXGBE_DMA_CHA_STATUS_REG(channel_no)); + + return ret_val; +} + +static int sxgbe_rx_dma_int_status(void __iomem *ioaddr, int channel_no, + struct sxgbe_extra_stats *x) +{ + u32 int_status = readl(ioaddr + SXGBE_DMA_CHA_STATUS_REG(channel_no)); + u32 clear_val = 0; + u32 ret_val = 0; + + /* RX Normal Interrupt Summary */ + if (likely(int_status & SXGBE_DMA_INT_STATUS_NIS)) { + x->normal_irq_n++; + if (int_status & SXGBE_DMA_INT_STATUS_RI) { + ret_val |= handle_rx; + x->rx_normal_irq_n++; + clear_val |= SXGBE_DMA_INT_STATUS_RI; + } + } else if (unlikely(int_status & SXGBE_DMA_INT_STATUS_AIS)) { + /* RX Abnormal Interrupt Summary */ + if (int_status & SXGBE_DMA_INT_STATUS_RBU) { + ret_val |= rx_bump_tc; + clear_val |= SXGBE_DMA_INT_STATUS_RBU; + x->rx_underflow_irq++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_RPS) { + ret_val |= rx_hard_error; + clear_val |= SXGBE_DMA_INT_STATUS_RPS; + x->rx_process_stopped_irq++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_FBE) { + ret_val |= rx_hard_error; + x->fatal_bus_error_irq++; + + /* Assumption: FBE bit is the combination of + * all the bus access erros and cleared when + * the respective error bits cleared + */ + + /* check for actual cause */ + if (int_status & SXGBE_DMA_INT_STATUS_REB0) { + x->rx_read_transfer_err++; + clear_val |= SXGBE_DMA_INT_STATUS_REB0; + } else { + x->rx_write_transfer_err++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_REB1) { + x->rx_desc_access_err++; + clear_val |= SXGBE_DMA_INT_STATUS_REB1; + } else { + x->rx_buffer_access_err++; + } + + if (int_status & SXGBE_DMA_INT_STATUS_REB2) { + x->rx_data_transfer_err++; + clear_val |= SXGBE_DMA_INT_STATUS_REB2; + } + } + } + + /* clear the served bits */ + writel(clear_val, ioaddr + SXGBE_DMA_CHA_STATUS_REG(channel_no)); + + return ret_val; +} + +/* Program the HW RX Watchdog */ +static void sxgbe_dma_rx_watchdog(void __iomem *ioaddr, u32 riwt) +{ + u32 que_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_RX_QUEUES, que_num) { + writel(riwt, + ioaddr + SXGBE_DMA_CHA_INT_RXWATCHTMR_REG(que_num)); + } +} + +static const struct sxgbe_dma_ops sxgbe_dma_ops = { + .init = sxgbe_dma_init, + .cha_init = sxgbe_dma_channel_init, + .enable_dma_transmission = sxgbe_enable_dma_transmission, + .enable_dma_irq = sxgbe_enable_dma_irq, + .disable_dma_irq = sxgbe_disable_dma_irq, + .start_tx = sxgbe_dma_start_tx, + .start_tx_queue = sxgbe_dma_start_tx_queue, + .stop_tx = sxgbe_dma_stop_tx, + .stop_tx_queue = sxgbe_dma_stop_tx_queue, + .start_rx = sxgbe_dma_start_rx, + .stop_rx = sxgbe_dma_stop_rx, + .tx_dma_int_status = sxgbe_tx_dma_int_status, + .rx_dma_int_status = sxgbe_rx_dma_int_status, + .rx_watchdog = sxgbe_dma_rx_watchdog, +}; + +const struct sxgbe_dma_ops *sxgbe_get_dma_ops(void) +{ + return &sxgbe_dma_ops; +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h new file mode 100644 index 000000000000..bbf167efb60c --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_dma.h @@ -0,0 +1,48 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SXGBE_DMA_H__ +#define __SXGBE_DMA_H__ + +/* forward declaration */ +struct sxgbe_extra_stats; + +#define SXGBE_DMA_BLENMAP_LSHIFT 1 +#define SXGBE_DMA_TXPBL_LSHIFT 16 +#define SXGBE_DMA_RXPBL_LSHIFT 16 +#define DEFAULT_DMA_PBL 8 + +struct sxgbe_dma_ops { + /* DMA core initialization */ + int (*init)(void __iomem *ioaddr, int fix_burst, int burst_map); + void (*cha_init)(void __iomem *ioaddr, int cha_num, int fix_burst, + int pbl, dma_addr_t dma_tx, dma_addr_t dma_rx, + int t_rzie, int r_rsize); + void (*enable_dma_transmission)(void __iomem *ioaddr, int dma_cnum); + void (*enable_dma_irq)(void __iomem *ioaddr, int dma_cnum); + void (*disable_dma_irq)(void __iomem *ioaddr, int dma_cnum); + void (*start_tx)(void __iomem *ioaddr, int tchannels); + void (*start_tx_queue)(void __iomem *ioaddr, int dma_cnum); + void (*stop_tx)(void __iomem *ioaddr, int tchannels); + void (*stop_tx_queue)(void __iomem *ioaddr, int dma_cnum); + void (*start_rx)(void __iomem *ioaddr, int rchannels); + void (*stop_rx)(void __iomem *ioaddr, int rchannels); + int (*tx_dma_int_status)(void __iomem *ioaddr, int channel_no, + struct sxgbe_extra_stats *x); + int (*rx_dma_int_status)(void __iomem *ioaddr, int channel_no, + struct sxgbe_extra_stats *x); + /* Program the HW RX Watchdog */ + void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt); +}; + +const struct sxgbe_dma_ops *sxgbe_get_dma_ops(void); + +#endif /* __SXGBE_CORE_H__ */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c new file mode 100644 index 000000000000..1dce2b2e045b --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c @@ -0,0 +1,44 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "sxgbe_common.h" + +struct sxgbe_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define SXGBE_STAT(m) \ +{ \ + #m, \ + FIELD_SIZEOF(struct sxgbe_extra_stats, m), \ + offsetof(struct sxgbe_priv_data, xstats.m) \ +} + +static const struct sxgbe_stats sxgbe_gstrings_stats[] = { +}; +#define SXGBE_STATS_LEN ARRAY_SIZE(sxgbe_gstrings_stats) + +static const struct ethtool_ops sxgbe_ethtool_ops = { +}; + +void sxgbe_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &sxgbe_ethtool_ops); +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c new file mode 100644 index 000000000000..75ba57cfe7c0 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -0,0 +1,2052 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sxgbe_common.h" +#include "sxgbe_desc.h" +#include "sxgbe_dma.h" +#include "sxgbe_mtl.h" +#include "sxgbe_reg.h" + +#define SXGBE_ALIGN(x) L1_CACHE_ALIGN(x) +#define JUMBO_LEN 9000 + +/* Module parameters */ +#define TX_TIMEO 5000 +#define DMA_TX_SIZE 512 +#define DMA_RX_SIZE 1024 +#define TC_DEFAULT 64 +#define DMA_BUFFER_SIZE BUF_SIZE_2KiB +/* The default timer value as per the sxgbe specification 1 sec(1000 ms) */ +#define SXGBE_DEFAULT_LPI_TIMER 1000 + +static int debug = -1; + +module_param(debug, int, S_IRUGO | S_IWUSR); +static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFUP | + NETIF_MSG_IFDOWN | NETIF_MSG_TIMER); + +static irqreturn_t sxgbe_common_interrupt(int irq, void *dev_id); +static irqreturn_t sxgbe_tx_interrupt(int irq, void *dev_id); +static irqreturn_t sxgbe_rx_interrupt(int irq, void *dev_id); + +#define SXGBE_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x)) + +/** + * sxgbe_clk_csr_set - dynamically set the MDC clock + * @priv: driver private structure + * Description: this is to dynamically set the MDC clock according to the csr + * clock input. + */ +static void sxgbe_clk_csr_set(struct sxgbe_priv_data *priv) +{ + u32 clk_rate = clk_get_rate(priv->sxgbe_clk); + + /* assign the proper divider, this will be used during + * mdio communication + */ + if (clk_rate < SXGBE_CSR_F_150M) + priv->clk_csr = SXGBE_CSR_100_150M; + else if (clk_rate <= SXGBE_CSR_F_250M) + priv->clk_csr = SXGBE_CSR_150_250M; + else if (clk_rate <= SXGBE_CSR_F_300M) + priv->clk_csr = SXGBE_CSR_250_300M; + else if (clk_rate <= SXGBE_CSR_F_350M) + priv->clk_csr = SXGBE_CSR_300_350M; + else if (clk_rate <= SXGBE_CSR_F_400M) + priv->clk_csr = SXGBE_CSR_350_400M; + else if (clk_rate <= SXGBE_CSR_F_500M) + priv->clk_csr = SXGBE_CSR_400_500M; +} + +/* minimum number of free TX descriptors required to wake up TX process */ +#define SXGBE_TX_THRESH(x) (x->dma_tx_size/4) + +static inline u32 sxgbe_tx_avail(struct sxgbe_tx_queue *queue, int tx_qsize) +{ + return queue->dirty_tx + tx_qsize - queue->cur_tx - 1; +} + +/** + * sxgbe_adjust_link + * @dev: net device structure + * Description: it adjusts the link parameters. + */ +static void sxgbe_adjust_link(struct net_device *dev) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + struct phy_device *phydev = priv->phydev; + u8 new_state = 0; + u8 speed = 0xff; + + if (!phydev) + return; + + /* SXGBE is not supporting auto-negotiation and + * half duplex mode. so, not handling duplex change + * in this function. only handling speed and link status + */ + if (phydev->link) { + if (phydev->speed != priv->speed) { + new_state = 1; + switch (phydev->speed) { + case SPEED_10000: + speed = SXGBE_SPEED_10G; + break; + case SPEED_2500: + speed = SXGBE_SPEED_2_5G; + break; + case SPEED_1000: + speed = SXGBE_SPEED_1G; + break; + default: + netif_err(priv, link, dev, + "Speed (%d) not supported\n", + phydev->speed); + } + + priv->speed = phydev->speed; + priv->hw->mac->set_speed(priv->ioaddr, speed); + } + + if (!priv->oldlink) { + new_state = 1; + priv->oldlink = 1; + } + } else if (priv->oldlink) { + new_state = 1; + priv->oldlink = 0; + priv->speed = SPEED_UNKNOWN; + } + + if (new_state & netif_msg_link(priv)) + phy_print_status(phydev); +} + +/** + * sxgbe_init_phy - PHY initialization + * @dev: net device structure + * Description: it initializes the driver's PHY state, and attaches the PHY + * to the mac driver. + * Return value: + * 0 on success + */ +static int sxgbe_init_phy(struct net_device *ndev) +{ + char phy_id_fmt[MII_BUS_ID_SIZE + 3]; + char bus_id[MII_BUS_ID_SIZE]; + struct phy_device *phydev; + struct sxgbe_priv_data *priv = netdev_priv(ndev); + int phy_iface = priv->plat->interface; + + /* assign default link status */ + priv->oldlink = 0; + priv->speed = SPEED_UNKNOWN; + priv->oldduplex = DUPLEX_UNKNOWN; + + if (priv->plat->phy_bus_name) + snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", + priv->plat->phy_bus_name, priv->plat->bus_id); + else + snprintf(bus_id, MII_BUS_ID_SIZE, "sxgbe-%x", + priv->plat->bus_id); + + snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, + priv->plat->phy_addr); + netdev_dbg(ndev, "%s: trying to attach to %s\n", __func__, phy_id_fmt); + + phydev = phy_connect(ndev, phy_id_fmt, &sxgbe_adjust_link, phy_iface); + + if (IS_ERR(phydev)) { + netdev_err(ndev, "Could not attach to PHY\n"); + return PTR_ERR(phydev); + } + + /* Stop Advertising 1000BASE Capability if interface is not GMII */ + if ((phy_iface == PHY_INTERFACE_MODE_MII) || + (phy_iface == PHY_INTERFACE_MODE_RMII)) + phydev->advertising &= ~(SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full); + if (phydev->phy_id == 0) { + phy_disconnect(phydev); + return -ENODEV; + } + + netdev_dbg(ndev, "%s: attached to PHY (UID 0x%x) Link = %d\n", + __func__, phydev->phy_id, phydev->link); + + /* save phy device in private structure */ + priv->phydev = phydev; + + return 0; +} + +/** + * sxgbe_clear_descriptors: clear descriptors + * @priv: driver private structure + * Description: this function is called to clear the tx and rx descriptors + * in case of both basic and extended descriptors are used. + */ +static void sxgbe_clear_descriptors(struct sxgbe_priv_data *priv) +{ + int i, j; + unsigned int txsize = priv->dma_tx_size; + unsigned int rxsize = priv->dma_rx_size; + + /* Clear the Rx/Tx descriptors */ + for (j = 0; j < SXGBE_RX_QUEUES; j++) { + for (i = 0; i < rxsize; i++) + priv->hw->desc->init_rx_desc(&priv->rxq[j]->dma_rx[i], + priv->use_riwt, priv->mode, + (i == rxsize - 1)); + } + + for (j = 0; j < SXGBE_TX_QUEUES; j++) { + for (i = 0; i < txsize; i++) + priv->hw->desc->init_tx_desc(&priv->txq[j]->dma_tx[i]); + } +} + +static int sxgbe_init_rx_buffers(struct net_device *dev, + struct sxgbe_rx_norm_desc *p, int i, + unsigned int dma_buf_sz, + struct sxgbe_rx_queue *rx_ring) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + struct sk_buff *skb; + + skb = __netdev_alloc_skb_ip_align(dev, dma_buf_sz, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + rx_ring->rx_skbuff[i] = skb; + rx_ring->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data, + dma_buf_sz, DMA_FROM_DEVICE); + + if (dma_mapping_error(priv->device, rx_ring->rx_skbuff_dma[i])) { + netdev_err(dev, "%s: DMA mapping error\n", __func__); + dev_kfree_skb_any(skb); + return -EINVAL; + } + + p->rdes23.rx_rd_des23.buf2_addr = rx_ring->rx_skbuff_dma[i]; + + return 0; +} +/** + * init_tx_ring - init the TX descriptor ring + * @dev: net device structure + * @tx_ring: ring to be intialised + * @tx_rsize: ring size + * Description: this function initializes the DMA TX descriptor + */ +static int init_tx_ring(struct device *dev, u8 queue_no, + struct sxgbe_tx_queue *tx_ring, int tx_rsize) +{ + /* TX ring is not allcoated */ + if (!tx_ring) { + dev_err(dev, "No memory for TX queue of SXGBE\n"); + return -ENOMEM; + } + + /* allocate memory for TX descriptors */ + tx_ring->dma_tx = dma_zalloc_coherent(dev, + tx_rsize * sizeof(struct sxgbe_tx_norm_desc), + &tx_ring->dma_tx_phy, GFP_KERNEL); + if (!tx_ring->dma_tx) + return -ENOMEM; + + /* allocate memory for TX skbuff array */ + tx_ring->tx_skbuff_dma = devm_kcalloc(dev, tx_rsize, + sizeof(dma_addr_t), GFP_KERNEL); + if (!tx_ring->tx_skbuff_dma) + goto dmamem_err; + + tx_ring->tx_skbuff = devm_kcalloc(dev, tx_rsize, + sizeof(struct sk_buff *), GFP_KERNEL); + + if (!tx_ring->tx_skbuff) + goto dmamem_err; + + /* assign queue number */ + tx_ring->queue_no = queue_no; + + /* initalise counters */ + tx_ring->dirty_tx = 0; + tx_ring->cur_tx = 0; + + /* initalise TX queue lock */ + spin_lock_init(&tx_ring->tx_lock); + + return 0; + +dmamem_err: + dma_free_coherent(dev, tx_rsize * sizeof(struct sxgbe_tx_norm_desc), + tx_ring->dma_tx, tx_ring->dma_tx_phy); + return -ENOMEM; +} + +/** + * free_rx_ring - free the RX descriptor ring + * @dev: net device structure + * @rx_ring: ring to be intialised + * @rx_rsize: ring size + * Description: this function initializes the DMA RX descriptor + */ +void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring, + int rx_rsize) +{ + dma_free_coherent(dev, rx_rsize * sizeof(struct sxgbe_rx_norm_desc), + rx_ring->dma_rx, rx_ring->dma_rx_phy); + kfree(rx_ring->rx_skbuff_dma); + kfree(rx_ring->rx_skbuff); +} + +/** + * init_rx_ring - init the RX descriptor ring + * @dev: net device structure + * @rx_ring: ring to be intialised + * @rx_rsize: ring size + * Description: this function initializes the DMA RX descriptor + */ +static int init_rx_ring(struct net_device *dev, u8 queue_no, + struct sxgbe_rx_queue *rx_ring, int rx_rsize) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + int desc_index; + unsigned int bfsize = 0; + unsigned int ret = 0; + + /* Set the max buffer size according to the MTU. */ + bfsize = ALIGN(dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN, 8); + + netif_dbg(priv, probe, dev, "%s: bfsize %d\n", __func__, bfsize); + + /* RX ring is not allcoated */ + if (rx_ring == NULL) { + netdev_err(dev, "No memory for RX queue\n"); + goto error; + } + + /* assign queue number */ + rx_ring->queue_no = queue_no; + + /* allocate memory for RX descriptors */ + rx_ring->dma_rx = dma_zalloc_coherent(priv->device, + rx_rsize * sizeof(struct sxgbe_rx_norm_desc), + &rx_ring->dma_rx_phy, GFP_KERNEL); + + if (rx_ring->dma_rx == NULL) + goto error; + + /* allocate memory for RX skbuff array */ + rx_ring->rx_skbuff_dma = kmalloc_array(rx_rsize, + sizeof(dma_addr_t), GFP_KERNEL); + if (rx_ring->rx_skbuff_dma == NULL) + goto dmamem_err; + + rx_ring->rx_skbuff = kmalloc_array(rx_rsize, + sizeof(struct sk_buff *), GFP_KERNEL); + if (rx_ring->rx_skbuff == NULL) + goto rxbuff_err; + + /* initialise the buffers */ + for (desc_index = 0; desc_index < rx_rsize; desc_index++) { + struct sxgbe_rx_norm_desc *p; + p = rx_ring->dma_rx + desc_index; + ret = sxgbe_init_rx_buffers(dev, p, desc_index, + bfsize, rx_ring); + if (ret) + goto err_init_rx_buffers; + } + + /* initalise counters */ + rx_ring->cur_rx = 0; + rx_ring->dirty_rx = (unsigned int)(desc_index - rx_rsize); + priv->dma_buf_sz = bfsize; + + return 0; + +err_init_rx_buffers: + while (--desc_index >= 0) + free_rx_ring(priv->device, rx_ring, desc_index); + kfree(rx_ring->rx_skbuff); +rxbuff_err: + kfree(rx_ring->rx_skbuff_dma); +dmamem_err: + dma_free_coherent(priv->device, + rx_rsize * sizeof(struct sxgbe_rx_norm_desc), + rx_ring->dma_rx, rx_ring->dma_rx_phy); +error: + return -ENOMEM; +} +/** + * free_tx_ring - free the TX descriptor ring + * @dev: net device structure + * @tx_ring: ring to be intialised + * @tx_rsize: ring size + * Description: this function initializes the DMA TX descriptor + */ +void free_tx_ring(struct device *dev, struct sxgbe_tx_queue *tx_ring, + int tx_rsize) +{ + dma_free_coherent(dev, tx_rsize * sizeof(struct sxgbe_tx_norm_desc), + tx_ring->dma_tx, tx_ring->dma_tx_phy); +} + +/** + * init_dma_desc_rings - init the RX/TX descriptor rings + * @dev: net device structure + * Description: this function initializes the DMA RX/TX descriptors + * and allocates the socket buffers. It suppors the chained and ring + * modes. + */ +static int init_dma_desc_rings(struct net_device *netd) +{ + int queue_num, ret; + struct sxgbe_priv_data *priv = netdev_priv(netd); + int tx_rsize = priv->dma_tx_size; + int rx_rsize = priv->dma_rx_size; + + /* Allocate memory for queue structures and TX descs */ + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + ret = init_tx_ring(priv->device, queue_num, + priv->txq[queue_num], tx_rsize); + if (ret) { + dev_err(&netd->dev, "TX DMA ring allocation failed!\n"); + goto txalloc_err; + } + + /* save private pointer in each ring this + * pointer is needed during cleaing TX queue + */ + priv->txq[queue_num]->priv_ptr = priv; + } + + /* Allocate memory for queue structures and RX descs */ + SXGBE_FOR_EACH_QUEUE(SXGBE_RX_QUEUES, queue_num) { + ret = init_rx_ring(netd, queue_num, + priv->rxq[queue_num], rx_rsize); + if (ret) { + netdev_err(netd, "RX DMA ring allocation failed!!\n"); + goto rxalloc_err; + } + + /* save private pointer in each ring this + * pointer is needed during cleaing TX queue + */ + priv->rxq[queue_num]->priv_ptr = priv; + } + + sxgbe_clear_descriptors(priv); + + return 0; + +txalloc_err: + while (queue_num--) + free_tx_ring(priv->device, priv->txq[queue_num], tx_rsize); + return ret; + +rxalloc_err: + while (queue_num--) + free_rx_ring(priv->device, priv->rxq[queue_num], rx_rsize); + return ret; +} + +static void tx_free_ring_skbufs(struct sxgbe_tx_queue *txqueue) +{ + int dma_desc; + struct sxgbe_priv_data *priv = txqueue->priv_ptr; + int tx_rsize = priv->dma_tx_size; + + for (dma_desc = 0; dma_desc < tx_rsize; dma_desc++) { + struct sxgbe_tx_norm_desc *tdesc = txqueue->dma_tx + dma_desc; + + if (txqueue->tx_skbuff_dma[dma_desc]) + dma_unmap_single(priv->device, + txqueue->tx_skbuff_dma[dma_desc], + priv->hw->desc->get_tx_len(tdesc), + DMA_TO_DEVICE); + + dev_kfree_skb_any(txqueue->tx_skbuff[dma_desc]); + txqueue->tx_skbuff[dma_desc] = NULL; + txqueue->tx_skbuff_dma[dma_desc] = 0; + } +} + + +static void dma_free_tx_skbufs(struct sxgbe_priv_data *priv) +{ + int queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + struct sxgbe_tx_queue *tqueue = priv->txq[queue_num]; + tx_free_ring_skbufs(tqueue); + } +} + +static void free_dma_desc_resources(struct sxgbe_priv_data *priv) +{ + int queue_num; + int tx_rsize = priv->dma_tx_size; + int rx_rsize = priv->dma_rx_size; + + /* Release the DMA TX buffers */ + dma_free_tx_skbufs(priv); + + /* Release the TX ring memory also */ + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + free_tx_ring(priv->device, priv->txq[queue_num], tx_rsize); + } + + /* Release the RX ring memory also */ + SXGBE_FOR_EACH_QUEUE(SXGBE_RX_QUEUES, queue_num) { + free_rx_ring(priv->device, priv->rxq[queue_num], rx_rsize); + } +} + +static int txring_mem_alloc(struct sxgbe_priv_data *priv) +{ + int queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + priv->txq[queue_num] = devm_kmalloc(priv->device, + sizeof(struct sxgbe_tx_queue), GFP_KERNEL); + if (!priv->txq[queue_num]) + return -ENOMEM; + } + + return 0; +} + +static int rxring_mem_alloc(struct sxgbe_priv_data *priv) +{ + int queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_RX_QUEUES, queue_num) { + priv->rxq[queue_num] = devm_kmalloc(priv->device, + sizeof(struct sxgbe_rx_queue), GFP_KERNEL); + if (!priv->rxq[queue_num]) + return -ENOMEM; + } + + return 0; +} + +/** + * sxgbe_mtl_operation_mode - HW MTL operation mode + * @priv: driver private structure + * Description: it sets the MTL operation mode: tx/rx MTL thresholds + * or Store-And-Forward capability. + */ +static void sxgbe_mtl_operation_mode(struct sxgbe_priv_data *priv) +{ + int queue_num; + + /* TX/RX threshold control */ + if (likely(priv->plat->force_sf_dma_mode)) { + /* set TC mode for TX QUEUES */ + SXGBE_FOR_EACH_QUEUE(priv->hw_cap.tx_mtl_queues, queue_num) + priv->hw->mtl->set_tx_mtl_mode(priv->ioaddr, queue_num, + SXGBE_MTL_SFMODE); + priv->tx_tc = SXGBE_MTL_SFMODE; + + /* set TC mode for RX QUEUES */ + SXGBE_FOR_EACH_QUEUE(priv->hw_cap.rx_mtl_queues, queue_num) + priv->hw->mtl->set_rx_mtl_mode(priv->ioaddr, queue_num, + SXGBE_MTL_SFMODE); + priv->rx_tc = SXGBE_MTL_SFMODE; + } else if (unlikely(priv->plat->force_thresh_dma_mode)) { + /* set TC mode for TX QUEUES */ + SXGBE_FOR_EACH_QUEUE(priv->hw_cap.tx_mtl_queues, queue_num) + priv->hw->mtl->set_tx_mtl_mode(priv->ioaddr, queue_num, + priv->tx_tc); + /* set TC mode for RX QUEUES */ + SXGBE_FOR_EACH_QUEUE(priv->hw_cap.rx_mtl_queues, queue_num) + priv->hw->mtl->set_rx_mtl_mode(priv->ioaddr, queue_num, + priv->rx_tc); + } else { + pr_err("ERROR: %s: Invalid TX threshold mode\n", __func__); + } +} + +/** + * sxgbe_tx_queue_clean: + * @priv: driver private structure + * Description: it reclaims resources after transmission completes. + */ +static void sxgbe_tx_queue_clean(struct sxgbe_tx_queue *tqueue) +{ + struct sxgbe_priv_data *priv = tqueue->priv_ptr; + unsigned int tx_rsize = priv->dma_tx_size; + struct netdev_queue *dev_txq; + u8 queue_no = tqueue->queue_no; + + dev_txq = netdev_get_tx_queue(priv->dev, queue_no); + + spin_lock(&tqueue->tx_lock); + + priv->xstats.tx_clean++; + while (tqueue->dirty_tx != tqueue->cur_tx) { + unsigned int entry = tqueue->dirty_tx % tx_rsize; + struct sk_buff *skb = tqueue->tx_skbuff[entry]; + struct sxgbe_tx_norm_desc *p; + + p = tqueue->dma_tx + entry; + + /* Check if the descriptor is owned by the DMA. */ + if (priv->hw->desc->get_tx_owner(p)) + break; + + if (netif_msg_tx_done(priv)) + pr_debug("%s: curr %d, dirty %d\n", + __func__, tqueue->cur_tx, tqueue->dirty_tx); + + if (likely(tqueue->tx_skbuff_dma[entry])) { + dma_unmap_single(priv->device, + tqueue->tx_skbuff_dma[entry], + priv->hw->desc->get_tx_len(p), + DMA_TO_DEVICE); + tqueue->tx_skbuff_dma[entry] = 0; + } + + if (likely(skb)) { + dev_kfree_skb(skb); + tqueue->tx_skbuff[entry] = NULL; + } + + priv->hw->desc->release_tx_desc(p); + + tqueue->dirty_tx++; + } + + /* wake up queue */ + if (unlikely(netif_tx_queue_stopped(dev_txq) && + sxgbe_tx_avail(tqueue, tx_rsize) > SXGBE_TX_THRESH(priv))) { + netif_tx_lock(priv->dev); + if (netif_tx_queue_stopped(dev_txq) && + sxgbe_tx_avail(tqueue, tx_rsize) > SXGBE_TX_THRESH(priv)) { + if (netif_msg_tx_done(priv)) + pr_debug("%s: restart transmit\n", __func__); + netif_tx_wake_queue(dev_txq); + } + netif_tx_unlock(priv->dev); + } + + spin_unlock(&tqueue->tx_lock); +} + +/** + * sxgbe_tx_clean: + * @priv: driver private structure + * Description: it reclaims resources after transmission completes. + */ +static void sxgbe_tx_all_clean(struct sxgbe_priv_data *priv) +{ + u8 queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + struct sxgbe_tx_queue *tqueue = priv->txq[queue_num]; + + sxgbe_tx_queue_clean(tqueue); + } +} + +/** + * sxgbe_restart_tx_queue: irq tx error mng function + * @priv: driver private structure + * Description: it cleans the descriptors and restarts the transmission + * in case of errors. + */ +static void sxgbe_restart_tx_queue(struct sxgbe_priv_data *priv, int queue_num) +{ + struct sxgbe_tx_queue *tx_ring = priv->txq[queue_num]; + struct netdev_queue *dev_txq = netdev_get_tx_queue(priv->dev, + queue_num); + + /* stop the queue */ + netif_tx_stop_queue(dev_txq); + + /* stop the tx dma */ + priv->hw->dma->stop_tx_queue(priv->ioaddr, queue_num); + + /* free the skbuffs of the ring */ + tx_free_ring_skbufs(tx_ring); + + /* initalise counters */ + tx_ring->cur_tx = 0; + tx_ring->dirty_tx = 0; + + /* start the tx dma */ + priv->hw->dma->start_tx_queue(priv->ioaddr, queue_num); + + priv->dev->stats.tx_errors++; + + /* wakeup the queue */ + netif_tx_wake_queue(dev_txq); +} + +/** + * sxgbe_reset_all_tx_queues: irq tx error mng function + * @priv: driver private structure + * Description: it cleans all the descriptors and + * restarts the transmission on all queues in case of errors. + */ +static void sxgbe_reset_all_tx_queues(struct sxgbe_priv_data *priv) +{ + int queue_num; + + /* On TX timeout of net device, resetting of all queues + * may not be proper way, revisit this later if needed + */ + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) + sxgbe_restart_tx_queue(priv, queue_num); +} + +/** + * sxgbe_get_hw_features: get XMAC capabilities from the HW cap. register. + * @priv: driver private structure + * Description: + * new GMAC chip generations have a new register to indicate the + * presence of the optional feature/functions. + * This can be also used to override the value passed through the + * platform and necessary for old MAC10/100 and GMAC chips. + */ +static int sxgbe_get_hw_features(struct sxgbe_priv_data * const priv) +{ + int rval = 0; + struct sxgbe_hw_features *features = &priv->hw_cap; + + /* Read First Capability Register CAP[0] */ + rval = priv->hw->mac->get_hw_feature(priv->ioaddr, 0); + if (rval) { + features->pmt_remote_wake_up = + SXGBE_HW_FEAT_PMT_TEMOTE_WOP(rval); + features->pmt_magic_frame = SXGBE_HW_FEAT_PMT_MAGIC_PKT(rval); + features->atime_stamp = SXGBE_HW_FEAT_IEEE1500_2008(rval); + features->tx_csum_offload = + SXGBE_HW_FEAT_TX_CSUM_OFFLOAD(rval); + features->rx_csum_offload = + SXGBE_HW_FEAT_RX_CSUM_OFFLOAD(rval); + features->multi_macaddr = SXGBE_HW_FEAT_MACADDR_COUNT(rval); + features->tstamp_srcselect = SXGBE_HW_FEAT_TSTMAP_SRC(rval); + features->sa_vlan_insert = SXGBE_HW_FEAT_SRCADDR_VLAN(rval); + } + + /* Read First Capability Register CAP[1] */ + rval = priv->hw->mac->get_hw_feature(priv->ioaddr, 1); + if (rval) { + features->rxfifo_size = SXGBE_HW_FEAT_RX_FIFO_SIZE(rval); + features->txfifo_size = SXGBE_HW_FEAT_TX_FIFO_SIZE(rval); + features->atstmap_hword = SXGBE_HW_FEAT_TX_FIFO_SIZE(rval); + features->dcb_enable = SXGBE_HW_FEAT_DCB(rval); + features->splithead_enable = SXGBE_HW_FEAT_SPLIT_HDR(rval); + features->tcpseg_offload = SXGBE_HW_FEAT_TSO(rval); + features->debug_mem = SXGBE_HW_FEAT_DEBUG_MEM_IFACE(rval); + features->rss_enable = SXGBE_HW_FEAT_RSS(rval); + features->hash_tsize = SXGBE_HW_FEAT_HASH_TABLE_SIZE(rval); + features->l3l4_filer_size = SXGBE_HW_FEAT_L3L4_FILTER_NUM(rval); + } + + /* Read First Capability Register CAP[2] */ + rval = priv->hw->mac->get_hw_feature(priv->ioaddr, 2); + if (rval) { + features->rx_mtl_queues = SXGBE_HW_FEAT_RX_MTL_QUEUES(rval); + features->tx_mtl_queues = SXGBE_HW_FEAT_TX_MTL_QUEUES(rval); + features->rx_dma_channels = SXGBE_HW_FEAT_RX_DMA_CHANNELS(rval); + features->tx_dma_channels = SXGBE_HW_FEAT_TX_DMA_CHANNELS(rval); + features->pps_output_count = SXGBE_HW_FEAT_PPS_OUTPUTS(rval); + features->aux_input_count = SXGBE_HW_FEAT_AUX_SNAPSHOTS(rval); + } + + return rval; +} + +/** + * sxgbe_check_ether_addr: check if the MAC addr is valid + * @priv: driver private structure + * Description: + * it is to verify if the MAC address is valid, in case of failures it + * generates a random MAC address + */ +static void sxgbe_check_ether_addr(struct sxgbe_priv_data *priv) +{ + if (!is_valid_ether_addr(priv->dev->dev_addr)) { + priv->hw->mac->get_umac_addr((void __iomem *) + priv->ioaddr, + priv->dev->dev_addr, 0); + if (!is_valid_ether_addr(priv->dev->dev_addr)) + eth_hw_addr_random(priv->dev); + } + dev_info(priv->device, "device MAC address %pM\n", + priv->dev->dev_addr); +} + +/** + * sxgbe_init_dma_engine: DMA init. + * @priv: driver private structure + * Description: + * It inits the DMA invoking the specific SXGBE callback. + * Some DMA parameters can be passed from the platform; + * in case of these are not passed a default is kept for the MAC or GMAC. + */ +static int sxgbe_init_dma_engine(struct sxgbe_priv_data *priv) +{ + int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, burst_map = 0; + int queue_num; + + if (priv->plat->dma_cfg) { + pbl = priv->plat->dma_cfg->pbl; + fixed_burst = priv->plat->dma_cfg->fixed_burst; + burst_map = priv->plat->dma_cfg->burst_map; + } + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) + priv->hw->dma->cha_init(priv->ioaddr, queue_num, + fixed_burst, pbl, + (priv->txq[queue_num])->dma_tx_phy, + (priv->rxq[queue_num])->dma_rx_phy, + priv->dma_tx_size, priv->dma_rx_size); + + return priv->hw->dma->init(priv->ioaddr, fixed_burst, burst_map); +} + +/** + * sxgbe_init_mtl_engine: MTL init. + * @priv: driver private structure + * Description: + * It inits the MTL invoking the specific SXGBE callback. + */ +static void sxgbe_init_mtl_engine(struct sxgbe_priv_data *priv) +{ + int queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + priv->hw->mtl->mtl_set_txfifosize(priv->ioaddr, queue_num, + priv->hw_cap.tx_mtl_qsize); + priv->hw->mtl->mtl_enable_txqueue(priv->ioaddr, queue_num); + } +} + +/** + * sxgbe_disable_mtl_engine: MTL disable. + * @priv: driver private structure + * Description: + * It disables the MTL queues by invoking the specific SXGBE callback. + */ +static void sxgbe_disable_mtl_engine(struct sxgbe_priv_data *priv) +{ + int queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) + priv->hw->mtl->mtl_disable_txqueue(priv->ioaddr, queue_num); +} + + +/** + * sxgbe_tx_timer: mitigation sw timer for tx. + * @data: data pointer + * Description: + * This is the timer handler to directly invoke the sxgbe_tx_clean. + */ +static void sxgbe_tx_timer(unsigned long data) +{ + struct sxgbe_tx_queue *p = (struct sxgbe_tx_queue *)data; + sxgbe_tx_queue_clean(p); +} + +/** + * sxgbe_init_tx_coalesce: init tx mitigation options. + * @priv: driver private structure + * Description: + * This inits the transmit coalesce parameters: i.e. timer rate, + * timer handler and default threshold used for enabling the + * interrupt on completion bit. + */ +static void sxgbe_tx_init_coalesce(struct sxgbe_priv_data *priv) +{ + u8 queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + struct sxgbe_tx_queue *p = priv->txq[queue_num]; + p->tx_coal_frames = SXGBE_TX_FRAMES; + p->tx_coal_timer = SXGBE_COAL_TX_TIMER; + init_timer(&p->txtimer); + p->txtimer.expires = SXGBE_COAL_TIMER(p->tx_coal_timer); + p->txtimer.data = (unsigned long)&priv->txq[queue_num]; + p->txtimer.function = sxgbe_tx_timer; + add_timer(&p->txtimer); + } +} + +static void sxgbe_tx_del_timer(struct sxgbe_priv_data *priv) +{ + u8 queue_num; + + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + struct sxgbe_tx_queue *p = priv->txq[queue_num]; + del_timer_sync(&p->txtimer); + } +} + +/** + * sxgbe_open - open entry point of the driver + * @dev : pointer to the device structure. + * Description: + * This function is the open entry point of the driver. + * Return value: + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +static int sxgbe_open(struct net_device *dev) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + int ret, queue_num; + + clk_prepare_enable(priv->sxgbe_clk); + + sxgbe_check_ether_addr(priv); + + /* Init the phy */ + ret = sxgbe_init_phy(dev); + if (ret) { + netdev_err(dev, "%s: Cannot attach to PHY (error: %d)\n", + __func__, ret); + goto phy_error; + } + + /* Create and initialize the TX/RX descriptors chains. */ + priv->dma_tx_size = SXGBE_ALIGN(DMA_TX_SIZE); + priv->dma_rx_size = SXGBE_ALIGN(DMA_RX_SIZE); + priv->dma_buf_sz = SXGBE_ALIGN(DMA_BUFFER_SIZE); + priv->tx_tc = TC_DEFAULT; + priv->rx_tc = TC_DEFAULT; + init_dma_desc_rings(dev); + + /* DMA initialization and SW reset */ + ret = sxgbe_init_dma_engine(priv); + if (ret < 0) { + netdev_err(dev, "%s: DMA initialization failed\n", __func__); + goto init_error; + } + + /* MTL initialization */ + sxgbe_init_mtl_engine(priv); + + /* Copy the MAC addr into the HW */ + priv->hw->mac->set_umac_addr(priv->ioaddr, dev->dev_addr, 0); + + /* Initialize the MAC Core */ + priv->hw->mac->core_init(priv->ioaddr); + + /* Request the IRQ lines */ + ret = devm_request_irq(priv->device, priv->irq, sxgbe_common_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(dev, "%s: ERROR: allocating the IRQ %d (error: %d)\n", + __func__, priv->irq, ret); + goto init_error; + } + + /* Request TX DMA irq lines */ + SXGBE_FOR_EACH_QUEUE(SXGBE_TX_QUEUES, queue_num) { + ret = devm_request_irq(priv->device, + (priv->txq[queue_num])->irq_no, + sxgbe_tx_interrupt, 0, + dev->name, priv->txq[queue_num]); + if (unlikely(ret < 0)) { + netdev_err(dev, "%s: ERROR: allocating TX IRQ %d (error: %d)\n", + __func__, priv->irq, ret); + goto init_error; + } + } + + /* Request RX DMA irq lines */ + SXGBE_FOR_EACH_QUEUE(SXGBE_RX_QUEUES, queue_num) { + ret = devm_request_irq(priv->device, + (priv->rxq[queue_num])->irq_no, + sxgbe_rx_interrupt, 0, + dev->name, priv->rxq[queue_num]); + if (unlikely(ret < 0)) { + netdev_err(dev, "%s: ERROR: allocating TX IRQ %d (error: %d)\n", + __func__, priv->irq, ret); + goto init_error; + } + } + + /* Enable the MAC Rx/Tx */ + priv->hw->mac->enable_tx(priv->ioaddr, true); + priv->hw->mac->enable_rx(priv->ioaddr, true); + + /* Set the HW DMA mode and the COE */ + sxgbe_mtl_operation_mode(priv); + + /* Extra statistics */ + memset(&priv->xstats, 0, sizeof(struct sxgbe_extra_stats)); + + priv->xstats.tx_threshold = priv->tx_tc; + priv->xstats.rx_threshold = priv->rx_tc; + + /* Start the ball rolling... */ + netdev_dbg(dev, "DMA RX/TX processes started...\n"); + priv->hw->dma->start_tx(priv->ioaddr, SXGBE_TX_QUEUES); + priv->hw->dma->start_rx(priv->ioaddr, SXGBE_RX_QUEUES); + + if (priv->phydev) + phy_start(priv->phydev); + + /* initalise TX coalesce parameters */ + sxgbe_tx_init_coalesce(priv); + + if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) { + priv->rx_riwt = SXGBE_MAX_DMA_RIWT; + priv->hw->dma->rx_watchdog(priv->ioaddr, SXGBE_MAX_DMA_RIWT); + } + + napi_enable(&priv->napi); + netif_start_queue(dev); + + return 0; + +init_error: + free_dma_desc_resources(priv); + if (priv->phydev) + phy_disconnect(priv->phydev); +phy_error: + clk_disable_unprepare(priv->sxgbe_clk); + + return ret; +} + +/** + * sxgbe_release - close entry point of the driver + * @dev : device pointer. + * Description: + * This is the stop entry point of the driver. + */ +static int sxgbe_release(struct net_device *dev) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + + /* Stop and disconnect the PHY */ + if (priv->phydev) { + phy_stop(priv->phydev); + phy_disconnect(priv->phydev); + priv->phydev = NULL; + } + + netif_tx_stop_all_queues(dev); + + napi_disable(&priv->napi); + + /* delete TX timers */ + sxgbe_tx_del_timer(priv); + + /* Stop TX/RX DMA and clear the descriptors */ + priv->hw->dma->stop_tx(priv->ioaddr, SXGBE_TX_QUEUES); + priv->hw->dma->stop_rx(priv->ioaddr, SXGBE_RX_QUEUES); + + /* disable MTL queue */ + sxgbe_disable_mtl_engine(priv); + + /* Release and free the Rx/Tx resources */ + free_dma_desc_resources(priv); + + /* Disable the MAC Rx/Tx */ + priv->hw->mac->enable_tx(priv->ioaddr, false); + priv->hw->mac->enable_rx(priv->ioaddr, false); + + clk_disable_unprepare(priv->sxgbe_clk); + + return 0; +} + +/** + * sxgbe_xmit: Tx entry point of the driver + * @skb : the socket buffer + * @dev : device pointer + * Description : this is the tx entry point of the driver. + * It programs the chain or the ring and supports oversized frames + * and SG feature. + */ +static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int entry, frag_num; + struct netdev_queue *dev_txq; + unsigned txq_index = skb_get_queue_mapping(skb); + struct sxgbe_priv_data *priv = netdev_priv(dev); + unsigned int tx_rsize = priv->dma_tx_size; + struct sxgbe_tx_queue *tqueue = priv->txq[txq_index]; + struct sxgbe_tx_norm_desc *tx_desc, *first_desc; + int nr_frags = skb_shinfo(skb)->nr_frags; + int no_pagedlen = skb_headlen(skb); + int is_jumbo = 0; + + /* get the TX queue handle */ + dev_txq = netdev_get_tx_queue(dev, txq_index); + + /* get the spinlock */ + spin_lock(&tqueue->tx_lock); + + if (unlikely(sxgbe_tx_avail(tqueue, tx_rsize) < nr_frags + 1)) { + if (!netif_tx_queue_stopped(dev_txq)) { + netif_tx_stop_queue(dev_txq); + netdev_err(dev, "%s: Tx Ring is full when %d queue is awake\n", + __func__, txq_index); + } + /* release the spin lock in case of BUSY */ + spin_unlock(&tqueue->tx_lock); + return NETDEV_TX_BUSY; + } + + entry = tqueue->cur_tx % tx_rsize; + tx_desc = tqueue->dma_tx + entry; + + first_desc = tx_desc; + + /* save the skb address */ + tqueue->tx_skbuff[entry] = skb; + + if (!is_jumbo) { + tx_desc->tdes01 = dma_map_single(priv->device, skb->data, + no_pagedlen, DMA_TO_DEVICE); + if (dma_mapping_error(priv->device, tx_desc->tdes01)) + pr_err("%s: TX dma mapping failed!!\n", __func__); + + priv->hw->desc->prepare_tx_desc(tx_desc, 1, no_pagedlen, + no_pagedlen, 0); + } + + for (frag_num = 0; frag_num < nr_frags; frag_num++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num]; + int len = skb_frag_size(frag); + + entry = (++tqueue->cur_tx) % tx_rsize; + tx_desc = tqueue->dma_tx + entry; + tx_desc->tdes01 = skb_frag_dma_map(priv->device, frag, 0, len, + DMA_TO_DEVICE); + + tqueue->tx_skbuff_dma[entry] = tx_desc->tdes01; + tqueue->tx_skbuff[entry] = NULL; + + /* prepare the descriptor */ + priv->hw->desc->prepare_tx_desc(tx_desc, 0, len, + len, 0); + /* memory barrier to flush descriptor */ + wmb(); + + /* set the owner */ + priv->hw->desc->set_tx_owner(tx_desc); + } + + /* close the descriptors */ + priv->hw->desc->close_tx_desc(tx_desc); + + /* memory barrier to flush descriptor */ + wmb(); + + tqueue->tx_count_frames += nr_frags + 1; + if (tqueue->tx_count_frames > tqueue->tx_coal_frames) { + priv->hw->desc->clear_tx_ic(tx_desc); + priv->xstats.tx_reset_ic_bit++; + mod_timer(&tqueue->txtimer, + SXGBE_COAL_TIMER(tqueue->tx_coal_timer)); + } else { + tqueue->tx_count_frames = 0; + } + + /* set owner for first desc */ + priv->hw->desc->set_tx_owner(first_desc); + + /* memory barrier to flush descriptor */ + wmb(); + + tqueue->cur_tx++; + + /* display current ring */ + netif_dbg(priv, pktdata, dev, "%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d\n", + __func__, tqueue->cur_tx % tx_rsize, + tqueue->dirty_tx % tx_rsize, entry, + first_desc, nr_frags); + + if (unlikely(sxgbe_tx_avail(tqueue, tx_rsize) <= (MAX_SKB_FRAGS + 1))) { + netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n", + __func__); + netif_tx_stop_queue(dev_txq); + } + + dev->stats.tx_bytes += skb->len; + + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + tqueue->hwts_tx_en)) { + /* declare that device is doing timestamping */ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + priv->hw->desc->tx_enable_tstamp(first_desc); + } + + if (!tqueue->hwts_tx_en) + skb_tx_timestamp(skb); + + priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index); + + spin_unlock(&tqueue->tx_lock); + + return NETDEV_TX_OK; +} + +/** + * sxgbe_rx_refill: refill used skb preallocated buffers + * @priv: driver private structure + * Description : this is to reallocate the skb for the reception process + * that is based on zero-copy. + */ +static void sxgbe_rx_refill(struct sxgbe_priv_data *priv) +{ + unsigned int rxsize = priv->dma_rx_size; + int bfsize = priv->dma_buf_sz; + u8 qnum = priv->cur_rx_qnum; + + for (; priv->rxq[qnum]->cur_rx - priv->rxq[qnum]->dirty_rx > 0; + priv->rxq[qnum]->dirty_rx++) { + unsigned int entry = priv->rxq[qnum]->dirty_rx % rxsize; + struct sxgbe_rx_norm_desc *p; + + p = priv->rxq[qnum]->dma_rx + entry; + + if (likely(priv->rxq[qnum]->rx_skbuff[entry] == NULL)) { + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(priv->dev, bfsize); + + if (unlikely(skb == NULL)) + break; + + priv->rxq[qnum]->rx_skbuff[entry] = skb; + priv->rxq[qnum]->rx_skbuff_dma[entry] = + dma_map_single(priv->device, skb->data, bfsize, + DMA_FROM_DEVICE); + + p->rdes23.rx_rd_des23.buf2_addr = + priv->rxq[qnum]->rx_skbuff_dma[entry]; + } + + /* Added memory barrier for RX descriptor modification */ + wmb(); + priv->hw->desc->set_rx_owner(p); + /* Added memory barrier for RX descriptor modification */ + wmb(); + } +} + +/** + * sxgbe_rx: receive the frames from the remote host + * @priv: driver private structure + * @limit: napi bugget. + * Description : this the function called by the napi poll method. + * It gets all the frames inside the ring. + */ +static int sxgbe_rx(struct sxgbe_priv_data *priv, int limit) +{ + u8 qnum = priv->cur_rx_qnum; + unsigned int rxsize = priv->dma_rx_size; + unsigned int entry = priv->rxq[qnum]->cur_rx; + unsigned int next_entry = 0; + unsigned int count = 0; + + while (count < limit) { + struct sxgbe_rx_norm_desc *p; + struct sk_buff *skb; + int frame_len; + + p = priv->rxq[qnum]->dma_rx + entry; + + if (priv->hw->desc->get_rx_owner(p)) + break; + + count++; + + next_entry = (++priv->rxq[qnum]->cur_rx) % rxsize; + prefetch(priv->rxq[qnum]->dma_rx + next_entry); + + /*TO DO read the status of the incoming frame */ + + skb = priv->rxq[qnum]->rx_skbuff[entry]; + + if (unlikely(!skb)) + netdev_err(priv->dev, "rx descriptor is not consistent\n"); + + prefetch(skb->data - NET_IP_ALIGN); + priv->rxq[qnum]->rx_skbuff[entry] = NULL; + + frame_len = priv->hw->desc->get_rx_frame_len(p); + + skb_put(skb, frame_len); + + netif_receive_skb(skb); + + entry = next_entry; + } + + sxgbe_rx_refill(priv); + + return count; +} + +/** + * sxgbe_poll - sxgbe poll method (NAPI) + * @napi : pointer to the napi structure. + * @budget : maximum number of packets that the current CPU can receive from + * all interfaces. + * Description : + * To look at the incoming frames and clear the tx resources. + */ +static int sxgbe_poll(struct napi_struct *napi, int budget) +{ + struct sxgbe_priv_data *priv = container_of(napi, + struct sxgbe_priv_data, napi); + int work_done = 0; + u8 qnum = priv->cur_rx_qnum; + + priv->xstats.napi_poll++; + /* first, clean the tx queues */ + sxgbe_tx_all_clean(priv); + + work_done = sxgbe_rx(priv, budget); + if (work_done < budget) { + napi_complete(napi); + priv->hw->dma->enable_dma_irq(priv->ioaddr, qnum); + } + + return work_done; +} + +/** + * sxgbe_tx_timeout + * @dev : Pointer to net device structure + * Description: this function is called when a packet transmission fails to + * complete within a reasonable time. The driver will mark the error in the + * netdev structure and arrange for the device to be reset to a sane state + * in order to transmit a new packet. + */ +static void sxgbe_tx_timeout(struct net_device *dev) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + + sxgbe_reset_all_tx_queues(priv); +} + +/** + * sxgbe_common_interrupt - main ISR + * @irq: interrupt number. + * @dev_id: to pass the net device pointer. + * Description: this is the main driver interrupt service routine. + * It calls the DMA ISR and also the core ISR to manage PMT, MMC, LPI + * interrupts. + */ +static irqreturn_t sxgbe_common_interrupt(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + +/** + * sxgbe_tx_interrupt - TX DMA ISR + * @irq: interrupt number. + * @dev_id: to pass the net device pointer. + * Description: this is the tx dma interrupt service routine. + */ +static irqreturn_t sxgbe_tx_interrupt(int irq, void *dev_id) +{ + int status; + struct sxgbe_tx_queue *txq = (struct sxgbe_tx_queue *)dev_id; + struct sxgbe_priv_data *priv = txq->priv_ptr; + + /* get the channel status */ + status = priv->hw->dma->tx_dma_int_status(priv->ioaddr, txq->queue_no, + &priv->xstats); + /* check for normal path */ + if (likely((status & handle_tx))) + napi_schedule(&priv->napi); + + /* check for unrecoverable error */ + if (unlikely((status & tx_hard_error))) + sxgbe_restart_tx_queue(priv, txq->queue_no); + + /* check for TC configuration change */ + if (unlikely((status & tx_bump_tc) && + (priv->tx_tc != SXGBE_MTL_SFMODE) && + (priv->tx_tc < 512))) { + /* step of TX TC is 32 till 128, otherwise 64 */ + priv->tx_tc += (priv->tx_tc < 128) ? 32 : 64; + priv->hw->mtl->set_tx_mtl_mode(priv->ioaddr, + txq->queue_no, priv->tx_tc); + priv->xstats.tx_threshold = priv->tx_tc; + } + + return IRQ_HANDLED; +} + +/** + * sxgbe_rx_interrupt - RX DMA ISR + * @irq: interrupt number. + * @dev_id: to pass the net device pointer. + * Description: this is the rx dma interrupt service routine. + */ +static irqreturn_t sxgbe_rx_interrupt(int irq, void *dev_id) +{ + int status; + struct sxgbe_rx_queue *rxq = (struct sxgbe_rx_queue *)dev_id; + struct sxgbe_priv_data *priv = rxq->priv_ptr; + + /* get the channel status */ + status = priv->hw->dma->rx_dma_int_status(priv->ioaddr, rxq->queue_no, + &priv->xstats); + + if (likely((status & handle_rx) && (napi_schedule_prep(&priv->napi)))) { + priv->hw->dma->disable_dma_irq(priv->ioaddr, rxq->queue_no); + __napi_schedule(&priv->napi); + } + + /* check for TC configuration change */ + if (unlikely((status & rx_bump_tc) && + (priv->rx_tc != SXGBE_MTL_SFMODE) && + (priv->rx_tc < 128))) { + /* step of TC is 32 */ + priv->rx_tc += 32; + priv->hw->mtl->set_rx_mtl_mode(priv->ioaddr, + rxq->queue_no, priv->rx_tc); + priv->xstats.rx_threshold = priv->rx_tc; + } + + return IRQ_HANDLED; +} + +static inline u64 sxgbe_get_stat64(void __iomem *ioaddr, int reg_lo, int reg_hi) +{ + u64 val = readl(ioaddr + reg_lo); + + val |= ((u64)readl(ioaddr + reg_hi)) << 32; + + return val; +} + + +/* sxgbe_get_stats64 - entry point to see statistical information of device + * @dev : device pointer. + * @stats : pointer to hold all the statistical information of device. + * Description: + * This function is a driver entry point whenever ifconfig command gets + * executed to see device statistics. Statistics are number of + * bytes sent or received, errors occured etc. + * Return value: + * This function returns various statistical information of device. + */ +static struct rtnl_link_stats64 *sxgbe_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + void __iomem *ioaddr = priv->ioaddr; + u64 count; + + spin_lock(&priv->stats_lock); + /* Freeze the counter registers before reading value otherwise it may + * get updated by hardware while we are reading them + */ + writel(SXGBE_MMC_CTRL_CNT_FRZ, ioaddr + SXGBE_MMC_CTL_REG); + + stats->rx_bytes = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_RXOCTETLO_GCNT_REG, + SXGBE_MMC_RXOCTETHI_GCNT_REG); + + stats->rx_packets = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_RXFRAMELO_GBCNT_REG, + SXGBE_MMC_RXFRAMEHI_GBCNT_REG); + + stats->multicast = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_RXMULTILO_GCNT_REG, + SXGBE_MMC_RXMULTIHI_GCNT_REG); + + stats->rx_crc_errors = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_RXCRCERRLO_REG, + SXGBE_MMC_RXCRCERRHI_REG); + + stats->rx_length_errors = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_RXLENERRLO_REG, + SXGBE_MMC_RXLENERRHI_REG); + + stats->rx_missed_errors = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_RXFIFOOVERFLOWLO_GBCNT_REG, + SXGBE_MMC_RXFIFOOVERFLOWHI_GBCNT_REG); + + stats->tx_bytes = sxgbe_get_stat64(ioaddr, + SXGBE_MMC_TXOCTETLO_GCNT_REG, + SXGBE_MMC_TXOCTETHI_GCNT_REG); + + count = sxgbe_get_stat64(ioaddr, SXGBE_MMC_TXFRAMELO_GBCNT_REG, + SXGBE_MMC_TXFRAMEHI_GBCNT_REG); + + stats->tx_errors = sxgbe_get_stat64(ioaddr, SXGBE_MMC_TXFRAMELO_GCNT_REG, + SXGBE_MMC_TXFRAMEHI_GCNT_REG); + stats->tx_errors = count - stats->tx_errors; + stats->tx_packets = count; + stats->tx_fifo_errors = sxgbe_get_stat64(ioaddr, SXGBE_MMC_TXUFLWLO_GBCNT_REG, + SXGBE_MMC_TXUFLWHI_GBCNT_REG); + writel(0, ioaddr + SXGBE_MMC_CTL_REG); + spin_unlock(&priv->stats_lock); + + return stats; +} + +/* sxgbe_set_features - entry point to set offload features of the device. + * @dev : device pointer. + * @features : features which are required to be set. + * Description: + * This function is a driver entry point and called by Linux kernel whenever + * any device features are set or reset by user. + * Return value: + * This function returns 0 after setting or resetting device features. + */ +static int sxgbe_set_features(struct net_device *dev, + netdev_features_t features) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + netdev_features_t changed = dev->features ^ features; + u32 ctrl; + + if (changed & NETIF_F_RXCSUM) { + ctrl = readl(priv->ioaddr + SXGBE_CORE_RX_CONFIG_REG); + if (features & NETIF_F_RXCSUM) + ctrl |= SXGBE_RX_CSUMOFFLOAD_ENABLE; + else + ctrl &= ~SXGBE_RX_CSUMOFFLOAD_ENABLE; + writel(ctrl, priv->ioaddr + SXGBE_CORE_RX_CONFIG_REG); + } + + return 0; +} + +/* sxgbe_change_mtu - entry point to change MTU size for the device. + * @dev : device pointer. + * @new_mtu : the new MTU size for the device. + * Description: the Maximum Transfer Unit (MTU) is used by the network layer + * to drive packet transmission. Ethernet has an MTU of 1500 octets + * (ETH_DATA_LEN). This value can be changed with ifconfig. + * Return value: + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +static int sxgbe_change_mtu(struct net_device *dev, int new_mtu) +{ + /* RFC 791, page 25, "Every internet module must be able to forward + * a datagram of 68 octets without further fragmentation." + */ + if (new_mtu < MIN_MTU || (new_mtu > MAX_MTU)) { + netdev_err(dev, "invalid MTU, MTU should be in between %d and %d\n", + MIN_MTU, MAX_MTU); + return -EINVAL; + } + + /* Return if the buffer sizes will not change */ + if (dev->mtu == new_mtu) + return 0; + + dev->mtu = new_mtu; + + if (!netif_running(dev)) + return 0; + + /* Recevice ring buffer size is needed to be set based on MTU. If MTU is + * changed then reinitilisation of the receive ring buffers need to be + * done. Hence bring interface down and bring interface back up + */ + sxgbe_release(dev); + return sxgbe_open(dev); +} + +static void sxgbe_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + unsigned long data; + + data = (addr[5] << 8) | addr[4]; + /* For MAC Addr registers se have to set the Address Enable (AE) + * bit that has no effect on the High Reg 0 where the bit 31 (MO) + * is RO. + */ + writel(data | SXGBE_HI_REG_AE, ioaddr + SXGBE_ADDR_HIGH(reg_n)); + data = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; + writel(data, ioaddr + SXGBE_ADDR_LOW(reg_n)); +} + +/** + * sxgbe_set_rx_mode - entry point for setting different receive mode of + * a device. unicast, multicast addressing + * @dev : pointer to the device structure + * Description: + * This function is a driver entry point which gets called by the kernel + * whenever different receive mode like unicast, multicast and promiscuous + * must be enabled/disabled. + * Return value: + * void. + */ +static void sxgbe_set_rx_mode(struct net_device *dev) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + void __iomem *ioaddr = (void __iomem *)priv->ioaddr; + unsigned int value = 0; + u32 mc_filter[2]; + struct netdev_hw_addr *ha; + int reg = 1; + + netdev_dbg(dev, "%s: # mcasts %d, # unicast %d\n", + __func__, netdev_mc_count(dev), netdev_uc_count(dev)); + + if (dev->flags & IFF_PROMISC) { + value = SXGBE_FRAME_FILTER_PR; + + } else if ((netdev_mc_count(dev) > SXGBE_HASH_TABLE_SIZE) || + (dev->flags & IFF_ALLMULTI)) { + value = SXGBE_FRAME_FILTER_PM; /* pass all multi */ + writel(0xffffffff, ioaddr + SXGBE_HASH_HIGH); + writel(0xffffffff, ioaddr + SXGBE_HASH_LOW); + + } else if (!netdev_mc_empty(dev)) { + /* Hash filter for multicast */ + value = SXGBE_FRAME_FILTER_HMC; + + memset(mc_filter, 0, sizeof(mc_filter)); + netdev_for_each_mc_addr(ha, dev) { + /* The upper 6 bits of the calculated CRC are used to + * index the contens of the hash table + */ + int bit_nr = bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26; + + /* The most significant bit determines the register to + * use (H/L) while the other 5 bits determine the bit + * within the register. + */ + mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31); + } + writel(mc_filter[0], ioaddr + SXGBE_HASH_LOW); + writel(mc_filter[1], ioaddr + SXGBE_HASH_HIGH); + } + + /* Handle multiple unicast addresses (perfect filtering) */ + if (netdev_uc_count(dev) > SXGBE_MAX_PERFECT_ADDRESSES) + /* Switch to promiscuous mode if more than 16 addrs + * are required + */ + value |= SXGBE_FRAME_FILTER_PR; + else { + netdev_for_each_uc_addr(ha, dev) { + sxgbe_set_umac_addr(ioaddr, ha->addr, reg); + reg++; + } + } +#ifdef FRAME_FILTER_DEBUG + /* Enable Receive all mode (to debug filtering_fail errors) */ + value |= SXGBE_FRAME_FILTER_RA; +#endif + writel(value, ioaddr + SXGBE_FRAME_FILTER); + + netdev_dbg(dev, "Filter: 0x%08x\n\tHash: HI 0x%08x, LO 0x%08x\n", + readl(ioaddr + SXGBE_FRAME_FILTER), + readl(ioaddr + SXGBE_HASH_HIGH), + readl(ioaddr + SXGBE_HASH_LOW)); +} + +/** + * sxgbe_config - entry point for changing configuration mode passed on by + * ifconfig + * @dev : pointer to the device structure + * @map : pointer to the device mapping structure + * Description: + * This function is a driver entry point which gets called by the kernel + * whenever some device configuration is changed. + * Return value: + * This function returns 0 if success and appropriate error otherwise. + */ +static int sxgbe_config(struct net_device *dev, struct ifmap *map) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + + /* Can't act on a running interface */ + if (dev->flags & IFF_UP) + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != (unsigned long)priv->ioaddr) { + netdev_warn(dev, "can't change I/O address\n"); + return -EOPNOTSUPP; + } + + /* Don't allow changing the IRQ */ + if (map->irq != priv->irq) { + netdev_warn(dev, "not change IRQ number %d\n", priv->irq); + return -EOPNOTSUPP; + } + + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/** + * sxgbe_poll_controller - entry point for polling receive by device + * @dev : pointer to the device structure + * Description: + * This function is used by NETCONSOLE and other diagnostic tools + * to allow network I/O with interrupts disabled. + * Return value: + * Void. + */ +static void sxgbe_poll_controller(struct net_device *dev) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + + disable_irq(priv->irq); + sxgbe_rx_interrupt(priv->irq, dev); + enable_irq(priv->irq); +} +#endif + +/* sxgbe_ioctl - Entry point for the Ioctl + * @dev: Device pointer. + * @rq: An IOCTL specefic structure, that can contain a pointer to + * a proprietary structure used to pass information to the driver. + * @cmd: IOCTL command + * Description: + * Currently it supports the phy_mii_ioctl(...) and HW time stamping. + */ +static int sxgbe_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct sxgbe_priv_data *priv = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + if (!netif_running(dev)) + return -EINVAL; + + switch (cmd) { + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + if (!priv->phydev) + return -EINVAL; + ret = phy_mii_ioctl(priv->phydev, rq, cmd); + break; + default: + break; + } + + return ret; +} + +static const struct net_device_ops sxgbe_netdev_ops = { + .ndo_open = sxgbe_open, + .ndo_start_xmit = sxgbe_xmit, + .ndo_stop = sxgbe_release, + .ndo_get_stats64 = sxgbe_get_stats64, + .ndo_change_mtu = sxgbe_change_mtu, + .ndo_set_features = sxgbe_set_features, + .ndo_set_rx_mode = sxgbe_set_rx_mode, + .ndo_tx_timeout = sxgbe_tx_timeout, + .ndo_do_ioctl = sxgbe_ioctl, + .ndo_set_config = sxgbe_config, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = sxgbe_poll_controller, +#endif + .ndo_set_mac_address = eth_mac_addr, +}; + +/* Get the hardware ops */ +void sxgbe_get_ops(struct sxgbe_ops * const ops_ptr) +{ + ops_ptr->mac = sxgbe_get_core_ops(); + ops_ptr->desc = sxgbe_get_desc_ops(); + ops_ptr->dma = sxgbe_get_dma_ops(); + ops_ptr->mtl = sxgbe_get_mtl_ops(); + + /* set the MDIO communication Address/Data regisers */ + ops_ptr->mii.addr = SXGBE_MDIO_SCMD_ADD_REG; + ops_ptr->mii.data = SXGBE_MDIO_SCMD_DATA_REG; + + /* Assigning the default link settings + * no SXGBE defined default values to be set in registers, + * so assigning as 0 for port and duplex + */ + ops_ptr->link.port = 0; + ops_ptr->link.duplex = 0; + ops_ptr->link.speed = SXGBE_SPEED_10G; +} + +/** + * sxgbe_hw_init - Init the GMAC device + * @priv: driver private structure + * Description: this function checks the HW capability + * (if supported) and sets the driver's features. + */ +static void sxgbe_hw_init(struct sxgbe_priv_data * const priv) +{ + u32 ctrl_ids; + + priv->hw = kmalloc(sizeof(*priv->hw), GFP_KERNEL); + + /* get the hardware ops */ + sxgbe_get_ops(priv->hw); + + /* get the controller id */ + ctrl_ids = priv->hw->mac->get_controller_version(priv->ioaddr); + priv->hw->ctrl_uid = (ctrl_ids & 0x00ff0000) >> 16; + priv->hw->ctrl_id = (ctrl_ids & 0x000000ff); + pr_info("user ID: 0x%x, Controller ID: 0x%x\n", + priv->hw->ctrl_uid, priv->hw->ctrl_id); + + /* get the H/W features */ + if (!sxgbe_get_hw_features(priv)) + pr_info("Hardware features not found\n"); + + if (priv->hw_cap.tx_csum_offload) + pr_info("TX Checksum offload supported\n"); + + if (priv->hw_cap.rx_csum_offload) + pr_info("RX Checksum offload supported\n"); +} + +/** + * sxgbe_drv_probe + * @device: device pointer + * @plat_dat: platform data pointer + * @addr: iobase memory address + * Description: this is the main probe function used to + * call the alloc_etherdev, allocate the priv structure. + */ +struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device, + struct sxgbe_plat_data *plat_dat, + void __iomem *addr) +{ + struct sxgbe_priv_data *priv; + struct net_device *ndev; + int ret; + + ndev = alloc_etherdev_mqs(sizeof(struct sxgbe_priv_data), + SXGBE_TX_QUEUES, SXGBE_RX_QUEUES); + if (!ndev) + return NULL; + + SET_NETDEV_DEV(ndev, device); + + priv = netdev_priv(ndev); + priv->device = device; + priv->dev = ndev; + + sxgbe_set_ethtool_ops(ndev); + priv->plat = plat_dat; + priv->ioaddr = addr; + + /* Init MAC and get the capabilities */ + sxgbe_hw_init(priv); + + /* allocate memory resources for Descriptor rings */ + ret = txring_mem_alloc(priv); + if (ret) + goto error_free_netdev; + + ret = rxring_mem_alloc(priv); + if (ret) + goto error_free_netdev; + + ndev->netdev_ops = &sxgbe_netdev_ops; + + ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM; + ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA; + ndev->watchdog_timeo = msecs_to_jiffies(TX_TIMEO); + + /* assign filtering support */ + ndev->priv_flags |= IFF_UNICAST_FLT; + + priv->msg_enable = netif_msg_init(debug, default_msg_level); + + /* Rx Watchdog is available, enable depend on platform data */ + if (!priv->plat->riwt_off) { + priv->use_riwt = 1; + pr_info("Enable RX Mitigation via HW Watchdog Timer\n"); + } + + netif_napi_add(ndev, &priv->napi, sxgbe_poll, 64); + + spin_lock_init(&priv->stats_lock); + + priv->sxgbe_clk = clk_get(priv->device, SXGBE_RESOURCE_NAME); + if (IS_ERR(priv->sxgbe_clk)) { + netdev_warn(ndev, "%s: warning: cannot get CSR clock\n", + __func__); + goto error_clk_get; + } + + /* If a specific clk_csr value is passed from the platform + * this means that the CSR Clock Range selection cannot be + * changed at run-time and it is fixed. Viceversa the driver'll try to + * set the MDC clock dynamically according to the csr actual + * clock input. + */ + if (!priv->plat->clk_csr) + sxgbe_clk_csr_set(priv); + else + priv->clk_csr = priv->plat->clk_csr; + + /* MDIO bus Registration */ + ret = sxgbe_mdio_register(ndev); + if (ret < 0) { + netdev_dbg(ndev, "%s: MDIO bus (id: %d) registration failed\n", + __func__, priv->plat->bus_id); + goto error_mdio_register; + } + + ret = register_netdev(ndev); + if (ret) { + pr_err("%s: ERROR %i registering the device\n", __func__, ret); + goto error_netdev_register; + } + + sxgbe_check_ether_addr(priv); + + return priv; + +error_mdio_register: + clk_put(priv->sxgbe_clk); +error_clk_get: +error_netdev_register: + netif_napi_del(&priv->napi); +error_free_netdev: + free_netdev(ndev); + + return NULL; +} + +/** + * sxgbe_drv_remove + * @ndev: net device pointer + * Description: this function resets the TX/RX processes, disables the MAC RX/TX + * changes the link status, releases the DMA descriptor rings. + */ +int sxgbe_drv_remove(struct net_device *ndev) +{ + struct sxgbe_priv_data *priv = netdev_priv(ndev); + + netdev_info(ndev, "%s: removing driver\n", __func__); + + priv->hw->dma->stop_rx(priv->ioaddr, SXGBE_RX_QUEUES); + priv->hw->dma->stop_tx(priv->ioaddr, SXGBE_TX_QUEUES); + + priv->hw->mac->enable_tx(priv->ioaddr, false); + priv->hw->mac->enable_rx(priv->ioaddr, false); + + netif_napi_del(&priv->napi); + + sxgbe_mdio_unregister(ndev); + + unregister_netdev(ndev); + + free_netdev(ndev); + + return 0; +} + +#ifdef CONFIG_PM +int sxgbe_suspend(struct net_device *ndev) +{ + return 0; +} + +int sxgbe_resume(struct net_device *ndev) +{ + return 0; +} + +int sxgbe_freeze(struct net_device *ndev) +{ + return -ENOSYS; +} + +int sxgbe_restore(struct net_device *ndev) +{ + return -ENOSYS; +} +#endif /* CONFIG_PM */ + +/* Driver is configured as Platform driver */ +static int __init sxgbe_init(void) +{ + int ret; + + ret = sxgbe_register_platform(); + if (ret) + goto err; + return 0; +err: + pr_err("driver registration failed\n"); + return ret; +} + +static void __exit sxgbe_exit(void) +{ + sxgbe_unregister_platform(); +} + +module_init(sxgbe_init); +module_exit(sxgbe_exit); + +#ifndef MODULE +static int __init sxgbe_cmdline_opt(char *str) +{ + return 0; +} + +__setup("sxgbeeth=", sxgbe_cmdline_opt); +#endif /* MODULE */ + + + +MODULE_DESCRIPTION("SAMSUNG 10G/2.5G/1G Ethernet PLATFORM driver"); + +MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)"); + +MODULE_AUTHOR("Siva Reddy Kallam "); +MODULE_AUTHOR("ByungHo An "); +MODULE_AUTHOR("Girish K S "); +MODULE_AUTHOR("Vipul Pandya "); + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c new file mode 100644 index 000000000000..b0eb0a2c52ca --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mdio.c @@ -0,0 +1,251 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "sxgbe_common.h" +#include "sxgbe_reg.h" + +#define SXGBE_SMA_WRITE_CMD 0x01 /* write command */ +#define SXGBE_SMA_PREAD_CMD 0x02 /* post read increament address */ +#define SXGBE_SMA_READ_CMD 0x03 /* read command */ +#define SXGBE_SMA_SKIP_ADDRFRM 0x00040000 /* skip the address frame */ +#define SXGBE_MII_BUSY 0x00800000 /* mii busy */ + +static int sxgbe_mdio_busy_wait(void __iomem *ioaddr, unsigned int mii_data) +{ + unsigned long fin_time = jiffies + 3 * HZ; /* 3 seconds */ + + while (!time_after(jiffies, fin_time)) { + if (!(readl(ioaddr + mii_data) & SXGBE_MII_BUSY)) + return 0; + cpu_relax(); + } + + return -EBUSY; +} + +static void sxgbe_mdio_ctrl_data(struct sxgbe_priv_data *sp, u32 cmd, + u16 phydata) +{ + u32 reg = phydata; + + reg |= (cmd << 16) | SXGBE_SMA_SKIP_ADDRFRM | + ((sp->clk_csr & 0x7) << 19) | SXGBE_MII_BUSY; + writel(reg, sp->ioaddr + sp->hw->mii.data); +} + +static void sxgbe_mdio_c45(struct sxgbe_priv_data *sp, u32 cmd, int phyaddr, + int phyreg, u16 phydata) +{ + u32 reg; + + /* set mdio address register */ + reg = ((phyreg >> 16) & 0x1f) << 21; + reg |= (phyaddr << 16) | (phyreg & 0xffff); + writel(reg, sp->ioaddr + sp->hw->mii.addr); + + sxgbe_mdio_ctrl_data(sp, cmd, phydata); +} + +static void sxgbe_mdio_c22(struct sxgbe_priv_data *sp, u32 cmd, int phyaddr, + int phyreg, u16 phydata) +{ + u32 reg; + + writel(1 << phyaddr, sp->ioaddr + SXGBE_MDIO_CLAUSE22_PORT_REG); + + /* set mdio address register */ + reg = (phyaddr << 16) | (phyreg & 0x1f); + writel(reg, sp->ioaddr + sp->hw->mii.addr); + + sxgbe_mdio_ctrl_data(sp, cmd, phydata); +} + +static int sxgbe_mdio_access(struct sxgbe_priv_data *sp, u32 cmd, int phyaddr, + int phyreg, u16 phydata) +{ + const struct mii_regs *mii = &sp->hw->mii; + int rc; + + rc = sxgbe_mdio_busy_wait(sp->ioaddr, mii->data); + if (rc < 0) + return rc; + + if (phyreg & MII_ADDR_C45) { + sxgbe_mdio_c45(sp, cmd, phyaddr, phyreg, phydata); + } else { + /* Ports 0-3 only support C22. */ + if (phyaddr >= 4) + return -ENODEV; + + sxgbe_mdio_c22(sp, cmd, phyaddr, phyreg, phydata); + } + + return sxgbe_mdio_busy_wait(sp->ioaddr, mii->data); +} + +/** + * sxgbe_mdio_read + * @bus: points to the mii_bus structure + * @phyaddr: address of phy port + * @phyreg: address of register with in phy register + * Description: this function used for C45 and C22 MDIO Read + */ +static int sxgbe_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) +{ + struct net_device *ndev = bus->priv; + struct sxgbe_priv_data *priv = netdev_priv(ndev); + int rc; + + rc = sxgbe_mdio_access(priv, SXGBE_SMA_READ_CMD, phyaddr, phyreg, 0); + if (rc < 0) + return rc; + + return readl(priv->ioaddr + priv->hw->mii.data) & 0xffff; +} + +/** + * sxgbe_mdio_write + * @bus: points to the mii_bus structure + * @phyaddr: address of phy port + * @phyreg: address of phy registers + * @phydata: data to be written into phy register + * Description: this function is used for C45 and C22 MDIO write + */ +static int sxgbe_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, + u16 phydata) +{ + struct net_device *ndev = bus->priv; + struct sxgbe_priv_data *priv = netdev_priv(ndev); + + return sxgbe_mdio_access(priv, SXGBE_SMA_WRITE_CMD, phyaddr, phyreg, + phydata); +} + +int sxgbe_mdio_register(struct net_device *ndev) +{ + struct mii_bus *mdio_bus; + struct sxgbe_priv_data *priv = netdev_priv(ndev); + struct sxgbe_mdio_bus_data *mdio_data = priv->plat->mdio_bus_data; + int err, phy_addr; + int *irqlist; + bool act; + + /* allocate the new mdio bus */ + mdio_bus = mdiobus_alloc(); + if (!mdio_bus) { + netdev_err(ndev, "%s: mii bus allocation failed\n", __func__); + return -ENOMEM; + } + + if (mdio_data->irqs) + irqlist = mdio_data->irqs; + else + irqlist = priv->mii_irq; + + /* assign mii bus fields */ + mdio_bus->name = "samsxgbe"; + mdio_bus->read = &sxgbe_mdio_read; + mdio_bus->write = &sxgbe_mdio_write; + snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-%x", + mdio_bus->name, priv->plat->bus_id); + mdio_bus->priv = ndev; + mdio_bus->phy_mask = mdio_data->phy_mask; + mdio_bus->parent = priv->device; + + /* register with kernel subsystem */ + err = mdiobus_register(mdio_bus); + if (err != 0) { + netdev_err(ndev, "mdiobus register failed\n"); + goto mdiobus_err; + } + + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { + struct phy_device *phy = mdio_bus->phy_map[phy_addr]; + + if (phy) { + char irq_num[4]; + char *irq_str; + /* If an IRQ was provided to be assigned after + * the bus probe, do it here. + */ + if ((mdio_data->irqs == NULL) && + (mdio_data->probed_phy_irq > 0)) { + irqlist[phy_addr] = mdio_data->probed_phy_irq; + phy->irq = mdio_data->probed_phy_irq; + } + + /* If we're going to bind the MAC to this PHY bus, + * and no PHY number was provided to the MAC, + * use the one probed here. + */ + if (priv->plat->phy_addr == -1) + priv->plat->phy_addr = phy_addr; + + act = (priv->plat->phy_addr == phy_addr); + switch (phy->irq) { + case PHY_POLL: + irq_str = "POLL"; + break; + case PHY_IGNORE_INTERRUPT: + irq_str = "IGNORE"; + break; + default: + sprintf(irq_num, "%d", phy->irq); + irq_str = irq_num; + break; + } + netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n", + phy->phy_id, phy_addr, irq_str, + dev_name(&phy->dev), act ? " active" : ""); + } + } + + if (!err) { + netdev_err(ndev, "PHY not found\n"); + mdiobus_unregister(mdio_bus); + mdiobus_free(mdio_bus); + goto mdiobus_err; + } + + priv->mii = mdio_bus; + + return 0; + +mdiobus_err: + mdiobus_free(mdio_bus); + return err; +} + +int sxgbe_mdio_unregister(struct net_device *ndev) +{ + struct sxgbe_priv_data *priv = netdev_priv(ndev); + + if (!priv->mii) + return 0; + + mdiobus_unregister(priv->mii); + priv->mii->priv = NULL; + mdiobus_free(priv->mii); + priv->mii = NULL; + + return 0; +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c new file mode 100644 index 000000000000..324681c2bb74 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.c @@ -0,0 +1,254 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "sxgbe_mtl.h" +#include "sxgbe_reg.h" + +static void sxgbe_mtl_init(void __iomem *ioaddr, unsigned int etsalg, + unsigned int raa) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_OP_MODE_REG); + reg_val &= ETS_RST; + + /* ETS Algorith */ + switch (etsalg & SXGBE_MTL_OPMODE_ESTMASK) { + case ETS_WRR: + reg_val &= ETS_WRR; + break; + case ETS_WFQ: + reg_val |= ETS_WFQ; + break; + case ETS_DWRR: + reg_val |= ETS_DWRR; + break; + } + writel(reg_val, ioaddr + SXGBE_MTL_OP_MODE_REG); + + switch (raa & SXGBE_MTL_OPMODE_RAAMASK) { + case RAA_SP: + reg_val &= RAA_SP; + break; + case RAA_WSP: + reg_val |= RAA_WSP; + break; + } + writel(reg_val, ioaddr + SXGBE_MTL_OP_MODE_REG); +} + +/* For Dynamic DMA channel mapping for Rx queue */ +static void sxgbe_mtl_dma_dm_rxqueue(void __iomem *ioaddr) +{ + writel(RX_QUEUE_DYNAMIC, ioaddr + SXGBE_MTL_RXQ_DMAMAP0_REG); + writel(RX_QUEUE_DYNAMIC, ioaddr + SXGBE_MTL_RXQ_DMAMAP1_REG); + writel(RX_QUEUE_DYNAMIC, ioaddr + SXGBE_MTL_RXQ_DMAMAP2_REG); +} + +static void sxgbe_mtl_set_txfifosize(void __iomem *ioaddr, int queue_num, + int queue_fifo) +{ + u32 fifo_bits, reg_val; + + /* 0 means 256 bytes */ + fifo_bits = (queue_fifo / SXGBE_MTL_TX_FIFO_DIV) - 1; + reg_val = readl(ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); + reg_val |= (fifo_bits << SXGBE_MTL_FIFO_LSHIFT); + writel(reg_val, ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_set_rxfifosize(void __iomem *ioaddr, int queue_num, + int queue_fifo) +{ + u32 fifo_bits, reg_val; + + /* 0 means 256 bytes */ + fifo_bits = (queue_fifo / SXGBE_MTL_RX_FIFO_DIV)-1; + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val |= (fifo_bits << SXGBE_MTL_FIFO_LSHIFT); + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_enable_txqueue(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); + reg_val |= SXGBE_MTL_ENABLE_QUEUE; + writel(reg_val, ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_disable_txqueue(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); + reg_val &= ~SXGBE_MTL_ENABLE_QUEUE; + writel(reg_val, ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fc_active(void __iomem *ioaddr, int queue_num, + int threshold) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val &= ~(SXGBE_MTL_FCMASK << RX_FC_ACTIVE); + reg_val |= (threshold << RX_FC_ACTIVE); + + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fc_enable(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val |= SXGBE_MTL_ENABLE_FC; + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fc_deactive(void __iomem *ioaddr, int queue_num, + int threshold) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val &= ~(SXGBE_MTL_FCMASK << RX_FC_DEACTIVE); + reg_val |= (threshold << RX_FC_DEACTIVE); + + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fep_enable(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val |= SXGBE_MTL_RXQ_OP_FEP; + + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fep_disable(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val &= ~(SXGBE_MTL_RXQ_OP_FEP); + + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fup_enable(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val |= SXGBE_MTL_RXQ_OP_FUP; + + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_mtl_fup_disable(void __iomem *ioaddr, int queue_num) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + reg_val &= ~(SXGBE_MTL_RXQ_OP_FUP); + + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + + +static void sxgbe_set_tx_mtl_mode(void __iomem *ioaddr, int queue_num, + int tx_mode) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); + /* TX specific MTL mode settings */ + if (tx_mode == SXGBE_MTL_SFMODE) { + reg_val |= SXGBE_MTL_SFMODE; + } else { + /* set the TTC values */ + if (tx_mode <= 64) + reg_val |= MTL_CONTROL_TTC_64; + else if (tx_mode <= 96) + reg_val |= MTL_CONTROL_TTC_96; + else if (tx_mode <= 128) + reg_val |= MTL_CONTROL_TTC_128; + else if (tx_mode <= 192) + reg_val |= MTL_CONTROL_TTC_192; + else if (tx_mode <= 256) + reg_val |= MTL_CONTROL_TTC_256; + else if (tx_mode <= 384) + reg_val |= MTL_CONTROL_TTC_384; + else + reg_val |= MTL_CONTROL_TTC_512; + } + + /* write into TXQ operation register */ + writel(reg_val, ioaddr + SXGBE_MTL_TXQ_OPMODE_REG(queue_num)); +} + +static void sxgbe_set_rx_mtl_mode(void __iomem *ioaddr, int queue_num, + int rx_mode) +{ + u32 reg_val; + + reg_val = readl(ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); + /* RX specific MTL mode settings */ + if (rx_mode == SXGBE_RX_MTL_SFMODE) { + reg_val |= SXGBE_RX_MTL_SFMODE; + } else { + if (rx_mode <= 64) + reg_val |= MTL_CONTROL_RTC_64; + else if (rx_mode <= 96) + reg_val |= MTL_CONTROL_RTC_96; + else if (rx_mode <= 128) + reg_val |= MTL_CONTROL_RTC_128; + } + + /* write into RXQ operation register */ + writel(reg_val, ioaddr + SXGBE_MTL_RXQ_OPMODE_REG(queue_num)); +} + +static const struct sxgbe_mtl_ops mtl_ops = { + .mtl_set_txfifosize = sxgbe_mtl_set_txfifosize, + .mtl_set_rxfifosize = sxgbe_mtl_set_rxfifosize, + .mtl_enable_txqueue = sxgbe_mtl_enable_txqueue, + .mtl_disable_txqueue = sxgbe_mtl_disable_txqueue, + .mtl_dynamic_dma_rxqueue = sxgbe_mtl_dma_dm_rxqueue, + .set_tx_mtl_mode = sxgbe_set_tx_mtl_mode, + .set_rx_mtl_mode = sxgbe_set_rx_mtl_mode, + .mtl_init = sxgbe_mtl_init, + .mtl_fc_active = sxgbe_mtl_fc_active, + .mtl_fc_deactive = sxgbe_mtl_fc_deactive, + .mtl_fc_enable = sxgbe_mtl_fc_enable, + .mtl_fep_enable = sxgbe_mtl_fep_enable, + .mtl_fep_disable = sxgbe_mtl_fep_disable, + .mtl_fup_enable = sxgbe_mtl_fup_enable, + .mtl_fup_disable = sxgbe_mtl_fup_disable +}; + +const struct sxgbe_mtl_ops *sxgbe_get_mtl_ops(void) +{ + return &mtl_ops; +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h new file mode 100644 index 000000000000..7e4810c4137e --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_mtl.h @@ -0,0 +1,104 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SXGBE_MTL_H__ +#define __SXGBE_MTL_H__ + +#define SXGBE_MTL_OPMODE_ESTMASK 0x3 +#define SXGBE_MTL_OPMODE_RAAMASK 0x1 +#define SXGBE_MTL_FCMASK 0x7 +#define SXGBE_MTL_TX_FIFO_DIV 256 +#define SXGBE_MTL_RX_FIFO_DIV 256 + +#define SXGBE_MTL_RXQ_OP_FEP BIT(4) +#define SXGBE_MTL_RXQ_OP_FUP BIT(3) +#define SXGBE_MTL_ENABLE_FC 0x80 + +#define ETS_WRR 0xFFFFFF9F +#define ETS_RST 0xFFFFFF9F +#define ETS_WFQ 0x00000020 +#define ETS_DWRR 0x00000040 +#define RAA_SP 0xFFFFFFFB +#define RAA_WSP 0x00000004 + +#define RX_QUEUE_DYNAMIC 0x80808080 +#define RX_FC_ACTIVE 8 +#define RX_FC_DEACTIVE 13 + +enum ttc_control { + MTL_CONTROL_TTC_64 = 0x00000000, + MTL_CONTROL_TTC_96 = 0x00000020, + MTL_CONTROL_TTC_128 = 0x00000030, + MTL_CONTROL_TTC_192 = 0x00000040, + MTL_CONTROL_TTC_256 = 0x00000050, + MTL_CONTROL_TTC_384 = 0x00000060, + MTL_CONTROL_TTC_512 = 0x00000070, +}; + +enum rtc_control { + MTL_CONTROL_RTC_64 = 0x00000000, + MTL_CONTROL_RTC_96 = 0x00000002, + MTL_CONTROL_RTC_128 = 0x00000003, +}; + +enum flow_control_th { + MTL_FC_FULL_1K = 0x00000000, + MTL_FC_FULL_2K = 0x00000001, + MTL_FC_FULL_4K = 0x00000002, + MTL_FC_FULL_5K = 0x00000003, + MTL_FC_FULL_6K = 0x00000004, + MTL_FC_FULL_8K = 0x00000005, + MTL_FC_FULL_16K = 0x00000006, + MTL_FC_FULL_24K = 0x00000007, +}; + +struct sxgbe_mtl_ops { + void (*mtl_init)(void __iomem *ioaddr, unsigned int etsalg, + unsigned int raa); + + void (*mtl_set_txfifosize)(void __iomem *ioaddr, int queue_num, + int mtl_fifo); + + void (*mtl_set_rxfifosize)(void __iomem *ioaddr, int queue_num, + int queue_fifo); + + void (*mtl_enable_txqueue)(void __iomem *ioaddr, int queue_num); + + void (*mtl_disable_txqueue)(void __iomem *ioaddr, int queue_num); + + void (*set_tx_mtl_mode)(void __iomem *ioaddr, int queue_num, + int tx_mode); + + void (*set_rx_mtl_mode)(void __iomem *ioaddr, int queue_num, + int rx_mode); + + void (*mtl_dynamic_dma_rxqueue)(void __iomem *ioaddr); + + void (*mtl_fc_active)(void __iomem *ioaddr, int queue_num, + int threshold); + + void (*mtl_fc_deactive)(void __iomem *ioaddr, int queue_num, + int threshold); + + void (*mtl_fc_enable)(void __iomem *ioaddr, int queue_num); + + void (*mtl_fep_enable)(void __iomem *ioaddr, int queue_num); + + void (*mtl_fep_disable)(void __iomem *ioaddr, int queue_num); + + void (*mtl_fup_enable)(void __iomem *ioaddr, int queue_num); + + void (*mtl_fup_disable)(void __iomem *ioaddr, int queue_num); +}; + +const struct sxgbe_mtl_ops *sxgbe_get_mtl_ops(void); + +#endif /* __SXGBE_MTL_H__ */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c new file mode 100644 index 000000000000..f5a9de710052 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -0,0 +1,253 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sxgbe_common.h" +#include "sxgbe_reg.h" + +#ifdef CONFIG_OF +static int sxgbe_probe_config_dt(struct platform_device *pdev, + struct sxgbe_plat_data *plat, + const char **mac) +{ + struct device_node *np = pdev->dev.of_node; + struct sxgbe_dma_cfg *dma_cfg; + + if (!np) + return -ENODEV; + + *mac = of_get_mac_address(np); + plat->interface = of_get_phy_mode(np); + + plat->bus_id = of_alias_get_id(np, "ethernet"); + if (plat->bus_id < 0) + plat->bus_id = 0; + + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + + dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), GFP_KERNEL); + if (!dma_cfg) + return -ENOMEM; + + plat->dma_cfg = dma_cfg; + of_property_read_u32(np, "samsung,pbl", &dma_cfg->pbl); + if (of_property_read_u32(np, "samsung,burst-map", &dma_cfg->burst_map) == 0) + dma_cfg->fixed_burst = true; + + return 0; +} +#else +static int sxgbe_probe_config_dt(struct platform_device *pdev, + struct sxgbe_plat_data *plat, + const char **mac) +{ + return -ENOSYS; +} +#endif /* CONFIG_OF */ + +/** + * sxgbe_platform_probe + * @pdev: platform device pointer + * Description: platform_device probe function. It allocates + * the necessary resources and invokes the main to init + * the net device, register the mdio bus etc. + */ +static int sxgbe_platform_probe(struct platform_device *pdev) +{ + int ret; + int i, chan; + struct resource *res; + struct device *dev = &pdev->dev; + void __iomem *addr; + struct sxgbe_priv_data *priv = NULL; + struct sxgbe_plat_data *plat_dat = NULL; + const char *mac = NULL; + struct net_device *ndev = platform_get_drvdata(pdev); + struct device_node *node = dev->of_node; + + /* Get memory resource */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + goto err_out; + + addr = devm_ioremap_resource(dev, res); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + if (pdev->dev.of_node) { + plat_dat = devm_kzalloc(&pdev->dev, + sizeof(struct sxgbe_plat_data), + GFP_KERNEL); + if (!plat_dat) + return -ENOMEM; + + ret = sxgbe_probe_config_dt(pdev, plat_dat, &mac); + if (ret) { + pr_err("%s: main dt probe failed\n", __func__); + return ret; + } + } + + /* Get MAC address if available (DT) */ + if (mac) + ether_addr_copy(priv->dev->dev_addr, mac); + + priv = sxgbe_drv_probe(&(pdev->dev), plat_dat, addr); + if (!priv) { + pr_err("%s: main driver probe failed\n", __func__); + goto err_out; + } + + /* Get the SXGBE common INT information */ + priv->irq = irq_of_parse_and_map(node, 0); + if (priv->irq <= 0) { + dev_err(dev, "sxgbe common irq parsing failed\n"); + goto err_drv_remove; + } + + /* Get the TX/RX IRQ numbers */ + for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) { + priv->txq[i]->irq_no = irq_of_parse_and_map(node, chan++); + if (priv->txq[i]->irq_no <= 0) { + dev_err(dev, "sxgbe tx irq parsing failed\n"); + goto err_tx_irq_unmap; + } + } + + for (i = 0; i < SXGBE_RX_QUEUES; i++) { + priv->rxq[i]->irq_no = irq_of_parse_and_map(node, chan++); + if (priv->rxq[i]->irq_no <= 0) { + dev_err(dev, "sxgbe rx irq parsing failed\n"); + goto err_rx_irq_unmap; + } + } + + platform_set_drvdata(pdev, priv->dev); + + pr_debug("platform driver registration completed\n"); + + return 0; + +err_rx_irq_unmap: + while (--i) + irq_dispose_mapping(priv->rxq[i]->irq_no); + i = SXGBE_TX_QUEUES; +err_tx_irq_unmap: + while (--i) + irq_dispose_mapping(priv->txq[i]->irq_no); + irq_dispose_mapping(priv->irq); +err_drv_remove: + sxgbe_drv_remove(ndev); +err_out: + return -ENODEV; +} + +/** + * sxgbe_platform_remove + * @pdev: platform device pointer + * Description: this function calls the main to free the net resources + * and calls the platforms hook and release the resources (e.g. mem). + */ +static int sxgbe_platform_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + int ret = sxgbe_drv_remove(ndev); + + return ret; +} + +#ifdef CONFIG_PM +static int sxgbe_platform_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + + return sxgbe_suspend(ndev); +} + +static int sxgbe_platform_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + + return sxgbe_resume(ndev); +} + +int sxgbe_platform_freeze(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + + return sxgbe_freeze(ndev); +} + +int sxgbe_platform_restore(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + + return sxgbe_restore(ndev); +} + +static const struct dev_pm_ops sxgbe_platform_pm_ops = { + .suspend = sxgbe_platform_suspend, + .resume = sxgbe_platform_resume, + .freeze = sxgbe_platform_freeze, + .thaw = sxgbe_platform_restore, + .restore = sxgbe_platform_restore, +}; +#else +static const struct dev_pm_ops sxgbe_platform_pm_ops; +#endif /* CONFIG_PM */ + +static const struct of_device_id sxgbe_dt_ids[] = { + { .compatible = "samsung,sxgbe-v2.0a"}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sxgbe_dt_ids); + +struct platform_driver sxgbe_platform_driver = { + .probe = sxgbe_platform_probe, + .remove = sxgbe_platform_remove, + .driver = { + .name = SXGBE_RESOURCE_NAME, + .owner = THIS_MODULE, + .pm = &sxgbe_platform_pm_ops, + .of_match_table = of_match_ptr(sxgbe_dt_ids), + }, +}; + +int sxgbe_register_platform(void) +{ + int err; + + err = platform_driver_register(&sxgbe_platform_driver); + if (err) + pr_err("failed to register the platform driver\n"); + + return err; +} + +void sxgbe_unregister_platform(void) +{ + platform_driver_unregister(&sxgbe_platform_driver); +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h new file mode 100644 index 000000000000..d1cd9ac1b062 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_reg.h @@ -0,0 +1,477 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SXGBE_REGMAP_H__ +#define __SXGBE_REGMAP_H__ + +/* SXGBE MAC Registers */ +#define SXGBE_CORE_TX_CONFIG_REG 0x0000 +#define SXGBE_CORE_RX_CONFIG_REG 0x0004 +#define SXGBE_CORE_PKT_FILTER_REG 0x0008 +#define SXGBE_CORE_WATCHDOG_TIMEOUT_REG 0x000C +#define SXGBE_CORE_HASH_TABLE_REG0 0x0010 +#define SXGBE_CORE_HASH_TABLE_REG1 0x0014 +#define SXGBE_CORE_HASH_TABLE_REG2 0x0018 +#define SXGBE_CORE_HASH_TABLE_REG3 0x001C +#define SXGBE_CORE_HASH_TABLE_REG4 0x0020 +#define SXGBE_CORE_HASH_TABLE_REG5 0x0024 +#define SXGBE_CORE_HASH_TABLE_REG6 0x0028 +#define SXGBE_CORE_HASH_TABLE_REG7 0x002C +/* VLAN Specific Registers */ +#define SXGBE_CORE_VLAN_TAG_REG 0x0050 +#define SXGBE_CORE_VLAN_HASHTAB_REG 0x0058 +#define SXGBE_CORE_VLAN_INSCTL_REG 0x0060 +#define SXGBE_CORE_VLAN_INNERCTL_REG 0x0064 +#define SXGBE_CORE_RX_ETHTYPE_MATCH_REG 0x006C + +/* Flow Contol Registers */ +#define SXGBE_CORE_TX_Q0_FLOWCTL_REG 0x0070 +#define SXGBE_CORE_TX_Q1_FLOWCTL_REG 0x0074 +#define SXGBE_CORE_TX_Q2_FLOWCTL_REG 0x0078 +#define SXGBE_CORE_TX_Q3_FLOWCTL_REG 0x007C +#define SXGBE_CORE_TX_Q4_FLOWCTL_REG 0x0080 +#define SXGBE_CORE_TX_Q5_FLOWCTL_REG 0x0084 +#define SXGBE_CORE_TX_Q6_FLOWCTL_REG 0x0088 +#define SXGBE_CORE_TX_Q7_FLOWCTL_REG 0x008C +#define SXGBE_CORE_RX_FLOWCTL_REG 0x0090 +#define SXGBE_CORE_RX_CTL0_REG 0x00A0 +#define SXGBE_CORE_RX_CTL1_REG 0x00A4 +#define SXGBE_CORE_RX_CTL2_REG 0x00A8 +#define SXGBE_CORE_RX_CTL3_REG 0x00AC + +/* Interrupt Registers */ +#define SXGBE_CORE_INT_STATUS_REG 0x00B0 +#define SXGBE_CORE_INT_ENABLE_REG 0x00B4 +#define SXGBE_CORE_RXTX_ERR_STATUS_REG 0x00B8 +#define SXGBE_CORE_PMT_CTL_STATUS_REG 0x00C0 +#define SXGBE_CORE_RWK_PKT_FILTER_REG 0x00C4 +#define SXGBE_CORE_VERSION_REG 0x0110 +#define SXGBE_CORE_DEBUG_REG 0x0114 +#define SXGBE_CORE_HW_FEA_REG(index) (0x011C + index * 4) + +/* SMA(MDIO) module registers */ +#define SXGBE_MDIO_SCMD_ADD_REG 0x0200 +#define SXGBE_MDIO_SCMD_DATA_REG 0x0204 +#define SXGBE_MDIO_CCMD_WADD_REG 0x0208 +#define SXGBE_MDIO_CCMD_WDATA_REG 0x020C +#define SXGBE_MDIO_CSCAN_PORT_REG 0x0210 +#define SXGBE_MDIO_INT_STATUS_REG 0x0214 +#define SXGBE_MDIO_INT_ENABLE_REG 0x0218 +#define SXGBE_MDIO_PORT_CONDCON_REG 0x021C +#define SXGBE_MDIO_CLAUSE22_PORT_REG 0x0220 + +/* port specific, addr = 0-3 */ +#define SXGBE_MDIO_DEV_BASE_REG 0x0230 +#define SXGBE_MDIO_PORT_DEV_REG(addr) \ + (SXGBE_MDIO_DEV_BASE_REG + (0x10 * addr) + 0x0) +#define SXGBE_MDIO_PORT_LSTATUS_REG(addr) \ + (SXGBE_MDIO_DEV_BASE_REG + (0x10 * addr) + 0x4) +#define SXGBE_MDIO_PORT_ALIVE_REG(addr) \ + (SXGBE_MDIO_DEV_BASE_REG + (0x10 * addr) + 0x8) + +#define SXGBE_CORE_GPIO_CTL_REG 0x0278 +#define SXGBE_CORE_GPIO_STATUS_REG 0x027C + +/* Address registers for filtering */ +#define SXGBE_CORE_ADD_BASE_REG 0x0300 + +/* addr = 0-31 */ +#define SXGBE_CORE_ADD_HIGHOFFSET(addr) \ + (SXGBE_CORE_ADD_BASE_REG + (0x8 * addr) + 0x0) +#define SXGBE_CORE_ADD_LOWOFFSET(addr) \ + (SXGBE_CORE_ADD_BASE_REG + (0x8 * addr) + 0x4) + +/* SXGBE MMC registers */ +#define SXGBE_MMC_CTL_REG 0x0800 +#define SXGBE_MMC_RXINT_STATUS_REG 0x0804 +#define SXGBE_MMC_TXINT_STATUS_REG 0x0808 +#define SXGBE_MMC_RXINT_ENABLE_REG 0x080C +#define SXGBE_MMC_TXINT_ENABLE_REG 0x0810 + +/* TX specific counters */ +#define SXGBE_MMC_TXOCTETHI_GBCNT_REG 0x0814 +#define SXGBE_MMC_TXOCTETLO_GBCNT_REG 0x0818 +#define SXGBE_MMC_TXFRAMELO_GBCNT_REG 0x081C +#define SXGBE_MMC_TXFRAMEHI_GBCNT_REG 0x0820 +#define SXGBE_MMC_TXBROADLO_GCNT_REG 0x0824 +#define SXGBE_MMC_TXBROADHI_GCNT_REG 0x0828 +#define SXGBE_MMC_TXMULTILO_GCNT_REG 0x082C +#define SXGBE_MMC_TXMULTIHI_GCNT_REG 0x0830 +#define SXGBE_MMC_TX64LO_GBCNT_REG 0x0834 +#define SXGBE_MMC_TX64HI_GBCNT_REG 0x0838 +#define SXGBE_MMC_TX65TO127LO_GBCNT_REG 0x083C +#define SXGBE_MMC_TX65TO127HI_GBCNT_REG 0x0840 +#define SXGBE_MMC_TX128TO255LO_GBCNT_REG 0x0844 +#define SXGBE_MMC_TX128TO255HI_GBCNT_REG 0x0848 +#define SXGBE_MMC_TX256TO511LO_GBCNT_REG 0x084C +#define SXGBE_MMC_TX256TO511HI_GBCNT_REG 0x0850 +#define SXGBE_MMC_TX512TO1023LO_GBCNT_REG 0x0854 +#define SXGBE_MMC_TX512TO1023HI_GBCNT_REG 0x0858 +#define SXGBE_MMC_TX1023TOMAXLO_GBCNT_REG 0x085C +#define SXGBE_MMC_TX1023TOMAXHI_GBCNT_REG 0x0860 +#define SXGBE_MMC_TXUNICASTLO_GBCNT_REG 0x0864 +#define SXGBE_MMC_TXUNICASTHI_GBCNT_REG 0x0868 +#define SXGBE_MMC_TXMULTILO_GBCNT_REG 0x086C +#define SXGBE_MMC_TXMULTIHI_GBCNT_REG 0x0870 +#define SXGBE_MMC_TXBROADLO_GBCNT_REG 0x0874 +#define SXGBE_MMC_TXBROADHI_GBCNT_REG 0x0878 +#define SXGBE_MMC_TXUFLWLO_GBCNT_REG 0x087C +#define SXGBE_MMC_TXUFLWHI_GBCNT_REG 0x0880 +#define SXGBE_MMC_TXOCTETLO_GCNT_REG 0x0884 +#define SXGBE_MMC_TXOCTETHI_GCNT_REG 0x0888 +#define SXGBE_MMC_TXFRAMELO_GCNT_REG 0x088C +#define SXGBE_MMC_TXFRAMEHI_GCNT_REG 0x0890 +#define SXGBE_MMC_TXPAUSELO_CNT_REG 0x0894 +#define SXGBE_MMC_TXPAUSEHI_CNT_REG 0x0898 +#define SXGBE_MMC_TXVLANLO_GCNT_REG 0x089C +#define SXGBE_MMC_TXVLANHI_GCNT_REG 0x08A0 + +/* RX specific counters */ +#define SXGBE_MMC_RXFRAMELO_GBCNT_REG 0x0900 +#define SXGBE_MMC_RXFRAMEHI_GBCNT_REG 0x0904 +#define SXGBE_MMC_RXOCTETLO_GBCNT_REG 0x0908 +#define SXGBE_MMC_RXOCTETHI_GBCNT_REG 0x090C +#define SXGBE_MMC_RXOCTETLO_GCNT_REG 0x0910 +#define SXGBE_MMC_RXOCTETHI_GCNT_REG 0x0914 +#define SXGBE_MMC_RXBROADLO_GCNT_REG 0x0918 +#define SXGBE_MMC_RXBROADHI_GCNT_REG 0x091C +#define SXGBE_MMC_RXMULTILO_GCNT_REG 0x0920 +#define SXGBE_MMC_RXMULTIHI_GCNT_REG 0x0924 +#define SXGBE_MMC_RXCRCERRLO_REG 0x0928 +#define SXGBE_MMC_RXCRCERRHI_REG 0x092C +#define SXGBE_MMC_RXSHORT64BFRAME_ERR_REG 0x0930 +#define SXGBE_MMC_RXJABBERERR_REG 0x0934 +#define SXGBE_MMC_RXSHORT64BFRAME_COR_REG 0x0938 +#define SXGBE_MMC_RXOVERMAXFRAME_COR_REG 0x093C +#define SXGBE_MMC_RX64LO_GBCNT_REG 0x0940 +#define SXGBE_MMC_RX64HI_GBCNT_REG 0x0944 +#define SXGBE_MMC_RX65TO127LO_GBCNT_REG 0x0948 +#define SXGBE_MMC_RX65TO127HI_GBCNT_REG 0x094C +#define SXGBE_MMC_RX128TO255LO_GBCNT_REG 0x0950 +#define SXGBE_MMC_RX128TO255HI_GBCNT_REG 0x0954 +#define SXGBE_MMC_RX256TO511LO_GBCNT_REG 0x0958 +#define SXGBE_MMC_RX256TO511HI_GBCNT_REG 0x095C +#define SXGBE_MMC_RX512TO1023LO_GBCNT_REG 0x0960 +#define SXGBE_MMC_RX512TO1023HI_GBCNT_REG 0x0964 +#define SXGBE_MMC_RX1023TOMAXLO_GBCNT_REG 0x0968 +#define SXGBE_MMC_RX1023TOMAXHI_GBCNT_REG 0x096C +#define SXGBE_MMC_RXUNICASTLO_GCNT_REG 0x0970 +#define SXGBE_MMC_RXUNICASTHI_GCNT_REG 0x0974 +#define SXGBE_MMC_RXLENERRLO_REG 0x0978 +#define SXGBE_MMC_RXLENERRHI_REG 0x097C +#define SXGBE_MMC_RXOUTOFRANGETYPELO_REG 0x0980 +#define SXGBE_MMC_RXOUTOFRANGETYPEHI_REG 0x0984 +#define SXGBE_MMC_RXPAUSELO_CNT_REG 0x0988 +#define SXGBE_MMC_RXPAUSEHI_CNT_REG 0x098C +#define SXGBE_MMC_RXFIFOOVERFLOWLO_GBCNT_REG 0x0990 +#define SXGBE_MMC_RXFIFOOVERFLOWHI_GBCNT_REG 0x0994 +#define SXGBE_MMC_RXVLANLO_GBCNT_REG 0x0998 +#define SXGBE_MMC_RXVLANHI_GBCNT_REG 0x099C +#define SXGBE_MMC_RXWATCHDOG_ERR_REG 0x09A0 + +/* L3/L4 function registers */ +#define SXGBE_CORE_L34_ADDCTL_REG 0x0C00 +#define SXGBE_CORE_L34_ADDCTL_REG 0x0C00 +#define SXGBE_CORE_L34_DATA_REG 0x0C04 + +/* ARP registers */ +#define SXGBE_CORE_ARP_ADD_REG 0x0C10 + +/* RSS registers */ +#define SXGBE_CORE_RSS_CTL_REG 0x0C80 +#define SXGBE_CORE_RSS_ADD_REG 0x0C88 +#define SXGBE_CORE_RSS_DATA_REG 0x0C8C + +/* IEEE 1588 registers */ +#define SXGBE_CORE_TSTAMP_CTL_REG 0x0D00 +#define SXGBE_CORE_SUBSEC_INC_REG 0x0D04 +#define SXGBE_CORE_SYSTIME_SEC_REG 0x0D0C +#define SXGBE_CORE_SYSTIME_NSEC_REG 0x0D10 +#define SXGBE_CORE_SYSTIME_SECUP_REG 0x0D14 +#define SXGBE_CORE_TSTAMP_ADD_REG 0x0D18 +#define SXGBE_CORE_SYSTIME_HWORD_REG 0x0D1C +#define SXGBE_CORE_TSTAMP_STATUS_REG 0x0D20 +#define SXGBE_CORE_TXTIME_STATUSNSEC_REG 0x0D30 +#define SXGBE_CORE_TXTIME_STATUSSEC_REG 0x0D34 + +/* Auxiliary registers */ +#define SXGBE_CORE_AUX_CTL_REG 0x0D40 +#define SXGBE_CORE_AUX_TSTAMP_NSEC_REG 0x0D48 +#define SXGBE_CORE_AUX_TSTAMP_SEC_REG 0x0D4C +#define SXGBE_CORE_AUX_TSTAMP_INGCOR_REG 0x0D50 +#define SXGBE_CORE_AUX_TSTAMP_ENGCOR_REG 0x0D54 +#define SXGBE_CORE_AUX_TSTAMP_INGCOR_NSEC_REG 0x0D58 +#define SXGBE_CORE_AUX_TSTAMP_INGCOR_SUBNSEC_REG 0x0D5C +#define SXGBE_CORE_AUX_TSTAMP_ENGCOR_NSEC_REG 0x0D60 +#define SXGBE_CORE_AUX_TSTAMP_ENGCOR_SUBNSEC_REG 0x0D64 + +/* PPS registers */ +#define SXGBE_CORE_PPS_CTL_REG 0x0D70 +#define SXGBE_CORE_PPS_BASE 0x0D80 + +/* addr = 0 - 3 */ +#define SXGBE_CORE_PPS_TTIME_SEC_REG(addr) \ + (SXGBE_CORE_PPS_BASE + (0x10 * addr) + 0x0) +#define SXGBE_CORE_PPS_TTIME_NSEC_REG(addr) \ + (SXGBE_CORE_PPS_BASE + (0x10 * addr) + 0x4) +#define SXGBE_CORE_PPS_INTERVAL_REG(addr) \ + (SXGBE_CORE_PPS_BASE + (0x10 * addr) + 0x8) +#define SXGBE_CORE_PPS_WIDTH_REG(addr) \ + (SXGBE_CORE_PPS_BASE + (0x10 * addr) + 0xC) +#define SXGBE_CORE_PTO_CTL_REG 0x0DC0 +#define SXGBE_CORE_SRCPORT_ITY0_REG 0x0DC4 +#define SXGBE_CORE_SRCPORT_ITY1_REG 0x0DC8 +#define SXGBE_CORE_SRCPORT_ITY2_REG 0x0DCC +#define SXGBE_CORE_LOGMSG_LEVEL_REG 0x0DD0 + +/* SXGBE MTL Registers */ +#define SXGBE_MTL_BASE_REG 0x1000 +#define SXGBE_MTL_OP_MODE_REG (SXGBE_MTL_BASE_REG + 0x0000) +#define SXGBE_MTL_DEBUG_CTL_REG (SXGBE_MTL_BASE_REG + 0x0008) +#define SXGBE_MTL_DEBUG_STATUS_REG (SXGBE_MTL_BASE_REG + 0x000C) +#define SXGBE_MTL_FIFO_DEBUGDATA_REG (SXGBE_MTL_BASE_REG + 0x0010) +#define SXGBE_MTL_INT_STATUS_REG (SXGBE_MTL_BASE_REG + 0x0020) +#define SXGBE_MTL_RXQ_DMAMAP0_REG (SXGBE_MTL_BASE_REG + 0x0030) +#define SXGBE_MTL_RXQ_DMAMAP1_REG (SXGBE_MTL_BASE_REG + 0x0034) +#define SXGBE_MTL_RXQ_DMAMAP2_REG (SXGBE_MTL_BASE_REG + 0x0038) +#define SXGBE_MTL_TX_PRTYMAP0_REG (SXGBE_MTL_BASE_REG + 0x0040) +#define SXGBE_MTL_TX_PRTYMAP1_REG (SXGBE_MTL_BASE_REG + 0x0044) + +/* TC/Queue registers, qnum=0-15 */ +#define SXGBE_MTL_TC_TXBASE_REG (SXGBE_MTL_BASE_REG + 0x0100) +#define SXGBE_MTL_TXQ_OPMODE_REG(qnum) \ + (SXGBE_MTL_TC_TXBASE_REG + (qnum * 0x80) + 0x00) +#define SXGBE_MTL_SFMODE BIT(1) +#define SXGBE_MTL_FIFO_LSHIFT 16 +#define SXGBE_MTL_ENABLE_QUEUE 0x00000008 +#define SXGBE_MTL_TXQ_UNDERFLOW_REG(qnum) \ + (SXGBE_MTL_TC_TXBASE_REG + (qnum * 0x80) + 0x04) +#define SXGBE_MTL_TXQ_DEBUG_REG(qnum) \ + (SXGBE_MTL_TC_TXBASE_REG + (qnum * 0x80) + 0x08) +#define SXGBE_MTL_TXQ_ETSCTL_REG(qnum) \ + (SXGBE_MTL_TC_TXBASE_REG + (qnum * 0x80) + 0x10) +#define SXGBE_MTL_TXQ_ETSSTATUS_REG(qnum) \ + (SXGBE_MTL_TC_TXBASE_REG + (qnum * 0x80) + 0x14) +#define SXGBE_MTL_TXQ_QUANTWEIGHT_REG(qnum) \ + (SXGBE_MTL_TC_TXBASE_REG + (qnum * 0x80) + 0x18) + +#define SXGBE_MTL_TC_RXBASE_REG 0x1140 +#define SXGBE_RX_MTL_SFMODE BIT(5) +#define SXGBE_MTL_RXQ_OPMODE_REG(qnum) \ + (SXGBE_MTL_TC_RXBASE_REG + (qnum * 0x80) + 0x00) +#define SXGBE_MTL_RXQ_MISPKTOVERFLOW_REG(qnum) \ + (SXGBE_MTL_TC_RXBASE_REG + (qnum * 0x80) + 0x04) +#define SXGBE_MTL_RXQ_DEBUG_REG(qnum) \ + (SXGBE_MTL_TC_RXBASE_REG + (qnum * 0x80) + 0x08) +#define SXGBE_MTL_RXQ_CTL_REG(qnum) \ + (SXGBE_MTL_TC_RXBASE_REG + (qnum * 0x80) + 0x0C) +#define SXGBE_MTL_RXQ_INTENABLE_REG(qnum) \ + (SXGBE_MTL_TC_RXBASE_REG + (qnum * 0x80) + 0x30) +#define SXGBE_MTL_RXQ_INTSTATUS_REG(qnum) \ + (SXGBE_MTL_TC_RXBASE_REG + (qnum * 0x80) + 0x34) + +/* SXGBE DMA Registers */ +#define SXGBE_DMA_BASE_REG 0x3000 +#define SXGBE_DMA_MODE_REG (SXGBE_DMA_BASE_REG + 0x0000) +#define SXGBE_DMA_SOFT_RESET BIT(0) +#define SXGBE_DMA_SYSBUS_MODE_REG (SXGBE_DMA_BASE_REG + 0x0004) +#define SXGBE_DMA_AXI_UNDEF_BURST BIT(0) +#define SXGBE_DMA_ENHACE_ADDR_MODE BIT(11) +#define SXGBE_DMA_INT_STATUS_REG (SXGBE_DMA_BASE_REG + 0x0008) +#define SXGBE_DMA_AXI_ARCACHECTL_REG (SXGBE_DMA_BASE_REG + 0x0010) +#define SXGBE_DMA_AXI_AWCACHECTL_REG (SXGBE_DMA_BASE_REG + 0x0018) +#define SXGBE_DMA_DEBUG_STATUS0_REG (SXGBE_DMA_BASE_REG + 0x0020) +#define SXGBE_DMA_DEBUG_STATUS1_REG (SXGBE_DMA_BASE_REG + 0x0024) +#define SXGBE_DMA_DEBUG_STATUS2_REG (SXGBE_DMA_BASE_REG + 0x0028) +#define SXGBE_DMA_DEBUG_STATUS3_REG (SXGBE_DMA_BASE_REG + 0x002C) +#define SXGBE_DMA_DEBUG_STATUS4_REG (SXGBE_DMA_BASE_REG + 0x0030) +#define SXGBE_DMA_DEBUG_STATUS5_REG (SXGBE_DMA_BASE_REG + 0x0034) + +/* Channel Registers, cha_num = 0-15 */ +#define SXGBE_DMA_CHA_BASE_REG \ + (SXGBE_DMA_BASE_REG + 0x0100) +#define SXGBE_DMA_CHA_CTL_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x00) +#define SXGBE_DMA_PBL_X8MODE BIT(16) +#define SXGBE_DMA_CHA_TXCTL_TSE_ENABLE BIT(12) +#define SXGBE_DMA_CHA_TXCTL_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x04) +#define SXGBE_DMA_CHA_RXCTL_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x08) +#define SXGBE_DMA_CHA_TXDESC_HADD_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x10) +#define SXGBE_DMA_CHA_TXDESC_LADD_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x14) +#define SXGBE_DMA_CHA_RXDESC_HADD_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x18) +#define SXGBE_DMA_CHA_RXDESC_LADD_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x1C) +#define SXGBE_DMA_CHA_TXDESC_TAILPTR_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x24) +#define SXGBE_DMA_CHA_RXDESC_TAILPTR_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x2C) +#define SXGBE_DMA_CHA_TXDESC_RINGLEN_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x30) +#define SXGBE_DMA_CHA_RXDESC_RINGLEN_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x34) +#define SXGBE_DMA_CHA_INT_ENABLE_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x38) +#define SXGBE_DMA_CHA_INT_RXWATCHTMR_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x3C) +#define SXGBE_DMA_CHA_TXDESC_CURADDLO_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x44) +#define SXGBE_DMA_CHA_RXDESC_CURADDLO_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x4C) +#define SXGBE_DMA_CHA_CURTXBUF_ADDHI_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x50) +#define SXGBE_DMA_CHA_CURTXBUF_ADDLO_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x54) +#define SXGBE_DMA_CHA_CURRXBUF_ADDHI_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x58) +#define SXGBE_DMA_CHA_CURRXBUF_ADDLO_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x5C) +#define SXGBE_DMA_CHA_STATUS_REG(cha_num) \ + (SXGBE_DMA_CHA_BASE_REG + (cha_num * 0x80) + 0x60) + +/* TX DMA control register specific */ +#define SXGBE_TX_START_DMA BIT(0) + +/* sxgbe tx configuration register bitfields */ +#define SXGBE_SPEED_10G 0x0 +#define SXGBE_SPEED_2_5G 0x1 +#define SXGBE_SPEED_1G 0x2 +#define SXGBE_SPEED_LSHIFT 29 + +#define SXGBE_TX_ENABLE BIT(0) +#define SXGBE_TX_DISDIC_ALGO BIT(1) +#define SXGBE_TX_JABBER_DISABLE BIT(16) + +/* sxgbe rx configuration register bitfields */ +#define SXGBE_RX_ENABLE BIT(0) +#define SXGBE_RX_ACS_ENABLE BIT(1) +#define SXGBE_RX_WATCHDOG_DISABLE BIT(7) +#define SXGBE_RX_JUMBPKT_ENABLE BIT(8) +#define SXGBE_RX_CSUMOFFLOAD_ENABLE BIT(9) +#define SXGBE_RX_LOOPBACK_ENABLE BIT(10) +#define SXGBE_RX_ARPOFFLOAD_ENABLE BIT(31) + +/* sxgbe vlan Tag Register bitfields */ +#define SXGBE_VLAN_SVLAN_ENABLE BIT(18) +#define SXGBE_VLAN_DOUBLEVLAN_ENABLE BIT(26) +#define SXGBE_VLAN_INNERVLAN_ENABLE BIT(27) + +/* XMAC VLAN Tag Inclusion Register(0x0060) bitfields + * Below fields same for Inner VLAN Tag Inclusion + * Register(0x0064) register + */ +enum vlan_tag_ctl_tx { + VLAN_TAG_TX_NOP, + VLAN_TAG_TX_DEL, + VLAN_TAG_TX_INSERT, + VLAN_TAG_TX_REPLACE +}; +#define SXGBE_VLAN_PRTY_CTL BIT(18) +#define SXGBE_VLAN_CSVL_CTL BIT(19) + +/* SXGBE TX Q Flow Control Register bitfields */ +#define SXGBE_TX_FLOW_CTL_FCB BIT(0) +#define SXGBE_TX_FLOW_CTL_TFB BIT(1) + +/* SXGBE RX Q Flow Control Register bitfields */ +#define SXGBE_RX_FLOW_CTL_ENABLE BIT(0) +#define SXGBE_RX_UNICAST_DETECT BIT(1) +#define SXGBE_RX_PRTYFLOW_CTL_ENABLE BIT(8) + +/* sxgbe rx Q control0 register bitfields */ +#define SXGBE_RX_Q_ENABLE 0x2 + +/* SXGBE hardware features bitfield specific */ +/* Capability Register 0 */ +#define SXGBE_HW_FEAT_GMII(cap) ((cap & 0x00000002) >> 1) +#define SXGBE_HW_FEAT_VLAN_HASH_FILTER(cap) ((cap & 0x00000010) >> 4) +#define SXGBE_HW_FEAT_SMA(cap) ((cap & 0x00000020) >> 5) +#define SXGBE_HW_FEAT_PMT_TEMOTE_WOP(cap) ((cap & 0x00000040) >> 6) +#define SXGBE_HW_FEAT_PMT_MAGIC_PKT(cap) ((cap & 0x00000080) >> 7) +#define SXGBE_HW_FEAT_RMON(cap) ((cap & 0x00000100) >> 8) +#define SXGBE_HW_FEAT_ARP_OFFLOAD(cap) ((cap & 0x00000200) >> 9) +#define SXGBE_HW_FEAT_IEEE1500_2008(cap) ((cap & 0x00001000) >> 12) +#define SXGBE_HW_FEAT_EEE(cap) ((cap & 0x00002000) >> 13) +#define SXGBE_HW_FEAT_TX_CSUM_OFFLOAD(cap) ((cap & 0x00004000) >> 14) +#define SXGBE_HW_FEAT_RX_CSUM_OFFLOAD(cap) ((cap & 0x00010000) >> 16) +#define SXGBE_HW_FEAT_MACADDR_COUNT(cap) ((cap & 0x007C0000) >> 18) +#define SXGBE_HW_FEAT_TSTMAP_SRC(cap) ((cap & 0x06000000) >> 25) +#define SXGBE_HW_FEAT_SRCADDR_VLAN(cap) ((cap & 0x08000000) >> 27) + +/* Capability Register 1 */ +#define SXGBE_HW_FEAT_RX_FIFO_SIZE(cap) ((cap & 0x0000001F)) +#define SXGBE_HW_FEAT_TX_FIFO_SIZE(cap) ((cap & 0x000007C0) >> 6) +#define SXGBE_HW_FEAT_IEEE1588_HWORD(cap) ((cap & 0x00002000) >> 13) +#define SXGBE_HW_FEAT_DCB(cap) ((cap & 0x00010000) >> 16) +#define SXGBE_HW_FEAT_SPLIT_HDR(cap) ((cap & 0x00020000) >> 17) +#define SXGBE_HW_FEAT_TSO(cap) ((cap & 0x00040000) >> 18) +#define SXGBE_HW_FEAT_DEBUG_MEM_IFACE(cap) ((cap & 0x00080000) >> 19) +#define SXGBE_HW_FEAT_RSS(cap) ((cap & 0x00100000) >> 20) +#define SXGBE_HW_FEAT_HASH_TABLE_SIZE(cap) ((cap & 0x03000000) >> 24) +#define SXGBE_HW_FEAT_L3L4_FILTER_NUM(cap) ((cap & 0x78000000) >> 27) + +/* Capability Register 2 */ +#define SXGBE_HW_FEAT_RX_MTL_QUEUES(cap) ((cap & 0x0000000F)) +#define SXGBE_HW_FEAT_TX_MTL_QUEUES(cap) ((cap & 0x000003C0) >> 6) +#define SXGBE_HW_FEAT_RX_DMA_CHANNELS(cap) ((cap & 0x0000F000) >> 12) +#define SXGBE_HW_FEAT_TX_DMA_CHANNELS(cap) ((cap & 0x003C0000) >> 18) +#define SXGBE_HW_FEAT_PPS_OUTPUTS(cap) ((cap & 0x07000000) >> 24) +#define SXGBE_HW_FEAT_AUX_SNAPSHOTS(cap) ((cap & 0x70000000) >> 28) + +/* DMAchannel interrupt enable specific */ +/* DMA Normal interrupt */ +#define SXGBE_DMA_INT_ENA_NIE BIT(16) /* Normal Summary */ +#define SXGBE_DMA_INT_ENA_TIE BIT(0) /* Transmit Interrupt */ +#define SXGBE_DMA_INT_ENA_TUE BIT(2) /* Transmit Buffer Unavailable */ +#define SXGBE_DMA_INT_ENA_RIE BIT(6) /* Receive Interrupt */ + +#define SXGBE_DMA_INT_NORMAL \ + (SXGBE_DMA_INT_ENA_NIE | SXGBE_DMA_INT_ENA_RIE | \ + SXGBE_DMA_INT_ENA_TIE | SXGBE_DMA_INT_ENA_TUE) + +/* DMA Abnormal interrupt */ +#define SXGBE_DMA_INT_ENA_AIE BIT(15) /* Abnormal Summary */ +#define SXGBE_DMA_INT_ENA_TSE BIT(1) /* Transmit Stopped */ +#define SXGBE_DMA_INT_ENA_RUE BIT(7) /* Receive Buffer Unavailable */ +#define SXGBE_DMA_INT_ENA_RSE BIT(8) /* Receive Stopped */ +#define SXGBE_DMA_INT_ENA_FBE BIT(12) /* Fatal Bus Error */ +#define SXGBE_DMA_INT_ENA_CDEE BIT(13) /* Context Descriptor Error */ + +#define SXGBE_DMA_INT_ABNORMAL \ + (SXGBE_DMA_INT_ENA_AIE | SXGBE_DMA_INT_ENA_TSE | \ + SXGBE_DMA_INT_ENA_RUE | SXGBE_DMA_INT_ENA_RSE | \ + SXGBE_DMA_INT_ENA_FBE | SXGBE_DMA_INT_ENA_CDEE) + +#define SXGBE_DMA_ENA_INT (SXGBE_DMA_INT_NORMAL | SXGBE_DMA_INT_ABNORMAL) + +/* DMA channel interrupt status specific */ +#define SXGBE_DMA_INT_STATUS_REB2 BIT(21) +#define SXGBE_DMA_INT_STATUS_REB1 BIT(20) +#define SXGBE_DMA_INT_STATUS_REB0 BIT(19) +#define SXGBE_DMA_INT_STATUS_TEB2 BIT(18) +#define SXGBE_DMA_INT_STATUS_TEB1 BIT(17) +#define SXGBE_DMA_INT_STATUS_TEB0 BIT(16) +#define SXGBE_DMA_INT_STATUS_NIS BIT(15) +#define SXGBE_DMA_INT_STATUS_AIS BIT(14) +#define SXGBE_DMA_INT_STATUS_CTXTERR BIT(13) +#define SXGBE_DMA_INT_STATUS_FBE BIT(12) +#define SXGBE_DMA_INT_STATUS_RPS BIT(8) +#define SXGBE_DMA_INT_STATUS_RBU BIT(7) +#define SXGBE_DMA_INT_STATUS_RI BIT(6) +#define SXGBE_DMA_INT_STATUS_TBU BIT(2) +#define SXGBE_DMA_INT_STATUS_TPS BIT(1) +#define SXGBE_DMA_INT_STATUS_TI BIT(0) + +#endif /* __SXGBE_REGMAP_H__ */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c new file mode 100644 index 000000000000..51c32194ba88 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.c @@ -0,0 +1,91 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "sxgbe_common.h" +#include "sxgbe_xpcs.h" + +static int sxgbe_xpcs_read(struct net_device *ndev, unsigned int reg) +{ + u32 value; + struct sxgbe_priv_data *priv = netdev_priv(ndev); + + value = readl(priv->ioaddr + XPCS_OFFSET + reg); + + return value; +} + +static int sxgbe_xpcs_write(struct net_device *ndev, int reg, int data) +{ + struct sxgbe_priv_data *priv = netdev_priv(ndev); + + writel(data, priv->ioaddr + XPCS_OFFSET + reg); + + return 0; +} + +int sxgbe_xpcs_init(struct net_device *ndev) +{ + u32 value; + + value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); + /* 10G XAUI mode */ + sxgbe_xpcs_write(ndev, SR_PCS_CONTROL2, XPCS_TYPE_SEL_X); + sxgbe_xpcs_write(ndev, VR_PCS_MMD_XAUI_MODE_CONTROL, XPCS_XAUI_MODE); + sxgbe_xpcs_write(ndev, VR_PCS_MMD_XAUI_MODE_CONTROL, value | BIT(13)); + sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value | BIT(11)); + + do { + value = sxgbe_xpcs_read(ndev, VR_PCS_MMD_DIGITAL_STATUS); + } while ((value & XPCS_QSEQ_STATE_MPLLOFF) == XPCS_QSEQ_STATE_STABLE); + + value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); + sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value & ~BIT(11)); + + do { + value = sxgbe_xpcs_read(ndev, VR_PCS_MMD_DIGITAL_STATUS); + } while ((value & XPCS_QSEQ_STATE_MPLLOFF) != XPCS_QSEQ_STATE_STABLE); + + return 0; +} + +int sxgbe_xpcs_init_1G(struct net_device *ndev) +{ + int value; + + /* 10GBASE-X PCS (1G) mode */ + sxgbe_xpcs_write(ndev, SR_PCS_CONTROL2, XPCS_TYPE_SEL_X); + sxgbe_xpcs_write(ndev, VR_PCS_MMD_XAUI_MODE_CONTROL, XPCS_XAUI_MODE); + value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); + sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value & ~BIT(13)); + + value = sxgbe_xpcs_read(ndev, SR_MII_MMD_CONTROL); + sxgbe_xpcs_write(ndev, SR_MII_MMD_CONTROL, value | BIT(6)); + sxgbe_xpcs_write(ndev, SR_MII_MMD_CONTROL, value & ~BIT(13)); + value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); + sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value | BIT(11)); + + do { + value = sxgbe_xpcs_read(ndev, VR_PCS_MMD_DIGITAL_STATUS); + } while ((value & XPCS_QSEQ_STATE_MPLLOFF) != XPCS_QSEQ_STATE_STABLE); + + value = sxgbe_xpcs_read(ndev, SR_PCS_MMD_CONTROL1); + sxgbe_xpcs_write(ndev, SR_PCS_MMD_CONTROL1, value & ~BIT(11)); + + /* Auto Negotiation cluase 37 enable */ + value = sxgbe_xpcs_read(ndev, SR_MII_MMD_CONTROL); + sxgbe_xpcs_write(ndev, SR_MII_MMD_CONTROL, value | BIT(12)); + + return 0; +} diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h new file mode 100644 index 000000000000..6b26a50724d3 --- /dev/null +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_xpcs.h @@ -0,0 +1,38 @@ +/* 10G controller driver for Samsung SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Byungho An + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SXGBE_XPCS_H__ +#define __SXGBE_XPCS_H__ + +/* XPCS Registers */ +#define XPCS_OFFSET 0x1A060000 +#define SR_PCS_MMD_CONTROL1 0x030000 +#define SR_PCS_CONTROL2 0x030007 +#define VR_PCS_MMD_XAUI_MODE_CONTROL 0x038004 +#define VR_PCS_MMD_DIGITAL_STATUS 0x038010 +#define SR_MII_MMD_CONTROL 0x1F0000 +#define SR_MII_MMD_AN_ADV 0x1F0004 +#define SR_MII_MMD_AN_LINK_PARTNER_BA 0x1F0005 +#define VR_MII_MMD_AN_CONTROL 0x1F8001 +#define VR_MII_MMD_AN_INT_STATUS 0x1F8002 + +#define XPCS_QSEQ_STATE_STABLE 0x10 +#define XPCS_QSEQ_STATE_MPLLOFF 0x1c +#define XPCS_TYPE_SEL_R 0x00 +#define XPCS_TYPE_SEL_X 0x01 +#define XPCS_TYPE_SEL_W 0x02 +#define XPCS_XAUI_MODE 0x00 +#define XPCS_RXAUI_MODE 0x01 + +int sxgbe_xpcs_init(struct net_device *ndev); +int sxgbe_xpcs_init_1G(struct net_device *ndev); + +#endif /* __SXGBE_XPCS_H__ */ diff --git a/include/linux/sxgbe_platform.h b/include/linux/sxgbe_platform.h new file mode 100644 index 000000000000..a62442cf0037 --- /dev/null +++ b/include/linux/sxgbe_platform.h @@ -0,0 +1,54 @@ +/* + * 10G controller driver for Samsung EXYNOS SoCs + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Siva Reddy Kallam + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SXGBE_PLATFORM_H__ +#define __SXGBE_PLATFORM_H__ + +/* MDC Clock Selection define*/ +#define SXGBE_CSR_100_150M 0x0 /* MDC = clk_scr_i/62 */ +#define SXGBE_CSR_150_250M 0x1 /* MDC = clk_scr_i/102 */ +#define SXGBE_CSR_250_300M 0x2 /* MDC = clk_scr_i/122 */ +#define SXGBE_CSR_300_350M 0x3 /* MDC = clk_scr_i/142 */ +#define SXGBE_CSR_350_400M 0x4 /* MDC = clk_scr_i/162 */ +#define SXGBE_CSR_400_500M 0x5 /* MDC = clk_scr_i/202 */ + +/* Platfrom data for platform device structure's + * platform_data field + */ +struct sxgbe_mdio_bus_data { + unsigned int phy_mask; + int *irqs; + int probed_phy_irq; +}; + +struct sxgbe_dma_cfg { + int pbl; + int fixed_burst; + int burst_map; + int adv_addr_mode; +}; + +struct sxgbe_plat_data { + char *phy_bus_name; + int bus_id; + int phy_addr; + int interface; + struct sxgbe_mdio_bus_data *mdio_bus_data; + struct sxgbe_dma_cfg *dma_cfg; + int clk_csr; + int pmt; + int force_sf_dma_mode; + int force_thresh_dma_mode; + int riwt_off; +}; + +#endif /* __SXGBE_PLATFORM_H__ */ -- cgit v1.2.3-71-gd317 From d18f141a1a7cfa5ffad8433e43062b05a8d1b82a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 27 Mar 2014 14:02:03 +0200 Subject: mlx4: Add support for CONFIG_DEV command Introduce the CONFIG_DEV firmware command which we will use to configure the UDP port assumed by the firmware for the VXLAN offloads. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 40 ++++++++++++++++++++++++++++++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 ++ 4 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 2fd61b60a5c5..78099eab7673 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -954,6 +954,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = NULL }, + { + .opcode = MLX4_CMD_CONFIG_DEV, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, { .opcode = MLX4_CMD_ALLOC_RES, .has_inbox = false, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 6bd33e2fc17c..d16a4d118903 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1779,6 +1779,46 @@ int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) MLX4_CMD_NATIVE); } +struct mlx4_config_dev { + __be32 update_flags; + __be32 rsdv1[3]; + __be16 vxlan_udp_dport; + __be16 rsvd2; +}; + +#define MLX4_VXLAN_UDP_DPORT (1 << 0) + +static int mlx4_CONFIG_DEV(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) +{ + int err; + struct mlx4_cmd_mailbox *mailbox; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, config_dev, sizeof(*config_dev)); + + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_CONFIG_DEV, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_VXLAN_UDP_DPORT); + config_dev.vxlan_udp_dport = udp_port; + + return mlx4_CONFIG_DEV(dev, &config_dev); +} +EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); + + int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0, diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 009985628257..c8450366c130 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -125,6 +125,7 @@ enum { /* miscellaneous commands */ MLX4_CMD_DIAG_RPRT = 0x30, MLX4_CMD_NOP = 0x31, + MLX4_CMD_CONFIG_DEV = 0x3a, MLX4_CMD_ACCESS_MEM = 0x2e, MLX4_CMD_SET_VEP = 0x52, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6b3998396b99..d986c975616f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1232,4 +1232,6 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); + +int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-71-gd317 From 1b136de120dda625109f2afe1e3d04e256be9ec1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 27 Mar 2014 14:02:04 +0200 Subject: net/mlx4: Implement vxlan ndo calls Add implementation for the add/del vxlan port ndo calls, using the CONFIG_DEV firmware command. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 86 +++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 + drivers/net/ethernet/mellanox/mlx4/port.c | 5 +- include/linux/mlx4/device.h | 2 +- 4 files changed, 91 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fa5ee719e04b..82d7eb5b79cc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1665,7 +1666,7 @@ int mlx4_en_start_port(struct net_device *dev) } if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC); + err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1); if (err) { en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n", err); @@ -1697,6 +1698,8 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); + if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) + vxlan_get_rx_port(dev); priv->port_up = true; netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -2264,6 +2267,81 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev, return 0; } +static void mlx4_en_add_vxlan_offloads(struct work_struct *work) +{ + int ret; + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + vxlan_add_task); + + ret = mlx4_config_vxlan_port(priv->mdev->dev, priv->vxlan_port); + if (ret) + goto out; + + ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, + VXLAN_STEER_BY_OUTER_MAC, 1); +out: + if (ret) + en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); +} + +static void mlx4_en_del_vxlan_offloads(struct work_struct *work) +{ + int ret; + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + vxlan_del_task); + + ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, + VXLAN_STEER_BY_OUTER_MAC, 0); + if (ret) + en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); + + priv->vxlan_port = 0; +} + +static void mlx4_en_add_vxlan_port(struct net_device *dev, + sa_family_t sa_family, __be16 port) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 current_port; + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)) + return; + + if (sa_family == AF_INET6) + return; + + current_port = priv->vxlan_port; + if (current_port && current_port != port) { + en_warn(priv, "vxlan port %d configured, can't add port %d\n", + ntohs(current_port), ntohs(port)); + return; + } + + priv->vxlan_port = port; + queue_work(priv->mdev->workqueue, &priv->vxlan_add_task); +} + +static void mlx4_en_del_vxlan_port(struct net_device *dev, + sa_family_t sa_family, __be16 port) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 current_port; + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + return; + + if (sa_family == AF_INET6) + return; + + current_port = priv->vxlan_port; + if (current_port != port) { + en_dbg(DRV, priv, "vxlan port %d isn't configured, ignoring\n", ntohs(port)); + return; + } + + queue_work(priv->mdev->workqueue, &priv->vxlan_del_task); +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2290,6 +2368,8 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_busy_poll = mlx4_en_low_latency_recv, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, + .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, + .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2381,6 +2461,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); + INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); + INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { @@ -2514,7 +2596,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC); + err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1); if (err) { en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 69e1f36858e0..36fc2a2b24c3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -559,6 +559,8 @@ struct mlx4_en_priv { struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; + struct work_struct vxlan_add_task; + struct work_struct vxlan_del_task; struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_port_stats port_stats; @@ -585,6 +587,7 @@ struct mlx4_en_priv { struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; #endif u64 tunnel_reg_id; + __be16 vxlan_port; }; enum mlx4_en_wol { diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 2705b9ab9463..cfcad26ed40f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -988,7 +988,7 @@ struct mlx4_set_port_vxlan_context { u8 steering; }; -int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering) +int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable) { int err; u32 in_mod; @@ -1002,7 +1002,8 @@ int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering) memset(context, 0, sizeof(*context)); context->modify_flags = VXLAN_ENABLE_MODIFY | VXLAN_STEERING_MODIFY; - context->enable_flags = VXLAN_ENABLE; + if (enable) + context->enable_flags = VXLAN_ENABLE; context->steering = steering; in_mod = MLX4_SET_PORT_VXLAN << 8 | port; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d986c975616f..ba87bd21295a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1142,7 +1142,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); -int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering); +int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -- cgit v1.2.3-71-gd317 From 015f0688f57ca4d499047d335b8052a733e17a4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Mar 2014 08:45:56 -0700 Subject: net: net: add a core netdev->tx_dropped counter Dropping packets in __dev_queue_xmit() when transmit queue is stopped (NIC TX ring buffer full or BQL limit reached) currently outputs a syslog message. It would be better to get a precise count of such events available in netdevice stats so that monitoring tools can have a clue. This extends the work done in caf586e5f23ce ("net: add a core netdev->rx_dropped counter") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++--- net/core/dev.c | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4b6d12c7b803..159c7e7945f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1311,9 +1311,10 @@ struct net_device { int iflink; struct net_device_stats stats; - atomic_long_t rx_dropped; /* dropped packets by core network - * Do not use this in drivers. - */ + + /* dropped packets by core network, Do not use this in drivers */ + atomic_long_t rx_dropped; + atomic_long_t tx_dropped; #ifdef CONFIG_WIRELESS_EXT /* List of functions to handle Wireless Extensions (instead of ioctl). diff --git a/net/core/dev.c b/net/core/dev.c index 48dd323d5918..98ba581b89f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2880,6 +2880,7 @@ recursion_alert: rc = -ENETDOWN; rcu_read_unlock_bh(); + atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return rc; out: @@ -6238,6 +6239,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, &dev->stats); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += atomic_long_read(&dev->tx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); -- cgit v1.2.3-71-gd317 From a8779ec1c5e60548b7b661a8d74a8cecf7775690 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Mar 2014 15:36:38 -0700 Subject: netpoll: Remove gfp parameter from __netpoll_setup The gfp parameter was added in: commit 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Author: Amerigo Wang Date: Fri Aug 10 01:24:37 2012 +0000 netpoll: use GFP_ATOMIC in slave_enable_netpoll() and __netpoll_setup() slave_enable_netpoll() and __netpoll_setup() may be called with read_lock() held, so should use GFP_ATOMIC to allocate memory. Eric suggested to pass gfp flags to __netpoll_setup(). Cc: Eric Dumazet Cc: "David S. Miller" Reported-by: Dan Carpenter Signed-off-by: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: David S. Miller The reason for the gfp parameter was removed in: commit c4cdef9b7183159c23c7302aaf270d64c549f557 Author: dingtianhong Date: Tue Jul 23 15:25:27 2013 +0800 bonding: don't call slave_xxx_netpoll under spinlocks The slave_xxx_netpoll will call synchronize_rcu_bh(), so the function may schedule and sleep, it should't be called under spinlocks. bond_netpoll_setup() and bond_netpoll_cleanup() are always protected by rtnl lock, it is no need to take the read lock, as the slave list couldn't be changed outside rtnl lock. Signed-off-by: Ding Tianhong Cc: Jay Vosburgh Cc: Andy Gospodarek Signed-off-by: David S. Miller Nothing else that calls __netpoll_setup or ndo_netpoll_setup requires a gfp paramter, so remove the gfp parameter from both of these functions making the code clearer. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- drivers/net/team/team.c | 16 +++++++--------- include/linux/netdevice.h | 3 +-- include/linux/netpoll.h | 2 +- net/8021q/vlan_dev.c | 7 +++---- net/bridge/br_device.c | 15 +++++++-------- net/bridge/br_if.c | 2 +- net/bridge/br_private.h | 4 ++-- net/core/netpoll.c | 8 ++++---- 9 files changed, 29 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5be34b72a048..95a6ca7d9e51 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -922,12 +922,12 @@ static inline int slave_enable_netpoll(struct slave *slave) struct netpoll *np; int err = 0; - np = kzalloc(sizeof(*np), GFP_ATOMIC); + np = kzalloc(sizeof(*np), GFP_KERNEL); err = -ENOMEM; if (!np) goto out; - err = __netpoll_setup(np, slave->dev, GFP_ATOMIC); + err = __netpoll_setup(np, slave->dev); if (err) { kfree(np); goto out; @@ -962,7 +962,7 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev) slave_disable_netpoll(slave); } -static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) +static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct bonding *bond = netdev_priv(dev); struct list_head *iter; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 2b1a1d61072c..33008c1d1d67 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1031,8 +1031,7 @@ static void team_port_leave(struct team *team, struct team_port *port) } #ifdef CONFIG_NET_POLL_CONTROLLER -static int team_port_enable_netpoll(struct team *team, struct team_port *port, - gfp_t gfp) +static int team_port_enable_netpoll(struct team *team, struct team_port *port) { struct netpoll *np; int err; @@ -1040,11 +1039,11 @@ static int team_port_enable_netpoll(struct team *team, struct team_port *port, if (!team->dev->npinfo) return 0; - np = kzalloc(sizeof(*np), gfp); + np = kzalloc(sizeof(*np), GFP_KERNEL); if (!np) return -ENOMEM; - err = __netpoll_setup(np, port->dev, gfp); + err = __netpoll_setup(np, port->dev); if (err) { kfree(np); return err; @@ -1067,8 +1066,7 @@ static void team_port_disable_netpoll(struct team_port *port) kfree(np); } #else -static int team_port_enable_netpoll(struct team *team, struct team_port *port, - gfp_t gfp) +static int team_port_enable_netpoll(struct team *team, struct team_port *port) { return 0; } @@ -1156,7 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_vids_add; } - err = team_port_enable_netpoll(team, port, GFP_KERNEL); + err = team_port_enable_netpoll(team, port); if (err) { netdev_err(dev, "Failed to enable netpoll on device %s\n", portname); @@ -1850,7 +1848,7 @@ static void team_netpoll_cleanup(struct net_device *dev) } static int team_netpoll_setup(struct net_device *dev, - struct netpoll_info *npifo, gfp_t gfp) + struct netpoll_info *npifo) { struct team *team = netdev_priv(dev); struct team_port *port; @@ -1858,7 +1856,7 @@ static int team_netpoll_setup(struct net_device *dev, mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { - err = team_port_enable_netpoll(team, port, gfp); + err = team_port_enable_netpoll(team, port); if (err) { __team_netpoll_cleanup(team); break; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 159c7e7945f8..0d8c8718980a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1037,8 +1037,7 @@ struct net_device_ops { #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev, - struct netpoll_info *info, - gfp_t gfp); + struct netpoll_info *info); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 1b475a5a7239..893b9e66060e 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -57,7 +57,7 @@ static inline void netpoll_rx_enable(struct net_device *dev) { return; } void netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp); +int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free_async(struct netpoll *np); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4f3e9073cb49..a78bebeca4d9 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -707,20 +707,19 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, - gfp_t gfp) +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; - netpoll = kzalloc(sizeof(*netpoll), gfp); + netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); err = -ENOMEM; if (!netpoll) goto out; - err = __netpoll_setup(netpoll, real_dev, gfp); + err = __netpoll_setup(netpoll, real_dev); if (err) { kfree(netpoll); goto out; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f2a08477e0f5..0dd01a05bd59 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -218,16 +218,16 @@ static void br_netpoll_cleanup(struct net_device *dev) br_netpoll_disable(p); } -static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) +static int __br_netpoll_enable(struct net_bridge_port *p) { struct netpoll *np; int err; - np = kzalloc(sizeof(*p->np), gfp); + np = kzalloc(sizeof(*p->np), GFP_KERNEL); if (!np) return -ENOMEM; - err = __netpoll_setup(np, p->dev, gfp); + err = __netpoll_setup(np, p->dev); if (err) { kfree(np); return err; @@ -237,16 +237,15 @@ static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) return err; } -int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) +int br_netpoll_enable(struct net_bridge_port *p) { if (!p->br->dev->npinfo) return 0; - return __br_netpoll_enable(p, gfp); + return __br_netpoll_enable(p); } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, - gfp_t gfp) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; @@ -255,7 +254,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, list_for_each_entry(p, &br->port_list, list) { if (!p->dev) continue; - err = __br_netpoll_enable(p, gfp); + err = __br_netpoll_enable(p); if (err) goto fail; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 54d207d3a31c..5262b8617eb9 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -366,7 +366,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - err = br_netpoll_enable(p, GFP_KERNEL); + err = br_netpoll_enable(p); if (err) goto err3; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e1ca1dc916a4..06811d79f89f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -349,7 +349,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); +int br_netpoll_enable(struct net_bridge_port *p); void br_netpoll_disable(struct net_bridge_port *p); #else static inline void br_netpoll_send_skb(const struct net_bridge_port *p, @@ -357,7 +357,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, { } -static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) +static inline int br_netpoll_enable(struct net_bridge_port *p) { return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 41c4e9ce1141..2c6379da295c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -584,7 +584,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; const struct net_device_ops *ops; @@ -603,7 +603,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), gfp); + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; goto out; @@ -617,7 +617,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); + err = ops->ndo_netpoll_setup(ndev, npinfo); if (err) goto free_npinfo; } @@ -749,7 +749,7 @@ int netpoll_setup(struct netpoll *np) /* fill up the skb queue */ refill_skbs(); - err = __netpoll_setup(np, ndev, GFP_KERNEL); + err = __netpoll_setup(np, ndev); if (err) goto put; -- cgit v1.2.3-71-gd317 From 66b5552fc2dfbaa6445b1bdadd10c9305ce261bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Mar 2014 15:39:03 -0700 Subject: netpoll: Rename netpoll_rx_enable/disable to netpoll_poll_disable/enable The netpoll_rx_enable and netpoll_rx_disable functions have always controlled polling the network drivers transmit and receive queues. Rename them to netpoll_poll_enable and netpoll_poll_disable to make their functionality clear. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netpoll.h | 8 ++++---- net/core/dev.c | 8 ++++---- net/core/netpoll.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 893b9e66060e..b25ee9ffdbe6 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -47,11 +47,11 @@ struct netpoll_info { }; #ifdef CONFIG_NETPOLL -extern void netpoll_rx_disable(struct net_device *dev); -extern void netpoll_rx_enable(struct net_device *dev); +extern void netpoll_poll_disable(struct net_device *dev); +extern void netpoll_poll_enable(struct net_device *dev); #else -static inline void netpoll_rx_disable(struct net_device *dev) { return; } -static inline void netpoll_rx_enable(struct net_device *dev) { return; } +static inline void netpoll_poll_disable(struct net_device *dev) { return; } +static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif void netpoll_send_udp(struct netpoll *np, const char *msg, int len); diff --git a/net/core/dev.c b/net/core/dev.c index 8d55fe780e3f..778b2036a9e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - netpoll_rx_disable(dev); + netpoll_poll_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); - netpoll_rx_enable(dev); + netpoll_poll_enable(dev); if (ret) clear_bit(__LINK_STATE_START, &dev->state); @@ -1314,7 +1314,7 @@ static int __dev_close_many(struct list_head *head) list_for_each_entry(dev, head, close_list) { /* Temporarily disable netpoll until the interface is down */ - netpoll_rx_disable(dev); + netpoll_poll_disable(dev); call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); @@ -1346,7 +1346,7 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; net_dmaengine_put(); - netpoll_rx_enable(dev); + netpoll_poll_enable(dev); } return 0; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e2492d176ae7..9fd88875faff 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -214,7 +214,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -void netpoll_rx_disable(struct net_device *dev) +void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -225,9 +225,9 @@ void netpoll_rx_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_rx_disable); +EXPORT_SYMBOL(netpoll_poll_disable); -void netpoll_rx_enable(struct net_device *dev) +void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; rcu_read_lock(); @@ -236,7 +236,7 @@ void netpoll_rx_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_rx_enable); +EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { -- cgit v1.2.3-71-gd317 From 5efeac44cfca62f66a1b2919fc8ec7f7c726d15b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Mar 2014 15:42:20 -0700 Subject: netpoll: Respect NETIF_F_LLTX Stop taking the transmit lock when a network device has specified NETIF_F_LLTX. If no locks needed to trasnmit a packet this is the ideal scenario for netpoll as all packets can be trasnmitted immediately. Even if some locks are needed in ndo_start_xmit skipping any unnecessary serialization is desirable for netpoll as it makes it more likely a debugging packet may be trasnmitted immediately instead of being deferred until later. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ net/core/netpoll.c | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0d8c8718980a..4cd5e9e13c87 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2909,6 +2909,11 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } \ } +#define HARD_TX_TRYLOCK(dev, txq) \ + (((dev->features & NETIF_F_LLTX) == 0) ? \ + __netif_tx_trylock(txq) : \ + true ) + #define HARD_TX_UNLOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_unlock(txq); \ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d44af2306f23..ed7740f7a94d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -119,17 +119,17 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); local_irq_save(flags); - __netif_tx_lock(txq, smp_processor_id()); + HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); } } @@ -345,11 +345,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (__netif_tx_trylock(txq)) { + if (HARD_TX_TRYLOCK(dev, txq)) { if (!netif_xmit_stopped(txq)) status = netpoll_start_xmit(skb, dev, txq); - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); if (status == NETDEV_TX_OK) break; -- cgit v1.2.3-71-gd317 From f8bbbfc3b97f4c7a6c7c23185e520b22bfc3a21d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Mar 2014 18:58:18 +0100 Subject: net: filter: add jited flag to indicate jit compiled filters This patch adds a jited flag into sk_filter struct in order to indicate whether a filter is currently jited or not. The size of sk_filter is not being expanded as the 32 bit 'len' member allows upper bits to be reused since a filter can currently only grow as large as BPF_MAXINSNS. Therefore, there's enough room also for other in future needed flags to reuse 'len' field if necessary. The jited flag also allows for having alternative interpreter functions running as currently, we can only detect jit compiled filters by testing fp->bpf_func to not equal the address of sk_run_filter(). Joint work with Alexei Starovoitov. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Cc: Pablo Neira Ayuso Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 3 ++- arch/powerpc/net/bpf_jit_comp.c | 3 ++- arch/s390/net/bpf_jit_comp.c | 5 ++++- arch/sparc/net/bpf_jit_comp.c | 3 ++- arch/x86/net/bpf_jit_comp.c | 3 ++- include/linux/filter.h | 3 ++- net/core/filter.c | 1 + 7 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 7ddb9c83cdfc..6f879c319a9d 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -925,6 +925,7 @@ void bpf_jit_compile(struct sk_filter *fp) bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; + fp->jited = 1; out: kfree(ctx.offsets); return; @@ -932,7 +933,7 @@ out: void bpf_jit_free(struct sk_filter *fp) { - if (fp->bpf_func != sk_run_filter) + if (fp->jited) module_free(NULL, fp->bpf_func); kfree(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 4afad6c17d50..808ce1cae21a 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -689,6 +689,7 @@ void bpf_jit_compile(struct sk_filter *fp) ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; fp->bpf_func = (void *)image; + fp->jited = 1; } out: kfree(addrs); @@ -697,7 +698,7 @@ out: void bpf_jit_free(struct sk_filter *fp) { - if (fp->bpf_func != sk_run_filter) + if (fp->jited) module_free(NULL, fp->bpf_func); kfree(fp); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 153f8f2cfd56..9c36dc398f90 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -877,6 +877,7 @@ void bpf_jit_compile(struct sk_filter *fp) if (jit.start) { set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.start; + fp->jited = 1; } out: kfree(addrs); @@ -887,10 +888,12 @@ void bpf_jit_free(struct sk_filter *fp) unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; - if (fp->bpf_func == sk_run_filter) + if (!fp->jited) goto free_filter; + set_memory_rw(addr, header->pages); module_free(NULL, header); + free_filter: kfree(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index d96d2a7c78ee..a82c6b2a9780 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -809,6 +809,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (image) { bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; + fp->jited = 1; } out: kfree(addrs); @@ -817,7 +818,7 @@ out: void bpf_jit_free(struct sk_filter *fp) { - if (fp->bpf_func != sk_run_filter) + if (fp->jited) module_free(NULL, fp->bpf_func); kfree(fp); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 293c57b74edc..dc017735bb91 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -772,6 +772,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; bpf_flush_icache(header, image + proglen); set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)image; + fp->jited = 1; } out: kfree(addrs); @@ -791,7 +792,7 @@ static void bpf_jit_free_deferred(struct work_struct *work) void bpf_jit_free(struct sk_filter *fp) { - if (fp->bpf_func != sk_run_filter) { + if (fp->jited) { INIT_WORK(&fp->work, bpf_jit_free_deferred); schedule_work(&fp->work); } else { diff --git a/include/linux/filter.h b/include/linux/filter.h index e568c8ef896b..e65e23087367 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -25,7 +25,8 @@ struct sock; struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ + u32 jited:1, /* Is our filter JIT'ed? */ + len:31; /* Number of filter blocks */ struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct sock_filter *filter); diff --git a/net/core/filter.c b/net/core/filter.c index 65b75966e206..bb3c76458ca9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -646,6 +646,7 @@ static int __sk_prepare_filter(struct sk_filter *fp) int err; fp->bpf_func = sk_run_filter; + fp->jited = 0; err = sk_chk_filter(fp->insns, fp->len); if (err) -- cgit v1.2.3-71-gd317 From a3ea269b8bcdbb0c5fa2fd449a436e7987446975 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Mar 2014 18:58:19 +0100 Subject: net: filter: keep original BPF program around In order to open up the possibility to internally transform a BPF program into an alternative and possibly non-trivial reversible representation, we need to keep the original BPF program around, so that it can be passed back to user space w/o the need of a complex decoder. The reason for that use case resides in commit a8fc92778080 ("sk-filter: Add ability to get socket filter program (v2)"), that is, the ability to retrieve the currently attached BPF filter from a given socket used mainly by the checkpoint-restore project, for example. Therefore, we add two helpers sk_{store,release}_orig_filter for taking care of that. In the sk_unattached_filter_create() case, there's no such possibility/requirement to retrieve a loaded BPF program. Therefore, we can spare us the work in that case. This approach will simplify and slightly speed up both, sk_get_filter() and sock_diag_put_filterinfo() handlers as we won't need to successively decode filters anymore through sk_decode_filter(). As we still need sk_decode_filter() later on, we're keeping it around. Joint work with Alexei Starovoitov. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/filter.h | 15 +++++++-- net/core/filter.c | 86 ++++++++++++++++++++++++++++++++++++++++---------- net/core/sock_diag.c | 23 ++++++-------- 3 files changed, 93 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index e65e23087367..93a9792e27bc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,14 +19,19 @@ struct compat_sock_fprog { }; #endif +struct sock_fprog_kern { + u16 len; + struct sock_filter *filter; +}; + struct sk_buff; struct sock; -struct sk_filter -{ +struct sk_filter { atomic_t refcnt; u32 jited:1, /* Is our filter JIT'ed? */ len:31; /* Number of filter blocks */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct sock_filter *filter); @@ -42,14 +47,20 @@ static inline unsigned int sk_filter_size(unsigned int proglen) offsetof(struct sk_filter, insns[proglen])); } +#define sk_filter_proglen(fprog) \ + (fprog->len * sizeof(fprog->filter[0])) + extern int sk_filter(struct sock *sk, struct sk_buff *skb); extern unsigned int sk_run_filter(const struct sk_buff *skb, const struct sock_filter *filter); + extern int sk_unattached_filter_create(struct sk_filter **pfp, struct sock_fprog *fprog); extern void sk_unattached_filter_destroy(struct sk_filter *fp); + extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); + extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); diff --git a/net/core/filter.c b/net/core/filter.c index bb3c76458ca9..9730e7fe4770 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -629,6 +629,37 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) } EXPORT_SYMBOL(sk_chk_filter); +static int sk_store_orig_filter(struct sk_filter *fp, + const struct sock_fprog *fprog) +{ + unsigned int fsize = sk_filter_proglen(fprog); + struct sock_fprog_kern *fkprog; + + fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); + if (!fp->orig_prog) + return -ENOMEM; + + fkprog = fp->orig_prog; + fkprog->len = fprog->len; + fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); + if (!fkprog->filter) { + kfree(fp->orig_prog); + return -ENOMEM; + } + + return 0; +} + +static void sk_release_orig_filter(struct sk_filter *fp) +{ + struct sock_fprog_kern *fprog = fp->orig_prog; + + if (fprog) { + kfree(fprog->filter); + kfree(fprog); + } +} + /** * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free @@ -637,6 +668,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + sk_release_orig_filter(fp); bpf_jit_free(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -669,8 +701,8 @@ static int __sk_prepare_filter(struct sk_filter *fp) int sk_unattached_filter_create(struct sk_filter **pfp, struct sock_fprog *fprog) { + unsigned int fsize = sk_filter_proglen(fprog); struct sk_filter *fp; - unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; /* Make sure new filter is there and in the right amounts. */ @@ -680,10 +712,16 @@ int sk_unattached_filter_create(struct sk_filter **pfp, fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; + memcpy(fp->insns, fprog->filter, fsize); atomic_set(&fp->refcnt, 1); fp->len = fprog->len; + /* Since unattached filters are not copied back to user + * space through sk_get_filter(), we do not need to hold + * a copy here, and can spare us the work. + */ + fp->orig_prog = NULL; err = __sk_prepare_filter(fp); if (err) @@ -716,7 +754,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; - unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + unsigned int fsize = sk_filter_proglen(fprog); unsigned int sk_fsize = sk_filter_size(fprog->len); int err; @@ -730,6 +768,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); if (!fp) return -ENOMEM; + if (copy_from_user(fp->insns, fprog->filter, fsize)) { sock_kfree_s(sk, fp, sk_fsize); return -EFAULT; @@ -738,6 +777,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) atomic_set(&fp->refcnt, 1); fp->len = fprog->len; + err = sk_store_orig_filter(fp, fprog); + if (err) { + sk_filter_uncharge(sk, fp); + return -ENOMEM; + } + err = __sk_prepare_filter(fp); if (err) { sk_filter_uncharge(sk, fp); @@ -750,6 +795,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (old_fp) sk_filter_uncharge(sk, old_fp); + return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -769,6 +815,7 @@ int sk_detach_filter(struct sock *sk) sk_filter_uncharge(sk, filter); ret = 0; } + return ret; } EXPORT_SYMBOL_GPL(sk_detach_filter); @@ -851,34 +898,41 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) to->k = filt->k; } -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) +int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, + unsigned int len) { + struct sock_fprog_kern *fprog; struct sk_filter *filter; - int i, ret; + int ret = 0; lock_sock(sk); filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); - ret = 0; + sock_owned_by_user(sk)); if (!filter) goto out; - ret = filter->len; + + /* We're copying the filter that has been originally attached, + * so no conversion/decode needed anymore. + */ + fprog = filter->orig_prog; + + ret = fprog->len; if (!len) + /* User space only enquires number of filter blocks. */ goto out; + ret = -EINVAL; - if (len < filter->len) + if (len < fprog->len) goto out; ret = -EFAULT; - for (i = 0; i < filter->len; i++) { - struct sock_filter fb; - - sk_decode_filter(&filter->insns[i], &fb); - if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) - goto out; - } + if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog))) + goto out; - ret = filter->len; + /* Instead of bytes, the API requests to return the number + * of filter blocks. + */ + ret = fprog->len; out: release_sock(sk); return ret; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a0e9cf6379de..d7af18859322 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -52,9 +52,10 @@ EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, struct sk_buff *skb, int attrtype) { - struct nlattr *attr; + struct sock_fprog_kern *fprog; struct sk_filter *filter; - unsigned int len; + struct nlattr *attr; + unsigned int flen; int err = 0; if (!ns_capable(user_ns, CAP_NET_ADMIN)) { @@ -63,24 +64,20 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, } rcu_read_lock(); - filter = rcu_dereference(sk->sk_filter); - len = filter ? filter->len * sizeof(struct sock_filter) : 0; + if (!filter) + goto out; - attr = nla_reserve(skb, attrtype, len); + fprog = filter->orig_prog; + flen = sk_filter_proglen(fprog); + + attr = nla_reserve(skb, attrtype, flen); if (attr == NULL) { err = -EMSGSIZE; goto out; } - if (filter) { - struct sock_filter *fb = (struct sock_filter *)nla_data(attr); - int i; - - for (i = 0; i < filter->len; i++, fb++) - sk_decode_filter(&filter->insns[i], fb); - } - + memcpy(nla_data(attr), fprog->filter, flen); out: rcu_read_unlock(); return err; -- cgit v1.2.3-71-gd317 From fbc907f0b1386c02e00516aa78a0fa6b0454fd0b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Mar 2014 18:58:20 +0100 Subject: net: filter: move filter accounting to filter core This patch basically does two things, i) removes the extern keyword from the include/linux/filter.h file to be more consistent with the rest of Joe's changes, and ii) moves filter accounting into the filter core framework. Filter accounting mainly done through sk_filter_{un,}charge() take care of the case when sockets are being cloned through sk_clone_lock() so that removal of the filter on one socket won't result in eviction as it's still referenced by the other. These functions actually belong to net/core/filter.c and not include/net/sock.h as we want to keep all that in a central place. It's also not in fast-path so uninlining them is fine and even allows us to get rd of sk_filter_release_rcu()'s EXPORT_SYMBOL and a forward declaration. Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/filter.h | 30 +++++++++++++++++------------- include/net/sock.h | 27 --------------------------- net/core/filter.c | 27 +++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 93a9792e27bc..9bde3ed19fe6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -50,28 +50,32 @@ static inline unsigned int sk_filter_size(unsigned int proglen) #define sk_filter_proglen(fprog) \ (fprog->len * sizeof(fprog->filter[0])) -extern int sk_filter(struct sock *sk, struct sk_buff *skb); -extern unsigned int sk_run_filter(const struct sk_buff *skb, - const struct sock_filter *filter); +int sk_filter(struct sock *sk, struct sk_buff *skb); +unsigned int sk_run_filter(const struct sk_buff *skb, + const struct sock_filter *filter); -extern int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog); -extern void sk_unattached_filter_destroy(struct sk_filter *fp); +int sk_unattached_filter_create(struct sk_filter **pfp, + struct sock_fprog *fprog); +void sk_unattached_filter_destroy(struct sk_filter *fp); -extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -extern int sk_detach_filter(struct sock *sk); +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_detach_filter(struct sock *sk); -extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); -extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); -extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); +int sk_chk_filter(struct sock_filter *filter, unsigned int flen); +int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, + unsigned int len); +void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); + +void sk_filter_charge(struct sock *sk, struct sk_filter *fp); +void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); #ifdef CONFIG_BPF_JIT #include #include #include -extern void bpf_jit_compile(struct sk_filter *fp); -extern void bpf_jit_free(struct sk_filter *fp); +void bpf_jit_compile(struct sk_filter *fp); +void bpf_jit_free(struct sk_filter *fp); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) diff --git a/include/net/sock.h b/include/net/sock.h index 8d7c431a0660..06a5668f05c9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1621,33 +1621,6 @@ void sk_common_release(struct sock *sk); /* Initialise core socket variables */ void sock_init_data(struct socket *sock, struct sock *sk); -void sk_filter_release_rcu(struct rcu_head *rcu); - -/** - * sk_filter_release - release a socket filter - * @fp: filter to remove - * - * Remove a filter from a socket and release its resources. - */ - -static inline void sk_filter_release(struct sk_filter *fp) -{ - if (atomic_dec_and_test(&fp->refcnt)) - call_rcu(&fp->rcu, sk_filter_release_rcu); -} - -static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) -{ - atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); - sk_filter_release(fp); -} - -static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) -{ - atomic_inc(&fp->refcnt); - atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); -} - /* * Socket reference counting postulates. * diff --git a/net/core/filter.c b/net/core/filter.c index 9730e7fe4770..5b3427aaeca5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -664,14 +664,37 @@ static void sk_release_orig_filter(struct sk_filter *fp) * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -void sk_filter_release_rcu(struct rcu_head *rcu) +static void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); sk_release_orig_filter(fp); bpf_jit_free(fp); } -EXPORT_SYMBOL(sk_filter_release_rcu); + +/** + * sk_filter_release - release a socket filter + * @fp: filter to remove + * + * Remove a filter from a socket and release its resources. + */ +static void sk_filter_release(struct sk_filter *fp) +{ + if (atomic_dec_and_test(&fp->refcnt)) + call_rcu(&fp->rcu, sk_filter_release_rcu); +} + +void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) +{ + atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); + sk_filter_release(fp); +} + +void sk_filter_charge(struct sock *sk, struct sk_filter *fp) +{ + atomic_inc(&fp->refcnt); + atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); +} static int __sk_prepare_filter(struct sk_filter *fp) { -- cgit v1.2.3-71-gd317 From e62d2df084e2849edffb206559725fa81bb569a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Mar 2014 18:58:21 +0100 Subject: net: ptp: use sk_unattached_filter_create() for BPF This patch migrates an open-coded sk_run_filter() implementation with proper use of the BPF API, that is, sk_unattached_filter_create(). This migration is needed, as we will be internally transforming the filter to a different representation, and therefore needs to be decoupled. It is okay to do so as skb_timestamping_init() is called during initialization of the network stack in core initcall via sock_init(). This would effectively also allow for PTP filters to be jit compiled if bpf_jit_enable is set. For better readability, there are also some newlines introduced, also ptp_classify.h is only in kernel space. Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Richard Cochran Cc: Jiri Benc Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 4 ---- net/core/timestamping.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 1dc420ba213a..3decfa4d3732 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -27,11 +27,7 @@ #include #include #include -#ifdef __KERNEL__ #include -#else -#include -#endif #define PTP_CLASS_NONE 0x00 /* not a PTP event message */ #define PTP_CLASS_V1 0x01 /* protocol version 1 */ diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 661b5a40ec10..e43d56acf803 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -23,16 +23,13 @@ #include #include -static struct sock_filter ptp_filter[] = { - PTP_FILTER -}; +static struct sk_filter *ptp_insns __read_mostly; static unsigned int classify(const struct sk_buff *skb) { - if (likely(skb->dev && - skb->dev->phydev && + if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return sk_run_filter(skb, ptp_filter); + return SK_RUN_FILTER(ptp_insns, skb); else return PTP_CLASS_NONE; } @@ -60,11 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) if (likely(phydev->drv->txtstamp)) { if (!atomic_inc_not_zero(&sk->sk_refcnt)) return; + clone = skb_clone(skb, GFP_ATOMIC); if (!clone) { sock_put(sk); return; } + clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } @@ -89,12 +88,15 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, } *skb_hwtstamps(skb) = *hwtstamps; + serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; skb->sk = NULL; + err = sock_queue_err_skb(sk, skb); + sock_put(sk); if (err) kfree_skb(skb); @@ -135,5 +137,10 @@ EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); void __init skb_timestamping_init(void) { - BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter))); + static struct sock_filter ptp_filter[] = { PTP_FILTER }; + struct sock_fprog ptp_prog = { + .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, + }; + + BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); } -- cgit v1.2.3-71-gd317 From 164d8c6665213c931645578310256da7b1259331 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Mar 2014 18:58:22 +0100 Subject: net: ptp: do not reimplement PTP/BPF classifier There are currently pch_gbe, cpts, and ixp4xx_eth drivers that open-code and reimplement a BPF classifier for the PTP protocol. Since all of them effectively do the very same thing and load the very same PTP/BPF filter, we can just consolidate that code by introducing ptp_classify_raw() in the time-stamping core framework which can be used in drivers. As drivers get initialized after bootstrapping the core networking subsystem, they can make use of ptp_insns wrapped through ptp_classify_raw(), which allows to simplify and remove PTP classifier setup code in drivers. Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Richard Cochran Cc: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 11 +---------- drivers/net/ethernet/ti/cpts.c | 10 +--------- drivers/net/ethernet/xscale/ixp4xx_eth.c | 11 +---------- include/linux/ptp_classify.h | 10 ++-------- net/core/timestamping.c | 8 +++++++- 5 files changed, 12 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 464e91058c81..73e66838cfef 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -120,10 +120,6 @@ static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg, int data); static void pch_gbe_set_multi(struct net_device *netdev); -static struct sock_filter ptp_filter[] = { - PTP_FILTER -}; - static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid) { u8 *data = skb->data; @@ -131,7 +127,7 @@ static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid) u16 *hi, *id; u32 lo; - if (sk_run_filter(skb, ptp_filter) == PTP_CLASS_NONE) + if (ptp_classify_raw(skb) == PTP_CLASS_NONE) return 0; offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; @@ -2635,11 +2631,6 @@ static int pch_gbe_probe(struct pci_dev *pdev, adapter->ptp_pdev = pci_get_bus_and_slot(adapter->pdev->bus->number, PCI_DEVFN(12, 4)); - if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) { - dev_err(&pdev->dev, "Bad ptp filter\n"); - ret = -EINVAL; - goto err_free_netdev; - } netdev->netdev_ops = &pch_gbe_netdev_ops; netdev->watchdog_timeo = PCH_GBE_WATCHDOG_PERIOD; diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 372cb192c5aa..a3bbf59eaafd 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -31,10 +31,6 @@ #ifdef CONFIG_TI_CPTS -static struct sock_filter ptp_filter[] = { - PTP_FILTER -}; - #define cpts_read32(c, r) __raw_readl(&c->reg->r) #define cpts_write32(c, v, r) __raw_writel(v, &c->reg->r) @@ -301,7 +297,7 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type) u64 ns = 0; struct cpts_event *event; struct list_head *this, *next; - unsigned int class = sk_run_filter(skb, ptp_filter); + unsigned int class = ptp_classify_raw(skb); unsigned long flags; u16 seqid; u8 mtype; @@ -372,10 +368,6 @@ int cpts_register(struct device *dev, struct cpts *cpts, int err, i; unsigned long flags; - if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) { - pr_err("cpts: bad ptp filter\n"); - return -EINVAL; - } cpts->info = cpts_info; cpts->clock = ptp_clock_register(&cpts->info, dev); if (IS_ERR(cpts->clock)) { diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 25283f17d82f..f7e0f0f7c2e2 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -256,10 +256,6 @@ static int ports_open; static struct port *npe_port_tab[MAX_NPES]; static struct dma_pool *dma_pool; -static struct sock_filter ptp_filter[] = { - PTP_FILTER -}; - static int ixp_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid) { u8 *data = skb->data; @@ -267,7 +263,7 @@ static int ixp_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid) u16 *hi, *id; u32 lo; - if (sk_run_filter(skb, ptp_filter) != PTP_CLASS_V1_IPV4) + if (ptp_classify_raw(skb) != PTP_CLASS_V1_IPV4) return 0; offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; @@ -1413,11 +1409,6 @@ static int eth_init_one(struct platform_device *pdev) char phy_id[MII_BUS_ID_SIZE + 3]; int err; - if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) { - pr_err("ixp4xx_eth: bad ptp filter\n"); - return -EINVAL; - } - if (!(dev = alloc_etherdev(sizeof(struct port)))) return -ENOMEM; diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 3decfa4d3732..6d3b0a2ef9ce 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -80,14 +80,6 @@ #define OP_RETA (BPF_RET | BPF_A) #define OP_RETK (BPF_RET | BPF_K) -static inline int ptp_filter_init(struct sock_filter *f, int len) -{ - if (OP_LDH == f[0].code) - return sk_chk_filter(f, len); - else - return 0; -} - #define PTP_FILTER \ {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \ {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \ @@ -133,4 +125,6 @@ static inline int ptp_filter_init(struct sock_filter *f, int len) {OP_RETA, 0, 0, 0 }, /* */ \ /*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, +unsigned int ptp_classify_raw(const struct sk_buff *skb); + #endif diff --git a/net/core/timestamping.c b/net/core/timestamping.c index e43d56acf803..9ff26b3cc021 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -25,11 +25,17 @@ static struct sk_filter *ptp_insns __read_mostly; +unsigned int ptp_classify_raw(const struct sk_buff *skb) +{ + return SK_RUN_FILTER(ptp_insns, skb); +} +EXPORT_SYMBOL_GPL(ptp_classify_raw); + static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return SK_RUN_FILTER(ptp_insns, skb); + return ptp_classify_raw(skb); else return PTP_CLASS_NONE; } -- cgit v1.2.3-71-gd317 From 77e0114ae9ae08685c503772a57af21d299c6701 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Mar 2014 18:58:24 +0100 Subject: net: isdn: use sk_unattached_filter api Similarly as in ppp, we need to migrate the ISDN/PPP code to make use of the sk_unattached_filter api in order to decouple having direct filter structure access. By using sk_unattached_filter_{create,destroy}, we can allow for the possibility to jit compile filters for faster filter verdicts as well. Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Karsten Keil Cc: isdn4linux@listserv.isdn4linux.de Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 61 ++++++++++++++++++++++++++++++--------------- include/linux/isdn_ppp.h | 5 ++-- 2 files changed, 43 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 38ceac5053a0..a5da511e3c9a 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -378,10 +378,15 @@ isdn_ppp_release(int min, struct file *file) is->slcomp = NULL; #endif #ifdef CONFIG_IPPP_FILTER - kfree(is->pass_filter); - is->pass_filter = NULL; - kfree(is->active_filter); - is->active_filter = NULL; + if (is->pass_filter) { + sk_unattached_filter_destroy(is->pass_filter); + is->pass_filter = NULL; + } + + if (is->active_filter) { + sk_unattached_filter_destroy(is->active_filter); + is->active_filter = NULL; + } #endif /* TODO: if this was the previous master: link the stuff to the new master */ @@ -629,25 +634,41 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) #ifdef CONFIG_IPPP_FILTER case PPPIOCSPASS: { + struct sock_fprog fprog; struct sock_filter *code; - int len = get_filter(argp, &code); + int err, len = get_filter(argp, &code); + if (len < 0) return len; - kfree(is->pass_filter); - is->pass_filter = code; - is->pass_len = len; - break; + + fprog.len = len; + fprog.filter = code; + + if (is->pass_filter) + sk_unattached_filter_destroy(is->pass_filter); + err = sk_unattached_filter_create(&is->pass_filter, &fprog); + kfree(code); + + return err; } case PPPIOCSACTIVE: { + struct sock_fprog fprog; struct sock_filter *code; - int len = get_filter(argp, &code); + int err, len = get_filter(argp, &code); + if (len < 0) return len; - kfree(is->active_filter); - is->active_filter = code; - is->active_len = len; - break; + + fprog.len = len; + fprog.filter = code; + + if (is->active_filter) + sk_unattached_filter_destroy(is->active_filter); + err = sk_unattached_filter_create(&is->active_filter, &fprog); + kfree(code); + + return err; } #endif /* CONFIG_IPPP_FILTER */ default: @@ -1147,14 +1168,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff * } if (is->pass_filter - && sk_run_filter(skb, is->pass_filter) == 0) { + && SK_RUN_FILTER(is->pass_filter, skb) == 0) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); kfree_skb(skb); return; } if (!(is->active_filter - && sk_run_filter(skb, is->active_filter) == 0)) { + && SK_RUN_FILTER(is->active_filter, skb) == 0)) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); lp->huptimer = 0; @@ -1293,14 +1314,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) } if (ipt->pass_filter - && sk_run_filter(skb, ipt->pass_filter) == 0) { + && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); kfree_skb(skb); goto unlock; } if (!(ipt->active_filter - && sk_run_filter(skb, ipt->active_filter) == 0)) { + && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); lp->huptimer = 0; @@ -1490,9 +1511,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp) } drop |= is->pass_filter - && sk_run_filter(skb, is->pass_filter) == 0; + && SK_RUN_FILTER(is->pass_filter, skb) == 0; drop |= is->active_filter - && sk_run_filter(skb, is->active_filter) == 0; + && SK_RUN_FILTER(is->active_filter, skb) == 0; skb_push(skb, IPPP_MAX_HEADER - 4); return drop; diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index d5f62bc5f4be..8e10f57f109f 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -180,9 +180,8 @@ struct ippp_struct { struct slcompress *slcomp; #endif #ifdef CONFIG_IPPP_FILTER - struct sock_filter *pass_filter; /* filter for packets to pass */ - struct sock_filter *active_filter; /* filter for pkts to reset idle */ - unsigned pass_len, active_len; + struct sk_filter *pass_filter; /* filter for packets to pass */ + struct sk_filter *active_filter; /* filter for pkts to reset idle */ #endif unsigned long debug; struct isdn_ppp_compressor *compressor,*decompressor; -- cgit v1.2.3-71-gd317 From bd4cf0ed331a275e9bf5a49e6d0fd55dffc551b8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 28 Mar 2014 18:58:25 +0100 Subject: net: filter: rework/optimize internal BPF interpreter's instruction set This patch replaces/reworks the kernel-internal BPF interpreter with an optimized BPF instruction set format that is modelled closer to mimic native instruction sets and is designed to be JITed with one to one mapping. Thus, the new interpreter is noticeably faster than the current implementation of sk_run_filter(); mainly for two reasons: 1. Fall-through jumps: BPF jump instructions are forced to go either 'true' or 'false' branch which causes branch-miss penalty. The new BPF jump instructions have only one branch and fall-through otherwise, which fits the CPU branch predictor logic better. `perf stat` shows drastic difference for branch-misses between the old and new code. 2. Jump-threaded implementation of interpreter vs switch statement: Instead of single table-jump at the top of 'switch' statement, gcc will now generate multiple table-jump instructions, which helps CPU branch predictor logic. Note that the verification of filters is still being done through sk_chk_filter() in classical BPF format, so filters from user- or kernel space are verified in the same way as we do now, and same restrictions/constraints hold as well. We reuse current BPF JIT compilers in a way that this upgrade would even be fine as is, but nevertheless allows for a successive upgrade of BPF JIT compilers to the new format. The internal instruction set migration is being done after the probing for JIT compilation, so in case JIT compilers are able to create a native opcode image, we're going to use that, and in all other cases we're doing a follow-up migration of the BPF program's instruction set, so that it can be transparently run in the new interpreter. In short, the *internal* format extends BPF in the following way (more details can be taken from the appended documentation): - Number of registers increase from 2 to 10 - Register width increases from 32-bit to 64-bit - Conditional jt/jf targets replaced with jt/fall-through - Adds signed > and >= insns - 16 4-byte stack slots for register spill-fill replaced with up to 512 bytes of multi-use stack space - Introduction of bpf_call insn and register passing convention for zero overhead calls from/to other kernel functions - Adds arithmetic right shift and endianness conversion insns - Adds atomic_add insn - Old tax/txa insns are replaced with 'mov dst,src' insn Performance of two BPF filters generated by libpcap resp. bpf_asm was measured on x86_64, i386 and arm32 (other libpcap programs have similar performance differences): fprog #1 is taken from Documentation/networking/filter.txt: tcpdump -i eth0 port 22 -dd fprog #2 is taken from 'man tcpdump': tcpdump -i eth0 'tcp port 22 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)' -dd Raw performance data from BPF micro-benchmark: SK_RUN_FILTER on the same SKB (cache-hit) or 10k SKBs (cache-miss); time in ns per call, smaller is better: --x86_64-- fprog #1 fprog #1 fprog #2 fprog #2 cache-hit cache-miss cache-hit cache-miss old BPF 90 101 192 202 new BPF 31 71 47 97 old BPF jit 12 34 17 44 new BPF jit TBD --i386-- fprog #1 fprog #1 fprog #2 fprog #2 cache-hit cache-miss cache-hit cache-miss old BPF 107 136 227 252 new BPF 40 119 69 172 --arm32-- fprog #1 fprog #1 fprog #2 fprog #2 cache-hit cache-miss cache-hit cache-miss old BPF 202 300 475 540 new BPF 180 270 330 470 old BPF jit 26 182 37 202 new BPF jit TBD Thus, without changing any userland BPF filters, applications on top of AF_PACKET (or other families) such as libpcap/tcpdump, cls_bpf classifier, netfilter's xt_bpf, team driver's load-balancing mode, and many more will have better interpreter filtering performance. While we are replacing the internal BPF interpreter, we also need to convert seccomp BPF in the same step to make use of the new internal structure since it makes use of lower-level API details without being further decoupled through higher-level calls like sk_unattached_filter_{create,destroy}(), for example. Just as for normal socket filtering, also seccomp BPF experiences a time-to-verdict speedup: 05-sim-long_jumps.c of libseccomp was used as micro-benchmark: seccomp_rule_add_exact(ctx,... seccomp_rule_add_exact(ctx,... rc = seccomp_load(ctx); for (i = 0; i < 10000000; i++) syscall(199, 100); 'short filter' has 2 rules 'large filter' has 200 rules 'short filter' performance is slightly better on x86_64/i386/arm32 'large filter' is much faster on x86_64 and i386 and shows no difference on arm32 --x86_64-- short filter old BPF: 2.7 sec 39.12% bench libc-2.15.so [.] syscall 8.10% bench [kernel.kallsyms] [k] sk_run_filter 6.31% bench [kernel.kallsyms] [k] system_call 5.59% bench [kernel.kallsyms] [k] trace_hardirqs_on_caller 4.37% bench [kernel.kallsyms] [k] trace_hardirqs_off_caller 3.70% bench [kernel.kallsyms] [k] __secure_computing 3.67% bench [kernel.kallsyms] [k] lock_is_held 3.03% bench [kernel.kallsyms] [k] seccomp_bpf_load new BPF: 2.58 sec 42.05% bench libc-2.15.so [.] syscall 6.91% bench [kernel.kallsyms] [k] system_call 6.25% bench [kernel.kallsyms] [k] trace_hardirqs_on_caller 6.07% bench [kernel.kallsyms] [k] __secure_computing 5.08% bench [kernel.kallsyms] [k] sk_run_filter_int_seccomp --arm32-- short filter old BPF: 4.0 sec 39.92% bench [kernel.kallsyms] [k] vector_swi 16.60% bench [kernel.kallsyms] [k] sk_run_filter 14.66% bench libc-2.17.so [.] syscall 5.42% bench [kernel.kallsyms] [k] seccomp_bpf_load 5.10% bench [kernel.kallsyms] [k] __secure_computing new BPF: 3.7 sec 35.93% bench [kernel.kallsyms] [k] vector_swi 21.89% bench libc-2.17.so [.] syscall 13.45% bench [kernel.kallsyms] [k] sk_run_filter_int_seccomp 6.25% bench [kernel.kallsyms] [k] __secure_computing 3.96% bench [kernel.kallsyms] [k] syscall_trace_exit --x86_64-- large filter old BPF: 8.6 seconds 73.38% bench [kernel.kallsyms] [k] sk_run_filter 10.70% bench libc-2.15.so [.] syscall 5.09% bench [kernel.kallsyms] [k] seccomp_bpf_load 1.97% bench [kernel.kallsyms] [k] system_call new BPF: 5.7 seconds 66.20% bench [kernel.kallsyms] [k] sk_run_filter_int_seccomp 16.75% bench libc-2.15.so [.] syscall 3.31% bench [kernel.kallsyms] [k] system_call 2.88% bench [kernel.kallsyms] [k] __secure_computing --i386-- large filter old BPF: 5.4 sec new BPF: 3.8 sec --arm32-- large filter old BPF: 13.5 sec 73.88% bench [kernel.kallsyms] [k] sk_run_filter 10.29% bench [kernel.kallsyms] [k] vector_swi 6.46% bench libc-2.17.so [.] syscall 2.94% bench [kernel.kallsyms] [k] seccomp_bpf_load 1.19% bench [kernel.kallsyms] [k] __secure_computing 0.87% bench [kernel.kallsyms] [k] sys_getuid new BPF: 13.5 sec 76.08% bench [kernel.kallsyms] [k] sk_run_filter_int_seccomp 10.98% bench [kernel.kallsyms] [k] vector_swi 5.87% bench libc-2.17.so [.] syscall 1.77% bench [kernel.kallsyms] [k] __secure_computing 0.93% bench [kernel.kallsyms] [k] sys_getuid BPF filters generated by seccomp are very branchy, so the new internal BPF performance is better than the old one. Performance gains will be even higher when BPF JIT is committed for the new structure, which is planned in future work (as successive JIT migrations). BPF has also been stress-tested with trinity's BPF fuzzer. Joint work with Daniel Borkmann. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Cc: Hagen Paul Pfeifer Cc: Kees Cook Cc: Paul Moore Cc: Ingo Molnar Cc: H. Peter Anvin Cc: linux-kernel@vger.kernel.org Acked-by: Kees Cook Signed-off-by: David S. Miller --- include/linux/filter.h | 74 ++- include/linux/seccomp.h | 1 - kernel/seccomp.c | 119 ++-- net/core/filter.c | 1457 +++++++++++++++++++++++++++++++++++++---------- 4 files changed, 1279 insertions(+), 372 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9bde3ed19fe6..262dcbb75ffe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -9,13 +9,58 @@ #include #include -#ifdef CONFIG_COMPAT -/* - * A struct sock_filter is architecture independent. +/* Internally used and optimized filter representation with extended + * instruction set based on top of classic BPF. */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG 11 + +/* BPF program can access up to 512 bytes of stack space. */ +#define MAX_BPF_STACK 512 + +/* Arg1, context and stack frame pointer register positions. */ +#define ARG1_REG 1 +#define CTX_REG 6 +#define FP_REG 10 + +struct sock_filter_int { + __u8 code; /* opcode */ + __u8 a_reg:4; /* dest register */ + __u8 x_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +#ifdef CONFIG_COMPAT +/* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { u16 len; - compat_uptr_t filter; /* struct sock_filter * */ + compat_uptr_t filter; /* struct sock_filter * */ }; #endif @@ -26,6 +71,7 @@ struct sock_fprog_kern { struct sk_buff; struct sock; +struct seccomp_data; struct sk_filter { atomic_t refcnt; @@ -34,9 +80,10 @@ struct sk_filter { struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, - const struct sock_filter *filter); + const struct sock_filter_int *filter); union { - struct sock_filter insns[0]; + struct sock_filter insns[0]; + struct sock_filter_int insnsi[0]; struct work_struct work; }; }; @@ -50,9 +97,18 @@ static inline unsigned int sk_filter_size(unsigned int proglen) #define sk_filter_proglen(fprog) \ (fprog->len * sizeof(fprog->filter[0])) +#define SK_RUN_FILTER(filter, ctx) \ + (*filter->bpf_func)(ctx, filter->insnsi) + int sk_filter(struct sock *sk, struct sk_buff *skb); -unsigned int sk_run_filter(const struct sk_buff *skb, - const struct sock_filter *filter); + +u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, + const struct sock_filter_int *insni); +u32 sk_run_filter_int_skb(const struct sk_buff *ctx, + const struct sock_filter_int *insni); + +int sk_convert_filter(struct sock_filter *prog, int len, + struct sock_filter_int *new_prog, int *new_len); int sk_unattached_filter_create(struct sk_filter **pfp, struct sock_fprog *fprog); @@ -86,7 +142,6 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); } -#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else #include static inline void bpf_jit_compile(struct sk_filter *fp) @@ -96,7 +151,6 @@ static inline void bpf_jit_free(struct sk_filter *fp) { kfree(fp); } -#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) #endif static inline int bpf_tell_extensions(void) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 6f19cfd1840e..4054b0994071 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -76,7 +76,6 @@ static inline int seccomp_mode(struct seccomp *s) #ifdef CONFIG_SECCOMP_FILTER extern void put_seccomp_filter(struct task_struct *tsk); extern void get_seccomp_filter(struct task_struct *tsk); -extern u32 seccomp_bpf_load(int off); #else /* CONFIG_SECCOMP_FILTER */ static inline void put_seccomp_filter(struct task_struct *tsk) { diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b7a10048a32c..4f18e754c23e 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -55,60 +55,33 @@ struct seccomp_filter { atomic_t usage; struct seccomp_filter *prev; unsigned short len; /* Instruction count */ - struct sock_filter insns[]; + struct sock_filter_int insnsi[]; }; /* Limit any path through the tree to 256KB worth of instructions. */ #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) -/** - * get_u32 - returns a u32 offset into data - * @data: a unsigned 64 bit value - * @index: 0 or 1 to return the first or second 32-bits - * - * This inline exists to hide the length of unsigned long. If a 32-bit - * unsigned long is passed in, it will be extended and the top 32-bits will be - * 0. If it is a 64-bit unsigned long, then whatever data is resident will be - * properly returned. - * +/* * Endianness is explicitly ignored and left for BPF program authors to manage * as per the specific architecture. */ -static inline u32 get_u32(u64 data, int index) +static void populate_seccomp_data(struct seccomp_data *sd) { - return ((u32 *)&data)[index]; -} + struct task_struct *task = current; + struct pt_regs *regs = task_pt_regs(task); -/* Helper for bpf_load below. */ -#define BPF_DATA(_name) offsetof(struct seccomp_data, _name) -/** - * bpf_load: checks and returns a pointer to the requested offset - * @off: offset into struct seccomp_data to load from - * - * Returns the requested 32-bits of data. - * seccomp_check_filter() should assure that @off is 32-bit aligned - * and not out of bounds. Failure to do so is a BUG. - */ -u32 seccomp_bpf_load(int off) -{ - struct pt_regs *regs = task_pt_regs(current); - if (off == BPF_DATA(nr)) - return syscall_get_nr(current, regs); - if (off == BPF_DATA(arch)) - return syscall_get_arch(current, regs); - if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) { - unsigned long value; - int arg = (off - BPF_DATA(args[0])) / sizeof(u64); - int index = !!(off % sizeof(u64)); - syscall_get_arguments(current, regs, arg, 1, &value); - return get_u32(value, index); - } - if (off == BPF_DATA(instruction_pointer)) - return get_u32(KSTK_EIP(current), 0); - if (off == BPF_DATA(instruction_pointer) + sizeof(u32)) - return get_u32(KSTK_EIP(current), 1); - /* seccomp_check_filter should make this impossible. */ - BUG(); + sd->nr = syscall_get_nr(task, regs); + sd->arch = syscall_get_arch(task, regs); + + /* Unroll syscall_get_args to help gcc on arm. */ + syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); + syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); + syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); + syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); + syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); + syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); + + sd->instruction_pointer = KSTK_EIP(task); } /** @@ -133,17 +106,17 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) switch (code) { case BPF_S_LD_W_ABS: - ftest->code = BPF_S_ANC_SECCOMP_LD_W; + ftest->code = BPF_LDX | BPF_W | BPF_ABS; /* 32-bit aligned and not out of bounds. */ if (k >= sizeof(struct seccomp_data) || k & 3) return -EINVAL; continue; case BPF_S_LD_W_LEN: - ftest->code = BPF_S_LD_IMM; + ftest->code = BPF_LD | BPF_IMM; ftest->k = sizeof(struct seccomp_data); continue; case BPF_S_LDX_W_LEN: - ftest->code = BPF_S_LDX_IMM; + ftest->code = BPF_LDX | BPF_IMM; ftest->k = sizeof(struct seccomp_data); continue; /* Explicitly include allowed calls. */ @@ -185,6 +158,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) case BPF_S_JMP_JGT_X: case BPF_S_JMP_JSET_K: case BPF_S_JMP_JSET_X: + sk_decode_filter(ftest, ftest); continue; default: return -EINVAL; @@ -202,18 +176,21 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) static u32 seccomp_run_filters(int syscall) { struct seccomp_filter *f; + struct seccomp_data sd; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ if (WARN_ON(current->seccomp.filter == NULL)) return SECCOMP_RET_KILL; + populate_seccomp_data(&sd); + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ for (f = current->seccomp.filter; f; f = f->prev) { - u32 cur_ret = sk_run_filter(NULL, f->insns); + u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; } @@ -231,6 +208,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) struct seccomp_filter *filter; unsigned long fp_size = fprog->len * sizeof(struct sock_filter); unsigned long total_insns = fprog->len; + struct sock_filter *fp; + int new_len; long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) @@ -252,28 +231,43 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return -EACCES; - /* Allocate a new seccomp_filter */ - filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, - GFP_KERNEL|__GFP_NOWARN); - if (!filter) + fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); + if (!fp) return -ENOMEM; - atomic_set(&filter->usage, 1); - filter->len = fprog->len; /* Copy the instructions from fprog. */ ret = -EFAULT; - if (copy_from_user(filter->insns, fprog->filter, fp_size)) - goto fail; + if (copy_from_user(fp, fprog->filter, fp_size)) + goto free_prog; /* Check and rewrite the fprog via the skb checker */ - ret = sk_chk_filter(filter->insns, filter->len); + ret = sk_chk_filter(fp, fprog->len); if (ret) - goto fail; + goto free_prog; /* Check and rewrite the fprog for seccomp use */ - ret = seccomp_check_filter(filter->insns, filter->len); + ret = seccomp_check_filter(fp, fprog->len); + if (ret) + goto free_prog; + + /* Convert 'sock_filter' insns to 'sock_filter_int' insns */ + ret = sk_convert_filter(fp, fprog->len, NULL, &new_len); + if (ret) + goto free_prog; + + /* Allocate a new seccomp_filter */ + filter = kzalloc(sizeof(struct seccomp_filter) + + sizeof(struct sock_filter_int) * new_len, + GFP_KERNEL|__GFP_NOWARN); + if (!filter) + goto free_prog; + + ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); if (ret) - goto fail; + goto free_filter; + + atomic_set(&filter->usage, 1); + filter->len = new_len; /* * If there is an existing filter, make it the prev and don't drop its @@ -282,8 +276,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) filter->prev = current->seccomp.filter; current->seccomp.filter = filter; return 0; -fail: + +free_filter: kfree(filter); +free_prog: + kfree(fp); return ret; } diff --git a/net/core/filter.c b/net/core/filter.c index 5b3427aaeca5..3733381190ec 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1,11 +1,16 @@ /* * Linux Socket Filter - Kernel level socket filtering * - * Author: - * Jay Schulist + * Based on the design of the Berkeley Packet Filter. The new + * internal format has been designed by PLUMgrid: * - * Based on the design of: - * - The Berkeley Packet Filter + * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com + * + * Authors: + * + * Jay Schulist + * Alexei Starovoitov + * Daniel Borkmann * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -108,304 +113,1045 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); +/* Base function for offset calculation. Needs to go into .text section, + * therefore keeping it non-static as well; will also be used by JITs + * anyway later on, so do not let the compiler omit it. + */ +noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return 0; +} + /** - * sk_run_filter - run a filter on a socket - * @skb: buffer to run the filter on + * __sk_run_filter - run a filter on a given context + * @ctx: buffer to run the filter on * @fentry: filter to apply * - * Decode and apply filter instructions to the skb->data. - * Return length to keep, 0 for none. @skb is the data we are - * filtering, @filter is the array of filter instructions. - * Because all jumps are guaranteed to be before last instruction, - * and last instruction guaranteed to be a RET, we dont need to check - * flen. (We used to pass to this function the length of filter) + * Decode and apply filter instructions to the skb->data. Return length to + * keep, 0 for none. @ctx is the data we are operating on, @filter is the + * array of filter instructions. */ -unsigned int sk_run_filter(const struct sk_buff *skb, - const struct sock_filter *fentry) +unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { + u64 stack[MAX_BPF_STACK / sizeof(u64)]; + u64 regs[MAX_BPF_REG], tmp; void *ptr; - u32 A = 0; /* Accumulator */ - u32 X = 0; /* Index Register */ - u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ - u32 tmp; - int k; + int off; + +#define K insn->imm +#define A regs[insn->a_reg] +#define X regs[insn->x_reg] +#define R0 regs[0] + +#define CONT ({insn++; goto select_insn; }) +#define CONT_JMP ({insn++; goto select_insn; }) + + static const void *jumptable[256] = { + [0 ... 255] = &&default_label, + /* Now overwrite non-defaults ... */ +#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C + DL(BPF_ALU, BPF_ADD, BPF_X), + DL(BPF_ALU, BPF_ADD, BPF_K), + DL(BPF_ALU, BPF_SUB, BPF_X), + DL(BPF_ALU, BPF_SUB, BPF_K), + DL(BPF_ALU, BPF_AND, BPF_X), + DL(BPF_ALU, BPF_AND, BPF_K), + DL(BPF_ALU, BPF_OR, BPF_X), + DL(BPF_ALU, BPF_OR, BPF_K), + DL(BPF_ALU, BPF_LSH, BPF_X), + DL(BPF_ALU, BPF_LSH, BPF_K), + DL(BPF_ALU, BPF_RSH, BPF_X), + DL(BPF_ALU, BPF_RSH, BPF_K), + DL(BPF_ALU, BPF_XOR, BPF_X), + DL(BPF_ALU, BPF_XOR, BPF_K), + DL(BPF_ALU, BPF_MUL, BPF_X), + DL(BPF_ALU, BPF_MUL, BPF_K), + DL(BPF_ALU, BPF_MOV, BPF_X), + DL(BPF_ALU, BPF_MOV, BPF_K), + DL(BPF_ALU, BPF_DIV, BPF_X), + DL(BPF_ALU, BPF_DIV, BPF_K), + DL(BPF_ALU, BPF_MOD, BPF_X), + DL(BPF_ALU, BPF_MOD, BPF_K), + DL(BPF_ALU, BPF_NEG, 0), + DL(BPF_ALU, BPF_END, BPF_TO_BE), + DL(BPF_ALU, BPF_END, BPF_TO_LE), + DL(BPF_ALU64, BPF_ADD, BPF_X), + DL(BPF_ALU64, BPF_ADD, BPF_K), + DL(BPF_ALU64, BPF_SUB, BPF_X), + DL(BPF_ALU64, BPF_SUB, BPF_K), + DL(BPF_ALU64, BPF_AND, BPF_X), + DL(BPF_ALU64, BPF_AND, BPF_K), + DL(BPF_ALU64, BPF_OR, BPF_X), + DL(BPF_ALU64, BPF_OR, BPF_K), + DL(BPF_ALU64, BPF_LSH, BPF_X), + DL(BPF_ALU64, BPF_LSH, BPF_K), + DL(BPF_ALU64, BPF_RSH, BPF_X), + DL(BPF_ALU64, BPF_RSH, BPF_K), + DL(BPF_ALU64, BPF_XOR, BPF_X), + DL(BPF_ALU64, BPF_XOR, BPF_K), + DL(BPF_ALU64, BPF_MUL, BPF_X), + DL(BPF_ALU64, BPF_MUL, BPF_K), + DL(BPF_ALU64, BPF_MOV, BPF_X), + DL(BPF_ALU64, BPF_MOV, BPF_K), + DL(BPF_ALU64, BPF_ARSH, BPF_X), + DL(BPF_ALU64, BPF_ARSH, BPF_K), + DL(BPF_ALU64, BPF_DIV, BPF_X), + DL(BPF_ALU64, BPF_DIV, BPF_K), + DL(BPF_ALU64, BPF_MOD, BPF_X), + DL(BPF_ALU64, BPF_MOD, BPF_K), + DL(BPF_ALU64, BPF_NEG, 0), + DL(BPF_JMP, BPF_CALL, 0), + DL(BPF_JMP, BPF_JA, 0), + DL(BPF_JMP, BPF_JEQ, BPF_X), + DL(BPF_JMP, BPF_JEQ, BPF_K), + DL(BPF_JMP, BPF_JNE, BPF_X), + DL(BPF_JMP, BPF_JNE, BPF_K), + DL(BPF_JMP, BPF_JGT, BPF_X), + DL(BPF_JMP, BPF_JGT, BPF_K), + DL(BPF_JMP, BPF_JGE, BPF_X), + DL(BPF_JMP, BPF_JGE, BPF_K), + DL(BPF_JMP, BPF_JSGT, BPF_X), + DL(BPF_JMP, BPF_JSGT, BPF_K), + DL(BPF_JMP, BPF_JSGE, BPF_X), + DL(BPF_JMP, BPF_JSGE, BPF_K), + DL(BPF_JMP, BPF_JSET, BPF_X), + DL(BPF_JMP, BPF_JSET, BPF_K), + DL(BPF_JMP, BPF_EXIT, 0), + DL(BPF_STX, BPF_MEM, BPF_B), + DL(BPF_STX, BPF_MEM, BPF_H), + DL(BPF_STX, BPF_MEM, BPF_W), + DL(BPF_STX, BPF_MEM, BPF_DW), + DL(BPF_STX, BPF_XADD, BPF_W), + DL(BPF_STX, BPF_XADD, BPF_DW), + DL(BPF_ST, BPF_MEM, BPF_B), + DL(BPF_ST, BPF_MEM, BPF_H), + DL(BPF_ST, BPF_MEM, BPF_W), + DL(BPF_ST, BPF_MEM, BPF_DW), + DL(BPF_LDX, BPF_MEM, BPF_B), + DL(BPF_LDX, BPF_MEM, BPF_H), + DL(BPF_LDX, BPF_MEM, BPF_W), + DL(BPF_LDX, BPF_MEM, BPF_DW), + DL(BPF_LD, BPF_ABS, BPF_W), + DL(BPF_LD, BPF_ABS, BPF_H), + DL(BPF_LD, BPF_ABS, BPF_B), + DL(BPF_LD, BPF_IND, BPF_W), + DL(BPF_LD, BPF_IND, BPF_H), + DL(BPF_LD, BPF_IND, BPF_B), +#undef DL + }; - /* - * Process array of filter instructions. - */ - for (;; fentry++) { -#if defined(CONFIG_X86_32) -#define K (fentry->k) -#else - const u32 K = fentry->k; -#endif - - switch (fentry->code) { - case BPF_S_ALU_ADD_X: - A += X; - continue; - case BPF_S_ALU_ADD_K: - A += K; - continue; - case BPF_S_ALU_SUB_X: - A -= X; - continue; - case BPF_S_ALU_SUB_K: - A -= K; - continue; - case BPF_S_ALU_MUL_X: - A *= X; - continue; - case BPF_S_ALU_MUL_K: - A *= K; - continue; - case BPF_S_ALU_DIV_X: - if (X == 0) - return 0; - A /= X; - continue; - case BPF_S_ALU_DIV_K: - A /= K; - continue; - case BPF_S_ALU_MOD_X: - if (X == 0) - return 0; - A %= X; - continue; - case BPF_S_ALU_MOD_K: - A %= K; - continue; - case BPF_S_ALU_AND_X: - A &= X; - continue; - case BPF_S_ALU_AND_K: - A &= K; - continue; - case BPF_S_ALU_OR_X: - A |= X; - continue; - case BPF_S_ALU_OR_K: - A |= K; - continue; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: - A ^= X; - continue; - case BPF_S_ALU_XOR_K: - A ^= K; - continue; - case BPF_S_ALU_LSH_X: - A <<= X; - continue; - case BPF_S_ALU_LSH_K: - A <<= K; - continue; - case BPF_S_ALU_RSH_X: - A >>= X; - continue; - case BPF_S_ALU_RSH_K: - A >>= K; - continue; - case BPF_S_ALU_NEG: - A = -A; - continue; - case BPF_S_JMP_JA: - fentry += K; - continue; - case BPF_S_JMP_JGT_K: - fentry += (A > K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JGE_K: - fentry += (A >= K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JEQ_K: - fentry += (A == K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JSET_K: - fentry += (A & K) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JGT_X: - fentry += (A > X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JGE_X: - fentry += (A >= X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JEQ_X: - fentry += (A == X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_JMP_JSET_X: - fentry += (A & X) ? fentry->jt : fentry->jf; - continue; - case BPF_S_LD_W_ABS: - k = K; -load_w: - ptr = load_pointer(skb, k, 4, &tmp); - if (ptr != NULL) { - A = get_unaligned_be32(ptr); - continue; - } - return 0; - case BPF_S_LD_H_ABS: - k = K; -load_h: - ptr = load_pointer(skb, k, 2, &tmp); - if (ptr != NULL) { - A = get_unaligned_be16(ptr); - continue; + regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + regs[ARG1_REG] = (u64) (unsigned long) ctx; + +select_insn: + goto *jumptable[insn->code]; + + /* ALU */ +#define ALU(OPCODE, OP) \ + BPF_ALU64_##OPCODE##_BPF_X: \ + A = A OP X; \ + CONT; \ + BPF_ALU_##OPCODE##_BPF_X: \ + A = (u32) A OP (u32) X; \ + CONT; \ + BPF_ALU64_##OPCODE##_BPF_K: \ + A = A OP K; \ + CONT; \ + BPF_ALU_##OPCODE##_BPF_K: \ + A = (u32) A OP (u32) K; \ + CONT; + + ALU(BPF_ADD, +) + ALU(BPF_SUB, -) + ALU(BPF_AND, &) + ALU(BPF_OR, |) + ALU(BPF_LSH, <<) + ALU(BPF_RSH, >>) + ALU(BPF_XOR, ^) + ALU(BPF_MUL, *) +#undef ALU + BPF_ALU_BPF_NEG_0: + A = (u32) -A; + CONT; + BPF_ALU64_BPF_NEG_0: + A = -A; + CONT; + BPF_ALU_BPF_MOV_BPF_X: + A = (u32) X; + CONT; + BPF_ALU_BPF_MOV_BPF_K: + A = (u32) K; + CONT; + BPF_ALU64_BPF_MOV_BPF_X: + A = X; + CONT; + BPF_ALU64_BPF_MOV_BPF_K: + A = K; + CONT; + BPF_ALU64_BPF_ARSH_BPF_X: + (*(s64 *) &A) >>= X; + CONT; + BPF_ALU64_BPF_ARSH_BPF_K: + (*(s64 *) &A) >>= K; + CONT; + BPF_ALU64_BPF_MOD_BPF_X: + tmp = A; + if (X) + A = do_div(tmp, X); + CONT; + BPF_ALU_BPF_MOD_BPF_X: + tmp = (u32) A; + if (X) + A = do_div(tmp, (u32) X); + CONT; + BPF_ALU64_BPF_MOD_BPF_K: + tmp = A; + if (K) + A = do_div(tmp, K); + CONT; + BPF_ALU_BPF_MOD_BPF_K: + tmp = (u32) A; + if (K) + A = do_div(tmp, (u32) K); + CONT; + BPF_ALU64_BPF_DIV_BPF_X: + if (X) + do_div(A, X); + CONT; + BPF_ALU_BPF_DIV_BPF_X: + tmp = (u32) A; + if (X) + do_div(tmp, (u32) X); + A = (u32) tmp; + CONT; + BPF_ALU64_BPF_DIV_BPF_K: + if (K) + do_div(A, K); + CONT; + BPF_ALU_BPF_DIV_BPF_K: + tmp = (u32) A; + if (K) + do_div(tmp, (u32) K); + A = (u32) tmp; + CONT; + BPF_ALU_BPF_END_BPF_TO_BE: + switch (K) { + case 16: + A = (__force u16) cpu_to_be16(A); + break; + case 32: + A = (__force u32) cpu_to_be32(A); + break; + case 64: + A = (__force u64) cpu_to_be64(A); + break; + } + CONT; + BPF_ALU_BPF_END_BPF_TO_LE: + switch (K) { + case 16: + A = (__force u16) cpu_to_le16(A); + break; + case 32: + A = (__force u32) cpu_to_le32(A); + break; + case 64: + A = (__force u64) cpu_to_le64(A); + break; + } + CONT; + + /* CALL */ + BPF_JMP_BPF_CALL_0: + /* Function call scratches R1-R5 registers, preserves R6-R9, + * and stores return value into R0. + */ + R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], + regs[4], regs[5]); + CONT; + + /* JMP */ + BPF_JMP_BPF_JA_0: + insn += insn->off; + CONT; + BPF_JMP_BPF_JEQ_BPF_X: + if (A == X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JEQ_BPF_K: + if (A == K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JNE_BPF_X: + if (A != X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JNE_BPF_K: + if (A != K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGT_BPF_X: + if (A > X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGT_BPF_K: + if (A > K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGE_BPF_X: + if (A >= X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JGE_BPF_K: + if (A >= K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGT_BPF_X: + if (((s64)A) > ((s64)X)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGT_BPF_K: + if (((s64)A) > ((s64)K)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGE_BPF_X: + if (((s64)A) >= ((s64)X)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSGE_BPF_K: + if (((s64)A) >= ((s64)K)) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSET_BPF_X: + if (A & X) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_JSET_BPF_K: + if (A & K) { + insn += insn->off; + CONT_JMP; + } + CONT; + BPF_JMP_BPF_EXIT_0: + return R0; + + /* STX and ST and LDX*/ +#define LDST(SIZEOP, SIZE) \ + BPF_STX_BPF_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (A + insn->off) = X; \ + CONT; \ + BPF_ST_BPF_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (A + insn->off) = K; \ + CONT; \ + BPF_LDX_BPF_MEM_##SIZEOP: \ + A = *(SIZE *)(unsigned long) (X + insn->off); \ + CONT; + + LDST(BPF_B, u8) + LDST(BPF_H, u16) + LDST(BPF_W, u32) + LDST(BPF_DW, u64) +#undef LDST + BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ + atomic_add((u32) X, (atomic_t *)(unsigned long) + (A + insn->off)); + CONT; + BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ + atomic64_add((u64) X, (atomic64_t *)(unsigned long) + (A + insn->off)); + CONT; + BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ + off = K; +load_word: + /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only + * appearing in the programs where ctx == skb. All programs + * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() + * saves it in R6, internal BPF verifier will check that + * R6 == ctx. + * + * BPF_ABS and BPF_IND are wrappers of function calls, so + * they scratch R1-R5 registers, preserve R6-R9, and store + * return value into R0. + * + * Implicit input: + * ctx + * + * Explicit input: + * X == any register + * K == 32-bit immediate + * + * Output: + * R0 - 8/16/32-bit skb data converted to cpu endianness + */ + ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); + if (likely(ptr != NULL)) { + R0 = get_unaligned_be32(ptr); + CONT; + } + return 0; + BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ + off = K; +load_half: + ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); + if (likely(ptr != NULL)) { + R0 = get_unaligned_be16(ptr); + CONT; + } + return 0; + BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ + off = K; +load_byte: + ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); + if (likely(ptr != NULL)) { + R0 = *(u8 *)ptr; + CONT; + } + return 0; + BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ + off = K + X; + goto load_word; + BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ + off = K + X; + goto load_half; + BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ + off = K + X; + goto load_byte; + + default_label: + /* If we ever reach this, we have a bug somewhere. */ + WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); + return 0; +#undef CONT_JMP +#undef CONT + +#undef R0 +#undef X +#undef A +#undef K +} + +u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, + const struct sock_filter_int *insni) + __attribute__ ((alias ("__sk_run_filter"))); + +u32 sk_run_filter_int_skb(const struct sk_buff *ctx, + const struct sock_filter_int *insni) + __attribute__ ((alias ("__sk_run_filter"))); +EXPORT_SYMBOL_GPL(sk_run_filter_int_skb); + +/* Helper to find the offset of pkt_type in sk_buff structure. We want + * to make sure its still a 3bit field starting at a byte boundary; + * taken from arch/x86/net/bpf_jit_comp.c. + */ +#define PKT_TYPE_MAX 7 +static unsigned int pkt_type_offset(void) +{ + struct sk_buff skb_probe = { .pkt_type = ~0, }; + u8 *ct = (u8 *) &skb_probe; + unsigned int off; + + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (ct[off] == PKT_TYPE_MAX) + return off; + } + + pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__); + return -1; +} + +static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) ctx; + + return __skb_get_poff(skb); +} + +static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); + if (nla) + return (void *) nla - (void *) skb->data; + + return 0; +} + +static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *) &skb->data[A]; + if (nla->nla_len > A - skb->len) + return 0; + + nla = nla_find_nested(nla, X); + if (nla) + return (void *) nla - (void *) skb->data; + + return 0; +} + +static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + return raw_smp_processor_id(); +} + +/* Register mappings for user programs. */ +#define A_REG 0 +#define X_REG 7 +#define TMP_REG 8 +#define ARG2_REG 2 +#define ARG3_REG 3 + +static bool convert_bpf_extensions(struct sock_filter *fp, + struct sock_filter_int **insnp) +{ + struct sock_filter_int *insn = *insnp; + + switch (fp->k) { + case SKF_AD_OFF + SKF_AD_PROTOCOL: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, protocol); + insn++; + + /* A = ntohs(A) [emitting a nop or swap16] */ + insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; + insn->a_reg = A_REG; + insn->imm = 16; + break; + + case SKF_AD_OFF + SKF_AD_PKTTYPE: + insn->code = BPF_LDX | BPF_MEM | BPF_B; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = pkt_type_offset(); + if (insn->off < 0) + return false; + insn++; + + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = PKT_TYPE_MAX; + break; + + case SKF_AD_OFF + SKF_AD_IFINDEX: + case SKF_AD_OFF + SKF_AD_HATYPE: + if (FIELD_SIZEOF(struct sk_buff, dev) == 8) + insn->code = BPF_LDX | BPF_MEM | BPF_DW; + else + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = TMP_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, dev); + insn++; + + insn->code = BPF_JMP | BPF_JNE | BPF_K; + insn->a_reg = TMP_REG; + insn->imm = 0; + insn->off = 1; + insn++; + + insn->code = BPF_JMP | BPF_EXIT; + insn++; + + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + + insn->a_reg = A_REG; + insn->x_reg = TMP_REG; + + if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->off = offsetof(struct net_device, ifindex); + } else { + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->off = offsetof(struct net_device, type); + } + break; + + case SKF_AD_OFF + SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, mark); + break; + + case SKF_AD_OFF + SKF_AD_RXHASH: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, hash); + break; + + case SKF_AD_OFF + SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, queue_mapping); + break; + + case SKF_AD_OFF + SKF_AD_VLAN_TAG: + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + + insn->code = BPF_LDX | BPF_MEM | BPF_H; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, vlan_tci); + insn++; + + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = ~VLAN_TAG_PRESENT; + } else { + insn->code = BPF_ALU | BPF_RSH | BPF_K; + insn->a_reg = A_REG; + insn->imm = 12; + insn++; + + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = 1; + } + break; + + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: + case SKF_AD_OFF + SKF_AD_NLATTR: + case SKF_AD_OFF + SKF_AD_NLATTR_NEST: + case SKF_AD_OFF + SKF_AD_CPU: + /* arg1 = ctx */ + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = ARG1_REG; + insn->x_reg = CTX_REG; + insn++; + + /* arg2 = A */ + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = ARG2_REG; + insn->x_reg = A_REG; + insn++; + + /* arg3 = X */ + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = ARG3_REG; + insn->x_reg = X_REG; + insn++; + + /* Emit call(ctx, arg2=A, arg3=X) */ + insn->code = BPF_JMP | BPF_CALL; + switch (fp->k) { + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: + insn->imm = __skb_get_pay_offset - __bpf_call_base; + break; + case SKF_AD_OFF + SKF_AD_NLATTR: + insn->imm = __skb_get_nlattr - __bpf_call_base; + break; + case SKF_AD_OFF + SKF_AD_NLATTR_NEST: + insn->imm = __skb_get_nlattr_nest - __bpf_call_base; + break; + case SKF_AD_OFF + SKF_AD_CPU: + insn->imm = __get_raw_cpu_id - __bpf_call_base; + break; + } + break; + + case SKF_AD_OFF + SKF_AD_ALU_XOR_X: + insn->code = BPF_ALU | BPF_XOR | BPF_X; + insn->a_reg = A_REG; + insn->x_reg = X_REG; + break; + + default: + /* This is just a dummy call to avoid letting the compiler + * evict __bpf_call_base() as an optimization. Placed here + * where no-one bothers. + */ + BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0); + return false; + } + + *insnp = insn; + return true; +} + +/** + * sk_convert_filter - convert filter program + * @prog: the user passed filter program + * @len: the length of the user passed filter program + * @new_prog: buffer where converted program will be stored + * @new_len: pointer to store length of converted program + * + * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style. + * Conversion workflow: + * + * 1) First pass for calculating the new program length: + * sk_convert_filter(old_prog, old_len, NULL, &new_len) + * + * 2) 2nd pass to remap in two passes: 1st pass finds new + * jump offsets, 2nd pass remapping: + * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); + * sk_convert_filter(old_prog, old_len, new_prog, &new_len); + * + * User BPF's register A is mapped to our BPF register 6, user BPF + * register X is mapped to BPF register 7; frame pointer is always + * register 10; Context 'void *ctx' is stored in register 1, that is, + * for socket filters: ctx == 'struct sk_buff *', for seccomp: + * ctx == 'struct seccomp_data *'. + */ +int sk_convert_filter(struct sock_filter *prog, int len, + struct sock_filter_int *new_prog, int *new_len) +{ + int new_flen = 0, pass = 0, target, i; + struct sock_filter_int *new_insn; + struct sock_filter *fp; + int *addrs = NULL; + u8 bpf_src; + + BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); + BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); + + if (len <= 0 || len >= BPF_MAXINSNS) + return -EINVAL; + + if (new_prog) { + addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + } + +do_pass: + new_insn = new_prog; + fp = prog; + + if (new_insn) { + new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + new_insn->a_reg = CTX_REG; + new_insn->x_reg = ARG1_REG; + } + new_insn++; + + for (i = 0; i < len; fp++, i++) { + struct sock_filter_int tmp_insns[6] = { }; + struct sock_filter_int *insn = tmp_insns; + + if (addrs) + addrs[i] = new_insn - new_prog; + + switch (fp->code) { + /* All arithmetic insns and skb loads map as-is. */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU | BPF_NEG: + case BPF_LD | BPF_ABS | BPF_W: + case BPF_LD | BPF_ABS | BPF_H: + case BPF_LD | BPF_ABS | BPF_B: + case BPF_LD | BPF_IND | BPF_W: + case BPF_LD | BPF_IND | BPF_H: + case BPF_LD | BPF_IND | BPF_B: + /* Check for overloaded BPF extension and + * directly convert it if found, otherwise + * just move on with mapping. + */ + if (BPF_CLASS(fp->code) == BPF_LD && + BPF_MODE(fp->code) == BPF_ABS && + convert_bpf_extensions(fp, &insn)) + break; + + insn->code = fp->code; + insn->a_reg = A_REG; + insn->x_reg = X_REG; + insn->imm = fp->k; + break; + + /* Jump opcodes map as-is, but offsets need adjustment. */ + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; +#define EMIT_JMP \ + do { \ + if (target >= len || target < 0) \ + goto err; \ + insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + /* Adjust pc relative offset for 2nd or 3rd insn. */ \ + insn->off -= insn - tmp_insns; \ + } while (0) + + EMIT_JMP; + break; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { + /* BPF immediates are signed, zero extend + * immediate into tmp register and use it + * in compare insn. + */ + insn->code = BPF_ALU | BPF_MOV | BPF_K; + insn->a_reg = TMP_REG; + insn->imm = fp->k; + insn++; + + insn->a_reg = A_REG; + insn->x_reg = TMP_REG; + bpf_src = BPF_X; + } else { + insn->a_reg = A_REG; + insn->x_reg = X_REG; + insn->imm = fp->k; + bpf_src = BPF_SRC(fp->code); } - return 0; - case BPF_S_LD_B_ABS: - k = K; -load_b: - ptr = load_pointer(skb, k, 1, &tmp); - if (ptr != NULL) { - A = *(u8 *)ptr; - continue; + + /* Common case where 'jump_false' is next insn. */ + if (fp->jf == 0) { + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; + target = i + fp->jt + 1; + EMIT_JMP; + break; } - return 0; - case BPF_S_LD_W_LEN: - A = skb->len; - continue; - case BPF_S_LDX_W_LEN: - X = skb->len; - continue; - case BPF_S_LD_W_IND: - k = X + K; - goto load_w; - case BPF_S_LD_H_IND: - k = X + K; - goto load_h; - case BPF_S_LD_B_IND: - k = X + K; - goto load_b; - case BPF_S_LDX_B_MSH: - ptr = load_pointer(skb, K, 1, &tmp); - if (ptr != NULL) { - X = (*(u8 *)ptr & 0xf) << 2; - continue; + + /* Convert JEQ into JNE when 'jump_true' is next insn. */ + if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { + insn->code = BPF_JMP | BPF_JNE | bpf_src; + target = i + fp->jf + 1; + EMIT_JMP; + break; } - return 0; - case BPF_S_LD_IMM: - A = K; - continue; - case BPF_S_LDX_IMM: - X = K; - continue; - case BPF_S_LD_MEM: - A = mem[K]; - continue; - case BPF_S_LDX_MEM: - X = mem[K]; - continue; - case BPF_S_MISC_TAX: - X = A; - continue; - case BPF_S_MISC_TXA: - A = X; - continue; - case BPF_S_RET_K: - return K; - case BPF_S_RET_A: - return A; - case BPF_S_ST: - mem[K] = A; - continue; - case BPF_S_STX: - mem[K] = X; - continue; - case BPF_S_ANC_PROTOCOL: - A = ntohs(skb->protocol); - continue; - case BPF_S_ANC_PKTTYPE: - A = skb->pkt_type; - continue; - case BPF_S_ANC_IFINDEX: - if (!skb->dev) - return 0; - A = skb->dev->ifindex; - continue; - case BPF_S_ANC_MARK: - A = skb->mark; - continue; - case BPF_S_ANC_QUEUE: - A = skb->queue_mapping; - continue; - case BPF_S_ANC_HATYPE: - if (!skb->dev) - return 0; - A = skb->dev->type; - continue; - case BPF_S_ANC_RXHASH: - A = skb->hash; - continue; - case BPF_S_ANC_CPU: - A = raw_smp_processor_id(); - continue; - case BPF_S_ANC_VLAN_TAG: - A = vlan_tx_tag_get(skb); - continue; - case BPF_S_ANC_VLAN_TAG_PRESENT: - A = !!vlan_tx_tag_present(skb); - continue; - case BPF_S_ANC_PAY_OFFSET: - A = __skb_get_poff(skb); - continue; - case BPF_S_ANC_NLATTR: { - struct nlattr *nla; - - if (skb_is_nonlinear(skb)) - return 0; - if (A > skb->len - sizeof(struct nlattr)) - return 0; - - nla = nla_find((struct nlattr *)&skb->data[A], - skb->len - A, X); - if (nla) - A = (void *)nla - (void *)skb->data; - else - A = 0; - continue; - } - case BPF_S_ANC_NLATTR_NEST: { - struct nlattr *nla; - - if (skb_is_nonlinear(skb)) - return 0; - if (A > skb->len - sizeof(struct nlattr)) - return 0; - - nla = (struct nlattr *)&skb->data[A]; - if (nla->nla_len > A - skb->len) - return 0; - - nla = nla_find_nested(nla, X); - if (nla) - A = (void *)nla - (void *)skb->data; - else - A = 0; - continue; - } -#ifdef CONFIG_SECCOMP_FILTER - case BPF_S_ANC_SECCOMP_LD_W: - A = seccomp_bpf_load(fentry->k); - continue; -#endif + + /* Other jumps are mapped into two insns: Jxx and JA. */ + target = i + fp->jt + 1; + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; + EMIT_JMP; + insn++; + + insn->code = BPF_JMP | BPF_JA; + target = i + fp->jf + 1; + EMIT_JMP; + break; + + /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ + case BPF_LDX | BPF_MSH | BPF_B: + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = TMP_REG; + insn->x_reg = A_REG; + insn++; + + insn->code = BPF_LD | BPF_ABS | BPF_B; + insn->a_reg = A_REG; + insn->imm = fp->k; + insn++; + + insn->code = BPF_ALU | BPF_AND | BPF_K; + insn->a_reg = A_REG; + insn->imm = 0xf; + insn++; + + insn->code = BPF_ALU | BPF_LSH | BPF_K; + insn->a_reg = A_REG; + insn->imm = 2; + insn++; + + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = X_REG; + insn->x_reg = A_REG; + insn++; + + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = A_REG; + insn->x_reg = TMP_REG; + break; + + /* RET_K, RET_A are remaped into 2 insns. */ + case BPF_RET | BPF_A: + case BPF_RET | BPF_K: + insn->code = BPF_ALU | BPF_MOV | + (BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X); + insn->a_reg = 0; + insn->x_reg = A_REG; + insn->imm = fp->k; + insn++; + + insn->code = BPF_JMP | BPF_EXIT; + break; + + /* Store to stack. */ + case BPF_ST: + case BPF_STX: + insn->code = BPF_STX | BPF_MEM | BPF_W; + insn->a_reg = FP_REG; + insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; + insn->off = -(BPF_MEMWORDS - fp->k) * 4; + break; + + /* Load from stack. */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? + A_REG : X_REG; + insn->x_reg = FP_REG; + insn->off = -(BPF_MEMWORDS - fp->k) * 4; + break; + + /* A = K or X = K */ + case BPF_LD | BPF_IMM: + case BPF_LDX | BPF_IMM: + insn->code = BPF_ALU | BPF_MOV | BPF_K; + insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? + A_REG : X_REG; + insn->imm = fp->k; + break; + + /* X = A */ + case BPF_MISC | BPF_TAX: + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = X_REG; + insn->x_reg = A_REG; + break; + + /* A = X */ + case BPF_MISC | BPF_TXA: + insn->code = BPF_ALU64 | BPF_MOV | BPF_X; + insn->a_reg = A_REG; + insn->x_reg = X_REG; + break; + + /* A = skb->len or X = skb->len */ + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LDX | BPF_W | BPF_LEN: + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? + A_REG : X_REG; + insn->x_reg = CTX_REG; + insn->off = offsetof(struct sk_buff, len); + break; + + /* access seccomp_data fields */ + case BPF_LDX | BPF_ABS | BPF_W: + insn->code = BPF_LDX | BPF_MEM | BPF_W; + insn->a_reg = A_REG; + insn->x_reg = CTX_REG; + insn->off = fp->k; + break; + default: - WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", - fentry->code, fentry->jt, - fentry->jf, fentry->k); - return 0; + goto err; } + + insn++; + if (new_prog) + memcpy(new_insn, tmp_insns, + sizeof(*insn) * (insn - tmp_insns)); + + new_insn += insn - tmp_insns; } + if (!new_prog) { + /* Only calculating new length. */ + *new_len = new_insn - new_prog; + return 0; + } + + pass++; + if (new_flen != new_insn - new_prog) { + new_flen = new_insn - new_prog; + if (pass > 2) + goto err; + + goto do_pass; + } + + kfree(addrs); + BUG_ON(*new_len != new_flen); return 0; +err: + kfree(addrs); + return -EINVAL; } -EXPORT_SYMBOL(sk_run_filter); -/* - * Security : +/* Security: + * * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). + * * As we dont want to clear mem[] array for each packet going through * sk_run_filter(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure @@ -696,19 +1442,130 @@ void sk_filter_charge(struct sock *sk, struct sk_filter *fp) atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); } -static int __sk_prepare_filter(struct sk_filter *fp) +static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, + struct sock *sk, + unsigned int len) +{ + struct sk_filter *fp_new; + + if (sk == NULL) + return krealloc(fp, len, GFP_KERNEL); + + fp_new = sock_kmalloc(sk, len, GFP_KERNEL); + if (fp_new) { + memcpy(fp_new, fp, sizeof(struct sk_filter)); + /* As we're kepping orig_prog in fp_new along, + * we need to make sure we're not evicting it + * from the old fp. + */ + fp->orig_prog = NULL; + sk_filter_uncharge(sk, fp); + } + + return fp_new; +} + +static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, + struct sock *sk) +{ + struct sock_filter *old_prog; + struct sk_filter *old_fp; + int i, err, new_len, old_len = fp->len; + + /* We are free to overwrite insns et al right here as it + * won't be used at this point in time anymore internally + * after the migration to the internal BPF instruction + * representation. + */ + BUILD_BUG_ON(sizeof(struct sock_filter) != + sizeof(struct sock_filter_int)); + + /* For now, we need to unfiddle BPF_S_* identifiers in place. + * This can sooner or later on be subject to removal, e.g. when + * JITs have been converted. + */ + for (i = 0; i < fp->len; i++) + sk_decode_filter(&fp->insns[i], &fp->insns[i]); + + /* Conversion cannot happen on overlapping memory areas, + * so we need to keep the user BPF around until the 2nd + * pass. At this time, the user BPF is stored in fp->insns. + */ + old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), + GFP_KERNEL); + if (!old_prog) { + err = -ENOMEM; + goto out_err; + } + + /* 1st pass: calculate the new program length. */ + err = sk_convert_filter(old_prog, old_len, NULL, &new_len); + if (err) + goto out_err_free; + + /* Expand fp for appending the new filter representation. */ + old_fp = fp; + fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len)); + if (!fp) { + /* The old_fp is still around in case we couldn't + * allocate new memory, so uncharge on that one. + */ + fp = old_fp; + err = -ENOMEM; + goto out_err_free; + } + + fp->bpf_func = sk_run_filter_int_skb; + fp->len = new_len; + + /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ + err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len); + if (err) + /* 2nd sk_convert_filter() can fail only if it fails + * to allocate memory, remapping must succeed. Note, + * that at this time old_fp has already been released + * by __sk_migrate_realloc(). + */ + goto out_err_free; + + kfree(old_prog); + return fp; + +out_err_free: + kfree(old_prog); +out_err: + /* Rollback filter setup. */ + if (sk != NULL) + sk_filter_uncharge(sk, fp); + else + kfree(fp); + return ERR_PTR(err); +} + +static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, + struct sock *sk) { int err; - fp->bpf_func = sk_run_filter; + fp->bpf_func = NULL; fp->jited = 0; err = sk_chk_filter(fp->insns, fp->len); if (err) - return err; + return ERR_PTR(err); + /* Probe if we can JIT compile the filter and if so, do + * the compilation of the filter. + */ bpf_jit_compile(fp); - return 0; + + /* JIT compiler couldn't process this filter, so do the + * internal BPF translation for the optimized interpreter. + */ + if (!fp->jited) + fp = __sk_migrate_filter(fp, sk); + + return fp; } /** @@ -726,7 +1583,6 @@ int sk_unattached_filter_create(struct sk_filter **pfp, { unsigned int fsize = sk_filter_proglen(fprog); struct sk_filter *fp; - int err; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) @@ -746,15 +1602,15 @@ int sk_unattached_filter_create(struct sk_filter **pfp, */ fp->orig_prog = NULL; - err = __sk_prepare_filter(fp); - if (err) - goto free_mem; + /* __sk_prepare_filter() already takes care of uncharging + * memory in case something goes wrong. + */ + fp = __sk_prepare_filter(fp, NULL); + if (IS_ERR(fp)) + return PTR_ERR(fp); *pfp = fp; return 0; -free_mem: - kfree(fp); - return err; } EXPORT_SYMBOL_GPL(sk_unattached_filter_create); @@ -806,11 +1662,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return -ENOMEM; } - err = __sk_prepare_filter(fp); - if (err) { - sk_filter_uncharge(sk, fp); - return err; - } + /* __sk_prepare_filter() already takes care of uncharging + * memory in case something goes wrong. + */ + fp = __sk_prepare_filter(fp, sk); + if (IS_ERR(fp)) + return PTR_ERR(fp); old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); -- cgit v1.2.3-71-gd317 From 1ee481fb4cf8044632fa869bafc41345afa5957b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:32:29 -0400 Subject: net: Allow modules to use is_skb_forwardable Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 06287c110241..29b579fb5196 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2629,6 +2629,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); +bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index cf92139b229c..a923eed976ae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1640,8 +1640,7 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -static inline bool is_skb_forwardable(struct net_device *dev, - struct sk_buff *skb) +bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { unsigned int len; @@ -1660,6 +1659,7 @@ static inline bool is_skb_forwardable(struct net_device *dev, return false; } +EXPORT_SYMBOL_GPL(is_skb_forwardable); /** * dev_forward_skb - loopback an skb to another netif -- cgit v1.2.3-71-gd317 From 339e022396d58f4b4f9b4200ea5309768934bb33 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Mar 2014 14:25:58 -0700 Subject: net: export NET_ADDR_* values to user-space API NET_ADDR_* values are exported in the /sys/class/net//addr_assign_type sysfs attributes, and as such constitutes an user-space ABI. Move the NET_ADDR_* definitions from include/linux/netdevice.h to include/uapi/linux/netdevice.h Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ------- include/uapi/linux/netdevice.h | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29b579fb5196..34cae3ee74f1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,13 +63,6 @@ struct wireless_dev; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); -/* hardware address assignment types */ -#define NET_ADDR_PERM 0 /* address is permanent (default) */ -#define NET_ADDR_RANDOM 1 /* address is generated randomly */ -#define NET_ADDR_STOLEN 2 /* address is stolen from other device */ -#define NET_ADDR_SET 3 /* address is set using - * dev_set_mac_address() */ - /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h index 6b9500bc2d56..fdfbd1c17065 100644 --- a/include/uapi/linux/netdevice.h +++ b/include/uapi/linux/netdevice.h @@ -49,5 +49,11 @@ enum { IF_PORT_100BASEFX }; +/* hardware address assignment types */ +#define NET_ADDR_PERM 0 /* address is permanent (default) */ +#define NET_ADDR_RANDOM 1 /* address is generated randomly */ +#define NET_ADDR_STOLEN 2 /* address is stolen from other device */ +#define NET_ADDR_SET 3 /* address is set using + * dev_set_mac_address() */ #endif /* _UAPI_LINUX_NETDEVICE_H */ -- cgit v1.2.3-71-gd317 From 2d3b479df41a10e2f41f9259fcba775bd34de6e4 Mon Sep 17 00:00:00 2001 From: david decotigny Date: Sat, 29 Mar 2014 09:48:35 -0700 Subject: net-sysfs: expose number of carrier on/off changes This allows to monitor carrier on/off transitions and detect link flapping issues: - new /sys/class/net/X/carrier_changes - new rtnetlink IFLA_CARRIER_CHANGES (getlink) Tested: - grep . /sys/class/net/*/carrier_changes + ip link set dev X down/up + plug/unplug cable - updated iproute2: prints IFLA_CARRIER_CHANGES - iproute2 20121211-2 (debian): unchanged behavior Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/uapi/linux/if_link.h | 1 + net/core/net-sysfs.c | 11 +++++++++++ net/core/rtnetlink.c | 6 +++++- net/sched/sch_generic.c | 2 ++ 5 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 34cae3ee74f1..45537ed7a5b3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1308,6 +1308,9 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; + /* Stats to monitor carrier on<->off transitions */ + atomic_t carrier_changes; + #ifdef CONFIG_WIRELESS_EXT /* List of functions to handle Wireless Extensions (instead of ioctl). * See for details. Jean II */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 16410b6e7819..9a7f7ace6649 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -144,6 +144,7 @@ enum { IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, + IFLA_CARRIER_CHANGES, __IFLA_MAX }; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index daed9a64c6f6..462396278484 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -253,6 +253,16 @@ static ssize_t operstate_show(struct device *dev, } static DEVICE_ATTR_RO(operstate); +static ssize_t carrier_changes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + return sprintf(buf, fmt_dec, + atomic_read(&netdev->carrier_changes)); +} +static DEVICE_ATTR_RO(carrier_changes); + /* read-write attributes */ static int change_mtu(struct net_device *net, unsigned long new_mtu) @@ -386,6 +396,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_duplex.attr, &dev_attr_dormant.attr, &dev_attr_operstate.attr, + &dev_attr_carrier_changes.attr, &dev_attr_ifalias.attr, &dev_attr_carrier.attr, &dev_attr_mtu.attr, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e7c6006bc3ea..d4ff41739b0f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -822,6 +822,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -970,7 +971,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || (dev->ifalias && - nla_put_string(skb, IFLA_IFALIAS, dev->ifalias))) + nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) || + nla_put_u32(skb, IFLA_CARRIER_CHANGES, + atomic_read(&dev->carrier_changes))) goto nla_put_failure; if (1) { @@ -1147,6 +1150,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN }, + [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e82e43b69c33..e1543b03e39d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -310,6 +310,7 @@ void netif_carrier_on(struct net_device *dev) if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; + atomic_inc(&dev->carrier_changes); linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -328,6 +329,7 @@ void netif_carrier_off(struct net_device *dev) if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; + atomic_inc(&dev->carrier_changes); linkwatch_fire_event(dev); } } -- cgit v1.2.3-71-gd317 From a50e233c50dbc881abaa0e4070789064e8d12d70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Mar 2014 21:28:21 -0700 Subject: net-gro: restore frag0 optimization Main difference between napi_frags_skb() and napi_gro_receive() is that the later is called while ethernet header was already pulled by the NIC driver (eth_type_trans() was called before napi_gro_receive()) Jerry Chu in commit 299603e8370a ("net-gro: Prepare GRO stack for the upcoming tunneling support") tried to remove this difference by calling eth_type_trans() from napi_frags_skb() instead of doing this later from napi_frags_finish() Goal was that napi_gro_complete() could call ptype->callbacks.gro_complete(skb, 0) (offset of first network header = 0) Also, xxx_gro_receive() handlers all use off = skb_gro_offset(skb) to point to their own header, for the current skb and ones held in gro_list Problem is this cleanup work defeated the frag0 optimization: It turns out the consecutive pskb_may_pull() calls are too expensive. This patch brings back the frag0 stuff in napi_frags_skb(). As all skb have their mac header in skb head, we no longer need skb_gro_mac_header() Reported-by: Michal Schmidt Fixes: 299603e8370a ("net-gro: Prepare GRO stack for the upcoming tunneling support") Signed-off-by: Eric Dumazet Cc: Jerry Chu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 --- net/core/dev.c | 96 +++++++++++++++++++++++++++++++---------------- 2 files changed, 64 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45537ed7a5b3..775cc956ff78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2014,11 +2014,6 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, return skb->data + offset; } -static inline void *skb_gro_mac_header(struct sk_buff *skb) -{ - return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); -} - static inline void *skb_gro_network_header(struct sk_buff *skb) { return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + diff --git a/net/core/dev.c b/net/core/dev.c index a923eed976ae..48d81e4a256e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3833,10 +3833,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + skb_mac_header(skb)); else if (!diffs) diffs = memcmp(skb_mac_header(p), - skb_gro_mac_header(skb), + skb_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } @@ -3859,6 +3859,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb) } } +static void gro_pull_from_frag0(struct sk_buff *skb, int grow) +{ + struct skb_shared_info *pinfo = skb_shinfo(skb); + + BUG_ON(skb->end - skb->tail < grow); + + memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); + + skb->data_len -= grow; + skb->tail += grow; + + pinfo->frags[0].page_offset += grow; + skb_frag_size_sub(&pinfo->frags[0], grow); + + if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { + skb_frag_unref(skb, 0); + memmove(pinfo->frags, pinfo->frags + 1, + --pinfo->nr_frags * sizeof(pinfo->frags[0])); + } +} + static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -3867,6 +3888,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff struct list_head *head = &offload_base; int same_flow; enum gro_result ret; + int grow; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -3874,7 +3896,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; - skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ @@ -3938,27 +3959,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ret = GRO_HELD; pull: - if (skb_headlen(skb) < skb_gro_offset(skb)) { - int grow = skb_gro_offset(skb) - skb_headlen(skb); - - BUG_ON(skb->end - skb->tail < grow); - - memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); - - skb->tail += grow; - skb->data_len -= grow; - - skb_shinfo(skb)->frags[0].page_offset += grow; - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - - if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { - skb_frag_unref(skb, 0); - memmove(skb_shinfo(skb)->frags, - skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); - } - } - + grow = skb_gro_offset(skb) - skb_headlen(skb); + if (grow > 0) + gro_pull_from_frag0(skb, grow); ok: return ret; @@ -4026,6 +4029,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { trace_napi_gro_receive_entry(skb); + skb_gro_reset_offset(skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -4054,12 +4059,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, - gro_result_t ret) +static gro_result_t napi_frags_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb_internal(skb)) + case GRO_HELD: + __skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -4068,7 +4077,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; - case GRO_HELD: case GRO_MERGED: break; } @@ -4076,17 +4084,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * return ret; } +/* Upper GRO stack assumes network header starts at gro_offset=0 + * Drivers could call both napi_gro_frags() and napi_gro_receive() + * We copy ethernet header into skb->data to have a common layout. + */ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; + const struct ethhdr *eth; + unsigned int hlen = sizeof(*eth); napi->skb = NULL; - if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { - napi_reuse_skb(napi, skb); - return NULL; + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header_fast(skb, 0); + if (unlikely(skb_gro_header_hard(skb, hlen))) { + eth = skb_gro_header_slow(skb, hlen, 0); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + return NULL; + } + } else { + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; + NAPI_GRO_CB(skb)->frag0_len -= hlen; } - skb->protocol = eth_type_trans(skb, skb->dev); + __skb_pull(skb, hlen); + + /* + * This works because the only protocols we care about don't require + * special handling. + * We'll fix it up properly in napi_frags_finish() + */ + skb->protocol = eth->h_proto; return skb; } -- cgit v1.2.3-71-gd317 From e462ded699aa2cca04b68fbf203ea4675d4c44d4 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Mon, 31 Mar 2014 21:37:46 +0200 Subject: mac802154: make csma/cca parameters per-wpan Commit 9b2777d6089bcd (ieee802154: add TX power control to wpan_phy) and following erroneously added CSMA and CCA parameters for 802.15.4 devices as PHY parameters, while they are actually MAC parameters and can differ for any two WPAN instances. Since it is now sensible to have multiple WPAN devices with differing CSMA/CCA parameters, make these parameters MAC parameters instead. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/linux/nl802154.h | 2 +- include/net/ieee802154_netdev.h | 17 ++++ net/ieee802154/ieee802154.h | 2 +- net/ieee802154/netlink.c | 2 +- net/ieee802154/nl-mac.c | 122 +++++++++++++++++++++++- net/ieee802154/nl-phy.c | 200 +--------------------------------------- net/ieee802154/wpan-class.c | 6 -- net/mac802154/ieee802154_dev.c | 36 +++----- net/mac802154/mac802154.h | 9 ++ net/mac802154/mac_cmd.c | 3 + net/mac802154/wpan.c | 89 +++++++++++++++++- 11 files changed, 252 insertions(+), 236 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index e110b8c266f5..c8d7f3965fff 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -132,7 +132,7 @@ enum { IEEE802154_ADD_IFACE, IEEE802154_DEL_IFACE, - IEEE802154_SET_PHYPARAMS, + IEEE802154_SET_MACPARAMS, __IEEE802154_CMD_MAX, }; diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index e1717cbf609b..5a719ca892f4 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -229,6 +229,18 @@ static inline int mac_cb_type(struct sk_buff *skb) #define IEEE802154_MAC_SCAN_PASSIVE 2 #define IEEE802154_MAC_SCAN_ORPHAN 3 +struct ieee802154_mac_params { + s8 transmit_power; + u8 min_be; + u8 max_be; + u8 csma_retries; + s8 frame_retries; + + bool lbt; + u8 cca_mode; + s32 cca_ed_level; +}; + struct wpan_phy; /* * This should be located at net_device->ml_priv @@ -255,6 +267,11 @@ struct ieee802154_mlme_ops { int (*scan_req)(struct net_device *dev, u8 type, u32 channels, u8 page, u8 duration); + int (*set_mac_params)(struct net_device *dev, + const struct ieee802154_mac_params *params); + void (*get_mac_params)(struct net_device *dev, + struct ieee802154_mac_params *params); + /* The fields below are required. */ struct wpan_phy *(*get_phy)(const struct net_device *dev); diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index 6cbc8965be91..6693a5cf01ce 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -53,7 +53,6 @@ int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_phy(struct sk_buff *skb, struct netlink_callback *cb); int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_del_iface(struct sk_buff *skb, struct genl_info *info); -int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info); enum ieee802154_mcgrp_ids { IEEE802154_COORD_MCGRP, @@ -67,5 +66,6 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info); int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info); int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb); +int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 67c151bf4b91..04b20589d97a 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -115,7 +115,6 @@ static const struct genl_ops ieee8021154_ops[] = { ieee802154_dump_phy), IEEE802154_OP(IEEE802154_ADD_IFACE, ieee802154_add_iface), IEEE802154_OP(IEEE802154_DEL_IFACE, ieee802154_del_iface), - IEEE802154_OP(IEEE802154_SET_PHYPARAMS, ieee802154_set_phyparams), /* see nl-mac.c */ IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req), IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp), @@ -124,6 +123,7 @@ static const struct genl_ops ieee8021154_ops[] = { IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req), IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, ieee802154_dump_iface), + IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams), }; static const struct genl_multicast_group ieee802154_mcgrps[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index bda8dba4f993..5d285498c0f6 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -264,6 +264,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, { void *hdr; struct wpan_phy *phy; + struct ieee802154_mlme_ops *ops; __le16 short_addr, pan_id; pr_debug("%s\n", __func__); @@ -273,11 +274,12 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, if (!hdr) goto out; - phy = ieee802154_mlme_ops(dev)->get_phy(dev); + ops = ieee802154_mlme_ops(dev); + phy = ops->get_phy(dev); BUG_ON(!phy); - short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); - pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + short_addr = ops->get_short_addr(dev); + pan_id = ops->get_pan_id(dev); if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || @@ -287,6 +289,30 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) || nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, pan_id)) goto nla_put_failure; + + if (ops->get_mac_params) { + struct ieee802154_mac_params params; + + ops->get_mac_params(dev, ¶ms); + + if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, + params.transmit_power) || + nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || + nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, + params.cca_mode) || + nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, + params.cca_ed_level) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, + params.csma_retries) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, + params.min_be) || + nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE, + params.max_be) || + nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES, + params.frame_retries)) + goto nla_put_failure; + } + wpan_phy_put(phy); return genlmsg_end(msg, hdr); @@ -599,3 +625,93 @@ cont: return skb->len; } + +int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = NULL; + struct ieee802154_mlme_ops *ops; + struct ieee802154_mac_params params; + struct wpan_phy *phy; + int rc = -EINVAL; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + ops = ieee802154_mlme_ops(dev); + + if (!ops->get_mac_params || !ops->set_mac_params) { + rc = -EOPNOTSUPP; + goto out; + } + + if (netif_running(dev)) { + rc = -EBUSY; + goto out; + } + + if (!info->attrs[IEEE802154_ATTR_LBT_ENABLED] && + !info->attrs[IEEE802154_ATTR_CCA_MODE] && + !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] && + !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] && + !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] && + !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] && + !info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) + goto out; + + phy = ops->get_phy(dev); + + if ((!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) || + (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE]) || + (!phy->set_cca_ed_level && + info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) || + (!phy->set_csma_params && + (info->attrs[IEEE802154_ATTR_CSMA_RETRIES] || + info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] || + info->attrs[IEEE802154_ATTR_CSMA_MAX_BE])) || + (!phy->set_frame_retries && + info->attrs[IEEE802154_ATTR_FRAME_RETRIES])) { + rc = -EOPNOTSUPP; + goto out_phy; + } + + ops->get_mac_params(dev, ¶ms); + + if (info->attrs[IEEE802154_ATTR_TXPOWER]) + params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); + + if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) + params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); + + if (info->attrs[IEEE802154_ATTR_CCA_MODE]) + params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); + + if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) + params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); + + if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) + params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); + + if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]) + params.min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]); + + if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) + params.max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]); + + if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) + params.frame_retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]); + + rc = ops->set_mac_params(dev, ¶ms); + + wpan_phy_put(phy); + dev_put(dev); + return rc; + +out_phy: + wpan_phy_put(phy); +out: + dev_put(dev); + return rc; +} diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 222310a07762..89b265aea151 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -55,15 +55,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, mutex_lock(&phy->pib_lock); if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) || - nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel) || - nla_put_s8(msg, IEEE802154_ATTR_TXPOWER, phy->transmit_power) || - nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, phy->lbt) || - nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, phy->cca_mode) || - nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, phy->cca_ed_level) || - nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, phy->csma_retries) || - nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE, phy->min_be) || - nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE, phy->max_be) || - nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES, phy->frame_retries)) + nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel)) goto nla_put_failure; for (i = 0; i < 32; i++) { if (phy->channels_supported[i]) @@ -362,193 +354,3 @@ out_dev: return rc; } - -static int phy_set_txpower(struct wpan_phy *phy, struct genl_info *info) -{ - int txpower = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]); - int rc; - - rc = phy->set_txpower(phy, txpower); - if (rc < 0) - return rc; - - phy->transmit_power = txpower; - - return 0; -} - -static int phy_set_lbt(struct wpan_phy *phy, struct genl_info *info) -{ - u8 on = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); - int rc; - - rc = phy->set_lbt(phy, on); - if (rc < 0) - return rc; - - phy->lbt = on; - - return 0; -} - -static int phy_set_cca_mode(struct wpan_phy *phy, struct genl_info *info) -{ - u8 mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); - int rc; - - if (mode > 3) - return -EINVAL; - - rc = phy->set_cca_mode(phy, mode); - if (rc < 0) - return rc; - - phy->cca_mode = mode; - - return 0; -} - -static int phy_set_cca_ed_level(struct wpan_phy *phy, struct genl_info *info) -{ - s32 level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); - int rc; - - rc = phy->set_cca_ed_level(phy, level); - if (rc < 0) - return rc; - - phy->cca_ed_level = level; - - return 0; -} - -static int phy_set_csma_params(struct wpan_phy *phy, struct genl_info *info) -{ - int rc; - u8 min_be = phy->min_be; - u8 max_be = phy->max_be; - u8 retries = phy->csma_retries; - - if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES]) - retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]); - if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]) - min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]); - if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) - max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]); - - if (retries > 5 || max_be < 3 || max_be > 8 || min_be > max_be) - return -EINVAL; - - rc = phy->set_csma_params(phy, min_be, max_be, retries); - if (rc < 0) - return rc; - - phy->min_be = min_be; - phy->max_be = max_be; - phy->csma_retries = retries; - - return 0; -} - -static int phy_set_frame_retries(struct wpan_phy *phy, struct genl_info *info) -{ - s8 retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]); - int rc; - - if (retries < -1 || retries > 7) - return -EINVAL; - - rc = phy->set_frame_retries(phy, retries); - if (rc < 0) - return rc; - - phy->frame_retries = retries; - - return 0; -} - -int ieee802154_set_phyparams(struct sk_buff *skb, struct genl_info *info) -{ - struct wpan_phy *phy; - const char *name; - int rc = -ENOTSUPP; - - pr_debug("%s\n", __func__); - - if (!info->attrs[IEEE802154_ATTR_PHY_NAME] && - !info->attrs[IEEE802154_ATTR_LBT_ENABLED] && - !info->attrs[IEEE802154_ATTR_CCA_MODE] && - !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] && - !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] && - !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] && - !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] && - !info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) - return -EINVAL; - - name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); - if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') - return -EINVAL; /* phy name should be null-terminated */ - - phy = wpan_phy_find(name); - if (!phy) - return -ENODEV; - - if ((!phy->set_txpower && info->attrs[IEEE802154_ATTR_TXPOWER]) || - (!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) || - (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE]) || - (!phy->set_cca_ed_level && - info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])) - goto out; - - mutex_lock(&phy->pib_lock); - - if (info->attrs[IEEE802154_ATTR_TXPOWER]) { - rc = phy_set_txpower(phy, info); - if (rc < 0) - goto error; - } - - if (info->attrs[IEEE802154_ATTR_LBT_ENABLED]) { - rc = phy_set_lbt(phy, info); - if (rc < 0) - goto error; - } - - if (info->attrs[IEEE802154_ATTR_CCA_MODE]) { - rc = phy_set_cca_mode(phy, info); - if (rc < 0) - goto error; - } - - if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) { - rc = phy_set_cca_ed_level(phy, info); - if (rc < 0) - goto error; - } - - if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES] || - info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] || - info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]) { - rc = phy_set_csma_params(phy, info); - if (rc < 0) - goto error; - } - - if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES]) { - rc = phy_set_frame_retries(phy, info); - if (rc < 0) - goto error; - } - - mutex_unlock(&phy->pib_lock); - - wpan_phy_put(phy); - - return 0; - -error: - mutex_unlock(&phy->pib_lock); -out: - wpan_phy_put(phy); - return rc; -} diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index edd0962d55f9..8d6f6704da84 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -169,12 +169,6 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size) phy->current_channel = -1; /* not initialised */ phy->current_page = 0; /* for compatibility */ - /* defaults per 802.15.4-2011 */ - phy->min_be = 3; - phy->max_be = 5; - phy->csma_retries = 4; - phy->frame_retries = -1; /* for compatibility, actual default is 3 */ - return phy; out: diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index e7aa76445fe1..2cf66d885e68 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -197,9 +197,6 @@ static int mac802154_set_txpower(struct wpan_phy *phy, int db) { struct mac802154_priv *priv = wpan_phy_priv(phy); - if (!priv->ops->set_txpower) - return -ENOTSUPP; - return priv->ops->set_txpower(&priv->hw, db); } @@ -207,9 +204,6 @@ static int mac802154_set_lbt(struct wpan_phy *phy, bool on) { struct mac802154_priv *priv = wpan_phy_priv(phy); - if (!priv->ops->set_lbt) - return -ENOTSUPP; - return priv->ops->set_lbt(&priv->hw, on); } @@ -217,9 +211,6 @@ static int mac802154_set_cca_mode(struct wpan_phy *phy, u8 mode) { struct mac802154_priv *priv = wpan_phy_priv(phy); - if (!priv->ops->set_cca_mode) - return -ENOTSUPP; - return priv->ops->set_cca_mode(&priv->hw, mode); } @@ -227,9 +218,6 @@ static int mac802154_set_cca_ed_level(struct wpan_phy *phy, s32 level) { struct mac802154_priv *priv = wpan_phy_priv(phy); - if (!priv->ops->set_cca_ed_level) - return -ENOTSUPP; - return priv->ops->set_cca_ed_level(&priv->hw, level); } @@ -238,9 +226,6 @@ static int mac802154_set_csma_params(struct wpan_phy *phy, u8 min_be, { struct mac802154_priv *priv = wpan_phy_priv(phy); - if (!priv->ops->set_csma_params) - return -ENOTSUPP; - return priv->ops->set_csma_params(&priv->hw, min_be, max_be, retries); } @@ -248,9 +233,6 @@ static int mac802154_set_frame_retries(struct wpan_phy *phy, s8 retries) { struct mac802154_priv *priv = wpan_phy_priv(phy); - if (!priv->ops->set_frame_retries) - return -ENOTSUPP; - return priv->ops->set_frame_retries(&priv->hw, retries); } @@ -331,12 +313,18 @@ int ieee802154_register_device(struct ieee802154_dev *dev) priv->phy->add_iface = mac802154_add_iface; priv->phy->del_iface = mac802154_del_iface; - priv->phy->set_txpower = mac802154_set_txpower; - priv->phy->set_lbt = mac802154_set_lbt; - priv->phy->set_cca_mode = mac802154_set_cca_mode; - priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; - priv->phy->set_csma_params = mac802154_set_csma_params; - priv->phy->set_frame_retries = mac802154_set_frame_retries; + if (priv->ops->set_txpower) + priv->phy->set_txpower = mac802154_set_txpower; + if (priv->ops->set_lbt) + priv->phy->set_lbt = mac802154_set_lbt; + if (priv->ops->set_cca_mode) + priv->phy->set_cca_mode = mac802154_set_cca_mode; + if (priv->ops->set_cca_ed_level) + priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; + if (priv->ops->set_csma_params) + priv->phy->set_csma_params = mac802154_set_csma_params; + if (priv->ops->set_frame_retries) + priv->phy->set_frame_retries = mac802154_set_frame_retries; rc = wpan_phy_register(priv->phy); if (rc < 0) diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index f40522ef288c..28ef59c566e6 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -23,6 +23,8 @@ #ifndef MAC802154_H #define MAC802154_H +#include + /* mac802154 device private data */ struct mac802154_priv { struct ieee802154_dev hw; @@ -82,6 +84,8 @@ struct mac802154_sub_if_data { u8 chan; u8 page; + struct ieee802154_mac_params mac_params; + /* MAC BSN field */ u8 bsn; /* MAC DSN field */ @@ -116,4 +120,9 @@ void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val); void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); u8 mac802154_dev_get_dsn(const struct net_device *dev); +int mac802154_set_mac_params(struct net_device *dev, + const struct ieee802154_mac_params *params); +void mac802154_get_mac_params(struct net_device *dev, + struct ieee802154_mac_params *params); + #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 15bac3358889..d40c0928bc62 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -74,4 +74,7 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .get_pan_id = mac802154_dev_get_pan_id, .get_short_addr = mac802154_dev_get_short_addr, .get_dsn = mac802154_dev_get_dsn, + + .set_mac_params = mac802154_set_mac_params, + .get_mac_params = mac802154_get_mac_params, }; diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 80cbee1a2f56..1df7a6a57386 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -102,6 +102,87 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) return 0; } +int mac802154_set_mac_params(struct net_device *dev, + const struct ieee802154_mac_params *params) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + mutex_lock(&priv->hw->slaves_mtx); + priv->mac_params = *params; + mutex_unlock(&priv->hw->slaves_mtx); + + return 0; +} + +void mac802154_get_mac_params(struct net_device *dev, + struct ieee802154_mac_params *params) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + mutex_lock(&priv->hw->slaves_mtx); + *params = priv->mac_params; + mutex_unlock(&priv->hw->slaves_mtx); +} + +int mac802154_wpan_open(struct net_device *dev) +{ + int rc; + struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct wpan_phy *phy = priv->hw->phy; + + rc = mac802154_slave_open(dev); + if (rc < 0) + return rc; + + mutex_lock(&phy->pib_lock); + + if (phy->set_txpower) { + rc = phy->set_txpower(phy, priv->mac_params.transmit_power); + if (rc < 0) + goto out; + } + + if (phy->set_lbt) { + rc = phy->set_lbt(phy, priv->mac_params.lbt); + if (rc < 0) + goto out; + } + + if (phy->set_cca_mode) { + rc = phy->set_cca_mode(phy, priv->mac_params.cca_mode); + if (rc < 0) + goto out; + } + + if (phy->set_cca_ed_level) { + rc = phy->set_cca_ed_level(phy, priv->mac_params.cca_ed_level); + if (rc < 0) + goto out; + } + + if (phy->set_csma_params) { + rc = phy->set_csma_params(phy, priv->mac_params.min_be, + priv->mac_params.max_be, + priv->mac_params.csma_retries); + if (rc < 0) + goto out; + } + + if (phy->set_frame_retries) { + rc = phy->set_frame_retries(phy, + priv->mac_params.frame_retries); + if (rc < 0) + goto out; + } + + mutex_unlock(&phy->pib_lock); + return 0; + +out: + mutex_unlock(&phy->pib_lock); + return rc; +} + static int mac802154_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -204,7 +285,7 @@ static struct header_ops mac802154_header_ops = { }; static const struct net_device_ops mac802154_wpan_ops = { - .ndo_open = mac802154_slave_open, + .ndo_open = mac802154_wpan_open, .ndo_stop = mac802154_slave_close, .ndo_start_xmit = mac802154_wpan_xmit, .ndo_do_ioctl = mac802154_wpan_ioctl, @@ -242,6 +323,12 @@ void mac802154_wpan_setup(struct net_device *dev) get_random_bytes(&priv->bsn, 1); get_random_bytes(&priv->dsn, 1); + /* defaults per 802.15.4-2011 */ + priv->mac_params.min_be = 3; + priv->mac_params.max_be = 5; + priv->mac_params.csma_retries = 4; + priv->mac_params.frame_retries = -1; /* for compatibility, actual default is 3 */ + priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); } -- cgit v1.2.3-71-gd317 From 408eccce32044ee3285a7f6a812723ba3540c3e7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 1 Apr 2014 16:20:23 +0200 Subject: net: ptp: move PTP classifier in its own file This commit fixes a build error reported by Fengguang, that is triggered when CONFIG_NETWORK_PHY_TIMESTAMPING is not set: ERROR: "ptp_classify_raw" [drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.ko] undefined! The fix is to introduce its own file for the PTP BPF classifier, so that PTP_1588_CLOCK and/or NETWORK_PHY_TIMESTAMPING can select it independently from each other. IXP4xx driver on ARM needs to select it as well since it does not seem to select PTP_1588_CLOCK or similar that would pull it in automatically. This also allows for hiding all of the internals of the BPF PTP program inside that file, and only exporting relevant API bits to drivers. This patch also adds a kdoc documentation of ptp_classify_raw() API to make it clear that it can return PTP_CLASS_* defines. Also, the BPF program has been translated into bpf_asm code, so that it can be more easily read and altered (extensively documented in [1]). In the kernel tree under tools/net/ we have bpf_asm and bpf_dbg tools, so the commented program can simply be translated via `./bpf_asm -c prog` where prog is a file that contains the commented code. This makes it easily readable/verifiable and when there's a need to change something, jump offsets etc do not need to be replaced manually which can be very error prone. Instead, a newly translated version via bpf_asm can simply replace the old code. I have checked opcode diffs before/after and it's the very same filter. [1] Documentation/networking/filter.txt Fixes: 164d8c666521 ("net: ptp: do not reimplement PTP/BPF classifier") Reported-by: Fengguang Wu Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Richard Cochran Cc: Jiri Benc Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/Kconfig | 1 + drivers/net/phy/dp83640.c | 1 + drivers/ptp/Kconfig | 1 + include/linux/ptp_classify.h | 95 ++++++------------------ include/linux/skbuff.h | 2 - net/Kconfig | 4 + net/core/Makefile | 1 + net/core/ptp_classifier.c | 141 ++++++++++++++++++++++++++++++++++++ net/core/timestamping.c | 18 ----- net/socket.c | 5 +- 10 files changed, 173 insertions(+), 96 deletions(-) create mode 100644 net/core/ptp_classifier.c (limited to 'include/linux') diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig index 3f431019e615..b81bc9fca378 100644 --- a/drivers/net/ethernet/xscale/Kconfig +++ b/drivers/net/ethernet/xscale/Kconfig @@ -23,6 +23,7 @@ config IXP4XX_ETH tristate "Intel IXP4xx Ethernet support" depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR select PHYLIB + select NET_PTP_CLASSIFY ---help--- Say Y here if you want to use built-in Ethernet ports on IXP4xx processor. diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 352c5e45fe9c..6a999e6814a0 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 5a7910e61e17..6963bdf54175 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -7,6 +7,7 @@ menu "PTP clock support" config PTP_1588_CLOCK tristate "PTP clock support" select PPS + select NET_PTP_CLASSIFY help The IEEE 1588 standard defines a method to precisely synchronize distributed clocks over Ethernet networks. The diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 6d3b0a2ef9ce..7dfed71d76a6 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -23,11 +23,8 @@ #ifndef _PTP_CLASSIFY_H_ #define _PTP_CLASSIFY_H_ -#include -#include #include -#include -#include +#include #define PTP_CLASS_NONE 0x00 /* not a PTP event message */ #define PTP_CLASS_V1 0x01 /* protocol version 1 */ @@ -40,7 +37,7 @@ #define PTP_CLASS_PMASK 0xf0 /* mask for the packet type field */ #define PTP_CLASS_V1_IPV4 (PTP_CLASS_V1 | PTP_CLASS_IPV4) -#define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /*probably DNE*/ +#define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /* probably DNE */ #define PTP_CLASS_V2_IPV4 (PTP_CLASS_V2 | PTP_CLASS_IPV4) #define PTP_CLASS_V2_IPV6 (PTP_CLASS_V2 | PTP_CLASS_IPV6) #define PTP_CLASS_V2_L2 (PTP_CLASS_V2 | PTP_CLASS_L2) @@ -49,82 +46,34 @@ #define PTP_EV_PORT 319 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ -#define OFF_ETYPE 12 -#define OFF_IHL 14 -#define OFF_FRAG 20 -#define OFF_PROTO4 23 -#define OFF_NEXT 6 -#define OFF_UDP_DST 2 - #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ #define OFF_PTP_SEQUENCE_ID 30 #define OFF_PTP_CONTROL 32 /* PTPv1 only */ -#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2) - +/* Below defines should actually be removed at some point in time. */ #define IP6_HLEN 40 #define UDP_HLEN 8 - -#define RELOFF_DST4 (ETH_HLEN + OFF_UDP_DST) -#define OFF_DST6 (ETH_HLEN + IP6_HLEN + OFF_UDP_DST) +#define OFF_IHL 14 #define OFF_PTP6 (ETH_HLEN + IP6_HLEN + UDP_HLEN) +#define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2) -#define OP_AND (BPF_ALU | BPF_AND | BPF_K) -#define OP_JEQ (BPF_JMP | BPF_JEQ | BPF_K) -#define OP_JSET (BPF_JMP | BPF_JSET | BPF_K) -#define OP_LDB (BPF_LD | BPF_B | BPF_ABS) -#define OP_LDH (BPF_LD | BPF_H | BPF_ABS) -#define OP_LDHI (BPF_LD | BPF_H | BPF_IND) -#define OP_LDX (BPF_LDX | BPF_B | BPF_MSH) -#define OP_OR (BPF_ALU | BPF_OR | BPF_K) -#define OP_RETA (BPF_RET | BPF_A) -#define OP_RETK (BPF_RET | BPF_K) - -#define PTP_FILTER \ - {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \ - {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \ - {OP_LDB, 0, 0, OFF_PROTO4 }, /* */ \ - {OP_JEQ, 0, 9, IPPROTO_UDP }, /* f goto L10 */ \ - {OP_LDH, 0, 0, OFF_FRAG }, /* */ \ - {OP_JSET, 7, 0, 0x1fff }, /* t goto L11 */ \ - {OP_LDX, 0, 0, OFF_IHL }, /* */ \ - {OP_LDHI, 0, 0, RELOFF_DST4 }, /* */ \ - {OP_JEQ, 0, 4, PTP_EV_PORT }, /* f goto L12 */ \ - {OP_LDHI, 0, 0, ETH_HLEN + UDP_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_IPV4 }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L1x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L20*/ {OP_JEQ, 0, 9, ETH_P_IPV6 }, /* f goto L40 */ \ - {OP_LDB, 0, 0, ETH_HLEN + OFF_NEXT }, /* */ \ - {OP_JEQ, 0, 6, IPPROTO_UDP }, /* f goto L30 */ \ - {OP_LDH, 0, 0, OFF_DST6 }, /* */ \ - {OP_JEQ, 0, 4, PTP_EV_PORT }, /* f goto L31 */ \ - {OP_LDH, 0, 0, OFF_PTP6 }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_IPV6 }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L3x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L40*/ {OP_JEQ, 0, 9, ETH_P_8021Q }, /* f goto L50 */ \ - {OP_LDH, 0, 0, OFF_ETYPE + 4 }, /* */ \ - {OP_JEQ, 0, 15, ETH_P_1588 }, /* f goto L60 */ \ - {OP_LDB, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ - {OP_JEQ, 0, 12, 0 }, /* f goto L6x */ \ - {OP_LDH, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_VLAN }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L50*/ {OP_JEQ, 0, 7, ETH_P_1588 }, /* f goto L61 */ \ - {OP_LDB, 0, 0, ETH_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ - {OP_JEQ, 0, 4, 0 }, /* f goto L6x */ \ - {OP_LDH, 0, 0, ETH_HLEN }, /* */ \ - {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ - {OP_OR, 0, 0, PTP_CLASS_L2 }, /* */ \ - {OP_RETA, 0, 0, 0 }, /* */ \ -/*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, - +#if defined(CONFIG_NET_PTP_CLASSIFY) +/** + * ptp_classify_raw - classify a PTP packet + * @skb: buffer + * + * Runs a minimal BPF dissector to classify a network packet to + * determine the PTP class. In case the skb does not contain any + * PTP protocol data, PTP_CLASS_NONE will be returned, otherwise + * PTP_CLASS_V1_IPV{4,6}, PTP_CLASS_V2_IPV{4,6} or + * PTP_CLASS_V2_{L2,VLAN}, depending on the packet content. + */ unsigned int ptp_classify_raw(const struct sk_buff *skb); +void __init ptp_classifier_init(void); +#else +static inline void ptp_classifier_init(void) +{ +} #endif +#endif /* _PTP_CLASSIFY_H_ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18ef0224fb6a..31edf63937a1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2630,8 +2630,6 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } -void skb_timestamping_init(void); - #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); diff --git a/net/Kconfig b/net/Kconfig index e411046a62e3..d1f6f968fc09 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -89,8 +89,12 @@ config NETWORK_SECMARK to nfmark, but designated for security purposes. If you are unsure how to answer this question, answer N. +config NET_PTP_CLASSIFY + def_bool n + config NETWORK_PHY_TIMESTAMPING bool "Timestamping in PHY devices" + select NET_PTP_CLASSIFY help This allows timestamping of network packets by PHYs with hardware timestamping capabilities. This option adds some diff --git a/net/core/Makefile b/net/core/Makefile index 9628c20acff6..826b925aa453 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,5 +21,6 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o +obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c new file mode 100644 index 000000000000..eaba0f68f860 --- /dev/null +++ b/net/core/ptp_classifier.c @@ -0,0 +1,141 @@ +/* PTP classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* The below program is the bpf_asm (tools/net/) representation of + * the opcode array in the ptp_filter structure. + * + * For convenience, this can easily be altered and reviewed with + * bpf_asm and bpf_dbg, e.g. `./bpf_asm -c prog` where prog is a + * simple file containing the below program: + * + * ldh [12] ; load ethertype + * + * ; PTP over UDP over IPv4 over Ethernet + * test_ipv4: + * jneq #0x800, test_ipv6 ; ETH_P_IP ? + * ldb [23] ; load proto + * jneq #17, drop_ipv4 ; IPPROTO_UDP ? + * ldh [20] ; load frag offset field + * jset #0x1fff, drop_ipv4 ; don't allow fragments + * ldxb 4*([14]&0xf) ; load IP header len + * ldh [x + 16] ; load UDP dst port + * jneq #319, drop_ipv4 ; is port PTP_EV_PORT ? + * ldh [x + 22] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x10 ; PTP_CLASS_IPV4 + * ret a ; return PTP class + * drop_ipv4: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over UDP over IPv6 over Ethernet + * test_ipv6: + * jneq #0x86dd, test_8021q ; ETH_P_IPV6 ? + * ldb [20] ; load proto + * jneq #17, drop_ipv6 ; IPPROTO_UDP ? + * ldh [56] ; load UDP dst port + * jneq #319, drop_ipv6 ; is port PTP_EV_PORT ? + * ldh [62] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x20 ; PTP_CLASS_IPV6 + * ret a ; return PTP class + * drop_ipv6: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over 802.1Q over Ethernet + * test_8021q: + * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? + * ldh [16] ; load inner type + * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * ldb [18] ; load payload + * and #0x8 ; as we don't have ports here, test + * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these + * ldh [18] ; reload payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x40 ; PTP_CLASS_V2_VLAN + * ret a ; return PTP class + * + * ; PTP over Ethernet + * test_ieee1588: + * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * ldb [14] ; load payload + * and #0x8 ; as we don't have ports here, test + * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these + * ldh [14] ; reload payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x30 ; PTP_CLASS_L2 + * ret a ; return PTP class + * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE + */ + +#include +#include +#include + +static struct sk_filter *ptp_insns __read_mostly; + +unsigned int ptp_classify_raw(const struct sk_buff *skb) +{ + return SK_RUN_FILTER(ptp_insns, skb); +} +EXPORT_SYMBOL_GPL(ptp_classify_raw); + +void __init ptp_classifier_init(void) +{ + static struct sock_filter ptp_filter[] = { + { 0x28, 0, 0, 0x0000000c }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x00000017 }, + { 0x15, 0, 9, 0x00000011 }, + { 0x28, 0, 0, 0x00000014 }, + { 0x45, 7, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x0000000e }, + { 0x48, 0, 0, 0x00000010 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x48, 0, 0, 0x00000016 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000010 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 9, 0x000086dd }, + { 0x30, 0, 0, 0x00000014 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x28, 0, 0, 0x00000038 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x28, 0, 0, 0x0000003e }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000020 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 9, 0x00008100 }, + { 0x28, 0, 0, 0x00000010 }, + { 0x15, 0, 15, 0x000088f7 }, + { 0x30, 0, 0, 0x00000012 }, + { 0x54, 0, 0, 0x00000008 }, + { 0x15, 0, 12, 0x00000000 }, + { 0x28, 0, 0, 0x00000012 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000040 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x15, 0, 7, 0x000088f7 }, + { 0x30, 0, 0, 0x0000000e }, + { 0x54, 0, 0, 0x00000008 }, + { 0x15, 0, 4, 0x00000000 }, + { 0x28, 0, 0, 0x0000000e }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000030 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + }; + struct sock_fprog ptp_prog = { + .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, + }; + + BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); +} diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 9ff26b3cc021..6521dfd8b7c8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -23,14 +23,6 @@ #include #include -static struct sk_filter *ptp_insns __read_mostly; - -unsigned int ptp_classify_raw(const struct sk_buff *skb) -{ - return SK_RUN_FILTER(ptp_insns, skb); -} -EXPORT_SYMBOL_GPL(ptp_classify_raw); - static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && @@ -140,13 +132,3 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) return false; } EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); - -void __init skb_timestamping_init(void) -{ - static struct sock_filter ptp_filter[] = { PTP_FILTER }; - struct sock_fprog ptp_prog = { - .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, - }; - - BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); -} diff --git a/net/socket.c b/net/socket.c index f25eaa30b690..1b1e7e6a960f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -2685,9 +2686,7 @@ static int __init sock_init(void) goto out; #endif -#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING - skb_timestamping_init(); -#endif + ptp_classifier_init(); out: return err; -- cgit v1.2.3-71-gd317 From 574f7194f693cd80de96a39f0c43dbb346c38a15 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 1 Apr 2014 12:20:24 -0700 Subject: net: Add a test to see if a skb is freeable in irq context Currently netpoll and skb_release_head_state assume that a skb is freeable in hard irq context except when skb->destructor is set. The reality is far from this. So add a function skb_irq_freeable to compute the full test and in the process be the living documentation of what the requirements are of actually freeing a skb in hard irq context. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 31edf63937a1..350eebe770d9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2831,6 +2831,19 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline bool skb_irq_freeable(const struct sk_buff *skb) +{ + return !skb->destructor && +#if IS_ENABLED(CONFIG_XFRM) + !skb->sp && +#endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + !skb->nfct && +#endif + !skb->_skb_refdst && + !skb_has_frag_list(skb); +} + static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) { skb->queue_mapping = queue_mapping; -- cgit v1.2.3-71-gd317