From 4bf84c35c65f36a344fb7a6cde6274df4120efb8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Dec 2012 23:49:37 +0000 Subject: net: add change_carrier netdev op This allows a driver to register change_carrier callback which will be called whenever user will like to change carrier state. This is useful for devices like dummy, gre, team and so on. Signed-off-by: Jiri Pirko Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c599e4782d45..0e1b92a0c1ec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -891,6 +891,14 @@ struct netdev_fcoe_hbainfo { * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, * struct net_device *dev) + * + * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); + * Called to change device carrier. Soft-devices (like dummy, team, etc) + * which do not represent real hardware may define this to allow their + * userspace components to manage their virtual carrier state. Devices + * that determine carrier state from physical hardware properties (eg + * network cables) or protocol-dependent mechanisms (eg + * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1008,6 +1016,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); + int (*ndo_change_carrier)(struct net_device *dev, + bool new_carrier); }; /* @@ -2194,6 +2204,8 @@ extern int dev_set_mtu(struct net_device *, int); extern void dev_set_group(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); +extern int dev_change_carrier(struct net_device *, + bool new_carrier); extern int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); -- cgit v1.2.3-71-gd317 From 2681128f0ced8aa4e66f221197e183cc16d244fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Dec 2012 16:02:43 +0000 Subject: veth: reduce stat overhead veth stats are a bit bloated. There is no need to account transmit and receive stats, since they are absolutely symmetric. Also use a per device atomic64_t for the dropped counter, as it should never be used in fast path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/veth.c | 115 +++++++++++++++++++--------------------------- include/linux/netdevice.h | 1 + 2 files changed, 48 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 95814d9747ef..c048f8d27bbf 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -25,18 +25,15 @@ #define MIN_MTU 68 /* Min L3 MTU */ #define MAX_MTU 65535 /* Max L3 MTU (arbitrary) */ -struct veth_net_stats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - u64 rx_dropped; +struct pcpu_vstats { + u64 packets; + u64 bytes; struct u64_stats_sync syncp; }; struct veth_priv { - struct net_device *peer; - struct veth_net_stats __percpu *stats; + struct net_device *peer; + atomic64_t dropped; }; /* @@ -107,50 +104,30 @@ static const struct ethtool_ops veth_ethtool_ops = { .get_ethtool_stats = veth_get_ethtool_stats, }; -/* - * xmit - */ - static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_device *rcv = NULL; - struct veth_priv *priv, *rcv_priv; - struct veth_net_stats *stats, *rcv_stats; - int length; - - priv = netdev_priv(dev); - rcv = priv->peer; - rcv_priv = netdev_priv(rcv); - - stats = this_cpu_ptr(priv->stats); - rcv_stats = this_cpu_ptr(rcv_priv->stats); + struct veth_priv *priv = netdev_priv(dev); + struct net_device *rcv = priv->peer; + int length = skb->len; /* don't change ip_summed == CHECKSUM_PARTIAL, as that - will cause bad checksum on forwarded packets */ + * will cause bad checksum on forwarded packets + */ if (skb->ip_summed == CHECKSUM_NONE && rcv->features & NETIF_F_RXCSUM) skb->ip_summed = CHECKSUM_UNNECESSARY; - length = skb->len; - if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS) - goto rx_drop; - - u64_stats_update_begin(&stats->syncp); - stats->tx_bytes += length; - stats->tx_packets++; - u64_stats_update_end(&stats->syncp); + if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); - u64_stats_update_begin(&rcv_stats->syncp); - rcv_stats->rx_bytes += length; - rcv_stats->rx_packets++; - u64_stats_update_end(&rcv_stats->syncp); - - return NETDEV_TX_OK; + u64_stats_update_begin(&stats->syncp); + stats->bytes += length; + stats->packets++; + u64_stats_update_end(&stats->syncp); + } else { + atomic64_inc(&priv->dropped); + } -rx_drop: - u64_stats_update_begin(&rcv_stats->syncp); - rcv_stats->rx_dropped++; - u64_stats_update_end(&rcv_stats->syncp); return NETDEV_TX_OK; } @@ -158,32 +135,42 @@ rx_drop: * general routines */ -static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); int cpu; + result->packets = 0; + result->bytes = 0; for_each_possible_cpu(cpu) { - struct veth_net_stats *stats = per_cpu_ptr(priv->stats, cpu); - u64 rx_packets, rx_bytes, rx_dropped; - u64 tx_packets, tx_bytes; + struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); + u64 packets, bytes; unsigned int start; do { start = u64_stats_fetch_begin_bh(&stats->syncp); - rx_packets = stats->rx_packets; - tx_packets = stats->tx_packets; - rx_bytes = stats->rx_bytes; - tx_bytes = stats->tx_bytes; - rx_dropped = stats->rx_dropped; + packets = stats->packets; + bytes = stats->bytes; } while (u64_stats_fetch_retry_bh(&stats->syncp, start)); - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - tot->rx_dropped += rx_dropped; + result->packets += packets; + result->bytes += bytes; } + return atomic64_read(&priv->dropped); +} + +static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + struct veth_priv *priv = netdev_priv(dev); + struct pcpu_vstats one; + + tot->tx_dropped = veth_stats_one(&one, dev); + tot->tx_bytes = one.bytes; + tot->tx_packets = one.packets; + + tot->rx_dropped = veth_stats_one(&one, priv->peer); + tot->rx_bytes = one.bytes; + tot->rx_packets = one.packets; return tot; } @@ -228,24 +215,16 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu) static int veth_dev_init(struct net_device *dev) { - struct veth_net_stats __percpu *stats; - struct veth_priv *priv; - - stats = alloc_percpu(struct veth_net_stats); - if (stats == NULL) + dev->vstats = alloc_percpu(struct pcpu_vstats); + if (!dev->vstats) return -ENOMEM; - priv = netdev_priv(dev); - priv->stats = stats; return 0; } static void veth_dev_free(struct net_device *dev) { - struct veth_priv *priv; - - priv = netdev_priv(dev); - free_percpu(priv->stats); + free_percpu(dev->vstats); free_netdev(dev); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0e1b92a0c1ec..6835b5837f93 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1284,6 +1284,7 @@ struct net_device { struct pcpu_lstats __percpu *lstats; /* loopback stats */ struct pcpu_tstats __percpu *tstats; /* tunnel stats */ struct pcpu_dstats __percpu *dstats; /* dummy stats */ + struct pcpu_vstats __percpu *vstats; /* veth stats */ }; /* GARP */ struct garp_port __rcu *garp_port; -- cgit v1.2.3-71-gd317 From 0f6dfcee2e081f47a3e97cb8984fb4d62217e6f7 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Tue, 18 Dec 2012 09:55:33 +0200 Subject: wireless: more 'capability info' bits define bits for 'capability info', as in recent spec edition IEEE802.11-2012 Also, add mask for 2-bit field 'bss type', as it is in 802.11ad Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f0859cc73861..09879eb24380 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1311,16 +1311,21 @@ struct ieee80211_vht_operation { #define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8) #define WLAN_CAPABILITY_QOS (1<<9) #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) +#define WLAN_CAPABILITY_APSD (1<<11) +#define WLAN_CAPABILITY_RADIO_MEASURE (1<<12) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) +#define WLAN_CAPABILITY_DEL_BACK (1<<14) +#define WLAN_CAPABILITY_IMM_BACK (1<<15) /* DMG (60gHz) 802.11ad */ /* type - bits 0..1 */ +#define WLAN_CAPABILITY_DMG_TYPE_MASK (3<<0) #define WLAN_CAPABILITY_DMG_TYPE_IBSS (1<<0) /* Tx by: STA */ #define WLAN_CAPABILITY_DMG_TYPE_PBSS (2<<0) /* Tx by: PCP */ #define WLAN_CAPABILITY_DMG_TYPE_AP (3<<0) /* Tx by: AP */ #define WLAN_CAPABILITY_DMG_CBAP_ONLY (1<<2) -#define WLAN_CAPABILITY_DMG_CBAP_SOURCE (1<<3) +#define WLAN_CAPABILITY_DMG_CBAP_SOURCE (1<<3) #define WLAN_CAPABILITY_DMG_PRIVACY (1<<4) #define WLAN_CAPABILITY_DMG_ECPAC (1<<5) -- cgit v1.2.3-71-gd317 From 598a5938e04ce30d837dca4c3c3326c69435342a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Dec 2012 12:00:40 +0100 Subject: wireless: use __packed in ieee80211.h Use __packed instead of __attribute__((packed)). Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 84 +++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 09879eb24380..5db76ebe8810 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -180,7 +180,7 @@ struct ieee80211_hdr { u8 addr3[6]; __le16 seq_ctrl; u8 addr4[6]; -} __attribute__ ((packed)); +} __packed; struct ieee80211_hdr_3addr { __le16 frame_control; @@ -189,7 +189,7 @@ struct ieee80211_hdr_3addr { u8 addr2[6]; u8 addr3[6]; __le16 seq_ctrl; -} __attribute__ ((packed)); +} __packed; struct ieee80211_qos_hdr { __le16 frame_control; @@ -199,7 +199,7 @@ struct ieee80211_qos_hdr { u8 addr3[6]; __le16 seq_ctrl; __le16 qos_ctrl; -} __attribute__ ((packed)); +} __packed; /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set @@ -576,7 +576,7 @@ struct ieee80211s_hdr { __le32 seqnum; u8 eaddr1[6]; u8 eaddr2[6]; -} __attribute__ ((packed)); +} __packed; /* Mesh flags */ #define MESH_FLAGS_AE_A4 0x1 @@ -614,7 +614,7 @@ struct ieee80211_quiet_ie { u8 period; __le16 duration; __le16 offset; -} __attribute__ ((packed)); +} __packed; /** * struct ieee80211_msrment_ie @@ -626,7 +626,7 @@ struct ieee80211_msrment_ie { u8 mode; u8 type; u8 request[0]; -} __attribute__ ((packed)); +} __packed; /** * struct ieee80211_channel_sw_ie @@ -637,7 +637,7 @@ struct ieee80211_channel_sw_ie { u8 mode; u8 new_ch_num; u8 count; -} __attribute__ ((packed)); +} __packed; /** * struct ieee80211_tim @@ -650,7 +650,7 @@ struct ieee80211_tim_ie { u8 bitmap_ctrl; /* variable size: 1 - 251 bytes */ u8 virtual_map[1]; -} __attribute__ ((packed)); +} __packed; /** * struct ieee80211_meshconf_ie @@ -665,7 +665,7 @@ struct ieee80211_meshconf_ie { u8 meshconf_auth; u8 meshconf_form; u8 meshconf_cap; -} __attribute__ ((packed)); +} __packed; /** * enum mesh_config_capab_flags - Mesh Configuration IE capability field flags @@ -695,7 +695,7 @@ struct ieee80211_rann_ie { __le32 rann_seq; __le32 rann_interval; __le32 rann_metric; -} __attribute__ ((packed)); +} __packed; enum ieee80211_rann_flags { RANN_FLAG_IS_GATE = 1 << 0, @@ -717,33 +717,33 @@ struct ieee80211_mgmt { __le16 status_code; /* possibly followed by Challenge text */ u8 variable[0]; - } __attribute__ ((packed)) auth; + } __packed auth; struct { __le16 reason_code; - } __attribute__ ((packed)) deauth; + } __packed deauth; struct { __le16 capab_info; __le16 listen_interval; /* followed by SSID and Supported rates */ u8 variable[0]; - } __attribute__ ((packed)) assoc_req; + } __packed assoc_req; struct { __le16 capab_info; __le16 status_code; __le16 aid; /* followed by Supported rates */ u8 variable[0]; - } __attribute__ ((packed)) assoc_resp, reassoc_resp; + } __packed assoc_resp, reassoc_resp; struct { __le16 capab_info; __le16 listen_interval; u8 current_ap[6]; /* followed by SSID and Supported rates */ u8 variable[0]; - } __attribute__ ((packed)) reassoc_req; + } __packed reassoc_req; struct { __le16 reason_code; - } __attribute__ ((packed)) disassoc; + } __packed disassoc; struct { __le64 timestamp; __le16 beacon_int; @@ -751,11 +751,11 @@ struct ieee80211_mgmt { /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params, TIM */ u8 variable[0]; - } __attribute__ ((packed)) beacon; + } __packed beacon; struct { /* only variable items: SSID, Supported rates */ u8 variable[0]; - } __attribute__ ((packed)) probe_req; + } __packed probe_req; struct { __le64 timestamp; __le16 beacon_int; @@ -763,7 +763,7 @@ struct ieee80211_mgmt { /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params */ u8 variable[0]; - } __attribute__ ((packed)) probe_resp; + } __packed probe_resp; struct { u8 category; union { @@ -772,55 +772,55 @@ struct ieee80211_mgmt { u8 dialog_token; u8 status_code; u8 variable[0]; - } __attribute__ ((packed)) wme_action; + } __packed wme_action; struct{ u8 action_code; u8 element_id; u8 length; struct ieee80211_channel_sw_ie sw_elem; - } __attribute__((packed)) chan_switch; + } __packed chan_switch; struct{ u8 action_code; u8 dialog_token; u8 element_id; u8 length; struct ieee80211_msrment_ie msr_elem; - } __attribute__((packed)) measurement; + } __packed measurement; struct{ u8 action_code; u8 dialog_token; __le16 capab; __le16 timeout; __le16 start_seq_num; - } __attribute__((packed)) addba_req; + } __packed addba_req; struct{ u8 action_code; u8 dialog_token; __le16 status; __le16 capab; __le16 timeout; - } __attribute__((packed)) addba_resp; + } __packed addba_resp; struct{ u8 action_code; __le16 params; __le16 reason_code; - } __attribute__((packed)) delba; + } __packed delba; struct { u8 action_code; u8 variable[0]; - } __attribute__((packed)) self_prot; + } __packed self_prot; struct{ u8 action_code; u8 variable[0]; - } __attribute__((packed)) mesh_action; + } __packed mesh_action; struct { u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; - } __attribute__ ((packed)) sa_query; + } __packed sa_query; struct { u8 action; u8 smps_control; - } __attribute__ ((packed)) ht_smps; + } __packed ht_smps; struct { u8 action_code; u8 dialog_token; @@ -828,9 +828,9 @@ struct ieee80211_mgmt { u8 variable[0]; } __packed tdls_discover_resp; } u; - } __attribute__ ((packed)) action; + } __packed action; } u; -} __attribute__ ((packed)); +} __packed; /* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 @@ -846,7 +846,7 @@ struct ieee80211_mmie { __le16 key_id; u8 sequence_number[6]; u8 mic[8]; -} __attribute__ ((packed)); +} __packed; struct ieee80211_vendor_ie { u8 element_id; @@ -861,20 +861,20 @@ struct ieee80211_rts { __le16 duration; u8 ra[6]; u8 ta[6]; -} __attribute__ ((packed)); +} __packed; struct ieee80211_cts { __le16 frame_control; __le16 duration; u8 ra[6]; -} __attribute__ ((packed)); +} __packed; struct ieee80211_pspoll { __le16 frame_control; __le16 aid; u8 bssid[6]; u8 ta[6]; -} __attribute__ ((packed)); +} __packed; /* TDLS */ @@ -967,7 +967,7 @@ struct ieee80211_bar { __u8 ta[6]; __le16 control; __le16 start_seq_num; -} __attribute__((packed)); +} __packed; /* 802.11 BAR control masks */ #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x0000 @@ -992,7 +992,7 @@ struct ieee80211_mcs_info { __le16 rx_highest; u8 tx_params; u8 reserved[3]; -} __attribute__((packed)); +} __packed; /* 802.11n HT capability MSC set */ #define IEEE80211_HT_MCS_RX_HIGHEST_MASK 0x3ff @@ -1031,7 +1031,7 @@ struct ieee80211_ht_cap { __le16 extended_ht_cap_info; __le32 tx_BF_cap_info; u8 antenna_selection_info; -} __attribute__ ((packed)); +} __packed; /* 802.11n HT capabilities masks (for cap_info) */ #define IEEE80211_HT_CAP_LDPC_CODING 0x0001 @@ -1102,7 +1102,7 @@ struct ieee80211_ht_operation { __le16 operation_mode; __le16 stbc_param; u8 basic_set[16]; -} __attribute__ ((packed)); +} __packed; /* for ht_param */ #define IEEE80211_HT_PARAM_CHA_SEC_OFFSET 0x03 @@ -1839,14 +1839,14 @@ struct ieee80211_country_ie_triplet { u8 first_channel; u8 num_channels; s8 max_power; - } __attribute__ ((packed)) chans; + } __packed chans; struct { u8 reg_extension_id; u8 reg_class; u8 coverage_class; - } __attribute__ ((packed)) ext; + } __packed ext; }; -} __attribute__ ((packed)); +} __packed; enum ieee80211_timeout_interval_type { WLAN_TIMEOUT_REASSOC_DEADLINE = 1 /* 802.11r */, -- cgit v1.2.3-71-gd317 From ec61cd63dd3f3bf982180b2bcc1b325160d73837 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Dec 2012 12:12:10 +0100 Subject: mac80211: support HT notify channel width action Support the HT notify channel width action frame to update the rate scaling about the bandwidth the peer can receive in. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++++++ net/mac80211/rx.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5db76ebe8810..ccf9ee1dca8c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -701,6 +701,11 @@ enum ieee80211_rann_flags { RANN_FLAG_IS_GATE = 1 << 0, }; +enum ieee80211_ht_chanwidth_values { + IEEE80211_HT_CHANWIDTH_20MHZ = 0, + IEEE80211_HT_CHANWIDTH_ANY = 1, +}; + #define WLAN_SA_QUERY_TR_ID_LEN 2 struct ieee80211_mgmt { @@ -821,6 +826,10 @@ struct ieee80211_mgmt { u8 action; u8 smps_control; } __packed ht_smps; + struct { + u8 action_code; + u8 chanwidth; + } __packed ht_notify_cw; struct { u8 action_code; u8 dialog_token; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 580704eba8b8..a19089565c4b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2353,7 +2353,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_ADHOC) break; - /* verify action & smps_control are present */ + /* verify action & smps_control/chanwidth are present */ if (len < IEEE80211_MIN_ACTION_SIZE + 2) goto invalid; @@ -2392,6 +2392,35 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) IEEE80211_RC_SMPS_CHANGED); goto handled; } + case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { + struct ieee80211_supported_band *sband; + u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + bool old_40mhz, new_40mhz; + + /* If it doesn't support 40 MHz it can't change ... */ + if (!rx->sta->supports_40mhz) + goto handled; + + old_40mhz = rx->sta->sta.ht_cap.cap & + IEEE80211_HT_CAP_SUP_WIDTH_20_40; + new_40mhz = chanwidth == IEEE80211_HT_CHANWIDTH_ANY; + + if (old_40mhz == new_40mhz) + goto handled; + + if (new_40mhz) + rx->sta->sta.ht_cap.cap |= + IEEE80211_HT_CAP_SUP_WIDTH_20_40; + else + rx->sta->sta.ht_cap.cap &= + ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + sband = rx->local->hw.wiphy->bands[status->band]; + + rate_control_rate_update(local, sband, rx->sta, + IEEE80211_RC_BW_CHANGED); + goto handled; + } default: goto invalid; } -- cgit v1.2.3-71-gd317 From e41b2d7fe7803e85e1202d0eb172717d7bf1bbaf Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 1 Jan 2013 03:30:15 +0000 Subject: net: set dev->addr_assign_type correctly Not a bitfield, but a plain value. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- drivers/net/ethernet/ethoc.c | 2 +- drivers/net/ethernet/lantiq_etop.c | 2 +- include/linux/etherdevice.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 56d3f697e0c7..17651c779680 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2540,7 +2540,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (atl1c_read_mac_addr(&adapter->hw)) { /* got a random MAC address, set NET_ADDR_RANDOM to netdev */ - netdev->addr_assign_type |= NET_ADDR_RANDOM; + netdev->addr_assign_type = NET_ADDR_RANDOM; } memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len); diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 71b3d7daa21d..5b0d9931c720 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -3053,7 +3053,7 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* copy the MAC address out of the EEPROM */ if (atl1_read_mac_addr(&adapter->hw)) { /* mark random mac */ - netdev->addr_assign_type |= NET_ADDR_RANDOM; + netdev->addr_assign_type = NET_ADDR_RANDOM; } memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len); diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 8db1c06008de..f380bb7653dd 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1068,7 +1068,7 @@ static int ethoc_probe(struct platform_device *pdev) } if (random_mac) - netdev->addr_assign_type |= NET_ADDR_RANDOM; + netdev->addr_assign_type = NET_ADDR_RANDOM; /* register MII bus */ priv->mdio = mdiobus_alloc(); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index c124e67a1a1c..cd3d2c09cdd0 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -655,7 +655,7 @@ ltq_etop_init(struct net_device *dev) /* Set addr_assign_type here, ltq_etop_set_mac_address would reset it. */ if (random_mac) - dev->addr_assign_type |= NET_ADDR_RANDOM; + dev->addr_assign_type = NET_ADDR_RANDOM; ltq_etop_set_multicast_list(dev); err = ltq_etop_mdio_init(dev); diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 243eea1e33d8..1a43e1b4f7ad 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -192,7 +192,7 @@ static inline void eth_zero_addr(u8 *addr) */ static inline void eth_hw_addr_random(struct net_device *dev) { - dev->addr_assign_type |= NET_ADDR_RANDOM; + dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->dev_addr); } -- cgit v1.2.3-71-gd317 From fbdeca2d7753aa1ab929aeb77ccc46489eed02b9 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 1 Jan 2013 03:30:16 +0000 Subject: net: add address assign type "SET" This is the way to indicate that mac address of a device has been set by dev_set_mac_address() Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6835b5837f93..c5031a45e185 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -64,6 +64,8 @@ struct wireless_dev; #define NET_ADDR_PERM 0 /* address is permanent (default) */ #define NET_ADDR_RANDOM 1 /* address is generated randomly */ #define NET_ADDR_STOLEN 2 /* address is stolen from other device */ +#define NET_ADDR_SET 3 /* address is set using + * dev_set_mac_address() */ /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ diff --git a/net/core/dev.c b/net/core/dev.c index c85e32b30f04..bddb2f2ccaa9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5022,6 +5022,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) err = ops->ndo_set_mac_address(dev, sa); if (err) return err; + dev->addr_assign_type = NET_ADDR_SET; call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); return 0; -- cgit v1.2.3-71-gd317 From 9ff162a8b96c96238773972e26288a366e403b0c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Jan 2013 22:48:49 +0000 Subject: net: introduce upper device lists This lists are supposed to serve for storing pointers to all upper devices. Eventually it will replace dev->master pointer which is used for bonding, bridge, team but it cannot be used for vlan, macvlan where there might be multiple upper present. In case the upper link is replacement for dev->master, it is marked with "master" flag. New upper device list resolves this limitation. Also, the information stored in lists is used for preventing looping setups like "bond->somethingelse->samebond" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++ net/core/dev.c | 239 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 249 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c5031a45e185..e324601f48e8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1174,6 +1174,8 @@ struct net_device { * which this device is member of. */ + struct list_head upper_dev_list; /* List of upper devices */ + /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are @@ -2636,6 +2638,18 @@ extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; extern int bpf_jit_enable; + +extern bool netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev); +extern bool netdev_has_any_upper_dev(struct net_device *dev); +extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); +extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); +extern int netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev); +extern int netdev_master_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev); +extern void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev); extern int netdev_set_master(struct net_device *dev, struct net_device *master); extern int netdev_set_bond_master(struct net_device *dev, struct net_device *master); diff --git a/net/core/dev.c b/net/core/dev.c index bddb2f2ccaa9..53a9fefbc9af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4600,6 +4600,232 @@ static int __init dev_proc_init(void) #endif /* CONFIG_PROC_FS */ +struct netdev_upper { + struct net_device *dev; + bool master; + struct list_head list; + struct rcu_head rcu; + struct list_head search_list; +}; + +static void __append_search_uppers(struct list_head *search_list, + struct net_device *dev) +{ + struct netdev_upper *upper; + + list_for_each_entry(upper, &dev->upper_dev_list, list) { + /* check if this upper is not already in search list */ + if (list_empty(&upper->search_list)) + list_add_tail(&upper->search_list, search_list); + } +} + +static bool __netdev_search_upper_dev(struct net_device *dev, + struct net_device *upper_dev) +{ + LIST_HEAD(search_list); + struct netdev_upper *upper; + struct netdev_upper *tmp; + bool ret = false; + + __append_search_uppers(&search_list, dev); + list_for_each_entry(upper, &search_list, search_list) { + if (upper->dev == upper_dev) { + ret = true; + break; + } + __append_search_uppers(&search_list, upper->dev); + } + list_for_each_entry_safe(upper, tmp, &search_list, search_list) + INIT_LIST_HEAD(&upper->search_list); + return ret; +} + +static struct netdev_upper *__netdev_find_upper(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_upper *upper; + + list_for_each_entry(upper, &dev->upper_dev_list, list) { + if (upper->dev == upper_dev) + return upper; + } + return NULL; +} + +/** + * netdev_has_upper_dev - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks only immediate upper device, + * not through a complete stack of devices. The caller must hold the RTNL lock. + */ +bool netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) +{ + ASSERT_RTNL(); + + return __netdev_find_upper(dev, upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev); + +/** + * netdev_has_any_upper_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to an upper device and return true in case + * it is. The caller must hold the RTNL lock. + */ +bool netdev_has_any_upper_dev(struct net_device *dev) +{ + ASSERT_RTNL(); + + return !list_empty(&dev->upper_dev_list); +} +EXPORT_SYMBOL(netdev_has_any_upper_dev); + +/** + * netdev_master_upper_dev_get - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RTNL lock. + */ +struct net_device *netdev_master_upper_dev_get(struct net_device *dev) +{ + struct netdev_upper *upper; + + ASSERT_RTNL(); + + if (list_empty(&dev->upper_dev_list)) + return NULL; + + upper = list_first_entry(&dev->upper_dev_list, + struct netdev_upper, list); + if (likely(upper->master)) + return upper->dev; + return NULL; +} +EXPORT_SYMBOL(netdev_master_upper_dev_get); + +/** + * netdev_master_upper_dev_get_rcu - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RCU read lock. + */ +struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) +{ + struct netdev_upper *upper; + + upper = list_first_or_null_rcu(&dev->upper_dev_list, + struct netdev_upper, list); + if (upper && likely(upper->master)) + return upper->dev; + return NULL; +} +EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); + +static int __netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev, bool master) +{ + struct netdev_upper *upper; + + ASSERT_RTNL(); + + if (dev == upper_dev) + return -EBUSY; + + /* To prevent loops, check if dev is not upper device to upper_dev. */ + if (__netdev_search_upper_dev(upper_dev, dev)) + return -EBUSY; + + if (__netdev_find_upper(dev, upper_dev)) + return -EEXIST; + + if (master && netdev_master_upper_dev_get(dev)) + return -EBUSY; + + upper = kmalloc(sizeof(*upper), GFP_KERNEL); + if (!upper) + return -ENOMEM; + + upper->dev = upper_dev; + upper->master = master; + INIT_LIST_HEAD(&upper->search_list); + + /* Ensure that master upper link is always the first item in list. */ + if (master) + list_add_rcu(&upper->list, &dev->upper_dev_list); + else + list_add_tail_rcu(&upper->list, &dev->upper_dev_list); + dev_hold(upper_dev); + + return 0; +} + +/** + * netdev_upper_dev_link - Add a link to the upper device + * @dev: device + * @upper_dev: new upper device + * + * Adds a link to device which is upper to this one. The caller must hold + * the RTNL lock. On a failure a negative errno code is returned. + * On success the reference counts are adjusted and the function + * returns zero. + */ +int netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) +{ + return __netdev_upper_dev_link(dev, upper_dev, false); +} +EXPORT_SYMBOL(netdev_upper_dev_link); + +/** + * netdev_master_upper_dev_link - Add a master link to the upper device + * @dev: device + * @upper_dev: new upper device + * + * Adds a link to device which is upper to this one. In this case, only + * one master upper device can be linked, although other non-master devices + * might be linked as well. The caller must hold the RTNL lock. + * On a failure a negative errno code is returned. On success the reference + * counts are adjusted and the function returns zero. + */ +int netdev_master_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) +{ + return __netdev_upper_dev_link(dev, upper_dev, true); +} +EXPORT_SYMBOL(netdev_master_upper_dev_link); + +/** + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device + * + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. + */ +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_upper *upper; + + ASSERT_RTNL(); + + upper = __netdev_find_upper(dev, upper_dev); + if (!upper) + return; + list_del_rcu(&upper->list); + dev_put(upper_dev); + kfree_rcu(upper, rcu); +} +EXPORT_SYMBOL(netdev_upper_dev_unlink); + /** * netdev_set_master - set up master pointer * @slave: slave device @@ -4613,19 +4839,23 @@ static int __init dev_proc_init(void) int netdev_set_master(struct net_device *slave, struct net_device *master) { struct net_device *old = slave->master; + int err; ASSERT_RTNL(); if (master) { if (old) return -EBUSY; - dev_hold(master); + err = netdev_master_upper_dev_link(slave, master); + if (err) + return err; } slave->master = master; if (old) - dev_put(old); + netdev_upper_dev_unlink(slave, master); + return 0; } EXPORT_SYMBOL(netdev_set_master); @@ -5503,8 +5733,8 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us all upper devices. */ + WARN_ON(netdev_has_any_upper_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -6212,6 +6442,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); + INIT_LIST_HEAD(&dev->upper_dev_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); -- cgit v1.2.3-71-gd317 From 8b98a70c28a607a02b3c3d41bc9a4c141f421052 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Jan 2013 22:49:02 +0000 Subject: net: remove no longer used netdev_set_bond_master() and netdev_set_master() Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +---- net/core/dev.c | 63 ----------------------------------------------- 2 files changed, 1 insertion(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e324601f48e8..3cad8eab02b6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -858,8 +858,7 @@ struct netdev_fcoe_hbainfo { * flow_id is a flow ID to be passed to rps_may_expire_flow() later. * Return the filter ID on success, or a negative error code. * - * Slave management functions (for bridge, bonding, etc). User should - * call netdev_set_master() to set dev->master properly. + * Slave management functions (for bridge, bonding, etc). * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to make another netdev an underling. * @@ -2650,9 +2649,6 @@ extern int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); extern void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); -extern int netdev_set_master(struct net_device *dev, struct net_device *master); -extern int netdev_set_bond_master(struct net_device *dev, - struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features); diff --git a/net/core/dev.c b/net/core/dev.c index 53a9fefbc9af..a51ccf46e8b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4826,69 +4826,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -/** - * netdev_set_master - set up master pointer - * @slave: slave device - * @master: new master device - * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success the reference counts - * are adjusted and the function returns zero. - */ -int netdev_set_master(struct net_device *slave, struct net_device *master) -{ - struct net_device *old = slave->master; - int err; - - ASSERT_RTNL(); - - if (master) { - if (old) - return -EBUSY; - err = netdev_master_upper_dev_link(slave, master); - if (err) - return err; - } - - slave->master = master; - - if (old) - netdev_upper_dev_unlink(slave, master); - - return 0; -} -EXPORT_SYMBOL(netdev_set_master); - -/** - * netdev_set_bond_master - set up bonding master/slave pair - * @slave: slave device - * @master: new master device - * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success %RTM_NEWLINK is sent - * to the routing socket and the function returns zero. - */ -int netdev_set_bond_master(struct net_device *slave, struct net_device *master) -{ - int err; - - ASSERT_RTNL(); - - err = netdev_set_master(slave, master); - if (err) - return err; - if (master) - slave->flags |= IFF_SLAVE; - else - slave->flags &= ~IFF_SLAVE; - - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); - return 0; -} -EXPORT_SYMBOL(netdev_set_bond_master); - static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.2.3-71-gd317 From 85464ef271a0f5496f404c6a2f2dfbf1d76e1a49 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Jan 2013 22:49:03 +0000 Subject: net: kill dev->master Nobody uses this now. Remove it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3cad8eab02b6..0209ac328e8a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1169,10 +1169,6 @@ struct net_device { * avoid dirtying this cache line. */ - struct net_device *master; /* Pointer to master device of a group, - * which this device is member of. - */ - struct list_head upper_dev_list; /* List of upper devices */ /* Interface address info used in eth_type_trans() */ -- cgit v1.2.3-71-gd317 From 8f9dc85348ac37ff3b6b031d22e93a5b59d81f83 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Fri, 4 Jan 2013 00:51:24 +0100 Subject: bcma: mips: remove assigned_irqs from structure This member is not needed any more. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_mips.c | 2 -- include/linux/bcma/bcma_driver_mips.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 69815079a6dd..c6d7be33b972 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -263,8 +263,6 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore) bcma_core_mips_early_init(mcore); - mcore->assigned_irqs = 1; - switch (bus->chipinfo.id) { case BCMA_CHIP_ID_BCM4716: case BCMA_CHIP_ID_BCM4748: diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 0baf8a56b794..6495579e3f35 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -36,7 +36,6 @@ struct bcma_drv_mips { struct bcma_device *core; u8 setup_done:1; u8 early_setup_done:1; - unsigned int assigned_irqs; }; #ifdef CONFIG_BCMA_DRIVER_MIPS -- cgit v1.2.3-71-gd317 From fda55eca5a33f33ffcd4192c6b2d75179714a52c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jan 2013 09:28:21 +0000 Subject: net: introduce skb_transport_header_was_set() We have skb_mac_header_was_set() helper to tell if mac_header was set on a skb. We would like the same for transport_header. __netif_receive_skb() doesn't reset the transport header if already set by GRO layer. Note that network stacks usually reset the transport header anyway, after pulling the network header, so this change only allows a followup patch to have more precise qdisc pkt_len computation for GSO packets at ingress side. Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ net/core/dev.c | 3 ++- net/core/skbuff.c | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 320e976d5ab8..8b2256e880e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1492,6 +1492,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_transport_header_was_set(const struct sk_buff *skb) +{ + return skb->transport_header != ~0U; +} + static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->head + skb->transport_header; @@ -1580,6 +1585,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header = skb->data + offset; } +static inline bool skb_transport_header_was_set(const struct sk_buff *skb) +{ + return skb->transport_header != NULL; +} + static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->transport_header; diff --git a/net/core/dev.c b/net/core/dev.c index a51ccf46e8b7..2e2448201a76 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3352,7 +3352,8 @@ static int __netif_receive_skb(struct sk_buff *skb) orig_dev = skb->dev; skb_reset_network_header(skb); - skb_reset_transport_header(skb); + if (!skb_transport_header_was_set(skb)) + skb_reset_transport_header(skb); skb_reset_mac_len(skb); pt_prev = NULL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b03fc0c6a952..1e1b9ea0296d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -260,6 +260,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -328,6 +329,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->mac_header = ~0U; + skb->transport_header = ~0U; #endif /* make sure we initialize shinfo sequentially */ -- cgit v1.2.3-71-gd317 From b7394d2429c198b1da3d46ac39192e891029ec0f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 7 Jan 2013 20:52:39 +0000 Subject: netpoll: prepare for ipv6 This patch adjusts some struct and functions, to prepare for supporting IPv6. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/netconsole.c | 12 +- include/linux/netpoll.h | 13 +- net/core/netpoll.c | 402 ++++++++++++++++++++++++++--------------------- 3 files changed, 243 insertions(+), 184 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 6989ebe2bc79..998fa0257a92 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -269,12 +269,14 @@ static ssize_t show_remote_port(struct netconsole_target *nt, char *buf) static ssize_t show_local_ip(struct netconsole_target *nt, char *buf) { - return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip); + if (!nt->np.ipv6) + return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip); } static ssize_t show_remote_ip(struct netconsole_target *nt, char *buf) { - return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip); + if (!nt->np.ipv6) + return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip); } static ssize_t show_local_mac(struct netconsole_target *nt, char *buf) @@ -410,7 +412,8 @@ static ssize_t store_local_ip(struct netconsole_target *nt, return -EINVAL; } - nt->np.local_ip = in_aton(buf); + if (!strnchr(buf, count, ':')) + nt->np.local_ip.ip = in_aton(buf); return strnlen(buf, count); } @@ -426,7 +429,8 @@ static ssize_t store_remote_ip(struct netconsole_target *nt, return -EINVAL; } - nt->np.remote_ip = in_aton(buf); + if (!strnchr(buf, count, ':')) + nt->np.remote_ip.ip = in_aton(buf); return strnlen(buf, count); } diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 66d5379c305e..f54c3bb6a22b 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -12,13 +12,22 @@ #include #include +union inet_addr { + __u32 all[4]; + __be32 ip; + __be32 ip6[4]; + struct in_addr in; + struct in6_addr in6; +}; + struct netpoll { struct net_device *dev; char dev_name[IFNAMSIZ]; const char *name; void (*rx_hook)(struct netpoll *, int, char *, int); - __be32 local_ip, remote_ip; + union inet_addr local_ip, remote_ip; + bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; @@ -33,7 +42,7 @@ struct netpoll_info { spinlock_t rx_lock; struct list_head rx_np; /* netpolls that registered an rx_hook */ - struct sk_buff_head arp_tx; /* list of arp requests to reply to */ + struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ struct sk_buff_head txq; struct delayed_work tx_work; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index d2bda8eb08ec..6bd073688f68 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -55,7 +55,7 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -181,13 +181,13 @@ static void poll_napi(struct net_device *dev) } } -static void service_arp_queue(struct netpoll_info *npi) +static void service_neigh_queue(struct netpoll_info *npi) { if (npi) { struct sk_buff *skb; - while ((skb = skb_dequeue(&npi->arp_tx))) - netpoll_arp_reply(skb, npi); + while ((skb = skb_dequeue(&npi->neigh_tx))) + netpoll_neigh_reply(skb, npi); } } @@ -216,14 +216,14 @@ static void netpoll_poll_dev(struct net_device *dev) bond_dev = netdev_master_upper_dev_get_rcu(dev); bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&ni->arp_tx))) { + while ((skb = skb_dequeue(&ni->neigh_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_ni->arp_tx, skb); + skb_queue_tail(&bond_ni->neigh_tx, skb); } } } - service_arp_queue(ni); + service_neigh_queue(ni); zap_completion_queue(); } @@ -386,7 +386,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) static atomic_t ip_ident; udp_len = len + sizeof(*udph); - ip_len = udp_len + sizeof(*iph); + if (!np->ipv6) + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, @@ -403,34 +405,38 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); - udph->check = 0; - udph->check = csum_tcpudp_magic(np->local_ip, - np->remote_ip, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - - /* iph->version = 4; iph->ihl = 5; */ - put_unaligned(0x45, (unsigned char *)iph); - iph->tos = 0; - put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = htons(atomic_inc_return(&ip_ident)); - iph->frag_off = 0; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->check = 0; - put_unaligned(np->local_ip, &(iph->saddr)); - put_unaligned(np->remote_ip, &(iph->daddr)); - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IP); + + if (!np->ipv6) { + udph->check = 0; + udph->check = csum_tcpudp_magic(np->local_ip.ip, + np->remote_ip.ip, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + + /* iph->version = 4; iph->ihl = 5; */ + put_unaligned(0x45, (unsigned char *)iph); + iph->tos = 0; + put_unaligned(htons(ip_len), &(iph->tot_len)); + iph->id = htons(atomic_inc_return(&ip_ident)); + iph->frag_off = 0; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->check = 0; + put_unaligned(np->local_ip.ip, &(iph->saddr)); + put_unaligned(np->remote_ip.ip, &(iph->daddr)); + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth->h_proto = htons(ETH_P_IP); + } + memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); @@ -440,7 +446,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) +static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { struct arphdr *arp; unsigned char *arp_ptr; @@ -451,7 +457,7 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) struct netpoll *np, *tmp; unsigned long flags; int hlen, tlen; - int hits = 0; + int hits = 0, proto; if (list_empty(&npinfo->rx_np)) return; @@ -469,94 +475,97 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) if (!hits) return; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; + proto = ntohs(eth_hdr(skb)->h_proto); + if (proto == ETH_P_IP) { + /* No arp on this interface */ + if (skb->dev->flags & IFF_NOARP) + return; - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) + return; - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + arp = arp_hdr(skb); - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; + if ((arp->ar_hrd != htons(ARPHRD_ETHER) && + arp->ar_hrd != htons(ARPHRD_IEEE802)) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_op != htons(ARPOP_REQUEST)) + return; - size = arp_hdr_len(skb->dev); + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; + memcpy(&sip, arp_ptr, 4); + arp_ptr += 4; + /* If we actually cared about dst hw addr, + it would get copied here */ + arp_ptr += skb->dev->addr_len; + memcpy(&tip, arp_ptr, 4); - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip) - continue; + /* Should we ignore arp? */ + if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) + return; - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; + size = arp_hdr_len(skb->dev); - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (tip != np->local_ip.ip) + continue; + + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); + if (!send_skb) + continue; + + skb_reset_network_header(send_skb); + arp = (struct arphdr *) skb_put(send_skb, size); + send_skb->dev = skb->dev; + send_skb->protocol = htons(ETH_P_ARP); + + /* Fill the device header for the ARP frame */ + if (dev_hard_header(send_skb, skb->dev, ptype, + sha, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ptype, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; + /* + * Fill out the arp protocol part. + * + * we only support ethernet device type, + * which (according to RFC 1390) should + * always equal 1 (Ethernet). + */ + + arp->ar_hrd = htons(np->dev->type); + arp->ar_pro = htons(ETH_P_IP); + arp->ar_hln = np->dev->addr_len; + arp->ar_pln = 4; + arp->ar_op = htons(type); + + arp_ptr = (unsigned char *)(arp + 1); + memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &tip, 4); + arp_ptr += 4; + memcpy(arp_ptr, sha, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &sip, 4); + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ - break; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); } int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) @@ -576,7 +585,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) /* check if netpoll clients need ARP */ if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { - skb_queue_tail(&npinfo->arp_tx, skb); + skb_queue_tail(&npinfo->neigh_tx, skb); return 1; } @@ -587,60 +596,61 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) } proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP) + if (proto != ETH_P_IP && proto != ETH_P_IPV6) goto out; if (skb->pkt_type == PACKET_OTHERHOST) goto out; if (skb_shared(skb)) goto out; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; + if (proto == ETH_P_IP) { + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + iph = (struct iphdr *)skb->data; + if (iph->ihl < 5 || iph->version != 4) + goto out; + if (!pskb_may_pull(skb, iph->ihl*4)) + goto out; + iph = (struct iphdr *)skb->data; + if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) + goto out; - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; + len = ntohs(iph->tot_len); + if (skb->len < len || len < iph->ihl*4) + goto out; - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - ulen = ntohs(uh->len); + /* + * Our transport medium may have padded the buffer out. + * Now We trim to the true length of the frame. + */ + if (pskb_trim_rcsum(skb, len)) + goto out; - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; + iph = (struct iphdr *)skb->data; + if (iph->protocol != IPPROTO_UDP) + goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip && np->local_ip != iph->daddr) - continue; - if (np->remote_ip && np->remote_ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; + len -= iph->ihl*4; + uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + ulen = ntohs(uh->len); - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); - hits++; + if (ulen != len) + goto out; + if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) + goto out; + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (np->local_ip.ip && np->local_ip.ip != iph->daddr) + continue; + if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } } if (!hits) @@ -661,17 +671,40 @@ out: void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); - np_info(np, "local IP %pI4\n", &np->local_ip); + if (!np->ipv6) + np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); - np_info(np, "remote IP %pI4\n", &np->remote_ip); + if (!np->ipv6) + np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } EXPORT_SYMBOL(netpoll_print_options); +static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) +{ + const char *end; + + if (!strchr(str, ':') && + in4_pton(str, -1, (void *)addr, -1, &end) > 0) { + if (!*end) + return 0; + } + if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { +#if IS_ENABLED(CONFIG_IPV6) + if (!*end) + return 1; +#else + return -1; +#endif + } + return -1; +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; + int ipv6; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) @@ -687,7 +720,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->local_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); + if (ipv6 < 0) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim; } cur++; @@ -719,7 +756,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->remote_ip = in_aton(cur); + ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); + if (ipv6 < 0) + goto parse_failed; + else if (np->ipv6 != (bool)ipv6) + goto parse_failed; + else + np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { @@ -767,7 +810,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->arp_tx); + skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -859,21 +902,24 @@ int netpoll_setup(struct netpoll *np) } } - if (!np->local_ip) { - rcu_read_lock(); - in_dev = __in_dev_get_rcu(ndev); + if (!np->local_ip.ip) { + if (!np->ipv6) { + rcu_read_lock(); + in_dev = __in_dev_get_rcu(ndev); - if (!in_dev || !in_dev->ifa_list) { + + if (!in_dev || !in_dev->ifa_list) { + rcu_read_unlock(); + np_err(np, "no IP address for %s, aborting\n", + np->dev_name); + err = -EDESTADDRREQ; + goto put; + } + + np->local_ip.ip = in_dev->ifa_list->ifa_local; rcu_read_unlock(); - np_err(np, "no IP address for %s, aborting\n", - np->dev_name); - err = -EDESTADDRREQ; - goto put; + np_info(np, "local IP %pI4\n", &np->local_ip.ip); } - - np->local_ip = in_dev->ifa_list->ifa_local; - rcu_read_unlock(); - np_info(np, "local IP %pI4\n", &np->local_ip); } /* fill up the skb queue */ @@ -906,7 +952,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->neigh_tx); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ -- cgit v1.2.3-71-gd317 From dd4544f05469aaaeee891d7dc54d66430344321e Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 8 Jan 2013 20:06:23 +0000 Subject: bgmac: driver for GBit MAC core on BCMA bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCMA is a Broadcom specific bus with devices AKA cores. All recent BCMA based SoCs have gigabit ethernet provided by the GBit MAC core. This patch adds driver for such a cores registering itself as a netdev. It has been tested on a BCM4706 and BCM4718 chipsets. In the kernel tree there is already b44 driver which has some common things with bgmac, however there are many differences that has led to the decision or writing a new driver: 1) GBit MAC cores appear on BCMA bus (not SSB as in case of b44) 2) There is 64bit DMA engine which differs from 32bit one 3) There is no CAM (Content Addressable Memory) in GBit MAC 4) We have 4 TX queues on GBit MAC devices (instead of 1) 5) Many registers have different addresses/values 6) RX header flags are also different The driver in it's state is functional how, however there is of course place for improvements: 1) Supporting more net_device_ops 2) SUpporting more ethtool_ops 3) Unaligned addressing in DMA 4) Writing separated PHY driver Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/bcma/driver_chipcommon_pmu.c | 3 +- drivers/net/ethernet/broadcom/Kconfig | 9 + drivers/net/ethernet/broadcom/Makefile | 1 + drivers/net/ethernet/broadcom/bgmac.c | 1422 +++++++++++++++++++++++++++ drivers/net/ethernet/broadcom/bgmac.h | 456 +++++++++ include/linux/bcma/bcma_driver_chipcommon.h | 2 + 6 files changed, 1892 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/broadcom/bgmac.c create mode 100644 drivers/net/ethernet/broadcom/bgmac.h (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c index c62c788b3289..932b101dee36 100644 --- a/drivers/bcma/driver_chipcommon_pmu.c +++ b/drivers/bcma/driver_chipcommon_pmu.c @@ -264,7 +264,7 @@ static u32 bcma_pmu_pll_clock_bcm4706(struct bcma_drv_cc *cc, u32 pll0, u32 m) } /* query bus clock frequency for PMU-enabled chipcommon */ -static u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc) +u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc) { struct bcma_bus *bus = cc->core->bus; @@ -293,6 +293,7 @@ static u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc) } return BCMA_CC_PMU_HT_CLOCK; } +EXPORT_SYMBOL_GPL(bcma_pmu_get_bus_clock); /* query cpu clock frequency for PMU-enabled chipcommon */ u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 3b3bf0dd0f1a..3e69b3f88099 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -130,4 +130,13 @@ config BNX2X_SRIOV Virtualization support in the 578xx and 57712 products. This allows for virtual function acceleration in virtual environments. +config BGMAC + tristate "BCMA bus GBit core support" + depends on BCMA_HOST_SOC && HAS_DMA + ---help--- + This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus. + They can be found on BCM47xx SoCs and provide gigabit ethernet. + In case of using this driver on BCM4706 it's also requires to enable + BCMA_DRIVER_GMAC_CMN to make it work. + endif # NET_VENDOR_BROADCOM diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index b7896051d54e..68efa1a3fb88 100644 --- a/drivers/net/ethernet/broadcom/Makefile +++ b/drivers/net/ethernet/broadcom/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_CNIC) += cnic.o obj-$(CONFIG_BNX2X) += bnx2x/ obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o obj-$(CONFIG_TIGON3) += tg3.o +obj-$(CONFIG_BGMAC) += bgmac.o diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c new file mode 100644 index 000000000000..9bd33db7fddd --- /dev/null +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -0,0 +1,1422 @@ +/* + * Driver for (BCM4706)? GBit MAC core on BCMA bus. + * + * Copyright (C) 2012 Rafał Miłecki + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include "bgmac.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct bcma_device_id bgmac_bcma_tbl[] = { + BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS), + BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS), + BCMA_CORETABLE_END +}; +MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl); + +static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask, + u32 value, int timeout) +{ + u32 val; + int i; + + for (i = 0; i < timeout / 10; i++) { + val = bcma_read32(core, reg); + if ((val & mask) == value) + return true; + udelay(10); + } + pr_err("Timeout waiting for reg 0x%X\n", reg); + return false; +} + +/************************************************** + * DMA + **************************************************/ + +static void bgmac_dma_tx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring) +{ + u32 val; + int i; + + if (!ring->mmio_base) + return; + + /* Suspend DMA TX ring first. + * bgmac_wait_value doesn't support waiting for any of few values, so + * implement whole loop here. + */ + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, + BGMAC_DMA_TX_SUSPEND); + for (i = 0; i < 10000 / 10; i++) { + val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS); + val &= BGMAC_DMA_TX_STAT; + if (val == BGMAC_DMA_TX_STAT_DISABLED || + val == BGMAC_DMA_TX_STAT_IDLEWAIT || + val == BGMAC_DMA_TX_STAT_STOPPED) { + i = 0; + break; + } + udelay(10); + } + if (i) + bgmac_err(bgmac, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n", + ring->mmio_base, val); + + /* Remove SUSPEND bit */ + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, 0); + if (!bgmac_wait_value(bgmac->core, + ring->mmio_base + BGMAC_DMA_TX_STATUS, + BGMAC_DMA_TX_STAT, BGMAC_DMA_TX_STAT_DISABLED, + 10000)) { + bgmac_warn(bgmac, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n", + ring->mmio_base); + udelay(300); + val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS); + if ((val & BGMAC_DMA_TX_STAT) != BGMAC_DMA_TX_STAT_DISABLED) + bgmac_err(bgmac, "Reset of DMA TX ring 0x%X failed\n", + ring->mmio_base); + } +} + +static void bgmac_dma_tx_enable(struct bgmac *bgmac, + struct bgmac_dma_ring *ring) +{ + u32 ctl; + + ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL); + ctl |= BGMAC_DMA_TX_ENABLE; + ctl |= BGMAC_DMA_TX_PARITY_DISABLE; + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl); +} + +static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, + struct bgmac_dma_ring *ring, + struct sk_buff *skb) +{ + struct device *dma_dev = bgmac->core->dma_dev; + struct net_device *net_dev = bgmac->net_dev; + struct bgmac_dma_desc *dma_desc; + struct bgmac_slot_info *slot; + u32 ctl0, ctl1; + int free_slots; + + if (skb->len > BGMAC_DESC_CTL1_LEN) { + bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); + goto err_stop_drop; + } + + if (ring->start <= ring->end) + free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; + else + free_slots = ring->start - ring->end; + if (free_slots == 1) { + bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); + netif_stop_queue(net_dev); + return NETDEV_TX_BUSY; + } + + slot = &ring->slots[ring->end]; + slot->skb = skb; + slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, slot->dma_addr)) { + bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", + ring->mmio_base); + goto err_stop_drop; + } + + ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF; + if (ring->end == ring->num_slots - 1) + ctl0 |= BGMAC_DESC_CTL0_EOT; + ctl1 = skb->len & BGMAC_DESC_CTL1_LEN; + + dma_desc = ring->cpu_base; + dma_desc += ring->end; + dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); + dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); + dma_desc->ctl0 = cpu_to_le32(ctl0); + dma_desc->ctl1 = cpu_to_le32(ctl1); + + wmb(); + + /* Increase ring->end to point empty slot. We tell hardware the first + * slot it should *not* read. + */ + if (++ring->end >= BGMAC_TX_RING_SLOTS) + ring->end = 0; + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, + ring->end * sizeof(struct bgmac_dma_desc)); + + /* Always keep one slot free to allow detecting bugged calls. */ + if (--free_slots == 1) + netif_stop_queue(net_dev); + + return NETDEV_TX_OK; + +err_stop_drop: + netif_stop_queue(net_dev); + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +/* Free transmitted packets */ +static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) +{ + struct device *dma_dev = bgmac->core->dma_dev; + int empty_slot; + bool freed = false; + + /* The last slot that hardware didn't consume yet */ + empty_slot = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS); + empty_slot &= BGMAC_DMA_TX_STATDPTR; + empty_slot /= sizeof(struct bgmac_dma_desc); + + while (ring->start != empty_slot) { + struct bgmac_slot_info *slot = &ring->slots[ring->start]; + + if (slot->skb) { + /* Unmap no longer used buffer */ + dma_unmap_single(dma_dev, slot->dma_addr, + slot->skb->len, DMA_TO_DEVICE); + slot->dma_addr = 0; + + /* Free memory! :) */ + dev_kfree_skb(slot->skb); + slot->skb = NULL; + } else { + bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", + ring->start, ring->end); + } + + if (++ring->start >= BGMAC_TX_RING_SLOTS) + ring->start = 0; + freed = true; + } + + if (freed && netif_queue_stopped(bgmac->net_dev)) + netif_wake_queue(bgmac->net_dev); +} + +static void bgmac_dma_rx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring) +{ + if (!ring->mmio_base) + return; + + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, 0); + if (!bgmac_wait_value(bgmac->core, + ring->mmio_base + BGMAC_DMA_RX_STATUS, + BGMAC_DMA_RX_STAT, BGMAC_DMA_RX_STAT_DISABLED, + 10000)) + bgmac_err(bgmac, "Reset of ring 0x%X RX failed\n", + ring->mmio_base); +} + +static void bgmac_dma_rx_enable(struct bgmac *bgmac, + struct bgmac_dma_ring *ring) +{ + u32 ctl; + + ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); + ctl &= BGMAC_DMA_RX_ADDREXT_MASK; + ctl |= BGMAC_DMA_RX_ENABLE; + ctl |= BGMAC_DMA_RX_PARITY_DISABLE; + ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; + ctl |= BGMAC_RX_FRAME_OFFSET << BGMAC_DMA_RX_FRAME_OFFSET_SHIFT; + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, ctl); +} + +static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac, + struct bgmac_slot_info *slot) +{ + struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_rx_header *rx; + + /* Alloc skb */ + slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE); + if (!slot->skb) { + bgmac_err(bgmac, "Allocation of skb failed!\n"); + return -ENOMEM; + } + + /* Poison - if everything goes fine, hardware will overwrite it */ + rx = (struct bgmac_rx_header *)slot->skb->data; + rx->len = cpu_to_le16(0xdead); + rx->flags = cpu_to_le16(0xbeef); + + /* Map skb for the DMA */ + slot->dma_addr = dma_map_single(dma_dev, slot->skb->data, + BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, slot->dma_addr)) { + bgmac_err(bgmac, "DMA mapping error\n"); + return -ENOMEM; + } + if (slot->dma_addr & 0xC0000000) + bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); + + return 0; +} + +static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, + int weight) +{ + u32 end_slot; + int handled = 0; + + end_slot = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_STATUS); + end_slot &= BGMAC_DMA_RX_STATDPTR; + end_slot /= sizeof(struct bgmac_dma_desc); + + ring->end = end_slot; + + while (ring->start != ring->end) { + struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_slot_info *slot = &ring->slots[ring->start]; + struct sk_buff *skb = slot->skb; + struct sk_buff *new_skb; + struct bgmac_rx_header *rx; + u16 len, flags; + + /* Unmap buffer to make it accessible to the CPU */ + dma_sync_single_for_cpu(dma_dev, slot->dma_addr, + BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + + /* Get info from the header */ + rx = (struct bgmac_rx_header *)skb->data; + len = le16_to_cpu(rx->len); + flags = le16_to_cpu(rx->flags); + + /* Check for poison and drop or pass the packet */ + if (len == 0xdead && flags == 0xbeef) { + bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n", + ring->start); + } else { + new_skb = netdev_alloc_skb(bgmac->net_dev, len); + if (new_skb) { + skb_put(new_skb, len); + skb_copy_from_linear_data_offset(skb, BGMAC_RX_FRAME_OFFSET, + new_skb->data, + len); + new_skb->protocol = + eth_type_trans(new_skb, bgmac->net_dev); + netif_receive_skb(new_skb); + handled++; + } else { + bgmac->net_dev->stats.rx_dropped++; + bgmac_err(bgmac, "Allocation of skb for copying packet failed!\n"); + } + + /* Poison the old skb */ + rx->len = cpu_to_le16(0xdead); + rx->flags = cpu_to_le16(0xbeef); + } + + /* Make it back accessible to the hardware */ + dma_sync_single_for_device(dma_dev, slot->dma_addr, + BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + + if (++ring->start >= BGMAC_RX_RING_SLOTS) + ring->start = 0; + + if (handled >= weight) /* Should never be greater */ + break; + } + + return handled; +} + +/* Does ring support unaligned addressing? */ +static bool bgmac_dma_unaligned(struct bgmac *bgmac, + struct bgmac_dma_ring *ring, + enum bgmac_dma_ring_type ring_type) +{ + switch (ring_type) { + case BGMAC_DMA_RING_TX: + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO, + 0xff0); + if (bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO)) + return true; + break; + case BGMAC_DMA_RING_RX: + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO, + 0xff0); + if (bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO)) + return true; + break; + } + return false; +} + +static void bgmac_dma_ring_free(struct bgmac *bgmac, + struct bgmac_dma_ring *ring) +{ + struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_slot_info *slot; + int size; + int i; + + for (i = 0; i < ring->num_slots; i++) { + slot = &ring->slots[i]; + if (slot->skb) { + if (slot->dma_addr) + dma_unmap_single(dma_dev, slot->dma_addr, + slot->skb->len, DMA_TO_DEVICE); + dev_kfree_skb(slot->skb); + } + } + + if (ring->cpu_base) { + /* Free ring of descriptors */ + size = ring->num_slots * sizeof(struct bgmac_dma_desc); + dma_free_coherent(dma_dev, size, ring->cpu_base, + ring->dma_base); + } +} + +static void bgmac_dma_free(struct bgmac *bgmac) +{ + int i; + + for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) + bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]); + for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) + bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]); +} + +static int bgmac_dma_alloc(struct bgmac *bgmac) +{ + struct device *dma_dev = bgmac->core->dma_dev; + struct bgmac_dma_ring *ring; + static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1, + BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, }; + int size; /* ring size: different for Tx and Rx */ + int err; + int i; + + BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base)); + BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base)); + + if (!(bcma_aread32(bgmac->core, BCMA_IOST) & BCMA_IOST_DMA64)) { + bgmac_err(bgmac, "Core does not report 64-bit DMA\n"); + return -ENOTSUPP; + } + + for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) { + ring = &bgmac->tx_ring[i]; + ring->num_slots = BGMAC_TX_RING_SLOTS; + ring->mmio_base = ring_base[i]; + if (bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_TX)) + bgmac_warn(bgmac, "TX on ring 0x%X supports unaligned addressing but this feature is not implemented\n", + ring->mmio_base); + + /* Alloc ring of descriptors */ + size = ring->num_slots * sizeof(struct bgmac_dma_desc); + ring->cpu_base = dma_zalloc_coherent(dma_dev, size, + &ring->dma_base, + GFP_KERNEL); + if (!ring->cpu_base) { + bgmac_err(bgmac, "Allocation of TX ring 0x%X failed\n", + ring->mmio_base); + goto err_dma_free; + } + if (ring->dma_base & 0xC0000000) + bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); + + /* No need to alloc TX slots yet */ + } + + for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) { + ring = &bgmac->rx_ring[i]; + ring->num_slots = BGMAC_RX_RING_SLOTS; + ring->mmio_base = ring_base[i]; + if (bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_RX)) + bgmac_warn(bgmac, "RX on ring 0x%X supports unaligned addressing but this feature is not implemented\n", + ring->mmio_base); + + /* Alloc ring of descriptors */ + size = ring->num_slots * sizeof(struct bgmac_dma_desc); + ring->cpu_base = dma_zalloc_coherent(dma_dev, size, + &ring->dma_base, + GFP_KERNEL); + if (!ring->cpu_base) { + bgmac_err(bgmac, "Allocation of RX ring 0x%X failed\n", + ring->mmio_base); + err = -ENOMEM; + goto err_dma_free; + } + if (ring->dma_base & 0xC0000000) + bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); + + /* Alloc RX slots */ + for (i = 0; i < ring->num_slots; i++) { + err = bgmac_dma_rx_skb_for_slot(bgmac, &ring->slots[i]); + if (err) { + bgmac_err(bgmac, "Can't allocate skb for slot in RX ring\n"); + goto err_dma_free; + } + } + } + + return 0; + +err_dma_free: + bgmac_dma_free(bgmac); + return -ENOMEM; +} + +static void bgmac_dma_init(struct bgmac *bgmac) +{ + struct bgmac_dma_ring *ring; + struct bgmac_dma_desc *dma_desc; + u32 ctl0, ctl1; + int i; + + for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) { + ring = &bgmac->tx_ring[i]; + + /* We don't implement unaligned addressing, so enable first */ + bgmac_dma_tx_enable(bgmac, ring); + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO, + lower_32_bits(ring->dma_base)); + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGHI, + upper_32_bits(ring->dma_base)); + + ring->start = 0; + ring->end = 0; /* Points the slot that should *not* be read */ + } + + for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) { + ring = &bgmac->rx_ring[i]; + + /* We don't implement unaligned addressing, so enable first */ + bgmac_dma_rx_enable(bgmac, ring); + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO, + lower_32_bits(ring->dma_base)); + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGHI, + upper_32_bits(ring->dma_base)); + + for (i = 0, dma_desc = ring->cpu_base; i < ring->num_slots; + i++, dma_desc++) { + ctl0 = ctl1 = 0; + + if (i == ring->num_slots - 1) + ctl0 |= BGMAC_DESC_CTL0_EOT; + ctl1 |= BGMAC_RX_BUF_SIZE & BGMAC_DESC_CTL1_LEN; + /* Is there any BGMAC device that requires extension? */ + /* ctl1 |= (addrext << B43_DMA64_DCTL1_ADDREXT_SHIFT) & + * B43_DMA64_DCTL1_ADDREXT_MASK; + */ + + dma_desc->addr_low = cpu_to_le32(lower_32_bits(ring->slots[i].dma_addr)); + dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[i].dma_addr)); + dma_desc->ctl0 = cpu_to_le32(ctl0); + dma_desc->ctl1 = cpu_to_le32(ctl1); + } + + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX, + ring->num_slots * sizeof(struct bgmac_dma_desc)); + + ring->start = 0; + ring->end = 0; + } +} + +/************************************************** + * PHY ops + **************************************************/ + +u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg) +{ + struct bcma_device *core; + u16 phy_access_addr; + u16 phy_ctl_addr; + u32 tmp; + + BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK); + BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK); + BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT); + BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK); + BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT); + BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE); + BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START); + BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK); + BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK); + BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT); + BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE); + + if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) { + core = bgmac->core->bus->drv_gmac_cmn.core; + phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS; + phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL; + } else { + core = bgmac->core; + phy_access_addr = BGMAC_PHY_ACCESS; + phy_ctl_addr = BGMAC_PHY_CNTL; + } + + tmp = bcma_read32(core, phy_ctl_addr); + tmp &= ~BGMAC_PC_EPA_MASK; + tmp |= phyaddr; + bcma_write32(core, phy_ctl_addr, tmp); + + tmp = BGMAC_PA_START; + tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT; + tmp |= reg << BGMAC_PA_REG_SHIFT; + bcma_write32(core, phy_access_addr, tmp); + + if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) { + bgmac_err(bgmac, "Reading PHY %d register 0x%X failed\n", + phyaddr, reg); + return 0xffff; + } + + return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK; +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */ +void bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value) +{ + struct bcma_device *core; + u16 phy_access_addr; + u16 phy_ctl_addr; + u32 tmp; + + if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) { + core = bgmac->core->bus->drv_gmac_cmn.core; + phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS; + phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL; + } else { + core = bgmac->core; + phy_access_addr = BGMAC_PHY_ACCESS; + phy_ctl_addr = BGMAC_PHY_CNTL; + } + + tmp = bcma_read32(core, phy_ctl_addr); + tmp &= ~BGMAC_PC_EPA_MASK; + tmp |= phyaddr; + bcma_write32(core, phy_ctl_addr, tmp); + + bgmac_write(bgmac, BGMAC_INT_STATUS, BGMAC_IS_MDIO); + if (bgmac_read(bgmac, BGMAC_INT_STATUS) & BGMAC_IS_MDIO) + bgmac_warn(bgmac, "Error setting MDIO int\n"); + + tmp = BGMAC_PA_START; + tmp |= BGMAC_PA_WRITE; + tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT; + tmp |= reg << BGMAC_PA_REG_SHIFT; + tmp |= value; + bcma_write32(core, phy_access_addr, tmp); + + if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) + bgmac_err(bgmac, "Writing to PHY %d register 0x%X failed\n", + phyaddr, reg); +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyforce */ +static void bgmac_phy_force(struct bgmac *bgmac) +{ + u16 ctl; + u16 mask = ~(BGMAC_PHY_CTL_SPEED | BGMAC_PHY_CTL_SPEED_MSB | + BGMAC_PHY_CTL_ANENAB | BGMAC_PHY_CTL_DUPLEX); + + if (bgmac->phyaddr == BGMAC_PHY_NOREGS) + return; + + if (bgmac->autoneg) + return; + + ctl = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL); + ctl &= mask; + if (bgmac->full_duplex) + ctl |= BGMAC_PHY_CTL_DUPLEX; + if (bgmac->speed == BGMAC_SPEED_100) + ctl |= BGMAC_PHY_CTL_SPEED_100; + else if (bgmac->speed == BGMAC_SPEED_1000) + ctl |= BGMAC_PHY_CTL_SPEED_1000; + bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL, ctl); +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyadvertise */ +static void bgmac_phy_advertise(struct bgmac *bgmac) +{ + u16 adv; + + if (bgmac->phyaddr == BGMAC_PHY_NOREGS) + return; + + if (!bgmac->autoneg) + return; + + /* Adv selected 10/100 speeds */ + adv = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV); + adv &= ~(BGMAC_PHY_ADV_10HALF | BGMAC_PHY_ADV_10FULL | + BGMAC_PHY_ADV_100HALF | BGMAC_PHY_ADV_100FULL); + if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_10) + adv |= BGMAC_PHY_ADV_10HALF; + if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_100) + adv |= BGMAC_PHY_ADV_100HALF; + if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_10) + adv |= BGMAC_PHY_ADV_10FULL; + if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_100) + adv |= BGMAC_PHY_ADV_100FULL; + bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV, adv); + + /* Adv selected 1000 speeds */ + adv = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV2); + adv &= ~(BGMAC_PHY_ADV2_1000HALF | BGMAC_PHY_ADV2_1000FULL); + if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_1000) + adv |= BGMAC_PHY_ADV2_1000HALF; + if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_1000) + adv |= BGMAC_PHY_ADV2_1000FULL; + bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV2, adv); + + /* Restart */ + bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL, + bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL) | + BGMAC_PHY_CTL_RESTART); +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */ +static void bgmac_phy_init(struct bgmac *bgmac) +{ + struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo; + struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc; + u8 i; + + if (ci->id == BCMA_CHIP_ID_BCM5356) { + for (i = 0; i < 5; i++) { + bgmac_phy_write(bgmac, i, 0x1f, 0x008b); + bgmac_phy_write(bgmac, i, 0x15, 0x0100); + bgmac_phy_write(bgmac, i, 0x1f, 0x000f); + bgmac_phy_write(bgmac, i, 0x12, 0x2aaa); + bgmac_phy_write(bgmac, i, 0x1f, 0x000b); + } + } + if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) || + (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) || + (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) { + bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0); + bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0); + for (i = 0; i < 5; i++) { + bgmac_phy_write(bgmac, i, 0x1f, 0x000f); + bgmac_phy_write(bgmac, i, 0x16, 0x5284); + bgmac_phy_write(bgmac, i, 0x1f, 0x000b); + bgmac_phy_write(bgmac, i, 0x17, 0x0010); + bgmac_phy_write(bgmac, i, 0x1f, 0x000f); + bgmac_phy_write(bgmac, i, 0x16, 0x5296); + bgmac_phy_write(bgmac, i, 0x17, 0x1073); + bgmac_phy_write(bgmac, i, 0x17, 0x9073); + bgmac_phy_write(bgmac, i, 0x16, 0x52b6); + bgmac_phy_write(bgmac, i, 0x17, 0x9273); + bgmac_phy_write(bgmac, i, 0x1f, 0x000b); + } + } +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */ +static void bgmac_phy_reset(struct bgmac *bgmac) +{ + if (bgmac->phyaddr == BGMAC_PHY_NOREGS) + return; + + bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL, + BGMAC_PHY_CTL_RESET); + udelay(100); + if (bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL) & + BGMAC_PHY_CTL_RESET) + bgmac_err(bgmac, "PHY reset failed\n"); + bgmac_phy_init(bgmac); +} + +/************************************************** + * Chip ops + **************************************************/ + +/* TODO: can we just drop @force? Can we don't reset MAC at all if there is + * nothing to change? Try if after stabilizng driver. + */ +static void bgmac_cmdcfg_maskset(struct bgmac *bgmac, u32 mask, u32 set, + bool force) +{ + u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG); + u32 new_val = (cmdcfg & mask) | set; + + bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR); + udelay(2); + + if (new_val != cmdcfg || force) + bgmac_write(bgmac, BGMAC_CMDCFG, new_val); + + bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR); + udelay(2); +} + +#if 0 /* We don't use that regs yet */ +static void bgmac_chip_stats_update(struct bgmac *bgmac) +{ + int i; + + if (bgmac->core->id.id != BCMA_CORE_4706_MAC_GBIT) { + for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++) + bgmac->mib_tx_regs[i] = + bgmac_read(bgmac, + BGMAC_TX_GOOD_OCTETS + (i * 4)); + for (i = 0; i < BGMAC_NUM_MIB_RX_REGS; i++) + bgmac->mib_rx_regs[i] = + bgmac_read(bgmac, + BGMAC_RX_GOOD_OCTETS + (i * 4)); + } + + /* TODO: what else? how to handle BCM4706? Specs are needed */ +} +#endif + +static void bgmac_clear_mib(struct bgmac *bgmac) +{ + int i; + + if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) + return; + + bgmac_set(bgmac, BGMAC_DEV_CTL, BGMAC_DC_MROR); + for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++) + bgmac_read(bgmac, BGMAC_TX_GOOD_OCTETS + (i * 4)); + for (i = 0; i < BGMAC_NUM_MIB_RX_REGS; i++) + bgmac_read(bgmac, BGMAC_RX_GOOD_OCTETS + (i * 4)); +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_speed */ +static void bgmac_speed(struct bgmac *bgmac, int speed) +{ + u32 mask = ~(BGMAC_CMDCFG_ES_MASK | BGMAC_CMDCFG_HD); + u32 set = 0; + + if (speed & BGMAC_SPEED_10) + set |= BGMAC_CMDCFG_ES_10; + if (speed & BGMAC_SPEED_100) + set |= BGMAC_CMDCFG_ES_100; + if (speed & BGMAC_SPEED_1000) + set |= BGMAC_CMDCFG_ES_1000; + if (!bgmac->full_duplex) + set |= BGMAC_CMDCFG_HD; + bgmac_cmdcfg_maskset(bgmac, mask, set, true); +} + +static void bgmac_miiconfig(struct bgmac *bgmac) +{ + u8 imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >> + BGMAC_DS_MM_SHIFT; + if (imode == 0 || imode == 1) { + if (bgmac->autoneg) + bgmac_speed(bgmac, BGMAC_SPEED_100); + else + bgmac_speed(bgmac, bgmac->speed); + } +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */ +static void bgmac_chip_reset(struct bgmac *bgmac) +{ + struct bcma_device *core = bgmac->core; + struct bcma_bus *bus = core->bus; + struct bcma_chipinfo *ci = &bus->chipinfo; + u32 flags = 0; + u32 iost; + int i; + + if (bcma_core_is_enabled(core)) { + if (!bgmac->stats_grabbed) { + /* bgmac_chip_stats_update(bgmac); */ + bgmac->stats_grabbed = true; + } + + for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) + bgmac_dma_tx_reset(bgmac, &bgmac->tx_ring[i]); + + bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_ML, false); + udelay(1); + + for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) + bgmac_dma_rx_reset(bgmac, &bgmac->rx_ring[i]); + + /* TODO: Clear software multicast filter list */ + } + + iost = bcma_aread32(core, BCMA_IOST); + if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == 10) || + (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) || + (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == 9)) + iost &= ~BGMAC_BCMA_IOST_ATTACHED; + + if (iost & BGMAC_BCMA_IOST_ATTACHED) { + flags = BGMAC_BCMA_IOCTL_SW_CLKEN; + if (!bgmac->has_robosw) + flags |= BGMAC_BCMA_IOCTL_SW_RESET; + } + + bcma_core_enable(core, flags); + + if (core->id.rev > 2) { + bgmac_set(bgmac, BCMA_CLKCTLST, 1 << 8); + bgmac_wait_value(bgmac->core, BCMA_CLKCTLST, 1 << 24, 1 << 24, + 1000); + } + + if (ci->id == BCMA_CHIP_ID_BCM5357 || ci->id == BCMA_CHIP_ID_BCM4749 || + ci->id == BCMA_CHIP_ID_BCM53572) { + struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc; + u8 et_swtype = 0; + u8 sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHY | + BGMAC_CHIPCTL_1_IF_TYPE_RMII; + char buf[2]; + + if (nvram_getenv("et_swtype", buf, 1) > 0) { + if (kstrtou8(buf, 0, &et_swtype)) + bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n", + buf); + et_swtype &= 0x0f; + et_swtype <<= 4; + sw_type = et_swtype; + } else if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == 9) { + sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII; + } else if (0) { + /* TODO */ + } + bcma_chipco_chipctl_maskset(cc, 1, + ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK | + BGMAC_CHIPCTL_1_SW_TYPE_MASK), + sw_type); + } + + if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) + bcma_awrite32(core, BCMA_IOCTL, + bcma_aread32(core, BCMA_IOCTL) & + ~BGMAC_BCMA_IOCTL_SW_RESET); + + /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset + * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine + * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to + * be keps until taking MAC out of the reset. + */ + bgmac_cmdcfg_maskset(bgmac, + ~(BGMAC_CMDCFG_TE | + BGMAC_CMDCFG_RE | + BGMAC_CMDCFG_RPI | + BGMAC_CMDCFG_TAI | + BGMAC_CMDCFG_HD | + BGMAC_CMDCFG_ML | + BGMAC_CMDCFG_CFE | + BGMAC_CMDCFG_RL | + BGMAC_CMDCFG_RED | + BGMAC_CMDCFG_PE | + BGMAC_CMDCFG_TPI | + BGMAC_CMDCFG_PAD_EN | + BGMAC_CMDCFG_PF), + BGMAC_CMDCFG_PROM | + BGMAC_CMDCFG_NLC | + BGMAC_CMDCFG_CFE | + BGMAC_CMDCFG_SR, + false); + + bgmac_clear_mib(bgmac); + if (core->id.id == BCMA_CORE_4706_MAC_GBIT) + bcma_maskset32(bgmac->cmn, BCMA_GMAC_CMN_PHY_CTL, ~0, + BCMA_GMAC_CMN_PC_MTE); + else + bgmac_set(bgmac, BGMAC_PHY_CNTL, BGMAC_PC_MTE); + bgmac_miiconfig(bgmac); + bgmac_phy_init(bgmac); + + bgmac->int_status = 0; +} + +static void bgmac_chip_intrs_on(struct bgmac *bgmac) +{ + bgmac_write(bgmac, BGMAC_INT_MASK, bgmac->int_mask); +} + +static void bgmac_chip_intrs_off(struct bgmac *bgmac) +{ + bgmac_write(bgmac, BGMAC_INT_MASK, 0); +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_enable */ +static void bgmac_enable(struct bgmac *bgmac) +{ + struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo; + u32 cmdcfg; + u32 mode; + u32 rxq_ctl; + u32 fl_ctl; + u16 bp_clk; + u8 mdp; + + cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG); + bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE), + BGMAC_CMDCFG_SR, true); + udelay(2); + cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE; + bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg); + + mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >> + BGMAC_DS_MM_SHIFT; + if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0) + bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT); + if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2) + bcma_chipco_chipctl_maskset(&bgmac->core->bus->drv_cc, 1, ~0, + BGMAC_CHIPCTL_1_RXC_DLL_BYPASS); + + switch (ci->id) { + case BCMA_CHIP_ID_BCM5357: + case BCMA_CHIP_ID_BCM4749: + case BCMA_CHIP_ID_BCM53572: + case BCMA_CHIP_ID_BCM4716: + case BCMA_CHIP_ID_BCM47162: + fl_ctl = 0x03cb04cb; + if (ci->id == BCMA_CHIP_ID_BCM5357 || + ci->id == BCMA_CHIP_ID_BCM4749 || + ci->id == BCMA_CHIP_ID_BCM53572) + fl_ctl = 0x2300e1; + bgmac_write(bgmac, BGMAC_FLOW_CTL_THRESH, fl_ctl); + bgmac_write(bgmac, BGMAC_PAUSE_CTL, 0x27fff); + break; + } + + rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL); + rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK; + bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) / 1000000; + mdp = (bp_clk * 128 / 1000) - 3; + rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT); + bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl); +} + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */ +static void bgmac_chip_init(struct bgmac *bgmac, bool full_init) +{ + struct bgmac_dma_ring *ring; + u8 *mac = bgmac->net_dev->dev_addr; + u32 tmp; + int i; + + /* 1 interrupt per received frame */ + bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT); + + /* Enable 802.3x tx flow control (honor received PAUSE frames) */ + bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_RPI, 0, true); + + if (bgmac->net_dev->flags & IFF_PROMISC) + bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_PROM, false); + else + bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_PROM, 0, false); + + /* Set MAC addr */ + tmp = (mac[0] << 24) | (mac[1] << 16) | (mac[2] << 8) | mac[3]; + bgmac_write(bgmac, BGMAC_MACADDR_HIGH, tmp); + tmp = (mac[4] << 8) | mac[5]; + bgmac_write(bgmac, BGMAC_MACADDR_LOW, tmp); + + if (bgmac->loopback) + bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_ML, true); + else + bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_ML, 0, true); + + bgmac_write(bgmac, BGMAC_RXMAX_LENGTH, 32 + ETHER_MAX_LEN); + + if (!bgmac->autoneg) { + bgmac_speed(bgmac, bgmac->speed); + bgmac_phy_force(bgmac); + } else if (bgmac->speed) { /* if there is anything to adv */ + bgmac_phy_advertise(bgmac); + } + + if (full_init) { + bgmac_dma_init(bgmac); + if (1) /* FIXME: is there any case we don't want IRQs? */ + bgmac_chip_intrs_on(bgmac); + } else { + for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) { + ring = &bgmac->rx_ring[i]; + bgmac_dma_rx_enable(bgmac, ring); + } + } + + bgmac_enable(bgmac); +} + +static irqreturn_t bgmac_interrupt(int irq, void *dev_id) +{ + struct bgmac *bgmac = netdev_priv(dev_id); + + u32 int_status = bgmac_read(bgmac, BGMAC_INT_STATUS); + int_status &= bgmac->int_mask; + + if (!int_status) + return IRQ_NONE; + + /* Ack */ + bgmac_write(bgmac, BGMAC_INT_STATUS, int_status); + + /* Disable new interrupts until handling existing ones */ + bgmac_chip_intrs_off(bgmac); + + bgmac->int_status = int_status; + + napi_schedule(&bgmac->napi); + + return IRQ_HANDLED; +} + +static int bgmac_poll(struct napi_struct *napi, int weight) +{ + struct bgmac *bgmac = container_of(napi, struct bgmac, napi); + struct bgmac_dma_ring *ring; + int handled = 0; + + if (bgmac->int_status & BGMAC_IS_TX0) { + ring = &bgmac->tx_ring[0]; + bgmac_dma_tx_free(bgmac, ring); + bgmac->int_status &= ~BGMAC_IS_TX0; + } + + if (bgmac->int_status & BGMAC_IS_RX) { + ring = &bgmac->rx_ring[0]; + handled += bgmac_dma_rx_read(bgmac, ring, weight); + bgmac->int_status &= ~BGMAC_IS_RX; + } + + if (bgmac->int_status) { + bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status); + bgmac->int_status = 0; + } + + if (handled < weight) + napi_complete(napi); + + bgmac_chip_intrs_on(bgmac); + + return handled; +} + +/************************************************** + * net_device_ops + **************************************************/ + +static int bgmac_open(struct net_device *net_dev) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + int err = 0; + + bgmac_chip_reset(bgmac); + /* Specs say about reclaiming rings here, but we do that in DMA init */ + bgmac_chip_init(bgmac, true); + + err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED, + KBUILD_MODNAME, net_dev); + if (err < 0) { + bgmac_err(bgmac, "IRQ request error: %d!\n", err); + goto err_out; + } + napi_enable(&bgmac->napi); + + netif_carrier_on(net_dev); + +err_out: + return err; +} + +static int bgmac_stop(struct net_device *net_dev) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + + netif_carrier_off(net_dev); + + napi_disable(&bgmac->napi); + bgmac_chip_intrs_off(bgmac); + free_irq(bgmac->core->irq, net_dev); + + bgmac_chip_reset(bgmac); + + return 0; +} + +static netdev_tx_t bgmac_start_xmit(struct sk_buff *skb, + struct net_device *net_dev) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + struct bgmac_dma_ring *ring; + + /* No QOS support yet */ + ring = &bgmac->tx_ring[0]; + return bgmac_dma_tx_add(bgmac, ring, skb); +} + +static int bgmac_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + struct mii_ioctl_data *data = if_mii(ifr); + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = bgmac->phyaddr; + /* fallthru */ + case SIOCGMIIREG: + if (!netif_running(net_dev)) + return -EAGAIN; + data->val_out = bgmac_phy_read(bgmac, data->phy_id, + data->reg_num & 0x1f); + return 0; + case SIOCSMIIREG: + if (!netif_running(net_dev)) + return -EAGAIN; + bgmac_phy_write(bgmac, data->phy_id, data->reg_num & 0x1f, + data->val_in); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static const struct net_device_ops bgmac_netdev_ops = { + .ndo_open = bgmac_open, + .ndo_stop = bgmac_stop, + .ndo_start_xmit = bgmac_start_xmit, + .ndo_set_mac_address = eth_mac_addr, /* generic, sets dev_addr */ + .ndo_do_ioctl = bgmac_ioctl, +}; + +/************************************************** + * ethtool_ops + **************************************************/ + +static int bgmac_get_settings(struct net_device *net_dev, + struct ethtool_cmd *cmd) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + + cmd->supported = SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg; + + if (bgmac->autoneg) { + WARN_ON(cmd->advertising); + if (bgmac->full_duplex) { + if (bgmac->speed & BGMAC_SPEED_10) + cmd->advertising |= ADVERTISED_10baseT_Full; + if (bgmac->speed & BGMAC_SPEED_100) + cmd->advertising |= ADVERTISED_100baseT_Full; + if (bgmac->speed & BGMAC_SPEED_1000) + cmd->advertising |= ADVERTISED_1000baseT_Full; + } else { + if (bgmac->speed & BGMAC_SPEED_10) + cmd->advertising |= ADVERTISED_10baseT_Half; + if (bgmac->speed & BGMAC_SPEED_100) + cmd->advertising |= ADVERTISED_100baseT_Half; + if (bgmac->speed & BGMAC_SPEED_1000) + cmd->advertising |= ADVERTISED_1000baseT_Half; + } + } else { + switch (bgmac->speed) { + case BGMAC_SPEED_10: + ethtool_cmd_speed_set(cmd, SPEED_10); + break; + case BGMAC_SPEED_100: + ethtool_cmd_speed_set(cmd, SPEED_100); + break; + case BGMAC_SPEED_1000: + ethtool_cmd_speed_set(cmd, SPEED_1000); + break; + } + } + + cmd->duplex = bgmac->full_duplex ? DUPLEX_FULL : DUPLEX_HALF; + + cmd->autoneg = bgmac->autoneg; + + return 0; +} + +#if 0 +static int bgmac_set_settings(struct net_device *net_dev, + struct ethtool_cmd *cmd) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + + return -1; +} +#endif + +static void bgmac_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strlcpy(info->bus_info, "BCMA", sizeof(info->bus_info)); +} + +static const struct ethtool_ops bgmac_ethtool_ops = { + .get_settings = bgmac_get_settings, + .get_drvinfo = bgmac_get_drvinfo, +}; + +/************************************************** + * BCMA bus ops + **************************************************/ + +/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */ +static int bgmac_probe(struct bcma_device *core) +{ + struct net_device *net_dev; + struct bgmac *bgmac; + struct ssb_sprom *sprom = &core->bus->sprom; + u8 *mac = core->core_unit ? sprom->et1mac : sprom->et0mac; + int err; + + /* We don't support 2nd, 3rd, ... units, SPROM has to be adjusted */ + if (core->core_unit > 1) { + pr_err("Unsupported core_unit %d\n", core->core_unit); + return -ENOTSUPP; + } + + /* Allocation and references */ + net_dev = alloc_etherdev(sizeof(*bgmac)); + if (!net_dev) + return -ENOMEM; + net_dev->netdev_ops = &bgmac_netdev_ops; + net_dev->irq = core->irq; + SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops); + bgmac = netdev_priv(net_dev); + bgmac->net_dev = net_dev; + bgmac->core = core; + bcma_set_drvdata(core, bgmac); + + /* Defaults */ + bgmac->autoneg = true; + bgmac->full_duplex = true; + bgmac->speed = BGMAC_SPEED_10 | BGMAC_SPEED_100 | BGMAC_SPEED_1000; + memcpy(bgmac->net_dev->dev_addr, mac, ETH_ALEN); + + /* On BCM4706 we need common core to access PHY */ + if (core->id.id == BCMA_CORE_4706_MAC_GBIT && + !core->bus->drv_gmac_cmn.core) { + bgmac_err(bgmac, "GMAC CMN core not found (required for BCM4706)\n"); + err = -ENODEV; + goto err_netdev_free; + } + bgmac->cmn = core->bus->drv_gmac_cmn.core; + + bgmac->phyaddr = core->core_unit ? sprom->et1phyaddr : + sprom->et0phyaddr; + bgmac->phyaddr &= BGMAC_PHY_MASK; + if (bgmac->phyaddr == BGMAC_PHY_MASK) { + bgmac_err(bgmac, "No PHY found\n"); + err = -ENODEV; + goto err_netdev_free; + } + bgmac_info(bgmac, "Found PHY addr: %d%s\n", bgmac->phyaddr, + bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : ""); + + if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) { + bgmac_err(bgmac, "PCI setup not implemented\n"); + err = -ENOTSUPP; + goto err_netdev_free; + } + + bgmac_chip_reset(bgmac); + + err = bgmac_dma_alloc(bgmac); + if (err) { + bgmac_err(bgmac, "Unable to alloc memory for DMA\n"); + goto err_netdev_free; + } + + bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK; + if (nvram_getenv("et0_no_txint", NULL, 0) == 0) + bgmac->int_mask &= ~BGMAC_IS_TX_MASK; + + /* TODO: reset the external phy. Specs are needed */ + bgmac_phy_reset(bgmac); + + bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo & + BGMAC_BFL_ENETROBO); + if (bgmac->has_robosw) + bgmac_warn(bgmac, "Support for Roboswitch not implemented\n"); + + if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM) + bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n"); + + err = register_netdev(bgmac->net_dev); + if (err) { + bgmac_err(bgmac, "Cannot register net device\n"); + err = -ENOTSUPP; + goto err_dma_free; + } + + netif_carrier_off(net_dev); + + netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT); + + return 0; + +err_dma_free: + bgmac_dma_free(bgmac); + +err_netdev_free: + bcma_set_drvdata(core, NULL); + free_netdev(net_dev); + + return err; +} + +static void bgmac_remove(struct bcma_device *core) +{ + struct bgmac *bgmac = bcma_get_drvdata(core); + + netif_napi_del(&bgmac->napi); + unregister_netdev(bgmac->net_dev); + bgmac_dma_free(bgmac); + bcma_set_drvdata(core, NULL); + free_netdev(bgmac->net_dev); +} + +static struct bcma_driver bgmac_bcma_driver = { + .name = KBUILD_MODNAME, + .id_table = bgmac_bcma_tbl, + .probe = bgmac_probe, + .remove = bgmac_remove, +}; + +static int __init bgmac_init(void) +{ + int err; + + err = bcma_driver_register(&bgmac_bcma_driver); + if (err) + return err; + pr_info("Broadcom 47xx GBit MAC driver loaded\n"); + + return 0; +} + +static void __exit bgmac_exit(void) +{ + bcma_driver_unregister(&bgmac_bcma_driver); +} + +module_init(bgmac_init) +module_exit(bgmac_exit) + +MODULE_AUTHOR("Rafał Miłecki"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h new file mode 100644 index 000000000000..129947017041 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -0,0 +1,456 @@ +#ifndef _BGMAC_H +#define _BGMAC_H + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define bgmac_err(bgmac, fmt, ...) \ + dev_err(&(bgmac)->core->dev, fmt, ##__VA_ARGS__) +#define bgmac_warn(bgmac, fmt, ...) \ + dev_warn(&(bgmac)->core->dev, fmt, ##__VA_ARGS__) +#define bgmac_info(bgmac, fmt, ...) \ + dev_info(&(bgmac)->core->dev, fmt, ##__VA_ARGS__) +#define bgmac_dbg(bgmac, fmt, ...) \ + dev_dbg(&(bgmac)->core->dev, fmt, ##__VA_ARGS__) + +#include +#include + +#define BGMAC_DEV_CTL 0x000 +#define BGMAC_DC_TSM 0x00000002 +#define BGMAC_DC_CFCO 0x00000004 +#define BGMAC_DC_RLSS 0x00000008 +#define BGMAC_DC_MROR 0x00000010 +#define BGMAC_DC_FCM_MASK 0x00000060 +#define BGMAC_DC_FCM_SHIFT 5 +#define BGMAC_DC_NAE 0x00000080 +#define BGMAC_DC_TF 0x00000100 +#define BGMAC_DC_RDS_MASK 0x00030000 +#define BGMAC_DC_RDS_SHIFT 16 +#define BGMAC_DC_TDS_MASK 0x000c0000 +#define BGMAC_DC_TDS_SHIFT 18 +#define BGMAC_DEV_STATUS 0x004 /* Configuration of the interface */ +#define BGMAC_DS_RBF 0x00000001 +#define BGMAC_DS_RDF 0x00000002 +#define BGMAC_DS_RIF 0x00000004 +#define BGMAC_DS_TBF 0x00000008 +#define BGMAC_DS_TDF 0x00000010 +#define BGMAC_DS_TIF 0x00000020 +#define BGMAC_DS_PO 0x00000040 +#define BGMAC_DS_MM_MASK 0x00000300 /* Mode of the interface */ +#define BGMAC_DS_MM_SHIFT 8 +#define BGMAC_BIST_STATUS 0x00c +#define BGMAC_INT_STATUS 0x020 /* Interrupt status */ +#define BGMAC_IS_MRO 0x00000001 +#define BGMAC_IS_MTO 0x00000002 +#define BGMAC_IS_TFD 0x00000004 +#define BGMAC_IS_LS 0x00000008 +#define BGMAC_IS_MDIO 0x00000010 +#define BGMAC_IS_MR 0x00000020 +#define BGMAC_IS_MT 0x00000040 +#define BGMAC_IS_TO 0x00000080 +#define BGMAC_IS_DESC_ERR 0x00000400 /* Descriptor error */ +#define BGMAC_IS_DATA_ERR 0x00000800 /* Data error */ +#define BGMAC_IS_DESC_PROT_ERR 0x00001000 /* Descriptor protocol error */ +#define BGMAC_IS_RX_DESC_UNDERF 0x00002000 /* Receive descriptor underflow */ +#define BGMAC_IS_RX_F_OVERF 0x00004000 /* Receive FIFO overflow */ +#define BGMAC_IS_TX_F_UNDERF 0x00008000 /* Transmit FIFO underflow */ +#define BGMAC_IS_RX 0x00010000 /* Interrupt for RX queue 0 */ +#define BGMAC_IS_TX0 0x01000000 /* Interrupt for TX queue 0 */ +#define BGMAC_IS_TX1 0x02000000 /* Interrupt for TX queue 1 */ +#define BGMAC_IS_TX2 0x04000000 /* Interrupt for TX queue 2 */ +#define BGMAC_IS_TX3 0x08000000 /* Interrupt for TX queue 3 */ +#define BGMAC_IS_TX_MASK 0x0f000000 +#define BGMAC_IS_INTMASK 0x0f01fcff +#define BGMAC_IS_ERRMASK 0x0000fc00 +#define BGMAC_INT_MASK 0x024 /* Interrupt mask */ +#define BGMAC_GP_TIMER 0x028 +#define BGMAC_INT_RECV_LAZY 0x100 +#define BGMAC_IRL_TO_MASK 0x00ffffff +#define BGMAC_IRL_FC_MASK 0xff000000 +#define BGMAC_IRL_FC_SHIFT 24 /* Shift the number of interrupts triggered per received frame */ +#define BGMAC_FLOW_CTL_THRESH 0x104 /* Flow control thresholds */ +#define BGMAC_WRRTHRESH 0x108 +#define BGMAC_GMAC_IDLE_CNT_THRESH 0x10c +#define BGMAC_PHY_ACCESS 0x180 /* PHY access address */ +#define BGMAC_PA_DATA_MASK 0x0000ffff +#define BGMAC_PA_ADDR_MASK 0x001f0000 +#define BGMAC_PA_ADDR_SHIFT 16 +#define BGMAC_PA_REG_MASK 0x1f000000 +#define BGMAC_PA_REG_SHIFT 24 +#define BGMAC_PA_WRITE 0x20000000 +#define BGMAC_PA_START 0x40000000 +#define BGMAC_PHY_CNTL 0x188 /* PHY control address */ +#define BGMAC_PC_EPA_MASK 0x0000001f +#define BGMAC_PC_MCT_MASK 0x007f0000 +#define BGMAC_PC_MCT_SHIFT 16 +#define BGMAC_PC_MTE 0x00800000 +#define BGMAC_TXQ_CTL 0x18c +#define BGMAC_TXQ_CTL_DBT_MASK 0x00000fff +#define BGMAC_TXQ_CTL_DBT_SHIFT 0 +#define BGMAC_RXQ_CTL 0x190 +#define BGMAC_RXQ_CTL_DBT_MASK 0x00000fff +#define BGMAC_RXQ_CTL_DBT_SHIFT 0 +#define BGMAC_RXQ_CTL_PTE 0x00001000 +#define BGMAC_RXQ_CTL_MDP_MASK 0x3f000000 +#define BGMAC_RXQ_CTL_MDP_SHIFT 24 +#define BGMAC_GPIO_SELECT 0x194 +#define BGMAC_GPIO_OUTPUT_EN 0x198 +/* For 0x1e0 see BCMA_CLKCTLST */ +#define BGMAC_HW_WAR 0x1e4 +#define BGMAC_PWR_CTL 0x1e8 +#define BGMAC_DMA_BASE0 0x200 /* Tx and Rx controller */ +#define BGMAC_DMA_BASE1 0x240 /* Tx controller only */ +#define BGMAC_DMA_BASE2 0x280 /* Tx controller only */ +#define BGMAC_DMA_BASE3 0x2C0 /* Tx controller only */ +#define BGMAC_TX_GOOD_OCTETS 0x300 +#define BGMAC_TX_GOOD_OCTETS_HIGH 0x304 +#define BGMAC_TX_GOOD_PKTS 0x308 +#define BGMAC_TX_OCTETS 0x30c +#define BGMAC_TX_OCTETS_HIGH 0x310 +#define BGMAC_TX_PKTS 0x314 +#define BGMAC_TX_BROADCAST_PKTS 0x318 +#define BGMAC_TX_MULTICAST_PKTS 0x31c +#define BGMAC_TX_LEN_64 0x320 +#define BGMAC_TX_LEN_65_TO_127 0x324 +#define BGMAC_TX_LEN_128_TO_255 0x328 +#define BGMAC_TX_LEN_256_TO_511 0x32c +#define BGMAC_TX_LEN_512_TO_1023 0x330 +#define BGMAC_TX_LEN_1024_TO_1522 0x334 +#define BGMAC_TX_LEN_1523_TO_2047 0x338 +#define BGMAC_TX_LEN_2048_TO_4095 0x33c +#define BGMAC_TX_LEN_4095_TO_8191 0x340 +#define BGMAC_TX_LEN_8192_TO_MAX 0x344 +#define BGMAC_TX_JABBER_PKTS 0x348 /* Error */ +#define BGMAC_TX_OVERSIZE_PKTS 0x34c /* Error */ +#define BGMAC_TX_FRAGMENT_PKTS 0x350 +#define BGMAC_TX_UNDERRUNS 0x354 /* Error */ +#define BGMAC_TX_TOTAL_COLS 0x358 +#define BGMAC_TX_SINGLE_COLS 0x35c +#define BGMAC_TX_MULTIPLE_COLS 0x360 +#define BGMAC_TX_EXCESSIVE_COLS 0x364 /* Error */ +#define BGMAC_TX_LATE_COLS 0x368 /* Error */ +#define BGMAC_TX_DEFERED 0x36c +#define BGMAC_TX_CARRIER_LOST 0x370 +#define BGMAC_TX_PAUSE_PKTS 0x374 +#define BGMAC_TX_UNI_PKTS 0x378 +#define BGMAC_TX_Q0_PKTS 0x37c +#define BGMAC_TX_Q0_OCTETS 0x380 +#define BGMAC_TX_Q0_OCTETS_HIGH 0x384 +#define BGMAC_TX_Q1_PKTS 0x388 +#define BGMAC_TX_Q1_OCTETS 0x38c +#define BGMAC_TX_Q1_OCTETS_HIGH 0x390 +#define BGMAC_TX_Q2_PKTS 0x394 +#define BGMAC_TX_Q2_OCTETS 0x398 +#define BGMAC_TX_Q2_OCTETS_HIGH 0x39c +#define BGMAC_TX_Q3_PKTS 0x3a0 +#define BGMAC_TX_Q3_OCTETS 0x3a4 +#define BGMAC_TX_Q3_OCTETS_HIGH 0x3a8 +#define BGMAC_RX_GOOD_OCTETS 0x3b0 +#define BGMAC_RX_GOOD_OCTETS_HIGH 0x3b4 +#define BGMAC_RX_GOOD_PKTS 0x3b8 +#define BGMAC_RX_OCTETS 0x3bc +#define BGMAC_RX_OCTETS_HIGH 0x3c0 +#define BGMAC_RX_PKTS 0x3c4 +#define BGMAC_RX_BROADCAST_PKTS 0x3c8 +#define BGMAC_RX_MULTICAST_PKTS 0x3cc +#define BGMAC_RX_LEN_64 0x3d0 +#define BGMAC_RX_LEN_65_TO_127 0x3d4 +#define BGMAC_RX_LEN_128_TO_255 0x3d8 +#define BGMAC_RX_LEN_256_TO_511 0x3dc +#define BGMAC_RX_LEN_512_TO_1023 0x3e0 +#define BGMAC_RX_LEN_1024_TO_1522 0x3e4 +#define BGMAC_RX_LEN_1523_TO_2047 0x3e8 +#define BGMAC_RX_LEN_2048_TO_4095 0x3ec +#define BGMAC_RX_LEN_4095_TO_8191 0x3f0 +#define BGMAC_RX_LEN_8192_TO_MAX 0x3f4 +#define BGMAC_RX_JABBER_PKTS 0x3f8 /* Error */ +#define BGMAC_RX_OVERSIZE_PKTS 0x3fc /* Error */ +#define BGMAC_RX_FRAGMENT_PKTS 0x400 +#define BGMAC_RX_MISSED_PKTS 0x404 /* Error */ +#define BGMAC_RX_CRC_ALIGN_ERRS 0x408 /* Error */ +#define BGMAC_RX_UNDERSIZE 0x40c /* Error */ +#define BGMAC_RX_CRC_ERRS 0x410 /* Error */ +#define BGMAC_RX_ALIGN_ERRS 0x414 /* Error */ +#define BGMAC_RX_SYMBOL_ERRS 0x418 /* Error */ +#define BGMAC_RX_PAUSE_PKTS 0x41c +#define BGMAC_RX_NONPAUSE_PKTS 0x420 +#define BGMAC_RX_SACHANGES 0x424 +#define BGMAC_RX_UNI_PKTS 0x428 +#define BGMAC_UNIMAC_VERSION 0x800 +#define BGMAC_HDBKP_CTL 0x804 +#define BGMAC_CMDCFG 0x808 /* Configuration */ +#define BGMAC_CMDCFG_TE 0x00000001 /* Set to activate TX */ +#define BGMAC_CMDCFG_RE 0x00000002 /* Set to activate RX */ +#define BGMAC_CMDCFG_ES_MASK 0x0000000c /* Ethernet speed see gmac_speed */ +#define BGMAC_CMDCFG_ES_10 0x00000000 +#define BGMAC_CMDCFG_ES_100 0x00000004 +#define BGMAC_CMDCFG_ES_1000 0x00000008 +#define BGMAC_CMDCFG_PROM 0x00000010 /* Set to activate promiscuous mode */ +#define BGMAC_CMDCFG_PAD_EN 0x00000020 +#define BGMAC_CMDCFG_CF 0x00000040 +#define BGMAC_CMDCFG_PF 0x00000080 +#define BGMAC_CMDCFG_RPI 0x00000100 /* Unset to enable 802.3x tx flow control */ +#define BGMAC_CMDCFG_TAI 0x00000200 +#define BGMAC_CMDCFG_HD 0x00000400 /* Set if in half duplex mode */ +#define BGMAC_CMDCFG_HD_SHIFT 10 +#define BGMAC_CMDCFG_SR 0x00000800 /* Set to reset mode */ +#define BGMAC_CMDCFG_ML 0x00008000 /* Set to activate mac loopback mode */ +#define BGMAC_CMDCFG_AE 0x00400000 +#define BGMAC_CMDCFG_CFE 0x00800000 +#define BGMAC_CMDCFG_NLC 0x01000000 +#define BGMAC_CMDCFG_RL 0x02000000 +#define BGMAC_CMDCFG_RED 0x04000000 +#define BGMAC_CMDCFG_PE 0x08000000 +#define BGMAC_CMDCFG_TPI 0x10000000 +#define BGMAC_CMDCFG_AT 0x20000000 +#define BGMAC_MACADDR_HIGH 0x80c /* High 4 octets of own mac address */ +#define BGMAC_MACADDR_LOW 0x810 /* Low 2 octets of own mac address */ +#define BGMAC_RXMAX_LENGTH 0x814 /* Max receive frame length with vlan tag */ +#define BGMAC_PAUSEQUANTA 0x818 +#define BGMAC_MAC_MODE 0x844 +#define BGMAC_OUTERTAG 0x848 +#define BGMAC_INNERTAG 0x84c +#define BGMAC_TXIPG 0x85c +#define BGMAC_PAUSE_CTL 0xb30 +#define BGMAC_TX_FLUSH 0xb34 +#define BGMAC_RX_STATUS 0xb38 +#define BGMAC_TX_STATUS 0xb3c + +#define BGMAC_PHY_CTL 0x00 +#define BGMAC_PHY_CTL_SPEED_MSB 0x0040 +#define BGMAC_PHY_CTL_DUPLEX 0x0100 /* duplex mode */ +#define BGMAC_PHY_CTL_RESTART 0x0200 /* restart autonegotiation */ +#define BGMAC_PHY_CTL_ANENAB 0x1000 /* enable autonegotiation */ +#define BGMAC_PHY_CTL_SPEED 0x2000 +#define BGMAC_PHY_CTL_LOOP 0x4000 /* loopback */ +#define BGMAC_PHY_CTL_RESET 0x8000 /* reset */ +/* Helpers */ +#define BGMAC_PHY_CTL_SPEED_10 0 +#define BGMAC_PHY_CTL_SPEED_100 BGMAC_PHY_CTL_SPEED +#define BGMAC_PHY_CTL_SPEED_1000 BGMAC_PHY_CTL_SPEED_MSB +#define BGMAC_PHY_ADV 0x04 +#define BGMAC_PHY_ADV_10HALF 0x0020 /* advertise 10MBits/s half duplex */ +#define BGMAC_PHY_ADV_10FULL 0x0040 /* advertise 10MBits/s full duplex */ +#define BGMAC_PHY_ADV_100HALF 0x0080 /* advertise 100MBits/s half duplex */ +#define BGMAC_PHY_ADV_100FULL 0x0100 /* advertise 100MBits/s full duplex */ +#define BGMAC_PHY_ADV2 0x09 +#define BGMAC_PHY_ADV2_1000HALF 0x0100 /* advertise 1000MBits/s half duplex */ +#define BGMAC_PHY_ADV2_1000FULL 0x0200 /* advertise 1000MBits/s full duplex */ + +/* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */ +#define BGMAC_BCMA_IOCTL_SW_CLKEN 0x00000004 /* PHY Clock Enable */ +#define BGMAC_BCMA_IOCTL_SW_RESET 0x00000008 /* PHY Reset */ + +/* BCMA GMAC core specific IO status (BCMA_IOST) flags */ +#define BGMAC_BCMA_IOST_ATTACHED 0x00000800 + +#define BGMAC_NUM_MIB_TX_REGS \ + (((BGMAC_TX_Q3_OCTETS_HIGH - BGMAC_TX_GOOD_OCTETS) / 4) + 1) +#define BGMAC_NUM_MIB_RX_REGS \ + (((BGMAC_RX_UNI_PKTS - BGMAC_RX_GOOD_OCTETS) / 4) + 1) + +#define BGMAC_DMA_TX_CTL 0x00 +#define BGMAC_DMA_TX_ENABLE 0x00000001 +#define BGMAC_DMA_TX_SUSPEND 0x00000002 +#define BGMAC_DMA_TX_LOOPBACK 0x00000004 +#define BGMAC_DMA_TX_FLUSH 0x00000010 +#define BGMAC_DMA_TX_PARITY_DISABLE 0x00000800 +#define BGMAC_DMA_TX_ADDREXT_MASK 0x00030000 +#define BGMAC_DMA_TX_ADDREXT_SHIFT 16 +#define BGMAC_DMA_TX_INDEX 0x04 +#define BGMAC_DMA_TX_RINGLO 0x08 +#define BGMAC_DMA_TX_RINGHI 0x0C +#define BGMAC_DMA_TX_STATUS 0x10 +#define BGMAC_DMA_TX_STATDPTR 0x00001FFF +#define BGMAC_DMA_TX_STAT 0xF0000000 +#define BGMAC_DMA_TX_STAT_DISABLED 0x00000000 +#define BGMAC_DMA_TX_STAT_ACTIVE 0x10000000 +#define BGMAC_DMA_TX_STAT_IDLEWAIT 0x20000000 +#define BGMAC_DMA_TX_STAT_STOPPED 0x30000000 +#define BGMAC_DMA_TX_STAT_SUSP 0x40000000 +#define BGMAC_DMA_TX_ERROR 0x14 +#define BGMAC_DMA_TX_ERRDPTR 0x0001FFFF +#define BGMAC_DMA_TX_ERR 0xF0000000 +#define BGMAC_DMA_TX_ERR_NOERR 0x00000000 +#define BGMAC_DMA_TX_ERR_PROT 0x10000000 +#define BGMAC_DMA_TX_ERR_UNDERRUN 0x20000000 +#define BGMAC_DMA_TX_ERR_TRANSFER 0x30000000 +#define BGMAC_DMA_TX_ERR_DESCREAD 0x40000000 +#define BGMAC_DMA_TX_ERR_CORE 0x50000000 +#define BGMAC_DMA_RX_CTL 0x20 +#define BGMAC_DMA_RX_ENABLE 0x00000001 +#define BGMAC_DMA_RX_FRAME_OFFSET_MASK 0x000000FE +#define BGMAC_DMA_RX_FRAME_OFFSET_SHIFT 1 +#define BGMAC_DMA_RX_DIRECT_FIFO 0x00000100 +#define BGMAC_DMA_RX_OVERFLOW_CONT 0x00000400 +#define BGMAC_DMA_RX_PARITY_DISABLE 0x00000800 +#define BGMAC_DMA_RX_ADDREXT_MASK 0x00030000 +#define BGMAC_DMA_RX_ADDREXT_SHIFT 16 +#define BGMAC_DMA_RX_INDEX 0x24 +#define BGMAC_DMA_RX_RINGLO 0x28 +#define BGMAC_DMA_RX_RINGHI 0x2C +#define BGMAC_DMA_RX_STATUS 0x30 +#define BGMAC_DMA_RX_STATDPTR 0x00001FFF +#define BGMAC_DMA_RX_STAT 0xF0000000 +#define BGMAC_DMA_RX_STAT_DISABLED 0x00000000 +#define BGMAC_DMA_RX_STAT_ACTIVE 0x10000000 +#define BGMAC_DMA_RX_STAT_IDLEWAIT 0x20000000 +#define BGMAC_DMA_RX_STAT_STOPPED 0x30000000 +#define BGMAC_DMA_RX_STAT_SUSP 0x40000000 +#define BGMAC_DMA_RX_ERROR 0x34 +#define BGMAC_DMA_RX_ERRDPTR 0x0001FFFF +#define BGMAC_DMA_RX_ERR 0xF0000000 +#define BGMAC_DMA_RX_ERR_NOERR 0x00000000 +#define BGMAC_DMA_RX_ERR_PROT 0x10000000 +#define BGMAC_DMA_RX_ERR_UNDERRUN 0x20000000 +#define BGMAC_DMA_RX_ERR_TRANSFER 0x30000000 +#define BGMAC_DMA_RX_ERR_DESCREAD 0x40000000 +#define BGMAC_DMA_RX_ERR_CORE 0x50000000 + +#define BGMAC_DESC_CTL0_EOT 0x10000000 /* End of ring */ +#define BGMAC_DESC_CTL0_IOC 0x20000000 /* IRQ on complete */ +#define BGMAC_DESC_CTL0_SOF 0x40000000 /* Start of frame */ +#define BGMAC_DESC_CTL0_EOF 0x80000000 /* End of frame */ +#define BGMAC_DESC_CTL1_LEN 0x00001FFF + +#define BGMAC_PHY_NOREGS 0x1E +#define BGMAC_PHY_MASK 0x1F + +#define BGMAC_MAX_TX_RINGS 4 +#define BGMAC_MAX_RX_RINGS 1 + +#define BGMAC_TX_RING_SLOTS 128 +#define BGMAC_RX_RING_SLOTS 512 - 1 /* Why -1? Well, Broadcom does that... */ + +#define BGMAC_RX_HEADER_LEN 28 /* Last 24 bytes are unused. Well... */ +#define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */ +#define BGMAC_RX_MAX_FRAME_SIZE 1536 /* Copied from b44/tg3 */ +#define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE) + +#define BGMAC_BFL_ENETROBO 0x0010 /* has ephy roboswitch spi */ +#define BGMAC_BFL_ENETADM 0x0080 /* has ADMtek switch */ +#define BGMAC_BFL_ENETVLAN 0x0100 /* can do vlan */ + +#define BGMAC_CHIPCTL_1_IF_TYPE_MASK 0x00000030 +#define BGMAC_CHIPCTL_1_IF_TYPE_RMII 0x00000000 +#define BGMAC_CHIPCTL_1_IF_TYPE_MI 0x00000010 +#define BGMAC_CHIPCTL_1_IF_TYPE_RGMII 0x00000020 +#define BGMAC_CHIPCTL_1_SW_TYPE_MASK 0x000000C0 +#define BGMAC_CHIPCTL_1_SW_TYPE_EPHY 0x00000000 +#define BGMAC_CHIPCTL_1_SW_TYPE_EPHYMII 0x00000040 +#define BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII 0x00000080 +#define BGMAC_CHIPCTL_1_SW_TYPE_RGMI 0x000000C0 +#define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS 0x00010000 + +#define BGMAC_SPEED_10 0x0001 +#define BGMAC_SPEED_100 0x0002 +#define BGMAC_SPEED_1000 0x0004 + +#define BGMAC_WEIGHT 64 + +#define ETHER_MAX_LEN 1518 + +struct bgmac_slot_info { + struct sk_buff *skb; + dma_addr_t dma_addr; +}; + +struct bgmac_dma_desc { + __le32 ctl0; + __le32 ctl1; + __le32 addr_low; + __le32 addr_high; +} __packed; + +enum bgmac_dma_ring_type { + BGMAC_DMA_RING_TX, + BGMAC_DMA_RING_RX, +}; + +/** + * bgmac_dma_ring - contains info about DMA ring (either TX or RX one) + * @start: index of the first slot containing data + * @end: index of a slot that can *not* be read (yet) + * + * Be really aware of the specific @end meaning. It's an index of a slot *after* + * the one containing data that can be read. If @start equals @end the ring is + * empty. + */ +struct bgmac_dma_ring { + u16 num_slots; + u16 start; + u16 end; + + u16 mmio_base; + struct bgmac_dma_desc *cpu_base; + dma_addr_t dma_base; + + struct bgmac_slot_info slots[BGMAC_RX_RING_SLOTS]; +}; + +struct bgmac_rx_header { + __le16 len; + __le16 flags; + __le16 pad[12]; +}; + +struct bgmac { + struct bcma_device *core; + struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */ + struct net_device *net_dev; + struct napi_struct napi; + + /* DMA */ + struct bgmac_dma_ring tx_ring[BGMAC_MAX_TX_RINGS]; + struct bgmac_dma_ring rx_ring[BGMAC_MAX_RX_RINGS]; + + /* Stats */ + bool stats_grabbed; + u32 mib_tx_regs[BGMAC_NUM_MIB_TX_REGS]; + u32 mib_rx_regs[BGMAC_NUM_MIB_RX_REGS]; + + /* Int */ + u32 int_mask; + u32 int_status; + + /* Speed-related */ + int speed; + bool autoneg; + bool full_duplex; + + u8 phyaddr; + bool has_robosw; + + bool loopback; +}; + +static inline u32 bgmac_read(struct bgmac *bgmac, u16 offset) +{ + return bcma_read32(bgmac->core, offset); +} + +static inline void bgmac_write(struct bgmac *bgmac, u16 offset, u32 value) +{ + bcma_write32(bgmac->core, offset, value); +} + +static inline void bgmac_maskset(struct bgmac *bgmac, u16 offset, u32 mask, + u32 set) +{ + bgmac_write(bgmac, offset, (bgmac_read(bgmac, offset) & mask) | set); +} + +static inline void bgmac_mask(struct bgmac *bgmac, u16 offset, u32 mask) +{ + bgmac_maskset(bgmac, offset, mask, 0); +} + +static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set) +{ + bgmac_maskset(bgmac, offset, ~0, set); +} + +u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg); +void bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value); + +#endif /* _BGMAC_H */ diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 9a0e3fa3ca95..ee332fab825b 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -634,4 +634,6 @@ extern void bcma_chipco_regctl_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask, u32 set); extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid); +extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc); + #endif /* LINUX_BCMA_DRIVER_CC_H_ */ -- cgit v1.2.3-71-gd317 From 416186fbf8c5b4e4465a10c6ac7a45b6c47144b2 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 10 Jan 2013 08:56:51 +0000 Subject: net: Split core bits of netdev_pick_tx into __netdev_pick_tx This change splits the core bits of netdev_pick_tx into a separate function. The main idea behind this is to make this code accessible to select queue functions when they decide to process the standard path instead of their own custom path in their select queue routine. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 57 ++++++++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0209ac328e8a..608c3ac4d045 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1403,6 +1403,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, extern struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); +extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); /* * Net namespace inlines diff --git a/net/core/dev.c b/net/core/dev.c index 4794cae84939..81ff67149f62 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2495,37 +2495,44 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) #endif } -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb) +u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - int queue_index; - const struct net_device_ops *ops = dev->netdev_ops; - - if (dev->real_num_tx_queues == 1) - queue_index = 0; - else if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { - struct sock *sk = skb->sk; - queue_index = sk_tx_queue_get(sk); + struct sock *sk = skb->sk; + int queue_index = sk_tx_queue_get(sk); - if (queue_index < 0 || skb->ooo_okay || - queue_index >= dev->real_num_tx_queues) { - int old_index = queue_index; + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int new_index = get_xps_queue(dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, skb); - queue_index = get_xps_queue(dev, skb); - if (queue_index < 0) - queue_index = skb_tx_hash(dev, skb); - - if (queue_index != old_index && sk) { - struct dst_entry *dst = + if (queue_index != new_index && sk) { + struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - } + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } + + queue_index = new_index; + } + + return queue_index; +} + +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + int queue_index = 0; + + if (dev->real_num_tx_queues != 1) { + const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); + else + queue_index = __netdev_pick_tx(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); } skb_set_queue_mapping(skb, queue_index); -- cgit v1.2.3-71-gd317 From 537c00de1c9ba9876b91d869e84caceefe2b8bf9 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 10 Jan 2013 08:57:02 +0000 Subject: net: Add functions netif_reset_xps_queue and netif_set_xps_queue This patch adds two functions, netif_reset_xps_queue and netif_set_xps_queue. The main idea behind these two functions is to provide a mechanism through which drivers can update their defaults in regards to XPS. Currently no such mechanism exists and as a result we cannot use XPS for things such as ATR which would require a basic configuration to start in which the Tx queues are mapped to CPUs via a 1:1 mapping. With this change I am making it possible for drivers such as ixgbe to be able to use the XPS feature by controlling the default configuration. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 ++++ net/core/dev.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/net-sysfs.c | 148 ++----------------------------------------- 3 files changed, 173 insertions(+), 143 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 608c3ac4d045..59fe9da4e315 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2103,6 +2103,19 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) __netif_schedule(txq->qdisc); } +#ifdef CONFIG_XPS +extern void netif_reset_xps_queue(struct net_device *dev, u16 index); +extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, + u16 index); +#else +static inline int netif_set_xps_queue(struct net_device *dev, + struct cpumask *mask, + u16 index) +{ + return 0; +} +#endif + /* * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * as a distribution range limit for the returned value. diff --git a/net/core/dev.c b/net/core/dev.c index 81ff67149f62..257b29516f69 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1857,6 +1857,161 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +#ifdef CONFIG_XPS +static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) + +void netif_reset_xps_queue(struct net_device *dev, u16 index) +{ + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int i, pos, nonempty = 0; + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (!dev_maps) + goto out_no_maps; + + for_each_possible_cpu(i) { + map = xmap_dereference(dev_maps->cpu_map[i]); + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; + } else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], NULL); + kfree_rcu(map, rcu); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + +out_no_maps: + mutex_unlock(&xps_map_mutex); +} + +int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +{ + int i, cpu, pos, map_len, alloc_len, need_set; + struct xps_map *map, *new_map; + struct xps_dev_maps *dev_maps, *new_dev_maps; + int nonempty = 0; + int numa_node_id = -2; + int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); + + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) + return -ENOMEM; + + mutex_lock(&xps_map_mutex); + + dev_maps = xmap_dereference(dev->xps_maps); + + for_each_possible_cpu(cpu) { + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + new_map = map; + if (map) { + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + map_len = map->len; + alloc_len = map->alloc_len; + } else + pos = map_len = alloc_len = 0; + + need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); +#ifdef CONFIG_NUMA + if (need_set) { + if (numa_node_id == -2) + numa_node_id = cpu_to_node(cpu); + else if (numa_node_id != cpu_to_node(cpu)) + numa_node_id = -1; + } +#endif + if (need_set && pos >= map_len) { + /* Need to add queue to this CPU's map */ + if (map_len >= alloc_len) { + alloc_len = alloc_len ? + 2 * alloc_len : XPS_MIN_MAP_ALLOC; + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL, + cpu_to_node(cpu)); + if (!new_map) + goto error; + new_map->alloc_len = alloc_len; + for (i = 0; i < map_len; i++) + new_map->queues[i] = map->queues[i]; + new_map->len = map_len; + } + new_map->queues[new_map->len++] = index; + } else if (!need_set && pos < map_len) { + /* Need to remove queue from this CPU's map */ + if (map_len > 1) + new_map->queues[pos] = + new_map->queues[--new_map->len]; + else + new_map = NULL; + } + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); + } + + /* Cleanup old maps */ + for_each_possible_cpu(cpu) { + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) + kfree_rcu(map, rcu); + if (new_dev_maps->cpu_map[cpu]) + nonempty = 1; + } + + if (nonempty) { + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + } else { + kfree(new_dev_maps); + RCU_INIT_POINTER(dev->xps_maps, NULL); + } + + if (dev_maps) + kfree_rcu(dev_maps, rcu); + + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), + (numa_node_id >= 0) ? numa_node_id : + NUMA_NO_NODE); + + mutex_unlock(&xps_map_mutex); + + return 0; +error: + mutex_unlock(&xps_map_mutex); + + if (new_dev_maps) + for_each_possible_cpu(i) + kfree(rcu_dereference_protected( + new_dev_maps->cpu_map[i], + 1)); + kfree(new_dev_maps); + return -ENOMEM; +} +EXPORT_SYMBOL(netif_set_xps_queue); + +#endif /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 29c884a74c38..5ad489d5d062 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1002,54 +1002,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue, return len; } -static DEFINE_MUTEX(xps_map_mutex); -#define xmap_dereference(P) \ - rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) - static void xps_queue_release(struct netdev_queue *queue) { struct net_device *dev = queue->dev; - struct xps_dev_maps *dev_maps; - struct xps_map *map; unsigned long index; - int i, pos, nonempty = 0; index = get_netdev_queue_index(queue); - mutex_lock(&xps_map_mutex); - dev_maps = xmap_dereference(dev->xps_maps); - - if (dev_maps) { - for_each_possible_cpu(i) { - map = xmap_dereference(dev_maps->cpu_map[i]); - if (!map) - continue; - - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - - if (pos < map->len) { - if (map->len > 1) - map->queues[pos] = - map->queues[--map->len]; - else { - RCU_INIT_POINTER(dev_maps->cpu_map[i], - NULL); - kfree_rcu(map, rcu); - map = NULL; - } - } - if (map) - nonempty = 1; - } - - if (!nonempty) { - RCU_INIT_POINTER(dev->xps_maps, NULL); - kfree_rcu(dev_maps, rcu); - } - } - mutex_unlock(&xps_map_mutex); + netif_reset_xps_queue(dev, index); } static ssize_t store_xps_map(struct netdev_queue *queue, @@ -1057,13 +1017,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue, const char *buf, size_t len) { struct net_device *dev = queue->dev; - cpumask_var_t mask; - int err, i, cpu, pos, map_len, alloc_len, need_set; unsigned long index; - struct xps_map *map, *new_map; - struct xps_dev_maps *dev_maps, *new_dev_maps; - int nonempty = 0; - int numa_node_id = -2; + cpumask_var_t mask; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1079,105 +1035,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue, return err; } - new_dev_maps = kzalloc(max_t(unsigned int, - XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); - if (!new_dev_maps) { - free_cpumask_var(mask); - return -ENOMEM; - } - - mutex_lock(&xps_map_mutex); - - dev_maps = xmap_dereference(dev->xps_maps); - - for_each_possible_cpu(cpu) { - map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; - new_map = map; - if (map) { - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - map_len = map->len; - alloc_len = map->alloc_len; - } else - pos = map_len = alloc_len = 0; - - need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); -#ifdef CONFIG_NUMA - if (need_set) { - if (numa_node_id == -2) - numa_node_id = cpu_to_node(cpu); - else if (numa_node_id != cpu_to_node(cpu)) - numa_node_id = -1; - } -#endif - if (need_set && pos >= map_len) { - /* Need to add queue to this CPU's map */ - if (map_len >= alloc_len) { - alloc_len = alloc_len ? - 2 * alloc_len : XPS_MIN_MAP_ALLOC; - new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), - GFP_KERNEL, - cpu_to_node(cpu)); - if (!new_map) - goto error; - new_map->alloc_len = alloc_len; - for (i = 0; i < map_len; i++) - new_map->queues[i] = map->queues[i]; - new_map->len = map_len; - } - new_map->queues[new_map->len++] = index; - } else if (!need_set && pos < map_len) { - /* Need to remove queue from this CPU's map */ - if (map_len > 1) - new_map->queues[pos] = - new_map->queues[--new_map->len]; - else - new_map = NULL; - } - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); - } - - /* Cleanup old maps */ - for_each_possible_cpu(cpu) { - map = dev_maps ? - xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; - if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) - kfree_rcu(map, rcu); - if (new_dev_maps->cpu_map[cpu]) - nonempty = 1; - } - - if (nonempty) { - rcu_assign_pointer(dev->xps_maps, new_dev_maps); - } else { - kfree(new_dev_maps); - RCU_INIT_POINTER(dev->xps_maps, NULL); - } - - if (dev_maps) - kfree_rcu(dev_maps, rcu); - - netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : - NUMA_NO_NODE); - - mutex_unlock(&xps_map_mutex); + err = netif_set_xps_queue(dev, mask, index); free_cpumask_var(mask); - return len; -error: - mutex_unlock(&xps_map_mutex); - - if (new_dev_maps) - for_each_possible_cpu(i) - kfree(rcu_dereference_protected( - new_dev_maps->cpu_map[i], - 1)); - kfree(new_dev_maps); - free_cpumask_var(mask); - return -ENOMEM; + return err ? : len; } static struct netdev_queue_attribute xps_cpus_attribute = -- cgit v1.2.3-71-gd317 From 024e9679a2daaa67642693366fb63a6b8c61b9f3 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 10 Jan 2013 08:57:46 +0000 Subject: net: Add support for XPS without sysfs being defined This patch makes it so that we can support transmit packet steering without sysfs needing to be enabled. The reason for making this change is to make it so that a driver can make use of the XPS even while the sysfs portion of the interface is not present. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - net/Kconfig | 2 +- net/core/dev.c | 26 ++++++++++++++++++++------ net/core/net-sysfs.c | 14 -------------- 4 files changed, 21 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59fe9da4e315..aa7ad8a96e70 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2104,7 +2104,6 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) } #ifdef CONFIG_XPS -extern void netif_reset_xps_queue(struct net_device *dev, u16 index); extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index); #else diff --git a/net/Kconfig b/net/Kconfig index 30b48f523135..3cc5be0fe420 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -232,7 +232,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && USE_GENERIC_SMP_HELPERS default y config NETPRIO_CGROUP diff --git a/net/core/dev.c b/net/core/dev.c index 41d5120df469..95de4c011808 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1887,10 +1887,10 @@ static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, return map; } -void netif_reset_xps_queue(struct net_device *dev, u16 index) +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) { struct xps_dev_maps *dev_maps; - int cpu; + int cpu, i; bool active = false; mutex_lock(&xps_map_mutex); @@ -1900,7 +1900,11 @@ void netif_reset_xps_queue(struct net_device *dev, u16 index) goto out_no_maps; for_each_possible_cpu(cpu) { - if (remove_xps_queue(dev_maps, cpu, index)) + for (i = index; i < dev->num_tx_queues; i++) { + if (!remove_xps_queue(dev_maps, cpu, i)) + break; + } + if (i == dev->num_tx_queues) active = true; } @@ -1909,8 +1913,10 @@ void netif_reset_xps_queue(struct net_device *dev, u16 index) kfree_rcu(dev_maps, rcu); } - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), - NUMA_NO_NODE); + for (i = index; i < dev->num_tx_queues; i++) + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); + out_no_maps: mutex_unlock(&xps_map_mutex); } @@ -2096,8 +2102,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); - if (txq < dev->real_num_tx_queues) + if (txq < dev->real_num_tx_queues) { qdisc_reset_all_tx_gt(dev, txq); +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, txq); +#endif + } } dev->real_num_tx_queues = txq; @@ -5919,6 +5929,10 @@ static void rollback_registered_many(struct list_head *head) /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); +#ifdef CONFIG_XPS + /* Remove XPS queueing entries */ + netif_reset_xps_queues_gt(dev, 0); +#endif } synchronize_net(); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5ad489d5d062..a5b89a6fec6d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1002,16 +1002,6 @@ static ssize_t show_xps_map(struct netdev_queue *queue, return len; } -static void xps_queue_release(struct netdev_queue *queue) -{ - struct net_device *dev = queue->dev; - unsigned long index; - - index = get_netdev_queue_index(queue); - - netif_reset_xps_queue(dev, index); -} - static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, const char *buf, size_t len) @@ -1058,10 +1048,6 @@ static void netdev_queue_release(struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); -#ifdef CONFIG_XPS - xps_queue_release(queue); -#endif - memset(kobj, 0, sizeof(*kobj)); dev_put(queue->dev); } -- cgit v1.2.3-71-gd317 From e2aa19fadd718d7dd920a3994118863861a4b61e Mon Sep 17 00:00:00 2001 From: Nathan Hintz Date: Thu, 10 Jan 2013 17:54:09 +0100 Subject: bcma: return the mips irq number in bcma_core_irq The irq signal numbers that are send by the cpu are increased by 2 from the number programmed into the mips core by bcma. Return the irq number on which the irqs are send in bcma_core_irq() now. Signed-off-by: Nathan Hintz Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- arch/mips/bcm47xx/serial.c | 2 +- drivers/bcma/driver_chipcommon.c | 2 +- drivers/bcma/driver_mips.c | 12 +++++++++--- drivers/bcma/driver_pci_host.c | 4 ++-- include/linux/bcma/bcma_driver_mips.h | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/bcm47xx/serial.c b/arch/mips/bcm47xx/serial.c index 57981e4fe2bc..b8ef965705cf 100644 --- a/arch/mips/bcm47xx/serial.c +++ b/arch/mips/bcm47xx/serial.c @@ -62,7 +62,7 @@ static int __init uart8250_init_bcma(void) p->mapbase = (unsigned int) bcma_port->regs; p->membase = (void *) bcma_port->regs; - p->irq = bcma_port->irq + 2; + p->irq = bcma_port->irq; p->uartclk = bcma_port->baud_base; p->regshift = bcma_port->reg_shift; p->iotype = UPIO_MEM; diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index e461ad25fda4..28fa50ad87be 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -329,7 +329,7 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc) return; } - irq = bcma_core_mips_irq(cc->core); + irq = bcma_core_irq(cc->core); /* Determine the registers of the UARTs */ cc->nr_serial_ports = (cc->capabilities & BCMA_CC_CAP_NRUART); diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 90f20a247725..a808404db4b1 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -85,7 +85,7 @@ static u32 bcma_core_mips_irqflag(struct bcma_device *dev) * If disabled, 5 is returned. * If not supported, 6 is returned. */ -unsigned int bcma_core_mips_irq(struct bcma_device *dev) +static unsigned int bcma_core_mips_irq(struct bcma_device *dev) { struct bcma_device *mdev = dev->bus->drv_mips.core; u32 irqflag; @@ -102,7 +102,13 @@ unsigned int bcma_core_mips_irq(struct bcma_device *dev) return 5; } -EXPORT_SYMBOL(bcma_core_mips_irq); + +unsigned int bcma_core_irq(struct bcma_device *dev) +{ + unsigned int mips_irq = bcma_core_mips_irq(dev); + return mips_irq <= 4 ? mips_irq + 2 : 0; +} +EXPORT_SYMBOL(bcma_core_irq); static void bcma_core_mips_set_irq(struct bcma_device *dev, unsigned int irq) { @@ -299,7 +305,7 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore) break; default: list_for_each_entry(core, &bus->cores, list) { - core->irq = bcma_core_mips_irq(core) + 2; + core->irq = bcma_core_irq(core); } bcma_err(bus, "Unknown device (0x%x) found, can not configure IRQs\n", diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c index e6b5c89469dc..ef9f0938da77 100644 --- a/drivers/bcma/driver_pci_host.c +++ b/drivers/bcma/driver_pci_host.c @@ -577,7 +577,7 @@ int bcma_core_pci_plat_dev_init(struct pci_dev *dev) pr_info("PCI: Fixing up device %s\n", pci_name(dev)); /* Fix up interrupt lines */ - dev->irq = bcma_core_mips_irq(pc_host->pdev->core) + 2; + dev->irq = bcma_core_irq(pc_host->pdev->core); pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); return 0; @@ -596,6 +596,6 @@ int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev) pc_host = container_of(dev->bus->ops, struct bcma_drv_pci_host, pci_ops); - return bcma_core_mips_irq(pc_host->pdev->core) + 2; + return bcma_core_irq(pc_host->pdev->core); } EXPORT_SYMBOL(bcma_core_pci_pcibios_map_irq); diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 6495579e3f35..73c7f4b882cc 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -48,6 +48,6 @@ static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { } extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore); -extern unsigned int bcma_core_mips_irq(struct bcma_device *dev); +extern unsigned int bcma_core_irq(struct bcma_device *core); #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */ -- cgit v1.2.3-71-gd317 From 990debe2ca8379863709721926550a55f47f3880 Mon Sep 17 00:00:00 2001 From: Nathan Hintz Date: Thu, 10 Jan 2013 22:24:03 -0800 Subject: bcma: update pci configuration for bcm4706/bcm4716 Update the PCI configuration for BCM4706 and BCM4716 per the 2011 Broadcom SDK. Signed-off-by: Nathan Hintz Signed-off-by: John W. Linville --- drivers/bcma/driver_pci_host.c | 13 ++++++++++++- include/linux/bcma/bcma_driver_pci.h | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c index ef9f0938da77..37d1777dcd47 100644 --- a/drivers/bcma/driver_pci_host.c +++ b/drivers/bcma/driver_pci_host.c @@ -427,7 +427,7 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc) /* Reset RC */ usleep_range(3000, 5000); pcicore_write32(pc, BCMA_CORE_PCI_CTL, BCMA_CORE_PCI_CTL_RST_OE); - usleep_range(1000, 2000); + msleep(50); pcicore_write32(pc, BCMA_CORE_PCI_CTL, BCMA_CORE_PCI_CTL_RST | BCMA_CORE_PCI_CTL_RST_OE); @@ -489,6 +489,17 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc) bcma_core_pci_enable_crs(pc); + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706 || + bus->chipinfo.id == BCMA_CHIP_ID_BCM4716) { + u16 val16; + bcma_extpci_read_config(pc, 0, 0, BCMA_CORE_PCI_CFG_DEVCTRL, + &val16, sizeof(val16)); + val16 |= (2 << 5); /* Max payload size of 512 */ + val16 |= (2 << 12); /* MRRS 512 */ + bcma_extpci_write_config(pc, 0, 0, BCMA_CORE_PCI_CFG_DEVCTRL, + &val16, sizeof(val16)); + } + /* Enable PCI bridge BAR0 memory & master access */ tmp = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY; bcma_extpci_write_config(pc, 0, 0, PCI_COMMAND, &tmp, sizeof(tmp)); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 41da581e1612..31232247a1ee 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -179,6 +179,8 @@ struct pci_dev; #define BCMA_CORE_PCI_CFG_FUN_MASK 7 /* Function mask */ #define BCMA_CORE_PCI_CFG_OFF_MASK 0xfff /* Register mask */ +#define BCMA_CORE_PCI_CFG_DEVCTRL 0xd8 + /* PCIE Root Capability Register bits (Host mode only) */ #define BCMA_CORE_PCI_RC_CRS_VISIBILITY 0x0001 -- cgit v1.2.3-71-gd317 From e7219858ac1f98213a4714d0e24e7a003e1bf6a2 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 05:02:01 +0000 Subject: ipv6: Use ipv6_get_dsfield() instead of ipv6_tclass(). Commit 7a3198a8 ("ipv6: helper function to get tclass") introduced ipv6_tclass(), but similar function is already available as ipv6_get_dsfield(). We might be able to call ipv6_tclass() from ipv6_get_dsfield(), but it is confusing to have two versions. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ----- net/ipv6/datagram.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 +++--- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index faed1e357dd6..304a9f46b578 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -77,11 +77,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) return (struct ipv6hdr *)skb_transport_header(skb); } -static inline __u8 ipv6_tclass(const struct ipv6hdr *iph) -{ - return (ntohl(*(__be32 *)iph) >> 20) & 0xff; -} - /* This structure contains results of exthdrs parsing as offsets from skb->nh. diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7f65df41c396..33be36398a78 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -487,7 +488,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } if (np->rxopt.bits.rxtclass) { - int tclass = ipv6_tclass(ipv6_hdr(skb)); + int tclass = ipv6_get_dsfield(ipv6_hdr(skb)); put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3164ad272a74..3701c3c6e2eb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1163,7 +1163,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1243,7 +1243,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1456,7 +1456,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); -- cgit v1.2.3-71-gd317 From dd3332bfcb2223458f553f341d3388cb84040e6a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 05:02:45 +0000 Subject: ipv6: Store Router Alert option in IP6CB directly. Router Alert option is very small and we can store the value itself in the skb. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- include/uapi/linux/ipv6.h | 2 ++ net/ipv6/exthdrs.c | 3 ++- net/ipv6/ip6_input.c | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 304a9f46b578..e971e3742172 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -84,7 +84,7 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) struct inet6_skb_parm { int iif; - __u16 ra; + __be16 ra; __u16 hop; __u16 dst0; __u16 srcrt; @@ -100,6 +100,7 @@ struct inet6_skb_parm { #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 +#define IP6SKB_ROUTERALERT 8 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5a2991cf0251..4bda4cf5b0f5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -63,6 +63,8 @@ struct ipv6_opt_hdr { #define ipv6_destopt_hdr ipv6_opt_hdr #define ipv6_hopopt_hdr ipv6_opt_hdr +/* Router Alert option values (RFC2711) */ +#define IPV6_OPT_ROUTERALERT_MLD 0x0000 /* MLD(RFC2710) */ /* * routing header type 0 (used in cmsghdr struct) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628f9f20..07a7d65a7cb6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2ccd35ec3628..4ac5bf30e16a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -280,9 +280,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -290,7 +289,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { -- cgit v1.2.3-71-gd317 From 25d46f43a911b08c5aa8c8fd4fe7fa9b36445068 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 16:02:06 +0000 Subject: ipv6: Move comment to right place. IN6ADDR_* and in6addr_* are not exported to userspace, and are defined in include/linux/in6.h. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 4 ++++ include/uapi/linux/in6.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 9e2ae26fb598..a16e19349ec0 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -22,6 +22,10 @@ #include +/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 + * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined + * in network byte order, not in host byte order as are the IPv4 equivalents + */ extern const struct in6_addr in6addr_any; #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index f79c3721da6e..5673b97dcf54 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -38,11 +38,6 @@ struct in6_addr { #define s6_addr32 in6_u.u6_addr32 }; -/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 - * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined - * in network byte order, not in host byte order as are the IPv4 equivalents - */ - struct sockaddr_in6 { unsigned short int sin6_family; /* AF_INET6 */ __be16 sin6_port; /* Transport layer port # */ -- cgit v1.2.3-71-gd317 From 6bf2e5461479c4511f59946a7378db576b04dbc5 Mon Sep 17 00:00:00 2001 From: Nathan Hintz Date: Fri, 11 Jan 2013 22:07:22 -0800 Subject: bcma: fix bcm4716/bcm4748 i2s irqflag The default irqflag assignment for the I2S core on some Broadcom 4716/4748 devices is invalid and needs to be corrected (from the Broadcom SDK). Signed-off-by: Nathan Hintz Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_mips.c | 28 ++++++++++++++++++++++++++++ include/linux/bcma/bcma_driver_mips.h | 1 + 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index a808404db4b1..9fe86ee16c66 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -256,6 +256,32 @@ void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) mcore->early_setup_done = true; } +static void bcma_fix_i2s_irqflag(struct bcma_bus *bus) +{ + struct bcma_device *cpu, *pcie, *i2s; + + /* Fixup the interrupts in 4716/4748 for i2s core (2010 Broadcom SDK) + * (IRQ flags > 7 are ignored when setting the interrupt masks) + */ + if (bus->chipinfo.id != BCMA_CHIP_ID_BCM4716 && + bus->chipinfo.id != BCMA_CHIP_ID_BCM4748) + return; + + cpu = bcma_find_core(bus, BCMA_CORE_MIPS_74K); + pcie = bcma_find_core(bus, BCMA_CORE_PCIE); + i2s = bcma_find_core(bus, BCMA_CORE_I2S); + if (cpu && pcie && i2s && + bcma_aread32(cpu, BCMA_MIPS_OOBSELINA74) == 0x08060504 && + bcma_aread32(pcie, BCMA_MIPS_OOBSELINA74) == 0x08060504 && + bcma_aread32(i2s, BCMA_MIPS_OOBSELOUTA30) == 0x88) { + bcma_awrite32(cpu, BCMA_MIPS_OOBSELINA74, 0x07060504); + bcma_awrite32(pcie, BCMA_MIPS_OOBSELINA74, 0x07060504); + bcma_awrite32(i2s, BCMA_MIPS_OOBSELOUTA30, 0x87); + bcma_debug(bus, + "Moved i2s interrupt to oob line 7 instead of 8\n"); + } +} + void bcma_core_mips_init(struct bcma_drv_mips *mcore) { struct bcma_bus *bus; @@ -269,6 +295,8 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore) bcma_core_mips_early_init(mcore); + bcma_fix_i2s_irqflag(bus); + switch (bus->chipinfo.id) { case BCMA_CHIP_ID_BCM4716: case BCMA_CHIP_ID_BCM4748: diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 73c7f4b882cc..0d1ea297851a 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -28,6 +28,7 @@ #define BCMA_MIPS_MIPS74K_GPIOEN 0x0048 #define BCMA_MIPS_MIPS74K_CLKCTLST 0x01E0 +#define BCMA_MIPS_OOBSELINA74 0x004 #define BCMA_MIPS_OOBSELOUTA30 0x100 struct bcma_device; -- cgit v1.2.3-71-gd317 From f9a8f83b04e0c362a2fc660dbad980d24af209fc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 14 Jan 2013 00:52:52 +0000 Subject: net: phy: remove flags argument from phy_{attach, connect, connect_direct} The flags argument of the phy_{attach,connect,connect_direct} functions is then used to assign a struct phy_device dev_flags with its value. All callers but the tg3 driver pass the flag 0, which results in the underlying PHY drivers in drivers/net/phy/ not being able to actually use any of the flags they would set in dev_flags. This patch gets rid of the flags argument, and passes phydev->dev_flags to the internal PHY library call phy_attach_direct() such that drivers which actually modify a phy device dev_flags get the value preserved for use by the underlying phy driver. Acked-by: Kosta Zertsekel Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/phy.txt | 11 ++++++----- drivers/net/ethernet/8390/ax88796.c | 2 +- drivers/net/ethernet/adi/bfin_mac.c | 4 ++-- drivers/net/ethernet/aeroflex/greth.c | 4 +--- drivers/net/ethernet/amd/au1000_eth.c | 4 ++-- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 2 +- drivers/net/ethernet/broadcom/sb1250-mac.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 4 ++-- drivers/net/ethernet/cadence/macb.c | 2 +- drivers/net/ethernet/dnet.c | 4 ++-- drivers/net/ethernet/ethoc.c | 4 ++-- drivers/net/ethernet/faraday/ftgmac100.c | 3 +-- drivers/net/ethernet/freescale/fec.c | 2 +- drivers/net/ethernet/lantiq_etop.c | 4 ++-- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- drivers/net/ethernet/nxp/lpc_eth.c | 2 +- drivers/net/ethernet/rdc/r6040.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/s6gmac.c | 2 +- drivers/net/ethernet/smsc/smsc911x.c | 5 ++--- drivers/net/ethernet/smsc/smsc9420.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +-- drivers/net/ethernet/ti/cpmac.c | 4 ++-- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/davinci_emac.c | 2 +- drivers/net/ethernet/toshiba/tc35815.c | 5 ++--- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 +- drivers/net/phy/phy_device.c | 15 ++++++--------- drivers/net/usb/ax88172a.c | 2 +- drivers/of/of_mdio.c | 4 ++-- drivers/staging/et131x/et131x.c | 2 +- include/linux/phy.h | 6 +++--- net/dsa/slave.c | 2 +- 34 files changed, 56 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt index 95e5f5985a2a..d5b1a3935245 100644 --- a/Documentation/networking/phy.txt +++ b/Documentation/networking/phy.txt @@ -103,7 +103,7 @@ Letting the PHY Abstraction Layer do Everything Now, to connect, just call this function: - phydev = phy_connect(dev, phy_name, &adjust_link, flags, interface); + phydev = phy_connect(dev, phy_name, &adjust_link, interface); phydev is a pointer to the phy_device structure which represents the PHY. If phy_connect is successful, it will return the pointer. dev, here, is the @@ -113,7 +113,9 @@ Letting the PHY Abstraction Layer do Everything current state, though the PHY will not yet be truly operational at this point. - flags is a u32 which can optionally contain phy-specific flags. + PHY-specific flags should be set in phydev->dev_flags prior to the call + to phy_connect() such that the underlying PHY driver can check for flags + and perform specific operations based on them. This is useful if the system has put hardware restrictions on the PHY/controller, of which the PHY needs to be aware. @@ -185,11 +187,10 @@ Doing it all yourself start, or disables then frees them for stop. struct phy_device * phy_attach(struct net_device *dev, const char *phy_id, - u32 flags, phy_interface_t interface); + phy_interface_t interface); Attaches a network device to a particular PHY, binding the PHY to a generic - driver if none was found during bus initialization. Passes in - any phy-specific flags as needed. + driver if none was found during bus initialization. int phy_start_aneg(struct phy_device *phydev); diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 7eeddf01307f..cab306a9888e 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -358,7 +358,7 @@ static int ax_mii_probe(struct net_device *dev) return -ENODEV; } - ret = phy_connect_direct(dev, phy_dev, ax_handle_link_change, 0, + ret = phy_connect_direct(dev, phy_dev, ax_handle_link_change, PHY_INTERFACE_MODE_MII); if (ret) { netdev_err(dev, "Could not attach to PHY\n"); diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index c7a83f6f2382..a175d0be1ae1 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -425,8 +425,8 @@ static int mii_probe(struct net_device *dev, int phy_mode) return -EINVAL; } - phydev = phy_connect(dev, dev_name(&phydev->dev), &bfin_mac_adjust_link, - 0, phy_mode); + phydev = phy_connect(dev, dev_name(&phydev->dev), + &bfin_mac_adjust_link, phy_mode); if (IS_ERR(phydev)) { netdev_err(dev, "could not attach PHY\n"); diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 480662ba5227..0be2195e5034 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1288,9 +1288,7 @@ static int greth_mdio_probe(struct net_device *dev) } ret = phy_connect_direct(dev, phy, &greth_link_change, - 0, greth->gbit_mac ? - PHY_INTERFACE_MODE_GMII : - PHY_INTERFACE_MODE_MII); + greth->gbit_mac ? PHY_INTERFACE_MODE_GMII : PHY_INTERFACE_MODE_MII); if (ret) { if (netif_msg_ifup(greth)) dev_err(&dev->dev, "could not attach to PHY\n"); diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 65b865a0cc78..de774d419144 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -437,8 +437,8 @@ static int au1000_mii_probe(struct net_device *dev) /* now we are supposed to have a proper phydev, to attach to... */ BUG_ON(phydev->attached_dev); - phydev = phy_connect(dev, dev_name(&phydev->dev), &au1000_adjust_link, - 0, PHY_INTERFACE_MODE_MII); + phydev = phy_connect(dev, dev_name(&phydev->dev), + &au1000_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { netdev_err(dev, "Could not attach to PHY\n"); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index d8a151046728..f5b6b4715d45 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -799,7 +799,7 @@ static int bcm_enet_open(struct net_device *dev) snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->mii_bus->id, priv->phy_id); - phydev = phy_connect(dev, phy_id, bcm_enet_adjust_phy_link, 0, + phydev = phy_connect(dev, phy_id, bcm_enet_adjust_phy_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 3a1c8a3cf7c9..e9b35da375cb 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2385,7 +2385,7 @@ static int sbmac_mii_probe(struct net_device *dev) return -ENXIO; } - phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll, 0, + phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll, PHY_INTERFACE_MODE_GMII); if (IS_ERR(phy_dev)) { printk(KERN_ERR "%s: could not attach to PHY\n", dev->name); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 88f2d41c009b..227749107789 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2004,8 +2004,8 @@ static int tg3_phy_init(struct tg3 *tp) phydev = tp->mdio_bus->phy_map[TG3_PHY_MII_ADDR]; /* Attach the MAC to the PHY. */ - phydev = phy_connect(tp->dev, dev_name(&phydev->dev), tg3_adjust_link, - phydev->dev_flags, phydev->interface); + phydev = phy_connect(tp->dev, dev_name(&phydev->dev), + tg3_adjust_link, phydev->interface); if (IS_ERR(phydev)) { dev_err(&tp->pdev->dev, "Could not attach to PHY\n"); return PTR_ERR(phydev); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index a9b0830fb39d..352190b9ebe7 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -287,7 +287,7 @@ static int macb_mii_probe(struct net_device *dev) } /* attach the mac to the phy */ - ret = phy_connect_direct(dev, phydev, &macb_handle_link_change, 0, + ret = phy_connect_direct(dev, phydev, &macb_handle_link_change, bp->phy_interface); if (ret) { netdev_err(dev, "Could not attach to PHY\n"); diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 2c177b329c8b..f3d60eb13c3a 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -281,11 +281,11 @@ static int dnet_mii_probe(struct net_device *dev) /* attach the mac to the phy */ if (bp->capabilities & DNET_HAS_RMII) { phydev = phy_connect(dev, dev_name(&phydev->dev), - &dnet_handle_link_change, 0, + &dnet_handle_link_change, PHY_INTERFACE_MODE_RMII); } else { phydev = phy_connect(dev, dev_name(&phydev->dev), - &dnet_handle_link_change, 0, + &dnet_handle_link_change, PHY_INTERFACE_MODE_MII); } diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index b51c81ac0b6f..aa47ef9689a8 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -682,8 +682,8 @@ static int ethoc_mdio_probe(struct net_device *dev) return -ENXIO; } - err = phy_connect_direct(dev, phy, ethoc_mdio_poll, 0, - PHY_INTERFACE_MODE_GMII); + err = phy_connect_direct(dev, phy, ethoc_mdio_poll, + PHY_INTERFACE_MODE_GMII); if (err) { dev_err(&dev->dev, "could not attach to PHY\n"); return err; diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 96454b5fca63..7c361d1db94c 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -858,8 +858,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv) } phydev = phy_connect(netdev, dev_name(&phydev->dev), - &ftgmac100_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); + &ftgmac100_adjust_link, PHY_INTERFACE_MODE_GMII); if (IS_ERR(phydev)) { netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name); diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c index 5f2b4acf4836..1b7684a8851e 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec.c @@ -1008,7 +1008,7 @@ static int fec_enet_mii_probe(struct net_device *ndev) } snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id); - phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, 0, + phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, fep->phy_interface); if (IS_ERR(phy_dev)) { printk(KERN_ERR "%s: could not attach to PHY\n", ndev->name); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 8ead46adc21e..6a2127489af7 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -393,8 +393,8 @@ ltq_etop_mdio_probe(struct net_device *dev) return -ENODEV; } - phydev = phy_connect(dev, dev_name(&phydev->dev), <q_etop_mdio_link, - 0, priv->pldata->mii_mode); + phydev = phy_connect(dev, dev_name(&phydev->dev), + <q_etop_mdio_link, priv->pldata->mii_mode); if (IS_ERR(phydev)) { netdev_err(dev, "Could not attach to PHY\n"); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 84c13263c514..c27b23d8f4fc 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2789,7 +2789,7 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) phy_reset(mp); - phy_attach(mp->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_GMII); + phy_attach(mp->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_GMII); if (speed == 0) { phy->autoneg = AUTONEG_ENABLE; diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index c7f2fa60fe6f..037ed866c22f 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1390,7 +1390,7 @@ static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex) struct phy_device *phy = pep->phy; ethernet_phy_reset(pep); - phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII); + phy_attach(pep->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_MII); if (speed == 0) { phy->autoneg = AUTONEG_ENABLE; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 6fda51ebcc76..c4122c86f829 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -800,7 +800,7 @@ static int lpc_mii_probe(struct net_device *ndev) else netdev_info(ndev, "using RMII interface\n"); phydev = phy_connect(ndev, dev_name(&phydev->dev), - &lpc_handle_link_change, 0, + &lpc_handle_link_change, lpc_phy_interface_mode(&pldat->pdev->dev)); if (IS_ERR(phydev)) { diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index be3616d060d9..34f76e99dc8a 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1042,7 +1042,7 @@ static int r6040_mii_probe(struct net_device *dev) } phydev = phy_connect(dev, dev_name(&phydev->dev), &r6040_adjust_link, - 0, PHY_INTERFACE_MODE_MII); + PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { dev_err(&lp->pdev->dev, "could not attach to PHY\n"); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 3d705862bd7d..e195c1e89d61 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1422,7 +1422,7 @@ static int sh_eth_phy_init(struct net_device *ndev) /* Try connect to PHY */ phydev = phy_connect(ndev, phy_id, sh_eth_adjust_link, - 0, mdp->phy_interface); + mdp->phy_interface); if (IS_ERR(phydev)) { dev_err(&ndev->dev, "phy_connect failed\n"); return PTR_ERR(phydev); diff --git a/drivers/net/ethernet/s6gmac.c b/drivers/net/ethernet/s6gmac.c index 72fc57dd084d..21683e2b1ff4 100644 --- a/drivers/net/ethernet/s6gmac.c +++ b/drivers/net/ethernet/s6gmac.c @@ -795,7 +795,7 @@ static inline int s6gmac_phy_start(struct net_device *dev) struct phy_device *p = NULL; while ((i < PHY_MAX_ADDR) && (!(p = pd->mii.bus->phy_map[i]))) i++; - p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0, + p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, PHY_INTERFACE_MODE_RGMII); if (IS_ERR(p)) { printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name); diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 04ff63cb6544..da5cc9a3b34c 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -997,9 +997,8 @@ static int smsc911x_mii_probe(struct net_device *dev) SMSC_TRACE(pdata, probe, "PHY: addr %d, phy_id 0x%08X", phydev->addr, phydev->phy_id); - ret = phy_connect_direct(dev, phydev, - &smsc911x_phy_adjust_link, 0, - pdata->config.phy_interface); + ret = phy_connect_direct(dev, phydev, &smsc911x_phy_adjust_link, + pdata->config.phy_interface); if (ret) { netdev_err(dev, "Could not attach to PHY\n"); diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index 3c586585e1b3..ecfb43614d7b 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -1179,7 +1179,7 @@ static int smsc9420_mii_probe(struct net_device *dev) phydev->phy_id); phydev = phy_connect(dev, dev_name(&phydev->dev), - smsc9420_phy_adjust_link, 0, PHY_INTERFACE_MODE_MII); + smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f07c0612abf6..8c657294ce56 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -428,8 +428,7 @@ static int stmmac_init_phy(struct net_device *dev) priv->plat->phy_addr); pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id_fmt); - phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, 0, - interface); + phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface); if (IS_ERR(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 70d1920cac97..31bbbca341a7 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1172,8 +1172,8 @@ static int cpmac_probe(struct platform_device *pdev) snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); - priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link, 0, - PHY_INTERFACE_MODE_MII); + priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link, + PHY_INTERFACE_MODE_MII); if (IS_ERR(priv->phy)) { if (netif_msg_drv(priv)) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index bea736b8c3ec..3772804fb697 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -592,7 +592,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) 1 << slave_port, 0, ALE_MCAST_FWD_2); slave->phy = phy_connect(priv->ndev, slave->data->phy_id, - &cpsw_adjust_link, 0, slave->data->phy_if); + &cpsw_adjust_link, slave->data->phy_if); if (IS_ERR(slave->phy)) { dev_err(priv->dev, "phy %s not found on slave %d\n", slave->data->phy_id, slave->slave_num); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 6621ae3a98d9..8478d98c1092 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1599,7 +1599,7 @@ static int emac_dev_open(struct net_device *ndev) if (priv->phy_id && *priv->phy_id) { priv->phydev = phy_connect(ndev, priv->phy_id, - &emac_adjust_link, 0, + &emac_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(priv->phydev)) { diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index f16410e599f4..fe256094db35 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -633,9 +633,8 @@ static int tc_mii_probe(struct net_device *dev) /* attach the mac to the phy */ phydev = phy_connect(dev, dev_name(&phydev->dev), - &tc_handle_link_change, 0, - lp->chiptype == TC35815_TX4939 ? - PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII); + &tc_handle_link_change, + lp->chiptype == TC35815_TX4939 ? PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name); return PTR_ERR(phydev); diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index a4be1ad886c5..6958a5e87703 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1451,7 +1451,7 @@ static int eth_init_one(struct platform_device *pdev) snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, mdio_bus->id, plat->phy); - port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, 0, + port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(port->phydev)) { err = PTR_ERR(port->phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8af46e88a181..9930f9999561 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -416,16 +416,15 @@ static void phy_prepare_link(struct phy_device *phydev, * @dev: the network device to connect * @phydev: the pointer to the phy device * @handler: callback function for state change notifications - * @flags: PHY device's dev_flags * @interface: PHY device's interface */ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, - void (*handler)(struct net_device *), u32 flags, + void (*handler)(struct net_device *), phy_interface_t interface) { int rc; - rc = phy_attach_direct(dev, phydev, flags, interface); + rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); if (rc) return rc; @@ -443,7 +442,6 @@ EXPORT_SYMBOL(phy_connect_direct); * @dev: the network device to connect * @bus_id: the id string of the PHY device to connect * @handler: callback function for state change notifications - * @flags: PHY device's dev_flags * @interface: PHY device's interface * * Description: Convenience function for connecting ethernet @@ -455,7 +453,7 @@ EXPORT_SYMBOL(phy_connect_direct); * the desired functionality. */ struct phy_device * phy_connect(struct net_device *dev, const char *bus_id, - void (*handler)(struct net_device *), u32 flags, + void (*handler)(struct net_device *), phy_interface_t interface) { struct phy_device *phydev; @@ -471,7 +469,7 @@ struct phy_device * phy_connect(struct net_device *dev, const char *bus_id, } phydev = to_phy_device(d); - rc = phy_connect_direct(dev, phydev, handler, flags, interface); + rc = phy_connect_direct(dev, phydev, handler, interface); if (rc) return ERR_PTR(rc); @@ -576,14 +574,13 @@ static int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, * phy_attach - attach a network device to a particular PHY device * @dev: network device to attach * @bus_id: Bus ID of PHY device to attach - * @flags: PHY device's dev_flags * @interface: PHY device's interface * * Description: Same as phy_attach_direct() except that a PHY bus_id * string is passed instead of a pointer to a struct phy_device. */ struct phy_device *phy_attach(struct net_device *dev, - const char *bus_id, u32 flags, phy_interface_t interface) + const char *bus_id, phy_interface_t interface) { struct bus_type *bus = &mdio_bus_type; struct phy_device *phydev; @@ -599,7 +596,7 @@ struct phy_device *phy_attach(struct net_device *dev, } phydev = to_phy_device(d); - rc = phy_attach_direct(dev, phydev, flags, interface); + rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); if (rc) return ERR_PTR(rc); diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index c8e0aa85fb8e..fdbab72926bd 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -377,7 +377,7 @@ static int ax88172a_reset(struct usbnet *dev) priv->phydev = phy_connect(dev->net, priv->phy_name, &ax88172a_adjust_link, - 0, PHY_INTERFACE_MODE_MII); + PHY_INTERFACE_MODE_MII); if (IS_ERR(priv->phydev)) { netdev_err(dev->net, "Could not connect to PHY device %s\n", priv->phy_name); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 83ca06f4312b..e3a8b22ef9dd 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -157,7 +157,7 @@ struct phy_device *of_phy_connect(struct net_device *dev, if (!phy) return NULL; - return phy_connect_direct(dev, phy, hndlr, flags, iface) ? NULL : phy; + return phy_connect_direct(dev, phy, hndlr, iface) ? NULL : phy; } EXPORT_SYMBOL(of_phy_connect); @@ -194,7 +194,7 @@ struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0])); - phy = phy_connect(dev, bus_id, hndlr, 0, iface); + phy = phy_connect(dev, bus_id, hndlr, iface); return IS_ERR(phy) ? NULL : phy; } EXPORT_SYMBOL(of_phy_connect_fixed_link); diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c index f15059ca3781..a0a30b3f2dcd 100644 --- a/drivers/staging/et131x/et131x.c +++ b/drivers/staging/et131x/et131x.c @@ -3917,7 +3917,7 @@ static int et131x_mii_probe(struct net_device *netdev) } phydev = phy_connect(netdev, dev_name(&phydev->dev), - &et131x_adjust_link, 0, PHY_INTERFACE_MODE_MII); + &et131x_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phydev)) { dev_err(&adapter->pdev->dev, "Could not attach to PHY\n"); diff --git a/include/linux/phy.h b/include/linux/phy.h index 93b3cf77f564..33999adbf8c8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -506,13 +506,13 @@ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); int phy_init_hw(struct phy_device *phydev); struct phy_device * phy_attach(struct net_device *dev, - const char *bus_id, u32 flags, phy_interface_t interface); + const char *bus_id, phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, - void (*handler)(struct net_device *), u32 flags, + void (*handler)(struct net_device *), phy_interface_t interface); struct phy_device * phy_connect(struct net_device *dev, const char *bus_id, - void (*handler)(struct net_device *), u32 flags, + void (*handler)(struct net_device *), phy_interface_t interface); void phy_disconnect(struct phy_device *phydev); void phy_detach(struct phy_device *phydev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f795b0ca7ee6..f4345582a6b9 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, if (p->phy != NULL) { phy_attach(slave_dev, dev_name(&p->phy->dev), - 0, PHY_INTERFACE_MODE_GMII); + PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; p->phy->speed = 0; -- cgit v1.2.3-71-gd317 From 7266507d89991fa1e989283e4e032c6d9357fe26 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Mon, 17 Dec 2012 18:33:58 +0000 Subject: netfilter: nf_ct_sip: support Cisco 7941/7945 IP phones Most SIP devices use a source port of 5060/udp on SIP requests, so the response automatically comes back to port 5060: phone_ip:5060 -> proxy_ip:5060 REGISTER proxy_ip:5060 -> phone_ip:5060 100 Trying The newer Cisco IP phones, however, use a randomly chosen high source port for the SIP request but expect the response on port 5060: phone_ip:49173 -> proxy_ip:5060 REGISTER proxy_ip:5060 -> phone_ip:5060 100 Trying Standard Linux NAT, with or without nf_nat_sip, will send the reply back to port 49173, not 5060: phone_ip:49173 -> proxy_ip:5060 REGISTER proxy_ip:5060 -> phone_ip:49173 100 Trying But the phone is not listening on 49173, so it will never see the reply. This patch modifies nf_*_sip to work around this quirk by extracting the SIP response port from the Via: header, iff the source IP in the packet header matches the source IP in the SIP request. Signed-off-by: Kevin Cernekee Acked-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 3 +++ net/netfilter/nf_conntrack_sip.c | 17 +++++++++++++++++ net/netfilter/nf_nat_sip.c | 27 ++++++++++++++++++++++++--- 3 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 387bdd02945d..ba7f571a2b1c 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -4,12 +4,15 @@ #include +#include + #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 struct nf_ct_sip_master { unsigned int register_cseq; unsigned int invite_cseq; + __be16 forced_dport; }; enum sip_expectation_classes { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index df8f4f284481..72a67bbe3518 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1440,8 +1440,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; + union nf_inet_addr addr; + __be16 port; + + /* Many Cisco IP phones use a high source port for SIP requests, but + * listen for the response on port 5060. If we are the local + * router for one of these phones, save the port number from the + * Via: header so that nf_nat_sip can redirect the responses to + * the correct port. + */ + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + SIP_HDR_VIA_UDP, NULL, &matchoff, + &matchlen, &addr, &port) > 0 && + port != ct->tuplehash[dir].tuple.src.u.udp.port && + nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3)) + ct_sip_info->forced_dport = port; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 16303c752213..5951146e7688 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; union nf_inet_addr newaddr; @@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff, } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) && ct->tuplehash[dir].tuple.dst.u.udp.port == port) { newaddr = ct->tuplehash[!dir].tuple.src.u3; - newport = ct->tuplehash[!dir].tuple.src.u.udp.port; + newport = ct_sip_info->forced_dport ? : + ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; @@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int coff, matchoff, matchlen; enum sip_header_types hdr; union nf_inet_addr addr; @@ -258,6 +261,21 @@ next: !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; + /* Mangle destination port for Cisco phones, then fix up checksums */ + if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) { + struct udphdr *uh; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + uh = (void *)skb->data + protoff; + uh->dest = ct_sip_info->forced_dport; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff, + 0, 0, NULL, 0)) + return NF_DROP; + } + return NF_ACCEPT; } @@ -311,8 +329,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); union nf_inet_addr newaddr; u_int16_t port; + __be16 srcport; char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")]; unsigned int buflen; @@ -326,8 +346,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, /* If the signalling port matches the connection's source port in the * original direction, try to use the destination port in the opposite * direction. */ - if (exp->tuple.dst.u.udp.port == - ct->tuplehash[dir].tuple.src.u.udp.port) + srcport = ct_sip_info->forced_dport ? : + ct->tuplehash[dir].tuple.src.u.udp.port; + if (exp->tuple.dst.u.udp.port == srcport) port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); else port = ntohs(exp->tuple.dst.u.udp.port); -- cgit v1.2.3-71-gd317 From bbb923a4c2d17ebd5ec34755fe19a33914cbd86f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:25 +0000 Subject: mcast: define and use MRT[6]_MAX in ip[6]_mroute_opt() This will ease further addition of new MRT[6]_* values and avoid to update in6.h each time. Note that we reduce the maximum value from 210 to 209, but 210 does not match any known value in ip[6]_mroute_setsockopt(). Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- include/uapi/linux/in6.h | 15 ++++----------- include/uapi/linux/mroute.h | 1 + include/uapi/linux/mroute6.h | 1 + 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index ea00d9162ee5..79aaa9fc1a15 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,7 +9,7 @@ #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { - return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10); + return (opt >= MRT_BASE) && (opt <= MRT_MAX); } #else static inline int ip_mroute_opt(int opt) diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index a223561ba12e..66982e764051 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -10,7 +10,7 @@ #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) { - return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10); + return (opt >= MRT6_BASE) && (opt <= MRT6_MAX); } #else static inline int ip6_mroute_opt(int opt) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5673b97dcf54..53b1d56a6e7f 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -259,17 +259,10 @@ struct in6_flowlabel_req { /* * Multicast Routing: - * see include/linux/mroute6.h. + * see include/uapi/linux/mroute6.h. * - * MRT6_INIT 200 - * MRT6_DONE 201 - * MRT6_ADD_MIF 202 - * MRT6_DEL_MIF 203 - * MRT6_ADD_MFC 204 - * MRT6_DEL_MFC 205 - * MRT6_VERSION 206 - * MRT6_ASSERT 207 - * MRT6_PIM 208 - * (reserved) 209 + * MRT6_BASE 200 + * ... + * MRT6_MAX */ #endif /* _UAPI_LINUX_IN6_H */ diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 16929993acc4..1c11004af5db 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,6 +26,7 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ +#define MRT_MAX (MRT_BASE+9) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 3e89b5e7f9e3..c206ae3a2327 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,6 +26,7 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ +#define MRT6_MAX (MRT6_BASE+9) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) -- cgit v1.2.3-71-gd317 From fa0879e37b59e8e3f130a30a9e6fa515717c5bdd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 21 Jan 2013 01:17:22 +0000 Subject: net: split eth_mac_addr for better error handling When we set mac address, software mac address in system and hardware mac address all need to be updated. Current eth_mac_addr() doesn't allow callers to implement error handling nicely. This patch split eth_mac_addr() to prepare part and real commit part, then we can prepare first, and try to change hardware address, then do the real commit if hardware address is set successfully. Signed-off-by: Stefan Hajnoczi Signed-off-by: Amos Kong Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 2 ++ net/ethernet/eth.c | 41 +++++++++++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 1a43e1b4f7ad..c623861964e4 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -40,6 +40,8 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, extern void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); +extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p); +extern void eth_commit_mac_addr_change(struct net_device *dev, void *p); extern int eth_mac_addr(struct net_device *dev, void *p); extern int eth_change_mtu(struct net_device *dev, int new_mtu); extern int eth_validate_addr(struct net_device *dev); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index bc39c8c8f589..a36c85eab5b4 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -271,6 +271,36 @@ void eth_header_cache_update(struct hh_cache *hh, } EXPORT_SYMBOL(eth_header_cache_update); +/** + * eth_prepare_mac_addr_change - prepare for mac change + * @dev: network device + * @p: socket address + */ +int eth_prepare_mac_addr_change(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) + return -EBUSY; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + return 0; +} +EXPORT_SYMBOL(eth_prepare_mac_addr_change); + +/** + * eth_commit_mac_addr_change - commit mac change + * @dev: network device + * @p: socket address + */ +void eth_commit_mac_addr_change(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); +} +EXPORT_SYMBOL(eth_commit_mac_addr_change); + /** * eth_mac_addr - set new Ethernet hardware address * @dev: network device @@ -283,13 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update); */ int eth_mac_addr(struct net_device *dev, void *p) { - struct sockaddr *addr = p; + int ret; - if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) - return -EBUSY; - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + ret = eth_prepare_mac_addr_change(dev, p); + if (ret < 0) + return ret; + eth_commit_mac_addr_change(dev, p); return 0; } EXPORT_SYMBOL(eth_mac_addr); -- cgit v1.2.3-71-gd317 From 055dc21a1d1d219608cd4baac7d0683fb2cbbe8a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 22 Jan 2013 09:49:50 +0000 Subject: soreuseport: infrastructure Definitions and macros for implementing soreusport. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 +- arch/avr32/include/uapi/asm/socket.h | 2 +- arch/cris/include/uapi/asm/socket.h | 2 +- arch/frv/include/uapi/asm/socket.h | 2 +- arch/h8300/include/uapi/asm/socket.h | 2 +- arch/ia64/include/uapi/asm/socket.h | 2 +- arch/m32r/include/uapi/asm/socket.h | 2 +- arch/mips/include/uapi/asm/socket.h | 3 +-- arch/mn10300/include/uapi/asm/socket.h | 2 +- arch/parisc/include/uapi/asm/socket.h | 2 +- arch/powerpc/include/uapi/asm/socket.h | 2 +- arch/s390/include/uapi/asm/socket.h | 2 +- arch/sparc/include/uapi/asm/socket.h | 2 +- arch/xtensa/include/uapi/asm/socket.h | 2 +- include/linux/random.h | 6 ++++++ include/net/sock.h | 5 ++++- include/uapi/asm-generic/socket.h | 3 +-- net/core/sock.c | 7 +++++++ 18 files changed, 32 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 755702eefd9c..c5195524d1ef 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -19,7 +19,7 @@ #define SO_BROADCAST 0x0020 #define SO_LINGER 0x0080 #define SO_OOBINLINE 0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT 0x0200 #define SO_TYPE 0x1008 #define SO_ERROR 0x1007 diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index f3f38a0e2ef9..51c6401582ea 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 406b5838defd..50692b738c75 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -24,7 +24,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index d8e1132a1ab6..595391f0f98c 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index c8b87a828206..43e32621da7d 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index f390896c3104..c567adc8bea5 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -31,7 +31,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 6a895155e7a3..519afa2755db 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 9d11a7713923..7e2723637b35 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -28,8 +28,7 @@ #define SO_LINGER 0x0080 /* Block on close of a reliable socket to transmit pending data. */ #define SO_OOBINLINE 0x0100 /* Receive out-of-band data in-band. */ -#if 0 -To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ +#define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #endif #define SO_TYPE 0x1008 /* Compatible name for SO_STYLE. */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index ab702c40b30e..5c7c7c988544 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index da2c8d3c209e..526e4b9aece0 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -13,7 +13,7 @@ #define SO_BROADCAST 0x0020 #define SO_LINGER 0x0080 #define SO_OOBINLINE 0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT 0x0200 #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 #define SO_SNDBUFFORCE 0x100a diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index e6ca31816cc9..a26dcaece509 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -29,7 +29,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_RCVLOWAT 16 #define SO_SNDLOWAT 17 #define SO_RCVTIMEO 18 diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 9ce60b68f070..f99eea7fff0f 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -28,7 +28,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index fbbba57547d1..cbbad74b2e06 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -15,7 +15,7 @@ #define SO_PEERCRED 0x0040 #define SO_LINGER 0x0080 #define SO_OOBINLINE 0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT 0x0200 #define SO_BSDCOMPAT 0x0400 #define SO_RCVLOWAT 0x0800 #define SO_SNDLOWAT 0x1000 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index dbf316487b51..35905cb6e419 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -32,7 +32,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/include/linux/random.h b/include/linux/random.h index d9846088c2c5..347ce553a306 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -74,4 +74,10 @@ static inline int arch_get_random_int(unsigned int *v) } #endif +/* Pseudo random number generator from numerical recipes. */ +static inline u32 next_pseudo_random32(u32 seed) +{ + return seed * 1664525 + 1013904223; +} + #endif /* _LINUX_RANDOM_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 5a34e2f03657..581dc6bd7dc6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair; * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting + * @skc_reuseport: %SO_REUSEPORT setting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol @@ -179,7 +180,8 @@ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; - unsigned char skc_reuse; + unsigned char skc_reuse:4; + unsigned char skc_reuseport:4; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -297,6 +299,7 @@ struct sock { #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse +#define sk_reuseport __sk_common.skc_reuseport #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 3f6a99201410..4ef3acbba5da 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -22,8 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ - +#define SO_REUSEPORT 15 #ifndef SO_PASSCRED /* powerpc only differs in these */ #define SO_PASSCRED 16 #define SO_PEERCRED 17 diff --git a/net/core/sock.c b/net/core/sock.c index 8258fb741e9a..235fb89e8973 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_REUSEADDR: sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; + case SO_REUSEPORT: + sk->sk_reuseport = valbool; + break; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: @@ -972,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_reuse; break; + case SO_REUSEPORT: + v.val = sk->sk_reuseport; + break; + case SO_KEEPALIVE: v.val = sock_flag(sk, SOCK_KEEPOPEN); break; -- cgit v1.2.3-71-gd317 From d437c86baacf265a640dfc462c75941d02c0e153 Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Wed, 23 Jan 2013 20:33:58 -0800 Subject: ieee80211: define AKM suite selectors type 5, 6 and 7 Reference: IEEE 802.11-2012 8.4.2.27.3 "AKM suites" Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ccf9ee1dca8c..11c8bc87fdcb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1898,7 +1898,10 @@ enum ieee80211_sa_query_action { /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X 0x000FAC01 #define WLAN_AKM_SUITE_PSK 0x000FAC02 -#define WLAN_AKM_SUITE_SAE 0x000FAC08 +#define WLAN_AKM_SUITE_8021X_SHA256 0x000FAC05 +#define WLAN_AKM_SUITE_PSK_SHA256 0x000FAC06 +#define WLAN_AKM_SUITE_TDLS 0x000FAC07 +#define WLAN_AKM_SUITE_SAE 0x000FAC08 #define WLAN_AKM_SUITE_FT_OVER_SAE 0x000FAC09 #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3-71-gd317 From 996a953de02ffb852c9ac736f4e892008ed68884 Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Tue, 18 Dec 2012 18:50:55 +0100 Subject: can: add tx/rx LED trigger support This patch implements the functions to add two LED triggers, named -tx and -rx, to a canbus device driver. Triggers are called from specific handlers by each CAN device driver and can be disabled altogether with a Kconfig option. The implementation keeps the LED on when the interface is UP and blinks the LED on network activity at a configurable rate. This only supports can-dev based drivers, as it uses some support field in the can_priv structure. Supported drivers should call devm_can_led_init() and can_led_event() as needed. Cleanup is handled automatically by devres, so no *_exit function is needed. Supported events are: - CAN_LED_EVENT_OPEN: turn on tx/rx LEDs - CAN_LED_EVENT_STOP: turn off tx/rx LEDs - CAN_LED_EVENT_TX: trigger tx LED blink - CAN_LED_EVENT_RX: trigger tx LED blink Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Signed-off-by: Fabio Baltieri Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/Kconfig | 11 +++++++ drivers/net/can/Makefile | 2 ++ drivers/net/can/led.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 8 +++++ include/linux/can/led.h | 42 +++++++++++++++++++++++ 5 files changed, 149 insertions(+) create mode 100644 drivers/net/can/led.c create mode 100644 include/linux/can/led.h (limited to 'include/linux') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 0c5a65682d01..1cca19f1c490 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -51,6 +51,17 @@ config CAN_CALC_BITTIMING arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" and "sjw". If unsure, say Y. +config CAN_LEDS + bool "Enable LED triggers for Netlink based drivers" + depends on LEDS_CLASS + select LEDS_TRIGGERS + ---help--- + This option adds two LED triggers for packet receive and transmit + events on each supported CAN device. + + Say Y here if you are working on a system with led-class supported + LEDs and you want to use them as canbus activity indicators. + config CAN_AT91 tristate "Atmel AT91 onchip CAN controller" depends on ARCH_AT91SAM9263 || ARCH_AT91SAM9X5 diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 7de59862bbe9..c7440392adbb 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -8,6 +8,8 @@ obj-$(CONFIG_CAN_SLCAN) += slcan.o obj-$(CONFIG_CAN_DEV) += can-dev.o can-dev-y := dev.o +can-dev-$(CONFIG_CAN_LEDS) += led.o + obj-y += usb/ obj-y += softing/ diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c new file mode 100644 index 000000000000..c50a0d741c57 --- /dev/null +++ b/drivers/net/can/led.c @@ -0,0 +1,86 @@ +/* + * Copyright 2012, Fabio Baltieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +static unsigned long led_delay = 50; +module_param(led_delay, ulong, 0644); +MODULE_PARM_DESC(led_delay, + "blink delay time for activity leds (msecs, default: 50)."); + +/* Trigger a LED event in response to a CAN device event */ +void can_led_event(struct net_device *netdev, enum can_led_event event) +{ + struct can_priv *priv = netdev_priv(netdev); + + switch (event) { + case CAN_LED_EVENT_OPEN: + led_trigger_event(priv->tx_led_trig, LED_FULL); + led_trigger_event(priv->rx_led_trig, LED_FULL); + break; + case CAN_LED_EVENT_STOP: + led_trigger_event(priv->tx_led_trig, LED_OFF); + led_trigger_event(priv->rx_led_trig, LED_OFF); + break; + case CAN_LED_EVENT_TX: + if (led_delay) + led_trigger_blink_oneshot(priv->tx_led_trig, + &led_delay, &led_delay, 1); + break; + case CAN_LED_EVENT_RX: + if (led_delay) + led_trigger_blink_oneshot(priv->rx_led_trig, + &led_delay, &led_delay, 1); + break; + } +} +EXPORT_SYMBOL_GPL(can_led_event); + +static void can_led_release(struct device *gendev, void *res) +{ + struct can_priv *priv = netdev_priv(to_net_dev(gendev)); + + led_trigger_unregister_simple(priv->tx_led_trig); + led_trigger_unregister_simple(priv->rx_led_trig); +} + +/* Register CAN LED triggers for a CAN device + * + * This is normally called from a driver's probe function + */ +void devm_can_led_init(struct net_device *netdev) +{ + struct can_priv *priv = netdev_priv(netdev); + void *res; + + res = devres_alloc(can_led_release, 0, GFP_KERNEL); + if (!res) { + netdev_err(netdev, "cannot register LED triggers\n"); + return; + } + + snprintf(priv->tx_led_trig_name, sizeof(priv->tx_led_trig_name), + "%s-tx", netdev->name); + snprintf(priv->rx_led_trig_name, sizeof(priv->rx_led_trig_name), + "%s-rx", netdev->name); + + led_trigger_register_simple(priv->tx_led_trig_name, + &priv->tx_led_trig); + led_trigger_register_simple(priv->rx_led_trig_name, + &priv->rx_led_trig); + + devres_add(&netdev->dev, res); +} +EXPORT_SYMBOL_GPL(devm_can_led_init); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 2b2fc345afca..7747d9bcdc84 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -16,6 +16,7 @@ #include #include #include +#include /* * CAN mode @@ -52,6 +53,13 @@ struct can_priv { unsigned int echo_skb_max; struct sk_buff **echo_skb; + +#ifdef CONFIG_CAN_LEDS + struct led_trigger *tx_led_trig; + char tx_led_trig_name[CAN_LED_NAME_SZ]; + struct led_trigger *rx_led_trig; + char rx_led_trig_name[CAN_LED_NAME_SZ]; +#endif }; /* diff --git a/include/linux/can/led.h b/include/linux/can/led.h new file mode 100644 index 000000000000..12d5549abb95 --- /dev/null +++ b/include/linux/can/led.h @@ -0,0 +1,42 @@ +/* + * Copyright 2012, Fabio Baltieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef CAN_LED_H +#define CAN_LED_H + +#include +#include + +enum can_led_event { + CAN_LED_EVENT_OPEN, + CAN_LED_EVENT_STOP, + CAN_LED_EVENT_TX, + CAN_LED_EVENT_RX, +}; + +#ifdef CONFIG_CAN_LEDS + +/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */ +#define CAN_LED_NAME_SZ (IFNAMSIZ + 4) + +void can_led_event(struct net_device *netdev, enum can_led_event event); +void devm_can_led_init(struct net_device *netdev); + +#else + +static inline void can_led_event(struct net_device *netdev, + enum can_led_event event) +{ +} +static inline void devm_can_led_init(struct net_device *netdev) +{ +} + +#endif + +#endif -- cgit v1.2.3-71-gd317 From bf03a5379cd3492fbeca42111340581ba9dee0b8 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Tue, 18 Dec 2012 18:50:56 +0100 Subject: can: export a safe netdev_priv wrapper for candev In net_device notifier calls, it was impossible to determine if a CAN device is based on candev in a safe way. This patch adds such test in order to access candev storage from within those notifiers. Signed-off-by: Kurt Van Dijck Acked-by: Oliver Hartkopp Signed-off-by: Fabio Baltieri Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 13 +++++++++++++ include/linux/can/dev.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 8233e5ed2939..13e738098fbe 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -794,6 +794,19 @@ void unregister_candev(struct net_device *dev) } EXPORT_SYMBOL_GPL(unregister_candev); +/* + * Test if a network device is a candev based device + * and return the can_priv* if so. + */ +struct can_priv *safe_candev_priv(struct net_device *dev) +{ + if ((dev->type != ARPHRD_CAN) || (dev->rtnl_link_ops != &can_link_ops)) + return NULL; + + return netdev_priv(dev); +} +EXPORT_SYMBOL_GPL(safe_candev_priv); + static __init int can_dev_init(void) { int err; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 7747d9bcdc84..fb0ab651a041 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -106,6 +106,9 @@ u8 can_len2dlc(u8 len); struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max); void free_candev(struct net_device *dev); +/* a candev safe wrapper around netdev_priv */ +struct can_priv *safe_candev_priv(struct net_device *dev); + int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); -- cgit v1.2.3-71-gd317 From a1ef7bd9fce8aba8e4701e60208148fb3bc9bdd4 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Tue, 18 Dec 2012 18:50:57 +0100 Subject: can: rename LED trigger name on netdev renames The LED trigger name for CAN devices is based on the initial CAN device name, but does never change. The LED trigger name is not guaranteed to be unique in case of hotplugging CAN devices. This patch tries to address this problem by modifying the LED trigger name according to the CAN device name when the latter changes. v1 - Kurt Van Dijck v2 - Fabio Baltieri - remove rename blocking if trigger is bound - use led-subsystem function for the actual rename (still WiP) - call init/exit functions from dev.c v3 - Kurt Van Dijck - safe operation for non-candev based devices (vcan, slcan) based on earlier patch v4 - Kurt Van Dijck - trivial patch mistakes fixed Signed-off-by: Kurt Van Dijck Signed-off-by: Fabio Baltieri Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 5 +++++ drivers/net/can/led.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/can/led.h | 9 +++++++++ 3 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 13e738098fbe..6abc6e59778e 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #define MOD_DESC "CAN device driver interface" @@ -811,6 +812,8 @@ static __init int can_dev_init(void) { int err; + can_led_notifier_init(); + err = rtnl_link_register(&can_link_ops); if (!err) printk(KERN_INFO MOD_DESC "\n"); @@ -822,6 +825,8 @@ module_init(can_dev_init); static __exit void can_dev_exit(void) { rtnl_link_unregister(&can_link_ops); + + can_led_notifier_exit(); } module_exit(can_dev_exit); diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c index c50a0d741c57..f27fca65dc4a 100644 --- a/drivers/net/can/led.c +++ b/drivers/net/can/led.c @@ -1,5 +1,6 @@ /* * Copyright 2012, Fabio Baltieri + * Copyright 2012, Kurt Van Dijck * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -84,3 +85,40 @@ void devm_can_led_init(struct net_device *netdev) devres_add(&netdev->dev, res); } EXPORT_SYMBOL_GPL(devm_can_led_init); + +/* NETDEV rename notifier to rename the associated led triggers too */ +static int can_led_notifier(struct notifier_block *nb, unsigned long msg, + void *data) +{ + struct net_device *netdev = data; + struct can_priv *priv = safe_candev_priv(netdev); + char name[CAN_LED_NAME_SZ]; + + if (!priv) + return NOTIFY_DONE; + + if (msg == NETDEV_CHANGENAME) { + snprintf(name, sizeof(name), "%s-tx", netdev->name); + led_trigger_rename_static(name, priv->tx_led_trig); + + snprintf(name, sizeof(name), "%s-rx", netdev->name); + led_trigger_rename_static(name, priv->rx_led_trig); + } + + return NOTIFY_DONE; +} + +/* notifier block for netdevice event */ +static struct notifier_block can_netdev_notifier __read_mostly = { + .notifier_call = can_led_notifier, +}; + +int __init can_led_notifier_init(void) +{ + return register_netdevice_notifier(&can_netdev_notifier); +} + +void __exit can_led_notifier_exit(void) +{ + unregister_netdevice_notifier(&can_netdev_notifier); +} diff --git a/include/linux/can/led.h b/include/linux/can/led.h index 12d5549abb95..9c1167baf273 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -26,6 +26,8 @@ enum can_led_event { void can_led_event(struct net_device *netdev, enum can_led_event event); void devm_can_led_init(struct net_device *netdev); +int __init can_led_notifier_init(void); +void __exit can_led_notifier_exit(void); #else @@ -36,6 +38,13 @@ static inline void can_led_event(struct net_device *netdev, static inline void devm_can_led_init(struct net_device *netdev) { } +static inline int can_led_notifier_init(void) +{ + return 0; +} +static inline void can_led_notifier_exit(void) +{ +} #endif -- cgit v1.2.3-71-gd317 From 156c2bb9f88065c8da78814f98fde665a5cbb527 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:39 +0100 Subject: can: add private data space for CAN sk_buffs The struct can_skb_priv is used to transport additional information along with the stored struct can(fd)_frame that can not be contained in existing struct sk_buff elements. can_skb_priv is located in the skb headroom, which does not touch the existing CAN sk_buff usage with skb->data and skb->len, so that even out-of-tree CAN drivers can be used without changes. Btw. out-of-tree CAN drivers without can_skb_priv in the sk_buff headroom would not support features based on can_skb_priv. The can_skb_priv->ifindex contains the first interface where the CAN frame appeared on the local host. Unfortunately skb->skb_iif can not be used as this value is overwritten in every netif_receive_skb() call. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 8 +++++++- drivers/net/can/slcan.c | 8 +++++++- include/linux/can/skb.h | 35 +++++++++++++++++++++++++++++++++++ net/can/bcm.c | 12 +++++++++--- net/can/raw.c | 8 ++++++-- 5 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 include/linux/can/skb.h (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 6abc6e59778e..59ada082a994 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -502,13 +503,18 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) { struct sk_buff *skb; - skb = netdev_alloc_skb(dev, sizeof(struct can_frame)); + skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) + + sizeof(struct can_frame)); if (unlikely(!skb)) return NULL; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reserve(skb, sizeof(struct can_skb_priv)); + ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index adc3708d8829..e79a8d10e0fc 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -55,6 +55,7 @@ #include #include #include +#include static __initconst const char banner[] = KERN_INFO "slcan: serial line CAN interface driver\n"; @@ -184,7 +185,8 @@ static void slc_bump(struct slcan *sl) cf.data[i] |= tmp; } - skb = dev_alloc_skb(sizeof(struct can_frame)); + skb = dev_alloc_skb(sizeof(struct can_frame) + + sizeof(struct can_skb_priv)); if (!skb) return; @@ -192,6 +194,10 @@ static void slc_bump(struct slcan *sl) skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reserve(skb, sizeof(struct can_skb_priv)); + ((struct can_skb_priv *)(skb->head))->ifindex = sl->dev->ifindex; + memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); netif_rx_ni(skb); diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h new file mode 100644 index 000000000000..4b0f24d3a878 --- /dev/null +++ b/include/linux/can/skb.h @@ -0,0 +1,35 @@ +/* + * linux/can/skb.h + * + * Definitions for the CAN network socket buffer + * + * Copyright (C) 2012 Oliver Hartkopp + * + */ + +#ifndef CAN_SKB_H +#define CAN_SKB_H + +#include +#include + +/* + * The struct can_skb_priv is used to transport additional information along + * with the stored struct can(fd)_frame that can not be contained in existing + * struct sk_buff elements. + * N.B. that this information must not be modified in cloned CAN sk_buffs. + * To modify the CAN frame content or the struct can_skb_priv content + * skb_copy() needs to be used instead of skb_clone(). + */ + +/** + * struct can_skb_priv - private additional data inside CAN sk_buffs + * @ifindex: ifindex of the first interface the CAN frame appeared on + * @cf: align to the following CAN frame at skb->data + */ +struct can_skb_priv { + int ifindex; + struct can_frame cf[0]; +}; + +#endif /* CAN_SKB_H */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 969b7cdff59d..ccc27b9e8384 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op) return; } - skb = alloc_skb(CFSIZ, gfp_any()); + skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any()); if (!skb) goto out; + skb_reserve(skb, sizeof(struct can_skb_priv)); + ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); /* send with loopback */ @@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) if (!ifindex) return -ENODEV; - skb = alloc_skb(CFSIZ, GFP_KERNEL); - + skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL); if (!skb) return -ENOMEM; + skb_reserve(skb, sizeof(struct can_skb_priv)); + err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ); if (err < 0) { kfree_skb(skb); @@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) return -ENODEV; } + ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; skb->dev = dev; skb->sk = sk; err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 5b0e3e330d97..5d860e8dcc52 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -699,11 +700,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (!dev) return -ENXIO; - skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, - &err); + skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto put_dev; + skb_reserve(skb, sizeof(struct can_skb_priv)); + ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; -- cgit v1.2.3-71-gd317 From cef401de7be8c4e155c6746bfccf721a4fa5fab9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Jan 2013 20:34:37 +0000 Subject: net: fix possible wrong checksum generation Pravin Shelar mentioned that GSO could potentially generate wrong TX checksum if skb has fragments that are overwritten by the user between the checksum computation and transmit. He suggested to linearize skbs but this extra copy can be avoided for normal tcp skbs cooked by tcp_sendmsg(). This patch introduces a new SKB_GSO_SHARED_FRAG flag, set in skb_shinfo(skb)->gso_type if at least one frag can be modified by the user. Typical sources of such possible overwrites are {vm}splice(), sendfile(), and macvtap/tun/virtio_net drivers. Tested: $ netperf -H 7.7.8.84 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3959.52 $ netperf -H 7.7.8.84 -t TCP_SENDFILE TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3216.80 Performance of the SENDFILE is impacted by the extra allocation and copy, and because we use order-0 pages, while the TCP_STREAM uses bigger pages. Reported-by: Pravin Shelar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 3 ++- drivers/net/tun.c | 12 ++++++++---- drivers/net/virtio_net.c | 12 ++++++++---- include/linux/skbuff.h | 19 +++++++++++++++++++ net/core/dev.c | 9 +++++++++ net/core/skbuff.c | 4 ++++ net/ipv4/af_inet.c | 1 + net/ipv4/ip_gre.c | 4 +++- net/ipv4/ipip.c | 4 +++- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/ip6_offload.c | 1 + 13 files changed, 65 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0f0f9ce3a776..b181dfb3d6d6 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; - skb_shinfo(skb)->gso_type = gso_type; + skb_shinfo(skb)->gso_type |= gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c81680dc10eb..293ce8dfc9e6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { + unsigned short gso_type = 0; + pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_TCPV6: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + gso_type = SKB_GSO_UDP; break; default: tun->dev->stats.rx_frame_errors++; @@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = gso.gso_size; + skb_shinfo(skb)->gso_type |= gso_type; if (skb_shinfo(skb)->gso_size == 0) { tun->dev->stats.rx_frame_errors++; kfree_skb(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 701408a1ded6..58914c8ea68f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page, skb->len += size; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; *len -= size; } @@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ntohs(skb->protocol), skb->len, skb->pkt_type); if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { + unsigned short gso_type = 0; + pr_debug("GSO!\n"); switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_UDP: - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + gso_type = SKB_GSO_UDP; break; case VIRTIO_NET_HDR_GSO_TCPV6: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + gso_type = SKB_GSO_TCPV6; break; default: net_warn_ratelimited("%s: bad gso type %u.\n", @@ -397,7 +400,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) } if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { @@ -405,6 +408,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) goto frame_err; } + skb_shinfo(skb)->gso_type |= gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_segs = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b2256e880e0..0259b719bebf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -307,6 +307,13 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKB_GSO_SHARED_FRAG = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb) return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; } +/** + * skb_has_shared_frag - can any frag be overwritten + * @skb: buffer to test + * + * Return true if the skb has at least one frag that might be modified + * by an external entity (as in vmsplice()/sendfile()) + */ +static inline bool skb_has_shared_frag(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG; +} + /** * skb_linearize_cow - make sure skb is linear and writable * @skb: buffer to process diff --git a/net/core/dev.c b/net/core/dev.c index c69cd8721b28..a83375d3af72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb) return -EINVAL; } + /* Before computing a checksum, we should make sure no frag could + * be modified by an external entity : checksum could be wrong. + */ + if (skb_has_shared_frag(skb)) { + ret = __skb_linearize(skb); + if (ret) + goto out; + } + offset = skb_checksum_start_offset(skb); BUG_ON(offset >= skb_headlen(skb)); csum = skb_checksum(skb, offset, skb->len - offset, 0); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2568c449fe36..bddc1dd2e7f2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); + skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; + if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); + skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4b7053919976..49ddca31c4da 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 303012adf9e6..af6be70821c4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -738,7 +738,7 @@ drop: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; const struct iphdr *tiph; struct flowi4 fl4; u8 tos; @@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 191fc24a745a..8f024d41eefa 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; @@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (tos & 1) tos = old_iph->tos; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 52271947a471..3ec1f69c5ceb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -896,6 +896,8 @@ new_segment: skb_fill_page_desc(skb, i, page, offset, copy); } + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_SHARED_FRAG | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0905997e5873..492c7cfe1453 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ if (!skb_shinfo(prev)->gso_size) { skb_shinfo(prev)->gso_size = mss; - skb_shinfo(prev)->gso_type = sk->sk_gso_type; + skb_shinfo(prev)->gso_type |= sk->sk_gso_type; } /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (skb_shinfo(skb)->gso_segs <= 1) { skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; } /* Difference in this won't matter, both ACKed by the same cumul. ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 667a6adfccf8..367e2ec01da1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, */ skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; } else { skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = sk->sk_gso_type; + skb_shinfo(skb)->gso_type |= sk->sk_gso_type; } } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..d141fc32a2ea 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_SHARED_FRAG | 0))) goto out; -- cgit v1.2.3-71-gd317 From 2bf3440d7b8755f2627232e6a4c37efbbe053685 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 28 Jan 2013 08:33:33 +0000 Subject: can: rework skb reserved data handling Added accessor and skb_reserve helpers for struct can_skb_priv. Removed pointless skb_headroom() check. Signed-off-by: Oliver Hartkopp CC: Marc Kleine-Budde Signed-off-by: David S. Miller --- drivers/net/can/dev.c | 4 ++-- drivers/net/can/slcan.c | 4 ++-- include/linux/can/skb.h | 10 ++++++++++ net/can/bcm.c | 8 ++++---- net/can/gw.c | 4 +--- net/can/raw.c | 4 ++-- 6 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 59ada082a994..f9cba4123c66 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -512,8 +512,8 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; - skb_reserve(skb, sizeof(struct can_skb_priv)); - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index e79a8d10e0fc..06b7e097d36e 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -195,8 +195,8 @@ static void slc_bump(struct slcan *sl) skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; - skb_reserve(skb, sizeof(struct can_skb_priv)); - ((struct can_skb_priv *)(skb->head))->ifindex = sl->dev->ifindex; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = sl->dev->ifindex; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 4b0f24d3a878..2f0543f7510c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -32,4 +32,14 @@ struct can_skb_priv { struct can_frame cf[0]; }; +static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb) +{ + return (struct can_skb_priv *)(skb->head); +} + +static inline void can_skb_reserve(struct sk_buff *skb) +{ + skb_reserve(skb, sizeof(struct can_skb_priv)); +} + #endif /* CAN_SKB_H */ diff --git a/net/can/bcm.c b/net/can/bcm.c index ccc27b9e8384..28e12d18f0f1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,8 +261,8 @@ static void bcm_can_tx(struct bcm_op *op) if (!skb) goto out; - skb_reserve(skb, sizeof(struct can_skb_priv)); - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1207,7 +1207,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) if (!skb) return -ENOMEM; - skb_reserve(skb, sizeof(struct can_skb_priv)); + can_skb_reserve(skb); err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ); if (err < 0) { @@ -1221,7 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) return -ENODEV; } - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_prv(skb)->ifindex = dev->ifindex; skb->dev = dev; skb->sk = sk; err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/gw.c b/net/can/gw.c index acdd4656cc3b..c185fcd5e828 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -381,9 +381,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) /* is sending the skb back to the incoming interface not allowed? */ if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && - skb_headroom(skb) == sizeof(struct can_skb_priv) && - (((struct can_skb_priv *)(skb->head))->ifindex == - gwj->dst.dev->ifindex)) + can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) return; /* diff --git a/net/can/raw.c b/net/can/raw.c index 5d860e8dcc52..c1764e41ddaf 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -705,8 +705,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto put_dev; - skb_reserve(skb, sizeof(struct can_skb_priv)); - ((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) -- cgit v1.2.3-71-gd317 From 5fbee843c32e5de2d8af68ba0bdd113bb0af9d86 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 22 Jan 2013 21:29:39 +0000 Subject: netpoll: add RCU annotation to npinfo field dev->npinfo is protected by RCU. This fixes the following sparse warnings: net/core/netpoll.c:177:48: error: incompatible types in comparison expression (different address spaces) net/core/netpoll.c:200:35: error: incompatible types in comparison expression (different address spaces) net/core/netpoll.c:221:35: error: incompatible types in comparison expression (different address spaces) net/core/netpoll.c:327:18: error: incompatible types in comparison expression (different address spaces) Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 549f5ad2055d..85b0949d9946 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1272,7 +1272,7 @@ struct net_device { void (*destructor)(struct net_device *dev); #ifdef CONFIG_NETPOLL - struct netpoll_info *npinfo; + struct netpoll_info __rcu *npinfo; #endif #ifdef CONFIG_NET_NS -- cgit v1.2.3-71-gd317 From 7ab59dc15e2f42a4321ed016bcd6044a4d8de6d1 Mon Sep 17 00:00:00 2001 From: "David J. Choi" Date: Wed, 23 Jan 2013 14:05:15 +0000 Subject: drivers/net/phy/micrel_phy: Add support for new PHYs Summary of changes: .Newly added phys -KSZ8081/KSZ8091, which has some phy ids. -KSZ8061 -KSZ9031, which is Gigabit phy. -KSZ886X, which has a switch function. -KSZ8031, which has a same phy ids with KSZ8021. Signed-off-by: David J. Choi Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 64 +++++++++++++++++++++++++++++++++++++++++++--- include/linux/micrel_phy.h | 9 ++++++- 2 files changed, 68 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b983596abcbb..29934446436a 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -5,15 +5,20 @@ * * Author: David J. Choi * - * Copyright (c) 2010 Micrel, Inc. + * Copyright (c) 2010-2013 Micrel, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Support : ksz9021 1000/100/10 phy from Micrel - * ks8001, ks8737, ks8721, ks8041, ks8051 100/10 phy + * Support : Micrel Phys: + * Giga phys: ksz9021, ksz9031 + * 100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041 + * ksz8021, ksz8031, ksz8051, + * ksz8081, ksz8091, + * ksz8061, + * Switch : ksz8873, ksz886x */ #include @@ -176,7 +181,7 @@ static struct phy_driver ksphy_driver[] = { }, { .phy_id = PHY_ID_KSZ8021, .phy_id_mask = 0x00ffffff, - .name = "Micrel KSZ8021", + .name = "Micrel KSZ8021 or KSZ8031", .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -224,6 +229,30 @@ static struct phy_driver ksphy_driver[] = { .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_KSZ8081, + .name = "Micrel KSZ8081 or KSZ8091", + .phy_id_mask = 0x00fffff0, + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = kszphy_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_KSZ8061, + .name = "Micrel KSZ8061", + .phy_id_mask = 0x00fffff0, + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = kszphy_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, @@ -237,6 +266,19 @@ static struct phy_driver ksphy_driver[] = { .ack_interrupt = kszphy_ack_interrupt, .config_intr = ksz9021_config_intr, .driver = { .owner = THIS_MODULE, }, +}, { + .phy_id = PHY_ID_KSZ9031, + .phy_id_mask = 0x00fffff0, + .name = "Micrel KSZ9031 Gigabit PHY", + .features = (PHY_GBIT_FEATURES | SUPPORTED_Pause + | SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = kszphy_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = ksz9021_config_intr, + .driver = { .owner = THIS_MODULE, }, }, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = 0x00fffff0, @@ -247,6 +289,16 @@ static struct phy_driver ksphy_driver[] = { .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, .driver = { .owner = THIS_MODULE, }, +}, { + .phy_id = PHY_ID_KSZ886X, + .phy_id_mask = 0x00fffff0, + .name = "Micrel KSZ886X Switch", + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = kszphy_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = { .owner = THIS_MODULE, }, } }; static int __init ksphy_init(void) @@ -270,12 +322,16 @@ MODULE_LICENSE("GPL"); static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, + { PHY_ID_KSZ9031, 0x00fffff0 }, { PHY_ID_KSZ8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, { PHY_ID_KSZ8021, 0x00ffffff }, { PHY_ID_KSZ8041, 0x00fffff0 }, { PHY_ID_KSZ8051, 0x00fffff0 }, + { PHY_ID_KSZ8061, 0x00fffff0 }, + { PHY_ID_KSZ8081, 0x00fffff0 }, { PHY_ID_KSZ8873MLL, 0x00fffff0 }, + { PHY_ID_KSZ886X, 0x00fffff0 }, { } }; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index adfe8c058f29..9dbb41a4e250 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -21,8 +21,15 @@ #define PHY_ID_KSZ8021 0x00221555 #define PHY_ID_KSZ8041 0x00221510 #define PHY_ID_KSZ8051 0x00221550 -/* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */ +/* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */ #define PHY_ID_KSZ8001 0x0022161A +/* same id: KS8081, KS8091 */ +#define PHY_ID_KSZ8081 0x00221560 +#define PHY_ID_KSZ8061 0x00221570 +#define PHY_ID_KSZ9031 0x00221620 + +#define PHY_ID_KSZ886X 0x00221430 +#define PHY_ID_KSZ8863 0x00221435 /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 -- cgit v1.2.3-71-gd317 From 5c766d642bcaffd0c2a5b354db2068515b3846cf Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 24 Jan 2013 09:41:41 +0000 Subject: ipv4: introduce address lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some usecase when lifetime of ipv4 addresses might be helpful. For example: 1) initramfs networkmanager uses a DHCP daemon to learn network configuration parameters 2) initramfs networkmanager addresses, routes and DNS configuration 3) initramfs networkmanager is requested to stop 4) initramfs networkmanager stops all daemons including dhclient 5) there are addresses and routes configured but no daemon running. If the system doesn't start networkmanager for some reason, addresses and routes will be used forever, which violates RFC 2131. This patch is essentially a backport of ivp6 address lifetime mechanism for ipv4 addresses. Current "ip" tool supports this without any patch (since it does not distinguish between ipv4 and ipv6 addresses in this perspective. Also, this should be back-compatible with all current netlink users. Reported-by: Pavel Šimerda Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 6 ++ include/net/addrconf.h | 4 + net/ipv4/devinet.c | 215 +++++++++++++++++++++++++++++++++++++++++++-- net/ipv6/addrconf.c | 4 - 4 files changed, 220 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a9d828976a77..ea1e3b863890 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -166,6 +166,12 @@ struct in_ifaddr { unsigned char ifa_flags; unsigned char ifa_prefixlen; char ifa_label[IFNAMSIZ]; + + /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ + __u32 ifa_valid_lft; + __u32 ifa_preferred_lft; + unsigned long ifa_cstamp; /* created timestamp */ + unsigned long ifa_tstamp; /* updated timestamp */ }; extern int register_inetaddr_notifier(struct notifier_block *nb); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6c58d507123f..40be2a0d8ae1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -15,6 +15,10 @@ #define IPV6_MAX_ADDRESSES 16 +#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ / 50 : 1) +#define ADDRCONF_TIMER_FUZZ (HZ / 4) +#define ADDRCONF_TIMER_FUZZ_MAX (HZ) + #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a8e4f2665d5e..5281314886c1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "fib_lookup.h" @@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } +static void check_lifetime(struct work_struct *work); + +static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); + static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -573,7 +582,107 @@ errout: return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) +#define INFINITY_LIFE_TIME 0xFFFFFFFF + +static void check_lifetime(struct work_struct *work) +{ + unsigned long now, next, next_sec, next_sched; + struct in_ifaddr *ifa; + struct hlist_node *node; + int i; + + now = jiffies; + next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); + + rcu_read_lock(); + for (i = 0; i < IN4_ADDR_HSIZE; i++) { + hlist_for_each_entry_rcu(ifa, node, + &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap ; + + rtnl_lock(); + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &ifa->ifa_next) { + if (*ifap == ifa) + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + } + rtnl_unlock(); + } else if (ifa->ifa_preferred_lft == + INFINITY_LIFE_TIME) { + continue; + } else if (age >= ifa->ifa_preferred_lft) { + if (time_before(ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ, next)) + next = ifa->ifa_tstamp + + ifa->ifa_valid_lft * HZ; + + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } else if (time_before(ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ, + next)) { + next = ifa->ifa_tstamp + + ifa->ifa_preferred_lft * HZ; + } + } + } + rcu_read_unlock(); + + next_sec = round_jiffies_up(next); + next_sched = next; + + /* If rounded timeout is accurate enough, accept it. */ + if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) + next_sched = next_sec; + + now = jiffies; + /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ + if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) + next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; + + schedule_delayed_work(&check_lifetime_work, next_sched - now); +} + +static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, + __u32 prefered_lft) +{ + unsigned long timeout; + + ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) + ifa->ifa_valid_lft = timeout; + else + ifa->ifa_flags |= IFA_F_PERMANENT; + + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa->ifa_flags |= IFA_F_DEPRECATED; + ifa->ifa_preferred_lft = timeout; + } + ifa->ifa_tstamp = jiffies; + if (!ifa->ifa_cstamp) + ifa->ifa_cstamp = ifa->ifa_tstamp; +} + +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, + __u32 *pvalid_lft, __u32 *pprefered_lft) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + err = -EINVAL; + goto errout; + } + *pvalid_lft = ci->ifa_valid; + *pprefered_lft = ci->ifa_prefered; + } + return ifa; errout: return ERR_PTR(err); } +static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct in_ifaddr *ifa1, **ifap; + + if (!ifa->ifa_local) + return NULL; + + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa) && + ifa1->ifa_local == ifa->ifa_local) + return ifa1; + } + return NULL; +} + static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; + struct in_ifaddr *ifa_existing; + __u32 valid_lft = INFINITY_LIFE_TIME; + __u32 prefered_lft = INFINITY_LIFE_TIME; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh); + ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + ifa_existing = find_matching_ifa(ifa); + if (!ifa_existing) { + /* It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + } else { + inet_free_ifa(ifa); + + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + return -EEXIST; + + set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + } + return 0; } /* @@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifa->ifa_prefixlen = 32; ifa->ifa_mask = inet_make_mask(32); } + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); ret = inet_set_ifa(dev, ifa); break; @@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ifa->ifa_dev = in_dev; ifa->ifa_scope = RT_SCOPE_HOST; memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, + INFINITY_LIFE_TIME); inet_insert_ifa(ifa); } } @@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void) + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } +static inline u32 cstamp_delta(unsigned long cstamp) +{ + return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; +} + +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = cstamp_delta(cstamp); + ci.tstamp = cstamp_delta(tstamp); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +} + static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; + ifm->ifa_flags = ifa->ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + preferred = ifa->ifa_preferred_lft; + valid = ifa->ifa_valid_lft; + if (preferred != INFINITY_LIFE_TIME) { + long tval = (jiffies - ifa->ifa_tstamp) / HZ; + + if (preferred > tval) + preferred -= tval; + else + preferred = 0; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } + } + } else { + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; + } if ((ifa->ifa_address && nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || (ifa->ifa_local && @@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, (ifa->ifa_broadcast && nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + preferred, valid)) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1988,6 +2191,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + schedule_delayed_work(&check_lifetime_work, 0); + rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 80d59802d964..7f7332b44699 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } -#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1) -#define ADDRCONF_TIMER_FUZZ (HZ / 4) -#define ADDRCONF_TIMER_FUZZ_MAX (HZ) - #ifdef CONFIG_SYSCTL static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); -- cgit v1.2.3-71-gd317 From a6ca2e10f795111a90a4efabb07717258669e03d Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 26 Jan 2013 21:38:35 +0100 Subject: ssb: add gpio_to_irq The old bcm47xx gpio code had support for gpio_to_irq, but the new code did not provide this function, but returned -ENXIO all the time. This patch adds the missing function. arch/mips/bcm47xx/wgt634u.c calls gpio_to_irq() and got the correct irq number with the old gpio handling code. With this patch the code in wgt634u.c should work again. I do not have a wgt634u to test this. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/ssb/driver_gpio.c | 22 ++++++++++++++++++++++ include/linux/ssb/ssb_driver_mips.h | 5 +++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index 97ac0a38e3d0..accabe39b320 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -74,6 +74,16 @@ static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio) ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0); } +static int ssb_gpio_chipco_to_irq(struct gpio_chip *chip, unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + if (bus->bustype == SSB_BUSTYPE_SSB) + return ssb_mips_irq(bus->chipco.dev) + 2; + else + return -EINVAL; +} + static int ssb_gpio_chipco_init(struct ssb_bus *bus) { struct gpio_chip *chip = &bus->gpio; @@ -86,6 +96,7 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus) chip->set = ssb_gpio_chipco_set_value; chip->direction_input = ssb_gpio_chipco_direction_input; chip->direction_output = ssb_gpio_chipco_direction_output; + chip->to_irq = ssb_gpio_chipco_to_irq; chip->ngpio = 16; /* There is just one SoC in one device and its GPIO addresses should be * deterministic to address them more easily. The other buses could get @@ -134,6 +145,16 @@ static int ssb_gpio_extif_direction_output(struct gpio_chip *chip, return 0; } +static int ssb_gpio_extif_to_irq(struct gpio_chip *chip, unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + if (bus->bustype == SSB_BUSTYPE_SSB) + return ssb_mips_irq(bus->extif.dev) + 2; + else + return -EINVAL; +} + static int ssb_gpio_extif_init(struct ssb_bus *bus) { struct gpio_chip *chip = &bus->gpio; @@ -144,6 +165,7 @@ static int ssb_gpio_extif_init(struct ssb_bus *bus) chip->set = ssb_gpio_extif_set_value; chip->direction_input = ssb_gpio_extif_direction_input; chip->direction_output = ssb_gpio_extif_direction_output; + chip->to_irq = ssb_gpio_extif_to_irq; chip->ngpio = 5; /* There is just one SoC in one device and its GPIO addresses should be * deterministic to address them more easily. The other buses could get diff --git a/include/linux/ssb/ssb_driver_mips.h b/include/linux/ssb/ssb_driver_mips.h index 07a9c7a2e088..afe79d40a99e 100644 --- a/include/linux/ssb/ssb_driver_mips.h +++ b/include/linux/ssb/ssb_driver_mips.h @@ -45,6 +45,11 @@ void ssb_mipscore_init(struct ssb_mipscore *mcore) { } +static inline unsigned int ssb_mips_irq(struct ssb_device *dev) +{ + return 0; +} + #endif /* CONFIG_SSB_DRIVER_MIPS */ #endif /* LINUX_SSB_MIPSCORE_H_ */ -- cgit v1.2.3-71-gd317 From 8f1ca2683225afa21b827ff620a6225c390771a9 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 26 Jan 2013 21:39:44 +0100 Subject: bcma: add gpio_to_irq The old bcm47xx gpio code had support for gpio_to_irq, but the new code did not provide this function, but returned -ENXIO all the time. This patch adds the missing function. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/driver_gpio.c | 11 +++++++++++ include/linux/bcma/bcma_driver_mips.h | 9 +++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index 9a6f585da2d9..0b5df538dfd9 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -73,6 +73,16 @@ static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio) bcma_chipco_gpio_pullup(cc, 1 << gpio, 0); } +static int bcma_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC) + return bcma_core_irq(cc->core); + else + return -EINVAL; +} + int bcma_gpio_init(struct bcma_drv_cc *cc) { struct gpio_chip *chip = &cc->gpio; @@ -85,6 +95,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->set = bcma_gpio_set_value; chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; + chip->to_irq = bcma_gpio_to_irq; chip->ngpio = 16; /* There is just one SoC in one device and its GPIO addresses should be * deterministic to address them more easily. The other buses could get diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 0d1ea297851a..fb61f3fb4ddb 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -42,13 +42,18 @@ struct bcma_drv_mips { #ifdef CONFIG_BCMA_DRIVER_MIPS extern void bcma_core_mips_init(struct bcma_drv_mips *mcore); extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore); + +extern unsigned int bcma_core_irq(struct bcma_device *core); #else static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { } static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { } + +static inline unsigned int bcma_core_irq(struct bcma_device *core) +{ + return 0; +} #endif extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore); -extern unsigned int bcma_core_irq(struct bcma_device *core); - #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */ -- cgit v1.2.3-71-gd317 From 18367681a10bd29c3f2305e6b7b984de5b33d548 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Wed, 30 Jan 2013 09:27:52 +0000 Subject: ipv6 flowlabel: Convert np->ipv6_fl_list to RCU. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/ipv6.h | 1 + net/ipv6/ip6_flowlabel.c | 72 +++++++++++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e971e3742172..850e95bc766c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -214,7 +214,7 @@ struct ipv6_pinfo { struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; - struct ipv6_fl_socklist *ipv6_fl_list; + struct ipv6_fl_socklist __rcu *ipv6_fl_list; struct ipv6_txoptions *opt; struct sk_buff *pktoptions; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1d457161def2..851d5412a299 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -239,6 +239,7 @@ struct ip6_flowlabel { struct ipv6_fl_socklist { struct ipv6_fl_socklist *next; struct ip6_flowlabel *fl; + struct rcu_head rcu; }; extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index da156015d827..22494afd981c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -62,7 +62,7 @@ static DEFINE_SPINLOCK(ip6_fl_lock); /* Big socket sock */ -static DEFINE_RWLOCK(ip6_sk_fl_lock); +static DEFINE_SPINLOCK(ip6_sk_fl_lock); #define for_each_fl_rcu(hash, fl) \ for (fl = rcu_dereference(fl_ht[(hash)]); \ @@ -73,6 +73,11 @@ static DEFINE_RWLOCK(ip6_sk_fl_lock); fl != NULL; \ fl = rcu_dereference(fl->next)) +#define for_each_sk_fl_rcu(np, sfl) \ + for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ + sfl != NULL; \ + sfl = rcu_dereference_bh(sfl->next)) + static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; @@ -244,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return fl; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return NULL; } @@ -265,20 +270,21 @@ void fl6_free_socklist(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; - if (!np->ipv6_fl_list) + if (!rcu_access_pointer(np->ipv6_fl_list)) return; - write_lock_bh(&ipv6_sk_fl_lock); - sfl = np->ipv6_fl_list; - np->ipv6_fl_list = NULL; - write_unlock_bh(&ipv6_sk_fl_lock); + spin_lock_bh(&ip6_sk_fl_lock); + while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { + np->ipv6_fl_list = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); - while (sfl) { - struct ipv6_fl_socklist *next = sfl->next; fl_release(sfl->fl); - kfree(sfl); - sfl = next; + kfree_rcu(sfl, rcu); + + spin_lock_bh(&ip6_sk_fl_lock); } + spin_unlock_bh(&ip6_sk_fl_lock); } /* Service routines */ @@ -443,7 +449,7 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) + for_each_sk_fl_rcu(np, sfl) count++; if (room <= 0 || @@ -486,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { - write_lock_bh(&ip6_sk_fl_lock); + spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; sfl->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl; - write_unlock_bh(&ip6_sk_fl_lock); + rcu_assign_pointer(np->ipv6_fl_list, sfl); + spin_unlock_bh(&ip6_sk_fl_lock); } int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) @@ -512,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: - write_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) { + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; + (sfl = rcu_dereference(*sflp))!=NULL; + sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = sfl->next; - write_unlock_bh(&ip6_sk_fl_lock); + *sflp = rcu_dereference(sfl->next); + spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); - kfree(sfl); + kfree_rcu(sfl, rcu); return 0; } } - write_unlock_bh(&ip6_sk_fl_lock); + spin_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; case IPV6_FL_A_RENEW: - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); return err; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (freq.flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { @@ -560,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label) { err = -EEXIST; - read_lock_bh(&ip6_sk_fl_lock); - for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_flags&IPV6_FL_F_EXCL) { - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); goto done; } fl1 = sfl->fl; @@ -572,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) break; } } - read_unlock_bh(&ip6_sk_fl_lock); + rcu_read_unlock_bh(); if (fl1 == NULL) fl1 = fl_lookup(net, freq.flr_label); -- cgit v1.2.3-71-gd317 From 955154fa33df2b74f0fea8e7c84df6dfd954dab2 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 30 Jan 2013 23:07:10 +0000 Subject: net/mlx4_en: Don't reassign port mac address on firmware that supports it Mac reassignments should only be done when not supported by the firmware. To accomplish that, checking firmware capability bit to know whether we should reassign macs in the driver. Signed-off-by: Matan Barak Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/fw.c | 7 ++++++- include/linux/mlx4/device.h | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 333a7a0b833c..7b513e9aea85 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1285,7 +1285,8 @@ void mlx4_en_stop_port(struct net_device *dev) /* Unregister Mac address for the port */ mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn); - mdev->mac_removed[priv->port] = 1; + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) + mdev->mac_removed[priv->port] = 1; /* Remove flow steering rules for the port*/ if (mdev->dev->caps.steering_mode == diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 91acf71aca97..38b62c78d5da 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -127,7 +127,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", - [3] = "Device manage flow steering support" + [3] = "Device manage flow steering support", + [4] = "Automatic mac reassignment support" }; int i; @@ -478,6 +479,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 +#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -637,6 +639,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_BMME_FLAGS_OFFSET); MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); + MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); + if (field & 1<<6) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 20ea939c22a6..1883e8e84718 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -150,7 +150,8 @@ enum { MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0, MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, - MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 + MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, + MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4 }; enum { -- cgit v1.2.3-71-gd317 From 6fcdf4facb85e7d54ff6195378dd2ba8e0baccc4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 30 Jan 2013 21:50:08 -0500 Subject: wanrouter: delete now orphaned header content, files/drivers The wanrouter support was identified earlier as unused for years, and so the previous commit totally decoupled it from the kernel, leaving the related wanrouter files present, but totally inert. Here we take the final step in that cleanup, by doing a wholesale removal of these files. The two step process is used so that the large deletion is decoupled from the git history of files that we still care about. The drivers deleted here all were dependent on the Kconfig setting CONFIG_WAN_ROUTER_DRIVERS. A stub wanrouter.h header (kernel & uapi) are left behind so that drivers/isdn/i4l/isdn_x25iface.c continues to compile, and so that we don't accidentally break userspace that expected these defines. Cc: Joe Perches Cc: Dan Carpenter Cc: Arnaldo Carvalho de Melo Signed-off-by: Paul Gortmaker --- drivers/net/wan/cycx_drv.c | 569 -------------- drivers/net/wan/cycx_main.c | 346 --------- drivers/net/wan/cycx_x25.c | 1602 ---------------------------------------- include/linux/cyclomx.h | 77 -- include/linux/cycx_drv.h | 64 -- include/linux/wanrouter.h | 127 +--- include/uapi/linux/wanrouter.h | 443 +---------- net/wanrouter/Kconfig | 27 - net/wanrouter/Makefile | 7 - net/wanrouter/patchlevel | 1 - net/wanrouter/wanmain.c | 782 -------------------- net/wanrouter/wanproc.c | 380 ---------- 12 files changed, 8 insertions(+), 4417 deletions(-) delete mode 100644 drivers/net/wan/cycx_drv.c delete mode 100644 drivers/net/wan/cycx_main.c delete mode 100644 drivers/net/wan/cycx_x25.c delete mode 100644 include/linux/cyclomx.h delete mode 100644 include/linux/cycx_drv.h delete mode 100644 net/wanrouter/Kconfig delete mode 100644 net/wanrouter/Makefile delete mode 100644 net/wanrouter/patchlevel delete mode 100644 net/wanrouter/wanmain.c delete mode 100644 net/wanrouter/wanproc.c (limited to 'include/linux') diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c deleted file mode 100644 index 2a3ecae67a90..000000000000 --- a/drivers/net/wan/cycx_drv.c +++ /dev/null @@ -1,569 +0,0 @@ -/* -* cycx_drv.c Cyclom 2X Support Module. -* -* This module is a library of common hardware specific -* functions used by the Cyclades Cyclom 2X sync card. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/11/11 acme set_current_state(TASK_INTERRUPTIBLE), code -* cleanup -* 1999/11/08 acme init_cyc2x deleted, doing nothing -* 1999/11/06 acme back to read[bw], write[bw] and memcpy_to and -* fromio to use dpmbase ioremaped -* 1999/10/26 acme use isa_read[bw], isa_write[bw] & isa_memcpy_to -* & fromio -* 1999/10/23 acme cleanup to only supports cyclom2x: all the other -* boards are no longer manufactured by cyclades, -* if someone wants to support them... be my guest! -* 1999/05/28 acme cycx_intack & cycx_intde gone for good -* 1999/05/18 acme lots of unlogged work, submitting to Linus... -* 1999/01/03 acme more judicious use of data types -* 1999/01/03 acme judicious use of data types :> -* cycx_inten trying to reset pending interrupts -* from cyclom 2x - I think this isn't the way to -* go, but for now... -* 1999/01/02 acme cycx_intack ok, I think there's nothing to do -* to ack an int in cycx_drv.c, only handle it in -* cyx_isr (or in the other protocols: cyp_isr, -* cyf_isr, when they get implemented. -* Dec 31, 1998 acme cycx_data_boot & cycx_code_boot fixed, crossing -* fingers to see x25_configure in cycx_x25.c -* work... :) -* Dec 26, 1998 acme load implementation fixed, seems to work! :) -* cycx_2x_dpmbase_options with all the possible -* DPM addresses (20). -* cycx_intr implemented (test this!) -* general code cleanup -* Dec 8, 1998 Ivan Passos Cyclom-2X firmware load implementation. -* Aug 8, 1998 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* __init */ -#include -#include /* printk(), and other useful stuff */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* API definitions */ -#include /* CYCX firmware module definitions */ -#include /* udelay, msleep_interruptible */ -#include /* read[wl], write[wl], ioremap, iounmap */ - -#define MOD_VERSION 0 -#define MOD_RELEASE 6 - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2x Sync Card Driver"); -MODULE_LICENSE("GPL"); - -/* Hardware-specific functions */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len); -static void cycx_bootcfg(struct cycx_hw *hw); - -static int reset_cyc2x(void __iomem *addr); -static int detect_cyc2x(void __iomem *addr); - -/* Miscellaneous functions */ -static int get_option_index(const long *optlist, long optval); -static u16 checksum(u8 *buf, u32 len); - -#define wait_cyc(addr) cycx_exec(addr + CMD_OFFSET) - -/* Global Data */ - -/* private data */ -static const char fullname[] = "Cyclom 2X Support Module"; -static const char copyright[] = - "(c) 1998-2003 Arnaldo Carvalho de Melo "; - -/* Hardware configuration options. - * These are arrays of configuration options used by verification routines. - * The first element of each array is its size (i.e. number of options). - */ -static const long cyc2x_dpmbase_options[] = { - 20, - 0xA0000, 0xA4000, 0xA8000, 0xAC000, 0xB0000, 0xB4000, 0xB8000, - 0xBC000, 0xC0000, 0xC4000, 0xC8000, 0xCC000, 0xD0000, 0xD4000, - 0xD8000, 0xDC000, 0xE0000, 0xE4000, 0xE8000, 0xEC000 -}; - -static const long cycx_2x_irq_options[] = { 7, 3, 5, 9, 10, 11, 12, 15 }; - -/* Kernel Loadable Module Entry Points */ -/* Module 'insert' entry point. - * o print announcement - * o initialize static data - * - * Return: 0 Ok - * < 0 error. - * Context: process */ - -static int __init cycx_drv_init(void) -{ - pr_info("%s v%u.%u %s\n", - fullname, MOD_VERSION, MOD_RELEASE, copyright); - - return 0; -} - -/* Module 'remove' entry point. - * o release all remaining system resources */ -static void cycx_drv_cleanup(void) -{ -} - -/* Kernel APIs */ -/* Set up adapter. - * o detect adapter type - * o verify hardware configuration options - * o check for hardware conflicts - * o set up adapter shared memory - * o test adapter memory - * o load firmware - * Return: 0 ok. - * < 0 error */ -EXPORT_SYMBOL(cycx_setup); -int cycx_setup(struct cycx_hw *hw, void *cfm, u32 len, unsigned long dpmbase) -{ - int err; - - /* Verify IRQ configuration options */ - if (!get_option_index(cycx_2x_irq_options, hw->irq)) { - pr_err("IRQ %d is invalid!\n", hw->irq); - return -EINVAL; - } - - /* Setup adapter dual-port memory window and test memory */ - if (!dpmbase) { - pr_err("you must specify the dpm address!\n"); - return -EINVAL; - } else if (!get_option_index(cyc2x_dpmbase_options, dpmbase)) { - pr_err("memory address 0x%lX is invalid!\n", dpmbase); - return -EINVAL; - } - - hw->dpmbase = ioremap(dpmbase, CYCX_WINDOWSIZE); - hw->dpmsize = CYCX_WINDOWSIZE; - - if (!detect_cyc2x(hw->dpmbase)) { - pr_err("adapter Cyclom 2X not found at address 0x%lX!\n", - dpmbase); - return -EINVAL; - } - - pr_info("found Cyclom 2X card at address 0x%lX\n", dpmbase); - - /* Load firmware. If loader fails then shut down adapter */ - err = load_cyc2x(hw, cfm, len); - - if (err) - cycx_down(hw); /* shutdown adapter */ - - return err; -} - -EXPORT_SYMBOL(cycx_down); -int cycx_down(struct cycx_hw *hw) -{ - iounmap(hw->dpmbase); - return 0; -} - -/* Enable interrupt generation. */ -static void cycx_inten(struct cycx_hw *hw) -{ - writeb(0, hw->dpmbase); -} - -/* Generate an interrupt to adapter's CPU. */ -EXPORT_SYMBOL(cycx_intr); -void cycx_intr(struct cycx_hw *hw) -{ - writew(0, hw->dpmbase + GEN_CYCX_INTR); -} - -/* Execute Adapter Command. - * o Set exec flag. - * o Busy-wait until flag is reset. */ -EXPORT_SYMBOL(cycx_exec); -int cycx_exec(void __iomem *addr) -{ - u16 i = 0; - /* wait till addr content is zeroed */ - - while (readw(addr)) { - udelay(1000); - - if (++i > 50) - return -1; - } - - return 0; -} - -/* Read absolute adapter memory. - * Transfer data from adapter's memory to data buffer. */ -EXPORT_SYMBOL(cycx_peek); -int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - *(u8*)buf = readb(hw->dpmbase + addr); - else - memcpy_fromio(buf, hw->dpmbase + addr, len); - - return 0; -} - -/* Write Absolute Adapter Memory. - * Transfer data from data buffer to adapter's memory. */ -EXPORT_SYMBOL(cycx_poke); -int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - writeb(*(u8*)buf, hw->dpmbase + addr); - else - memcpy_toio(hw->dpmbase + addr, buf, len); - - return 0; -} - -/* Hardware-Specific Functions */ - -/* Load Aux Routines */ -/* Reset board hardware. - return 1 if memory exists at addr and 0 if not. */ -static int memory_exists(void __iomem *addr) -{ - int tries = 0; - - for (; tries < 3 ; tries++) { - writew(TEST_PATTERN, addr + 0x10); - - if (readw(addr + 0x10) == TEST_PATTERN) - if (readw(addr + 0x10) == TEST_PATTERN) - return 1; - - msleep_interruptible(1 * 1000); - } - - return 0; -} - -/* Load reset code. */ -static void reset_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - void __iomem *pt_code = addr + RESET_OFFSET; - u16 i; /*, j; */ - - for (i = 0 ; i < cnt ; i++) { -/* for (j = 0 ; j < 50 ; j++); Delay - FIXME busy waiting... */ - writeb(*buffer++, pt_code++); - } -} - -/* Load buffer using boot interface. - * o copy data from buffer to Cyclom-X memory - * o wait for reset code to copy it to right portion of memory */ -static int buffer_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - memcpy_toio(addr + DATA_OFFSET, buffer, cnt); - writew(GEN_BOOT_DAT, addr + CMD_OFFSET); - - return wait_cyc(addr); -} - -/* Set up entry point and kick start Cyclom-X CPU. */ -static void cycx_start(void __iomem *addr) -{ - /* put in 0x30 offset the jump instruction to the code entry point */ - writeb(0xea, addr + 0x30); - writeb(0x00, addr + 0x31); - writeb(0xc4, addr + 0x32); - writeb(0x00, addr + 0x33); - writeb(0x00, addr + 0x34); - - /* cmd to start executing code */ - writew(GEN_START, addr + CMD_OFFSET); -} - -/* Load and boot reset code. */ -static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_start = addr + START_OFFSET; - - writeb(0xea, pt_start++); /* jmp to f000:3f00 */ - writeb(0x00, pt_start++); - writeb(0xfc, pt_start++); - writeb(0x00, pt_start++); - writeb(0xf0, pt_start); - reset_load(addr, code, len); - - /* 80186 was in hold, go */ - writeb(0, addr + START_CPU); - msleep_interruptible(1 * 1000); -} - -/* Load data.bin file through boot (reset) interface. */ -static int cycx_data_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0, pt_boot_cmd + sizeof(u16)); - writew(0x4000, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i))) < 0) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - - -/* Load code.bin file through boot (reset) interface. */ -static int cycx_code_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0x0000, pt_boot_cmd + sizeof(u16)); - writew(0xc400, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i)))) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - -/* Load adapter from the memory image of the CYCX firmware module. - * o verify firmware integrity and compatibility - * o start adapter up */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len) -{ - int i, j; - struct cycx_fw_header *img_hdr; - u8 *reset_image, - *data_image, - *code_image; - void __iomem *pt_cycld = hw->dpmbase + 0x400; - u16 cksum; - - /* Announce */ - pr_info("firmware signature=\"%s\"\n", cfm->signature); - - /* Verify firmware signature */ - if (strcmp(cfm->signature, CFM_SIGNATURE)) { - pr_err("load_cyc2x: not Cyclom-2X firmware!\n"); - return -EINVAL; - } - - pr_info("firmware version=%u\n", cfm->version); - - /* Verify firmware module format version */ - if (cfm->version != CFM_VERSION) { - pr_err("%s: firmware format %u rejected! Expecting %u.\n", - __func__, cfm->version, CFM_VERSION); - return -EINVAL; - } - - /* Verify firmware module length and checksum */ - cksum = checksum((u8*)&cfm->info, sizeof(struct cycx_fw_info) + - cfm->info.codesize); -/* - FIXME cfm->info.codesize is off by 2 - if (((len - sizeof(struct cycx_firmware) - 1) != cfm->info.codesize) || -*/ - if (cksum != cfm->checksum) { - pr_err("%s: firmware corrupted!\n", __func__); - pr_err(" cdsize = 0x%x (expected 0x%lx)\n", - len - (int)sizeof(struct cycx_firmware) - 1, - cfm->info.codesize); - pr_err(" chksum = 0x%x (expected 0x%x)\n", - cksum, cfm->checksum); - return -EINVAL; - } - - /* If everything is ok, set reset, data and code pointers */ - img_hdr = (struct cycx_fw_header *)&cfm->image; -#ifdef FIRMWARE_DEBUG - pr_info("%s: image sizes\n", __func__); - pr_info(" reset=%lu\n", img_hdr->reset_size); - pr_info(" data=%lu\n", img_hdr->data_size); - pr_info(" code=%lu\n", img_hdr->code_size); -#endif - reset_image = ((u8 *)img_hdr) + sizeof(struct cycx_fw_header); - data_image = reset_image + img_hdr->reset_size; - code_image = data_image + img_hdr->data_size; - - /*---- Start load ----*/ - /* Announce */ - pr_info("loading firmware %s (ID=%u)...\n", - cfm->descr[0] ? cfm->descr : "unknown firmware", - cfm->info.codeid); - - for (i = 0 ; i < 5 ; i++) { - /* Reset Cyclom hardware */ - if (!reset_cyc2x(hw->dpmbase)) { - pr_err("dpm problem or board not found\n"); - return -EINVAL; - } - - /* Load reset.bin */ - cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size); - /* reset is waiting for boot */ - writew(GEN_POWER_ON, pt_cycld); - msleep_interruptible(1 * 1000); - - for (j = 0 ; j < 3 ; j++) - if (!readw(pt_cycld)) - goto reset_loaded; - else - msleep_interruptible(1 * 1000); - } - - pr_err("reset not started\n"); - return -EINVAL; - -reset_loaded: - /* Load data.bin */ - if (cycx_data_boot(hw->dpmbase, data_image, img_hdr->data_size)) { - pr_err("cannot load data file\n"); - return -EINVAL; - } - - /* Load code.bin */ - if (cycx_code_boot(hw->dpmbase, code_image, img_hdr->code_size)) { - pr_err("cannot load code file\n"); - return -EINVAL; - } - - /* Prepare boot-time configuration data */ - cycx_bootcfg(hw); - - /* kick-off CPU */ - cycx_start(hw->dpmbase); - - /* Arthur Ganzert's tip: wait a while after the firmware loading... - seg abr 26 17:17:12 EST 1999 - acme */ - msleep_interruptible(7 * 1000); - pr_info("firmware loaded!\n"); - - /* enable interrupts */ - cycx_inten(hw); - - return 0; -} - -/* Prepare boot-time firmware configuration data. - * o initialize configuration data area - From async.doc - V_3.4.0 - 07/18/1994 - - As of now, only static buffers are available to the user. - So, the bit VD_RXDIRC must be set in 'valid'. That means that user - wants to use the static transmission and reception buffers. */ -static void cycx_bootcfg(struct cycx_hw *hw) -{ - /* use fixed buffers */ - writeb(FIXED_BUFFERS, hw->dpmbase + CONF_OFFSET); -} - -/* Detect Cyclom 2x adapter. - * Following tests are used to detect Cyclom 2x adapter: - * to be completed based on the tests done below - * Return 1 if detected o.k. or 0 if failed. - * Note: This test is destructive! Adapter will be left in shutdown - * state after the test. */ -static int detect_cyc2x(void __iomem *addr) -{ - reset_cyc2x(addr); - - return memory_exists(addr); -} - -/* Miscellaneous */ -/* Get option's index into the options list. - * Return option's index (1 .. N) or zero if option is invalid. */ -static int get_option_index(const long *optlist, long optval) -{ - int i = 1; - - for (; i <= optlist[0]; ++i) - if (optlist[i] == optval) - return i; - - return 0; -} - -/* Reset adapter's CPU. */ -static int reset_cyc2x(void __iomem *addr) -{ - writeb(0, addr + RST_ENABLE); - msleep_interruptible(2 * 1000); - writeb(0, addr + RST_DISABLE); - msleep_interruptible(2 * 1000); - - return memory_exists(addr); -} - -/* Calculate 16-bit CRC using CCITT polynomial. */ -static u16 checksum(u8 *buf, u32 len) -{ - u16 crc = 0; - u16 mask, flag; - - for (; len; --len, ++buf) - for (mask = 0x80; mask; mask >>= 1) { - flag = (crc & 0x8000); - crc <<= 1; - crc |= ((*buf & mask) ? 1 : 0); - - if (flag) - crc ^= 0x1021; - } - - return crc; -} - -module_init(cycx_drv_init); -module_exit(cycx_drv_cleanup); - -/* End */ diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c deleted file mode 100644 index 81fbbad406be..000000000000 --- a/drivers/net/wan/cycx_main.c +++ /dev/null @@ -1,346 +0,0 @@ -/* -* cycx_main.c Cyclades Cyclom 2X WAN Link Driver. Main module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdlamain.c by Gene Kozin & -* Jaspreet Singh -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Please look at the bitkeeper changelog (or any other scm tool that ends up -* importing bitkeeper changelog or that replaces bitkeeper in the future as -* main tool for linux development). -* -* 2001/05/09 acme Fix MODULE_DESC for debug, .bss nitpicks, -* some cleanups -* 2000/07/13 acme remove useless #ifdef MODULE and crap -* #if KERNEL_VERSION > blah -* 2000/07/06 acme __exit at cyclomx_cleanup -* 2000/04/02 acme dprintk and cycx_debug -* module_init/module_exit -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 2000/01/08 acme cleanup -* 1999/11/06 acme cycx_down back to life (it needs to be -* called to iounmap the dpmbase) -* 1999/08/09 acme removed references to enable_tx_int -* use spinlocks instead of cli/sti in -* cyclomx_set_state -* 1999/05/19 acme works directly linked into the kernel -* init_waitqueue_head for 2.3.* kernel -* 1999/05/18 acme major cleanup (polling not needed), etc -* 1998/08/28 acme minor cleanup (ioctls for firmware deleted) -* queue_task activated -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* inline memset(), etc. */ -#include /* kmalloc(), kfree() */ -#include /* printk(), and other useful stuff */ -#include /* support for loadable modules */ -#include /* request_region(), release_region() */ -#include /* WAN router definitions */ -#include /* cyclomx common user API definitions */ -#include /* __init (when not using as a module) */ -#include - -unsigned int cycx_debug; - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2X Sync Card Driver."); -MODULE_LICENSE("GPL"); -module_param(cycx_debug, int, 0); -MODULE_PARM_DESC(cycx_debug, "cyclomx debug level"); - -/* Defines & Macros */ - -#define CYCX_DRV_VERSION 0 /* version number */ -#define CYCX_DRV_RELEASE 11 /* release (minor version) number */ -#define CYCX_MAX_CARDS 1 /* max number of adapters */ - -#define CONFIG_CYCX_CARDS 1 - -/* Function Prototypes */ - -/* WAN link driver entry points */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf); -static int cycx_wan_shutdown(struct wan_device *wandev); - -/* Miscellaneous functions */ -static irqreturn_t cycx_isr(int irq, void *dev_id); - -/* Global Data - * Note: All data must be explicitly initialized!!! - */ - -/* private data */ -static const char cycx_drvname[] = "cyclomx"; -static const char cycx_fullname[] = "CYCLOM 2X(tm) Sync Card Driver"; -static const char cycx_copyright[] = "(c) 1998-2003 Arnaldo Carvalho de Melo " - ""; -static int cycx_ncards = CONFIG_CYCX_CARDS; -static struct cycx_device *cycx_card_array; /* adapter data space */ - -/* Kernel Loadable Module Entry Points */ - -/* - * Module 'insert' entry point. - * o print announcement - * o allocate adapter data space - * o initialize static data - * o register all cards with WAN router - * o calibrate Cyclom 2X shared memory access delay. - * - * Return: 0 Ok - * < 0 error. - * Context: process - */ -static int __init cycx_init(void) -{ - int cnt, err = -ENOMEM; - - pr_info("%s v%u.%u %s\n", - cycx_fullname, CYCX_DRV_VERSION, CYCX_DRV_RELEASE, - cycx_copyright); - - /* Verify number of cards and allocate adapter data space */ - cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS); - cycx_ncards = max_t(int, cycx_ncards, 1); - cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL); - if (!cycx_card_array) - goto out; - - - /* Register adapters with WAN router */ - for (cnt = 0; cnt < cycx_ncards; ++cnt) { - struct cycx_device *card = &cycx_card_array[cnt]; - struct wan_device *wandev = &card->wandev; - - sprintf(card->devname, "%s%d", cycx_drvname, cnt + 1); - wandev->magic = ROUTER_MAGIC; - wandev->name = card->devname; - wandev->private = card; - wandev->setup = cycx_wan_setup; - wandev->shutdown = cycx_wan_shutdown; - err = register_wan_device(wandev); - - if (err) { - pr_err("%s registration failed with error %d!\n", - card->devname, err); - break; - } - } - - err = -ENODEV; - if (!cnt) { - kfree(cycx_card_array); - goto out; - } - err = 0; - cycx_ncards = cnt; /* adjust actual number of cards */ -out: return err; -} - -/* - * Module 'remove' entry point. - * o unregister all adapters from the WAN router - * o release all remaining system resources - */ -static void __exit cycx_exit(void) -{ - int i = 0; - - for (; i < cycx_ncards; ++i) { - struct cycx_device *card = &cycx_card_array[i]; - unregister_wan_device(card->devname); - } - - kfree(cycx_card_array); -} - -/* WAN Device Driver Entry Points */ -/* - * Setup/configure WAN link driver. - * o check adapter state - * o make sure firmware is present in configuration - * o allocate interrupt vector - * o setup Cyclom 2X hardware - * o call appropriate routine to perform protocol-specific initialization - * - * This function is called when router handles ROUTER_SETUP IOCTL. The - * configuration structure is in kernel memory (including extended data, if - * any). - */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf) -{ - int rc = -EFAULT; - struct cycx_device *card; - int irq; - - /* Sanity checks */ - - if (!wandev || !wandev->private || !conf) - goto out; - - card = wandev->private; - rc = -EBUSY; - if (wandev->state != WAN_UNCONFIGURED) - goto out; - - rc = -EINVAL; - if (!conf->data_size || !conf->data) { - pr_err("%s: firmware not found in configuration data!\n", - wandev->name); - goto out; - } - - if (conf->irq <= 0) { - pr_err("%s: can't configure without IRQ!\n", wandev->name); - goto out; - } - - /* Allocate IRQ */ - irq = conf->irq == 2 ? 9 : conf->irq; /* IRQ2 -> IRQ9 */ - - if (request_irq(irq, cycx_isr, 0, wandev->name, card)) { - pr_err("%s: can't reserve IRQ %d!\n", wandev->name, irq); - goto out; - } - - /* Configure hardware, load firmware, etc. */ - memset(&card->hw, 0, sizeof(card->hw)); - card->hw.irq = irq; - card->hw.dpmsize = CYCX_WINDOWSIZE; - card->hw.fwid = CFID_X25_2X; - spin_lock_init(&card->lock); - init_waitqueue_head(&card->wait_stats); - - rc = cycx_setup(&card->hw, conf->data, conf->data_size, conf->maddr); - if (rc) - goto out_irq; - - /* Initialize WAN device data space */ - wandev->irq = irq; - wandev->dma = wandev->ioport = 0; - wandev->maddr = (unsigned long)card->hw.dpmbase; - wandev->msize = card->hw.dpmsize; - wandev->hw_opt[2] = 0; - wandev->hw_opt[3] = card->hw.fwid; - - /* Protocol-specific initialization */ - switch (card->hw.fwid) { -#ifdef CONFIG_CYCLOMX_X25 - case CFID_X25_2X: - rc = cycx_x25_wan_init(card, conf); - break; -#endif - default: - pr_err("%s: this firmware is not supported!\n", wandev->name); - rc = -EINVAL; - } - - if (rc) { - cycx_down(&card->hw); - goto out_irq; - } - - rc = 0; -out: - return rc; -out_irq: - free_irq(irq, card); - goto out; -} - -/* - * Shut down WAN link driver. - * o shut down adapter hardware - * o release system resources. - * - * This function is called by the router when device is being unregistered or - * when it handles ROUTER_DOWN IOCTL. - */ -static int cycx_wan_shutdown(struct wan_device *wandev) -{ - int ret = -EFAULT; - struct cycx_device *card; - - /* sanity checks */ - if (!wandev || !wandev->private) - goto out; - - ret = 0; - if (wandev->state == WAN_UNCONFIGURED) - goto out; - - card = wandev->private; - wandev->state = WAN_UNCONFIGURED; - cycx_down(&card->hw); - pr_info("%s: irq %d being freed!\n", wandev->name, wandev->irq); - free_irq(wandev->irq, card); -out: return ret; -} - -/* Miscellaneous */ -/* - * Cyclom 2X Interrupt Service Routine. - * o acknowledge Cyclom 2X hardware interrupt. - * o call protocol-specific interrupt service routine, if any. - */ -static irqreturn_t cycx_isr(int irq, void *dev_id) -{ - struct cycx_device *card = dev_id; - - if (card->wandev.state == WAN_UNCONFIGURED) - goto out; - - if (card->in_isr) { - pr_warn("%s: interrupt re-entrancy on IRQ %d!\n", - card->devname, card->wandev.irq); - goto out; - } - - if (card->isr) - card->isr(card); - return IRQ_HANDLED; -out: - return IRQ_NONE; -} - -/* Set WAN device state. */ -void cycx_set_state(struct cycx_device *card, int state) -{ - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (card->wandev.state != state) { - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - break; - } - pr_info("%s: link %s\n", card->devname, string_state); - card->wandev.state = state; - } - - card->state_tick = jiffies; - spin_unlock_irqrestore(&card->lock, flags); -} - -module_init(cycx_init); -module_exit(cycx_exit); diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c deleted file mode 100644 index 06f3f6309e4b..000000000000 --- a/drivers/net/wan/cycx_x25.c +++ /dev/null @@ -1,1602 +0,0 @@ -/* -* cycx_x25.c Cyclom 2X WAN Link Driver. X.25 module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdla_x25.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2001/01/12 acme use dev_kfree_skb_irq on interrupt context -* 2000/04/02 acme dprintk, cycx_debug -* fixed the bug introduced in get_dev_by_lcn and -* get_dev_by_dte_addr by the anonymous hacker -* that converted this driver to softnet -* 2000/01/08 acme cleanup -* 1999/10/27 acme use ARPHRD_HWX25 so that the X.25 stack know -* that we have a X.25 stack implemented in -* firmware onboard -* 1999/10/18 acme support for X.25 sockets in if_send, -* beware: socket(AF_X25...) IS WORK IN PROGRESS, -* TCP/IP over X.25 via wanrouter not affected, -* working. -* 1999/10/09 acme chan_disc renamed to chan_disconnect, -* began adding support for X.25 sockets: -* conf->protocol in new_if -* 1999/10/05 acme fixed return E... to return -E... -* 1999/08/10 acme serialized access to the card thru a spinlock -* in x25_exec -* 1999/08/09 acme removed per channel spinlocks -* removed references to enable_tx_int -* 1999/05/28 acme fixed nibble_to_byte, ackvc now properly treated -* if_send simplified -* 1999/05/25 acme fixed t1, t2, t21 & t23 configuration -* use spinlocks instead of cli/sti in some points -* 1999/05/24 acme finished the x25_get_stat function -* 1999/05/23 acme dev->type = ARPHRD_X25 (tcpdump only works, -* AFAIT, with ARPHRD_ETHER). This seems to be -* needed to use socket(AF_X25)... -* Now the config file must specify a peer media -* address for svc channels over a crossover cable. -* Removed hold_timeout from x25_channel_t, -* not used. -* A little enhancement in the DEBUG processing -* 1999/05/22 acme go to DISCONNECTED in disconnect_confirm_intr, -* instead of chan_disc. -* 1999/05/16 marcelo fixed timer initialization in SVCs -* 1999/01/05 acme x25_configure now get (most of) all -* parameters... -* 1999/01/05 acme pktlen now (correctly) uses log2 (value -* configured) -* 1999/01/03 acme judicious use of data types (u8, u16, u32, etc) -* 1999/01/03 acme cyx_isr: reset dpmbase to acknowledge -* indication (interrupt from cyclom 2x) -* 1999/01/02 acme cyx_isr: first hackings... -* 1999/01/0203 acme when initializing an array don't give less -* elements than declared... -* example: char send_cmd[6] = "?\xFF\x10"; -* you'll gonna lose a couple hours, 'cause your -* brain won't admit that there's an error in the -* above declaration... the side effect is that -* memset is put into the unresolved symbols -* instead of using the inline memset functions... -* 1999/01/02 acme began chan_connect, chan_send, x25_send -* 1998/12/31 acme x25_configure -* this code can be compiled as non module -* 1998/12/27 acme code cleanup -* IPX code wiped out! let's decrease code -* complexity for now, remember: I'm learning! :) -* bps_to_speed_code OK -* 1998/12/26 acme Minimal debug code cleanup -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#define CYCLOMX_X25_DEBUG 1 - -#include /* isdigit() */ -#include /* return codes */ -#include /* ARPHRD_HWX25 */ -#include /* printk(), and other useful stuff */ -#include -#include /* inline memset(), etc. */ -#include -#include /* kmalloc(), kfree() */ -#include /* offsetof(), etc. */ -#include /* WAN router definitions */ - -#include /* htons(), etc. */ - -#include /* Cyclom 2X common user API definitions */ -#include /* X.25 firmware API definitions */ - -#include - -/* Defines & Macros */ -#define CYCX_X25_MAX_CMD_RETRY 5 -#define CYCX_X25_CHAN_MTU 2048 /* unfragmented logical channel MTU */ - -/* Data Structures */ -/* This is an extension of the 'struct net_device' we create for each network - interface to keep the rest of X.25 channel-specific data. */ -struct cycx_x25_channel { - /* This member must be first. */ - struct net_device *slave; /* WAN slave */ - - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char *local_addr; /* local media address, ASCIIZ - - svc thru crossover cable */ - s16 lcn; /* logical channel number/conn.req.key*/ - u8 link; - struct timer_list timer; /* timer used for svc channel disc. */ - u16 protocol; /* ethertype, 0 - multiplexed */ - u8 svc; /* 0 - permanent, 1 - switched */ - u8 state; /* channel state */ - u8 drop_sequence; /* mark sequence for dropping */ - u32 idle_tmout; /* sec, before disconnecting */ - struct sk_buff *rx_skb; /* receive socket buffer */ - struct cycx_device *card; /* -> owner */ - struct net_device_stats ifstats;/* interface statistics */ -}; - -/* Function Prototypes */ -/* WAN link driver entry points. These are called by the WAN router module. */ -static int cycx_wan_update(struct wan_device *wandev), - cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf), - cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev); - -/* Network device interface */ -static int cycx_netdevice_init(struct net_device *dev); -static int cycx_netdevice_open(struct net_device *dev); -static int cycx_netdevice_stop(struct net_device *dev); -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len); -static int cycx_netdevice_rebuild_header(struct sk_buff *skb); -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev); - -static struct net_device_stats * - cycx_netdevice_get_stats(struct net_device *dev); - -/* Interrupt handlers */ -static void cycx_x25_irq_handler(struct cycx_device *card), - cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_log(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd); - -/* X.25 firmware interface functions */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf), - cycx_x25_get_stats(struct cycx_device *card), - cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf), - cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan), - cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn); - -/* channel functions */ -static int cycx_x25_chan_connect(struct net_device *dev), - cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb); - -static void cycx_x25_chan_disconnect(struct net_device *dev), - cycx_x25_chan_send_event(struct net_device *dev, u8 event); - -/* Miscellaneous functions */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state), - cycx_x25_chan_timer(unsigned long d); - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble), - reset_timer(struct net_device *dev); - -static u8 bps_to_speed_code(u32 bps); -static u8 cycx_log2(u32 n); - -static unsigned dec_to_uint(u8 *str, int len); - -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn); -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte); - -static void cycx_x25_chan_setup(struct net_device *dev); - -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len); -static void cycx_x25_dump_config(struct cycx_x25_config *conf); -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats); -static void cycx_x25_dump_devs(struct wan_device *wandev); -#else -#define hex_dump(msg, p, len) -#define cycx_x25_dump_config(conf) -#define cycx_x25_dump_stats(stats) -#define cycx_x25_dump_devs(wandev) -#endif -/* Public Functions */ - -/* X.25 Protocol Initialization routine. - * - * This routine is called by the main Cyclom 2X module during setup. At this - * point adapter is completely initialized and X.25 firmware is running. - * o configure adapter - * o initialize protocol-specific fields of the adapter data space. - * - * Return: 0 o.k. - * < 0 failure. */ -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf) -{ - struct cycx_x25_config cfg; - - /* Verify configuration ID */ - if (conf->config_id != WANCONFIG_X25) { - pr_info("%s: invalid configuration ID %u!\n", - card->devname, conf->config_id); - return -EINVAL; - } - - /* Initialize protocol-specific fields */ - card->mbox = card->hw.dpmbase + X25_MBOX_OFFS; - card->u.x.connection_keys = 0; - spin_lock_init(&card->u.x.lock); - - /* Configure adapter. Here we set reasonable defaults, then parse - * device configuration structure and set configuration options. - * Most configuration options are verified and corrected (if - * necessary) since we can't rely on the adapter to do so and don't - * want it to fail either. */ - memset(&cfg, 0, sizeof(cfg)); - cfg.link = 0; - cfg.clock = conf->clocking == WANOPT_EXTERNAL ? 8 : 55; - cfg.speed = bps_to_speed_code(conf->bps); - cfg.n3win = 7; - cfg.n2win = 2; - cfg.n2 = 5; - cfg.nvc = 1; - cfg.npvc = 1; - cfg.flags = 0x02; /* default = V35 */ - cfg.t1 = 10; /* line carrier timeout */ - cfg.t2 = 29; /* tx timeout */ - cfg.t21 = 180; /* CALL timeout */ - cfg.t23 = 180; /* CLEAR timeout */ - - /* adjust MTU */ - if (!conf->mtu || conf->mtu >= 512) - card->wandev.mtu = 512; - else if (conf->mtu >= 256) - card->wandev.mtu = 256; - else if (conf->mtu >= 128) - card->wandev.mtu = 128; - else - card->wandev.mtu = 64; - - cfg.pktlen = cycx_log2(card->wandev.mtu); - - if (conf->station == WANOPT_DTE) { - cfg.locaddr = 3; /* DTE */ - cfg.remaddr = 1; /* DCE */ - } else { - cfg.locaddr = 1; /* DCE */ - cfg.remaddr = 3; /* DTE */ - } - - if (conf->interface == WANOPT_RS232) - cfg.flags = 0; /* FIXME just reset the 2nd bit */ - - if (conf->u.x25.hi_pvc) { - card->u.x.hi_pvc = min_t(unsigned int, conf->u.x25.hi_pvc, 4095); - card->u.x.lo_pvc = min_t(unsigned int, conf->u.x25.lo_pvc, card->u.x.hi_pvc); - } - - if (conf->u.x25.hi_svc) { - card->u.x.hi_svc = min_t(unsigned int, conf->u.x25.hi_svc, 4095); - card->u.x.lo_svc = min_t(unsigned int, conf->u.x25.lo_svc, card->u.x.hi_svc); - } - - if (card->u.x.lo_pvc == 255) - cfg.npvc = 0; - else - cfg.npvc = card->u.x.hi_pvc - card->u.x.lo_pvc + 1; - - cfg.nvc = card->u.x.hi_svc - card->u.x.lo_svc + 1 + cfg.npvc; - - if (conf->u.x25.hdlc_window) - cfg.n2win = min_t(unsigned int, conf->u.x25.hdlc_window, 7); - - if (conf->u.x25.pkt_window) - cfg.n3win = min_t(unsigned int, conf->u.x25.pkt_window, 7); - - if (conf->u.x25.t1) - cfg.t1 = min_t(unsigned int, conf->u.x25.t1, 30); - - if (conf->u.x25.t2) - cfg.t2 = min_t(unsigned int, conf->u.x25.t2, 30); - - if (conf->u.x25.t11_t21) - cfg.t21 = min_t(unsigned int, conf->u.x25.t11_t21, 30); - - if (conf->u.x25.t13_t23) - cfg.t23 = min_t(unsigned int, conf->u.x25.t13_t23, 30); - - if (conf->u.x25.n2) - cfg.n2 = min_t(unsigned int, conf->u.x25.n2, 30); - - /* initialize adapter */ - if (cycx_x25_configure(card, &cfg)) - return -EIO; - - /* Initialize protocol-specific fields of adapter data space */ - card->wandev.bps = conf->bps; - card->wandev.interface = conf->interface; - card->wandev.clocking = conf->clocking; - card->wandev.station = conf->station; - card->isr = cycx_x25_irq_handler; - card->exec = NULL; - card->wandev.update = cycx_wan_update; - card->wandev.new_if = cycx_wan_new_if; - card->wandev.del_if = cycx_wan_del_if; - card->wandev.state = WAN_DISCONNECTED; - - return 0; -} - -/* WAN Device Driver Entry Points */ -/* Update device status & statistics. */ -static int cycx_wan_update(struct wan_device *wandev) -{ - /* sanity checks */ - if (!wandev || !wandev->private) - return -EFAULT; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - cycx_x25_get_stats(wandev->private); - - return 0; -} - -/* Create new logical channel. - * This routine is called by the router when ROUTER_IFNEW IOCTL is being - * handled. - * o parse media- and hardware-specific configuration - * o make sure that a new channel can be created - * o allocate resources, if necessary - * o prepare network device structure for registration. - * - * Return: 0 o.k. - * < 0 failure (channel will not be created) */ -static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf) -{ - struct cycx_device *card = wandev->private; - struct cycx_x25_channel *chan; - int err = 0; - - if (!conf->name[0] || strlen(conf->name) > WAN_IFNAME_SZ) { - pr_info("%s: invalid interface name!\n", card->devname); - return -EINVAL; - } - - dev = alloc_netdev(sizeof(struct cycx_x25_channel), conf->name, - cycx_x25_chan_setup); - if (!dev) - return -ENOMEM; - - chan = netdev_priv(dev); - strcpy(chan->name, conf->name); - chan->card = card; - chan->link = conf->port; - chan->protocol = conf->protocol ? ETH_P_X25 : ETH_P_IP; - chan->rx_skb = NULL; - /* only used in svc connected thru crossover cable */ - chan->local_addr = NULL; - - if (conf->addr[0] == '@') { /* SVC */ - int len = strlen(conf->local_addr); - - if (len) { - if (len > WAN_ADDRESS_SZ) { - pr_err("%s: %s local addr too long!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } else { - chan->local_addr = kmalloc(len + 1, GFP_KERNEL); - - if (!chan->local_addr) { - err = -ENOMEM; - goto error; - } - } - - strncpy(chan->local_addr, conf->local_addr, - WAN_ADDRESS_SZ); - } - - chan->svc = 1; - strncpy(chan->addr, &conf->addr[1], WAN_ADDRESS_SZ); - init_timer(&chan->timer); - chan->timer.function = cycx_x25_chan_timer; - chan->timer.data = (unsigned long)dev; - - /* Set channel timeouts (default if not specified) */ - chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90; - } else if (isdigit(conf->addr[0])) { /* PVC */ - s16 lcn = dec_to_uint(conf->addr, 0); - - if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc) - chan->lcn = lcn; - else { - pr_err("%s: PVC %u is out of range on interface %s!\n", - wandev->name, lcn, chan->name); - err = -EINVAL; - goto error; - } - } else { - pr_err("%s: invalid media address on interface %s!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } - - return 0; - -error: - free_netdev(dev); - return err; -} - -/* Delete logical channel. */ -static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - kfree(chan->local_addr); - if (chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - } - - return 0; -} - - -/* Network Device Interface */ - -static const struct header_ops cycx_header_ops = { - .create = cycx_netdevice_hard_header, - .rebuild = cycx_netdevice_rebuild_header, -}; - -static const struct net_device_ops cycx_netdev_ops = { - .ndo_init = cycx_netdevice_init, - .ndo_open = cycx_netdevice_open, - .ndo_stop = cycx_netdevice_stop, - .ndo_start_xmit = cycx_netdevice_hard_start_xmit, - .ndo_get_stats = cycx_netdevice_get_stats, -}; - -static void cycx_x25_chan_setup(struct net_device *dev) -{ - /* Initialize device driver entry points */ - dev->netdev_ops = &cycx_netdev_ops; - dev->header_ops = &cycx_header_ops; - - /* Initialize media-specific parameters */ - dev->mtu = CYCX_X25_CHAN_MTU; - dev->type = ARPHRD_HWX25; /* ARP h/w type */ - dev->hard_header_len = 0; /* media header length */ - dev->addr_len = 0; /* hardware address length */ -} - -/* Initialize Linux network interface. - * - * This routine is called only once for each interface, during Linux network - * interface registration. Returning anything but zero will fail interface - * registration. */ -static int cycx_netdevice_init(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - struct wan_device *wandev = &card->wandev; - - if (!chan->svc) - *(__be16*)dev->dev_addr = htons(chan->lcn); - - /* Initialize hardware parameters (just for reference) */ - dev->irq = wandev->irq; - dev->dma = wandev->dma; - dev->base_addr = wandev->ioport; - dev->mem_start = (unsigned long)wandev->maddr; - dev->mem_end = (unsigned long)(wandev->maddr + - wandev->msize - 1); - dev->flags |= IFF_NOARP; - - /* Set transmit buffer queue length */ - dev->tx_queue_len = 10; - - /* Initialize socket buffers */ - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - - return 0; -} - -/* Open network interface. - * o prevent module from unloading by incrementing use count - * o if link is disconnected then initiate connection - * - * Return 0 if O.k. or errno. */ -static int cycx_netdevice_open(struct net_device *dev) -{ - if (netif_running(dev)) - return -EBUSY; /* only one open is allowed */ - - netif_start_queue(dev); - return 0; -} - -/* Close network interface. - * o reset flags. - * o if there's no more open channels then disconnect physical link. */ -static int cycx_netdevice_stop(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - netif_stop_queue(dev); - - if (chan->state == WAN_CONNECTED || chan->state == WAN_CONNECTING) - cycx_x25_chan_disconnect(dev); - - return 0; -} - -/* Build media header. - * o encapsulate packet according to encapsulation type. - * - * The trick here is to put packet type (Ethertype) into 'protocol' field of - * the socket buffer, so that we don't forget it. If encapsulation fails, - * set skb->protocol to 0 and discard packet later. - * - * Return: media header length. */ -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len) -{ - skb->protocol = htons(type); - - return dev->hard_header_len; -} - -/* * Re-build media header. - * Return: 1 physical address resolved. - * 0 physical address not resolved */ -static int cycx_netdevice_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -/* Send a packet on a network interface. - * o set busy flag (marks start of the transmission). - * o check link state. If link is not up, then drop the packet. - * o check channel status. If it's down then initiate a call. - * o pass a packet to corresponding WAN device. - * o free socket buffer - * - * Return: 0 complete (socket buffer must be freed) - * non-0 packet may be re-transmitted (tbusy must be set) - * - * Notes: - * 1. This routine is called either by the protocol stack or by the "net - * bottom half" (with interrupts enabled). - * 2. Setting tbusy flag will inhibit further transmit requests from the - * protocol stack and can be used for flow control with protocol layer. */ -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (!chan->svc) - chan->protocol = ntohs(skb->protocol); - - if (card->wandev.state != WAN_CONNECTED) - ++chan->ifstats.tx_dropped; - else if (chan->svc && chan->protocol && - chan->protocol != ntohs(skb->protocol)) { - pr_info("%s: unsupported Ethertype 0x%04X on interface %s!\n", - card->devname, ntohs(skb->protocol), dev->name); - ++chan->ifstats.tx_errors; - } else if (chan->protocol == ETH_P_IP) { - switch (chan->state) { - case WAN_DISCONNECTED: - if (cycx_x25_chan_connect(dev)) { - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } - /* fall thru */ - case WAN_CONNECTED: - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) - return NETDEV_TX_BUSY; - - break; - default: - ++chan->ifstats.tx_dropped; - ++card->wandev.stats.tx_dropped; - } - } else { /* chan->protocol == ETH_P_X25 */ - switch (skb->data[0]) { - case X25_IFACE_DATA: - break; - case X25_IFACE_CONNECT: - cycx_x25_chan_connect(dev); - goto free_packet; - case X25_IFACE_DISCONNECT: - cycx_x25_chan_disconnect(dev); - goto free_packet; - default: - pr_info("%s: unknown %d x25-iface request on %s!\n", - card->devname, skb->data[0], dev->name); - ++chan->ifstats.tx_errors; - goto free_packet; - } - - skb_pull(skb, 1); /* Remove control byte */ - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) { - /* prepare for future retransmissions */ - skb_push(skb, 1); - return NETDEV_TX_BUSY; - } - } - -free_packet: - dev_kfree_skb(skb); - - return NETDEV_TX_OK; -} - -/* Get Ethernet-style interface statistics. - * Return a pointer to struct net_device_stats */ -static struct net_device_stats *cycx_netdevice_get_stats(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - return chan ? &chan->ifstats : NULL; -} - -/* Interrupt Handlers */ -/* X.25 Interrupt Service Routine. */ -static void cycx_x25_irq_handler(struct cycx_device *card) -{ - struct cycx_x25_cmd cmd; - u16 z = 0; - - card->in_isr = 1; - card->buff_int_mode_unbusy = 0; - cycx_peek(&card->hw, X25_RXMBOX_OFFS, &cmd, sizeof(cmd)); - - switch (cmd.command) { - case X25_DATA_INDICATION: - cycx_x25_irq_rx(card, &cmd); - break; - case X25_ACK_FROM_VC: - cycx_x25_irq_tx(card, &cmd); - break; - case X25_LOG: - cycx_x25_irq_log(card, &cmd); - break; - case X25_STATISTIC: - cycx_x25_irq_stat(card, &cmd); - break; - case X25_CONNECT_CONFIRM: - cycx_x25_irq_connect_confirm(card, &cmd); - break; - case X25_CONNECT_INDICATION: - cycx_x25_irq_connect(card, &cmd); - break; - case X25_DISCONNECT_INDICATION: - cycx_x25_irq_disconnect(card, &cmd); - break; - case X25_DISCONNECT_CONFIRM: - cycx_x25_irq_disconnect_confirm(card, &cmd); - break; - case X25_LINE_ON: - cycx_set_state(card, WAN_CONNECTED); - break; - case X25_LINE_OFF: - cycx_set_state(card, WAN_DISCONNECTED); - break; - default: - cycx_x25_irq_spurious(card, &cmd); - break; - } - - cycx_poke(&card->hw, 0, &z, sizeof(z)); - cycx_poke(&card->hw, X25_RXMBOX_OFFS, &z, sizeof(z)); - card->in_isr = 0; -} - -/* Transmit interrupt handler. - * o Release socket buffer - * o Clear 'tbusy' flag */ -static void cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct net_device *dev; - struct wan_device *wandev = &card->wandev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - - /* unbusy device and then dev_tint(); */ - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - card->buff_int_mode_unbusy = 1; - netif_wake_queue(dev); - } else - pr_err("%s:ackvc for inexistent lcn %d\n", card->devname, lcn); -} - -/* Receive interrupt handler. - * This routine handles fragmented IP packets using M-bit according to the - * RFC1356. - * o map logical channel number to network interface. - * o allocate socket buffer or append received packet to the existing one. - * o if M-bit is reset (i.e. it's the last packet in a sequence) then - * decapsulate packet and pass socket buffer to the protocol stack. - * - * Notes: - * 1. When allocating a socket buffer, if M-bit is set then more data is - * coming and we have to allocate buffer for the maximum IP packet size - * expected on this channel. - * 2. If something goes wrong and X.25 packet has to be dropped (e.g. no - * socket buffers available) the whole packet sequence must be discarded. */ -static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - struct sk_buff *skb; - u8 bitm, lcn; - int pktlen = cmd->len - 5; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 4, &bitm, sizeof(bitm)); - bitm &= 0x10; - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: receiving on orphaned LCN %d!\n", - card->devname, lcn); - return; - } - - chan = netdev_priv(dev); - reset_timer(dev); - - if (chan->drop_sequence) { - if (!bitm) - chan->drop_sequence = 0; - else - return; - } - - if ((skb = chan->rx_skb) == NULL) { - /* Allocate new socket buffer */ - int bufsize = bitm ? dev->mtu : pktlen; - - if ((skb = dev_alloc_skb((chan->protocol == ETH_P_X25 ? 1 : 0) + - bufsize + - dev->hard_header_len)) == NULL) { - pr_info("%s: no socket buffers available!\n", - card->devname); - chan->drop_sequence = 1; - ++chan->ifstats.rx_dropped; - return; - } - - if (chan->protocol == ETH_P_X25) /* X.25 socket layer control */ - /* 0 = data packet (dev_alloc_skb zeroed skb->data) */ - skb_put(skb, 1); - - skb->dev = dev; - skb->protocol = htons(chan->protocol); - chan->rx_skb = skb; - } - - if (skb_tailroom(skb) < pktlen) { - /* No room for the packet. Call off the whole thing! */ - dev_kfree_skb_irq(skb); - chan->rx_skb = NULL; - - if (bitm) - chan->drop_sequence = 1; - - pr_info("%s: unexpectedly long packet sequence on interface %s!\n", - card->devname, dev->name); - ++chan->ifstats.rx_length_errors; - return; - } - - /* Append packet to the socket buffer */ - cycx_peek(&card->hw, cmd->buf + 5, skb_put(skb, pktlen), pktlen); - - if (bitm) - return; /* more data is coming */ - - chan->rx_skb = NULL; /* dequeue packet */ - - ++chan->ifstats.rx_packets; - chan->ifstats.rx_bytes += pktlen; - - skb_reset_mac_header(skb); - netif_rx(skb); -} - -/* Connect interrupt handler. */ -static void cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev = NULL; - struct cycx_x25_channel *chan; - u8 d[32], - loc[24], - rem[24]; - u8 lcn, sizeloc, sizerem; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 5, &sizeloc, sizeof(sizeloc)); - cycx_peek(&card->hw, cmd->buf + 6, d, cmd->len - 6); - - sizerem = sizeloc >> 4; - sizeloc &= 0x0F; - - loc[0] = rem[0] = '\0'; - - if (sizeloc) - nibble_to_byte(d, loc, sizeloc, 0); - - if (sizerem) - nibble_to_byte(d + (sizeloc >> 1), rem, sizerem, sizeloc & 1); - - dprintk(1, KERN_INFO "%s:lcn=%d, local=%s, remote=%s\n", - __func__, lcn, loc, rem); - - dev = cycx_x25_get_dev_by_dte_addr(wandev, rem); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: connect not expected: remote %s!\n", - card->devname, rem); - return; - } - - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_connect_response(card, chan); - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Connect confirm interrupt handler. */ -static void cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - u8 lcn, key; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 1, &key, sizeof(key)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d, key=%d\n", - card->devname, __func__, lcn, key); - - dev = cycx_x25_get_dev_by_lcn(wandev, -key); - if (!dev) { - /* Invalid channel, discard packet */ - clear_bit(--key, (void*)&card->u.x.connection_keys); - pr_info("%s: connect confirm not expected: lcn %d, key=%d!\n", - card->devname, lcn, key); - return; - } - - clear_bit(--key, (void*)&card->u.x.connection_keys); - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Disconnect confirm interrupt handler. */ -static void cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d\n", - card->devname, __func__, lcn); - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s:disconnect confirm not expected!:lcn %d\n", - card->devname, lcn); - return; - } - - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* disconnect interrupt handler. */ -static void cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s:lcn=%d\n", __func__, lcn); - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - cycx_x25_disconnect_response(card, chan->link, lcn); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - } else - cycx_x25_disconnect_response(card, 0, lcn); -} - -/* LOG interrupt handler. */ -static void cycx_x25_irq_log(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ -#if CYCLOMX_X25_DEBUG - char bf[20]; - u16 size, toread, link, msg_code; - u8 code, routine; - - cycx_peek(&card->hw, cmd->buf, &msg_code, sizeof(msg_code)); - cycx_peek(&card->hw, cmd->buf + 2, &link, sizeof(link)); - cycx_peek(&card->hw, cmd->buf + 4, &size, sizeof(size)); - /* at most 20 bytes are available... thanks to Daniela :) */ - toread = size < 20 ? size : 20; - cycx_peek(&card->hw, cmd->buf + 10, &bf, toread); - cycx_peek(&card->hw, cmd->buf + 10 + toread, &code, 1); - cycx_peek(&card->hw, cmd->buf + 10 + toread + 1, &routine, 1); - - pr_info("cycx_x25_irq_handler: X25_LOG (0x4500) indic.:\n"); - pr_info("cmd->buf=0x%X\n", cmd->buf); - pr_info("Log message code=0x%X\n", msg_code); - pr_info("Link=%d\n", link); - pr_info("log code=0x%X\n", code); - pr_info("log routine=0x%X\n", routine); - pr_info("Message size=%d\n", size); - hex_dump("Message", bf, toread); -#endif -} - -/* STATISTIC interrupt handler. */ -static void cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - cycx_peek(&card->hw, cmd->buf, &card->u.x.stats, - sizeof(card->u.x.stats)); - hex_dump("cycx_x25_irq_stat", (unsigned char*)&card->u.x.stats, - sizeof(card->u.x.stats)); - cycx_x25_dump_stats(&card->u.x.stats); - wake_up_interruptible(&card->wait_stats); -} - -/* Spurious interrupt handler. - * o print a warning - * If number of spurious interrupts exceeded some limit, then ??? */ -static void cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - pr_info("%s: spurious interrupt (0x%X)!\n", - card->devname, cmd->command); -} -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len) -{ - print_hex_dump(KERN_INFO, msg, DUMP_PREFIX_OFFSET, 16, 1, - p, len, true); -} -#endif - -/* Cyclom 2X Firmware-Specific Functions */ -/* Exec X.25 command. */ -static int x25_exec(struct cycx_device *card, int command, int link, - void *d1, int len1, void *d2, int len2) -{ - struct cycx_x25_cmd c; - unsigned long flags; - u32 addr = 0x1200 + 0x2E0 * link + 0x1E2; - u8 retry = CYCX_X25_MAX_CMD_RETRY; - int err = 0; - - c.command = command; - c.link = link; - c.len = len1 + len2; - - spin_lock_irqsave(&card->u.x.lock, flags); - - /* write command */ - cycx_poke(&card->hw, X25_MBOX_OFFS, &c, sizeof(c) - sizeof(c.buf)); - - /* write X.25 data */ - if (d1) { - cycx_poke(&card->hw, addr, d1, len1); - - if (d2) { - if (len2 > 254) { - u32 addr1 = 0xA00 + 0x400 * link; - - cycx_poke(&card->hw, addr + len1, d2, 249); - cycx_poke(&card->hw, addr1, ((u8*)d2) + 249, - len2 - 249); - } else - cycx_poke(&card->hw, addr + len1, d2, len2); - } - } - - /* generate interruption, executing command */ - cycx_intr(&card->hw); - - /* wait till card->mbox == 0 */ - do { - err = cycx_exec(card->mbox); - } while (retry-- && err); - - spin_unlock_irqrestore(&card->u.x.lock, flags); - - return err; -} - -/* Configure adapter. */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf) -{ - struct { - u16 nlinks; - struct cycx_x25_config conf[2]; - } x25_cmd_conf; - - memset(&x25_cmd_conf, 0, sizeof(x25_cmd_conf)); - x25_cmd_conf.nlinks = 2; - x25_cmd_conf.conf[0] = *conf; - /* FIXME: we need to find a way in the wanrouter framework - to configure the second link, for now lets use it - with the same config from the first link, fixing - the interface type to RS232, the speed in 38400 and - the clock to external */ - x25_cmd_conf.conf[1] = *conf; - x25_cmd_conf.conf[1].link = 1; - x25_cmd_conf.conf[1].speed = 5; /* 38400 */ - x25_cmd_conf.conf[1].clock = 8; - x25_cmd_conf.conf[1].flags = 0; /* default = RS232 */ - - cycx_x25_dump_config(&x25_cmd_conf.conf[0]); - cycx_x25_dump_config(&x25_cmd_conf.conf[1]); - - return x25_exec(card, X25_CONFIG, 0, - &x25_cmd_conf, sizeof(x25_cmd_conf), NULL, 0); -} - -/* Get protocol statistics. */ -static int cycx_x25_get_stats(struct cycx_device *card) -{ - /* the firmware expects 20 in the size field!!! - thanks to Daniela */ - int err = x25_exec(card, X25_STATISTIC, 0, NULL, 20, NULL, 0); - - if (err) - return err; - - interruptible_sleep_on(&card->wait_stats); - - if (signal_pending(current)) - return -EINTR; - - card->wandev.stats.rx_packets = card->u.x.stats.n2_rx_frames; - card->wandev.stats.rx_over_errors = card->u.x.stats.rx_over_errors; - card->wandev.stats.rx_crc_errors = card->u.x.stats.rx_crc_errors; - card->wandev.stats.rx_length_errors = 0; /* not available from fw */ - card->wandev.stats.rx_frame_errors = 0; /* not available from fw */ - card->wandev.stats.rx_missed_errors = card->u.x.stats.rx_aborts; - card->wandev.stats.rx_dropped = 0; /* not available from fw */ - card->wandev.stats.rx_errors = 0; /* not available from fw */ - card->wandev.stats.tx_packets = card->u.x.stats.n2_tx_frames; - card->wandev.stats.tx_aborted_errors = card->u.x.stats.tx_aborts; - card->wandev.stats.tx_dropped = 0; /* not available from fw */ - card->wandev.stats.collisions = 0; /* not available from fw */ - card->wandev.stats.tx_errors = 0; /* not available from fw */ - - cycx_x25_dump_devs(&card->wandev); - - return 0; -} - -/* return the number of nibbles */ -static int byte_to_nibble(u8 *s, u8 *d, char *nibble) -{ - int i = 0; - - if (*nibble && *s) { - d[i] |= *s++ - '0'; - *nibble = 0; - ++i; - } - - while (*s) { - d[i] = (*s - '0') << 4; - if (*(s + 1)) - d[i] |= *(s + 1) - '0'; - else { - *nibble = 1; - break; - } - ++i; - s += 2; - } - - return i; -} - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble) -{ - if (nibble) { - *d++ = '0' + (*s++ & 0x0F); - --len; - } - - while (len) { - *d++ = '0' + (*s >> 4); - - if (--len) { - *d++ = '0' + (*s & 0x0F); - --len; - } else break; - - ++s; - } - - *d = '\0'; -} - -/* Place X.25 call. */ -static int x25_place_call(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - int err = 0, - len; - char d[64], - nibble = 0, - mylen = chan->local_addr ? strlen(chan->local_addr) : 0, - remotelen = strlen(chan->addr); - u8 key; - - if (card->u.x.connection_keys == ~0U) { - pr_info("%s: too many simultaneous connection requests!\n", - card->devname); - return -EAGAIN; - } - - key = ffz(card->u.x.connection_keys); - set_bit(key, (void*)&card->u.x.connection_keys); - ++key; - dprintk(1, KERN_INFO "%s:x25_place_call:key=%d\n", card->devname, key); - memset(d, 0, sizeof(d)); - d[1] = key; /* user key */ - d[2] = 0x10; - d[4] = 0x0B; - - len = byte_to_nibble(chan->addr, d + 6, &nibble); - - if (chan->local_addr) - len += byte_to_nibble(chan->local_addr, d + 6 + len, &nibble); - - if (nibble) - ++len; - - d[5] = mylen << 4 | remotelen; - d[6 + len + 1] = 0xCC; /* TCP/IP over X.25, thanks to Daniela :) */ - - if ((err = x25_exec(card, X25_CONNECT_REQUEST, chan->link, - &d, 7 + len + 1, NULL, 0)) != 0) - clear_bit(--key, (void*)&card->u.x.connection_keys); - else - chan->lcn = -key; - - return err; -} - -/* Place X.25 CONNECT RESPONSE. */ -static int cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - u8 d[8]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = chan->lcn; - d[2] = 0x10; - d[4] = 0x0F; - d[7] = 0xCC; /* TCP/IP over X.25, thanks Daniela */ - - return x25_exec(card, X25_CONNECT_RESPONSE, chan->link, &d, 8, NULL, 0); -} - -/* Place X.25 DISCONNECT RESPONSE. */ -static int cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn) -{ - char d[5]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x17; - - return x25_exec(card, X25_DISCONNECT_RESPONSE, link, &d, 5, NULL, 0); -} - -/* Clear X.25 call. */ -static int x25_clear_call(struct cycx_device *card, u8 link, u8 lcn, u8 cause, - u8 diagn) -{ - u8 d[7]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x13; - d[5] = cause; - d[6] = diagn; - - return x25_exec(card, X25_DISCONNECT_REQUEST, link, d, 7, NULL, 0); -} - -/* Send X.25 data packet. */ -static int cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf) -{ - u8 d[] = "?\xFF\x10??"; - - d[0] = d[3] = lcn; - d[4] = bitm; - - return x25_exec(card, X25_DATA_REQUEST, link, &d, 5, buf, len); -} - -/* Miscellaneous */ -/* Find network device by its channel number. */ -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (chan->lcn == lcn) - break; - dev = chan->slave; - } - return dev; -} - -/* Find network device by its remote dte address. */ -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (!strcmp(chan->addr, dte)) - break; - dev = chan->slave; - } - return dev; -} - -/* Initiate connection on the logical channel. - * o for PVC we just get channel configuration - * o for SVCs place an X.25 call - * - * Return: 0 connected - * >0 connection in progress - * <0 failure */ -static int cycx_x25_chan_connect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (chan->svc) { - if (!chan->addr[0]) - return -EINVAL; /* no destination address */ - - dprintk(1, KERN_INFO "%s: placing X.25 call to %s...\n", - card->devname, chan->addr); - - if (x25_place_call(card, chan)) - return -EIO; - - cycx_x25_set_chan_state(dev, WAN_CONNECTING); - return 1; - } else - cycx_x25_set_chan_state(dev, WAN_CONNECTED); - - return 0; -} - -/* Disconnect logical channel. - * o if SVC then clear X.25 call */ -static void cycx_x25_chan_disconnect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - x25_clear_call(chan->card, chan->link, chan->lcn, 0, 0); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTING); - } else - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* Called by kernel timer */ -static void cycx_x25_chan_timer(unsigned long d) -{ - struct net_device *dev = (struct net_device *)d; - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->state == WAN_CONNECTED) - cycx_x25_chan_disconnect(dev); - else - pr_err("%s: %s for svc (%s) not connected!\n", - chan->card->devname, __func__, dev->name); -} - -/* Set logical channel state. */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (chan->state != state) { - if (chan->svc && chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - *(__be16*)dev->dev_addr = htons(chan->lcn); - netif_wake_queue(dev); - reset_timer(dev); - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_CONNECT); - - break; - case WAN_CONNECTING: - string_state = "connecting..."; - break; - case WAN_DISCONNECTING: - string_state = "disconnecting..."; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - - if (chan->svc) { - *(unsigned short*)dev->dev_addr = 0; - chan->lcn = 0; - } - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_DISCONNECT); - - netif_wake_queue(dev); - break; - } - - pr_info("%s: interface %s %s\n", - card->devname, dev->name, string_state); - chan->state = state; - } - - spin_unlock_irqrestore(&card->lock, flags); -} - -/* Send packet on a logical channel. - * When this function is called, tx_skb field of the channel data space - * points to the transmit socket buffer. When transmission is complete, - * release socket buffer and reset 'tbusy' flag. - * - * Return: 0 - transmission complete - * 1 - busy - * - * Notes: - * 1. If packet length is greater than MTU for this channel, we'll fragment - * the packet into 'complete sequence' using M-bit. - * 2. When transmission is complete, an event notification should be issued - * to the router. */ -static int cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - int bitm = 0; /* final packet */ - unsigned len = skb->len; - - if (skb->len > card->wandev.mtu) { - len = card->wandev.mtu; - bitm = 0x10; /* set M-bit (more data) */ - } - - if (cycx_x25_send(card, chan->link, chan->lcn, bitm, len, skb->data)) - return 1; - - if (bitm) { - skb_pull(skb, len); - return 1; - } - - ++chan->ifstats.tx_packets; - chan->ifstats.tx_bytes += len; - - return 0; -} - -/* Send event (connection, disconnection, etc) to X.25 socket layer */ - -static void cycx_x25_chan_send_event(struct net_device *dev, u8 event) -{ - struct sk_buff *skb; - unsigned char *ptr; - - if ((skb = dev_alloc_skb(1)) == NULL) { - pr_err("%s: out of memory\n", __func__); - return; - } - - ptr = skb_put(skb, 1); - *ptr = event; - - skb->protocol = x25_type_trans(skb, dev); - netif_rx(skb); -} - -/* Convert line speed in bps to a number used by cyclom 2x code. */ -static u8 bps_to_speed_code(u32 bps) -{ - u8 number = 0; /* defaults to the lowest (1200) speed ;> */ - - if (bps >= 512000) number = 8; - else if (bps >= 256000) number = 7; - else if (bps >= 64000) number = 6; - else if (bps >= 38400) number = 5; - else if (bps >= 19200) number = 4; - else if (bps >= 9600) number = 3; - else if (bps >= 4800) number = 2; - else if (bps >= 2400) number = 1; - - return number; -} - -/* log base 2 */ -static u8 cycx_log2(u32 n) -{ - u8 log = 0; - - if (!n) - return 0; - - while (n > 1) { - n >>= 1; - ++log; - } - - return log; -} - -/* Convert decimal string to unsigned integer. - * If len != 0 then only 'len' characters of the string are converted. */ -static unsigned dec_to_uint(u8 *str, int len) -{ - unsigned val = 0; - - if (!len) - len = strlen(str); - - for (; len && isdigit(*str); ++str, --len) - val = (val * 10) + (*str - (unsigned) '0'); - - return val; -} - -static void reset_timer(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) - mod_timer(&chan->timer, jiffies+chan->idle_tmout*HZ); -} -#ifdef CYCLOMX_X25_DEBUG -static void cycx_x25_dump_config(struct cycx_x25_config *conf) -{ - pr_info("X.25 configuration\n"); - pr_info("-----------------\n"); - pr_info("link number=%d\n", conf->link); - pr_info("line speed=%d\n", conf->speed); - pr_info("clock=%sternal\n", conf->clock == 8 ? "Ex" : "In"); - pr_info("# level 2 retransm.=%d\n", conf->n2); - pr_info("level 2 window=%d\n", conf->n2win); - pr_info("level 3 window=%d\n", conf->n3win); - pr_info("# logical channels=%d\n", conf->nvc); - pr_info("level 3 pkt len=%d\n", conf->pktlen); - pr_info("my address=%d\n", conf->locaddr); - pr_info("remote address=%d\n", conf->remaddr); - pr_info("t1=%d seconds\n", conf->t1); - pr_info("t2=%d seconds\n", conf->t2); - pr_info("t21=%d seconds\n", conf->t21); - pr_info("# PVCs=%d\n", conf->npvc); - pr_info("t23=%d seconds\n", conf->t23); - pr_info("flags=0x%x\n", conf->flags); -} - -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats) -{ - pr_info("X.25 statistics\n"); - pr_info("--------------\n"); - pr_info("rx_crc_errors=%d\n", stats->rx_crc_errors); - pr_info("rx_over_errors=%d\n", stats->rx_over_errors); - pr_info("n2_tx_frames=%d\n", stats->n2_tx_frames); - pr_info("n2_rx_frames=%d\n", stats->n2_rx_frames); - pr_info("tx_timeouts=%d\n", stats->tx_timeouts); - pr_info("rx_timeouts=%d\n", stats->rx_timeouts); - pr_info("n3_tx_packets=%d\n", stats->n3_tx_packets); - pr_info("n3_rx_packets=%d\n", stats->n3_rx_packets); - pr_info("tx_aborts=%d\n", stats->tx_aborts); - pr_info("rx_aborts=%d\n", stats->rx_aborts); -} - -static void cycx_x25_dump_devs(struct wan_device *wandev) -{ - struct net_device *dev = wandev->dev; - - pr_info("X.25 dev states\n"); - pr_info("name: addr: txoff: protocol:\n"); - pr_info("---------------------------------------\n"); - - while(dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - pr_info("%-5.5s %-15.15s %d ETH_P_%s\n", - chan->name, chan->addr, netif_queue_stopped(dev), - chan->protocol == ETH_P_IP ? "IP" : "X25"); - dev = chan->slave; - } -} - -#endif /* CYCLOMX_X25_DEBUG */ -/* End */ diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h deleted file mode 100644 index b88f7f428e58..000000000000 --- a/include/linux/cyclomx.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _CYCLOMX_H -#define _CYCLOMX_H -/* -* cyclomx.h Cyclom 2X WAN Link Driver. -* User-level API definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on wanpipe.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2000/07/13 acme remove crap #if KERNEL_VERSION > blah -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 1999/05/19 acme wait_queue_head_t wait_stats(support for 2.3.*) -* 1999/01/03 acme judicious use of data types -* 1998/12/27 acme cleanup: PACKED not needed -* 1998/08/08 acme Version 0.0.1 -*/ - -#include -#include - -#ifdef __KERNEL__ -/* Kernel Interface */ - -#include /* Cyclom 2X support module API definitions */ -#include /* Cyclom 2X firmware module definitions */ -#ifdef CONFIG_CYCLOMX_X25 -#include -#endif - -/* Adapter Data Space. - * This structure is needed because we handle multiple cards, otherwise - * static data would do it. - */ -struct cycx_device { - char devname[WAN_DRVNAME_SZ + 1];/* card name */ - struct cycx_hw hw; /* hardware configuration */ - struct wan_device wandev; /* WAN device data space */ - u32 state_tick; /* link state timestamp */ - spinlock_t lock; - char in_isr; /* interrupt-in-service flag */ - char buff_int_mode_unbusy; /* flag for carrying out dev_tint */ - wait_queue_head_t wait_stats; /* to wait for the STATS indication */ - void __iomem *mbox; /* -> mailbox */ - void (*isr)(struct cycx_device* card); /* interrupt service routine */ - int (*exec)(struct cycx_device* card, void* u_cmd, void* u_data); - union { -#ifdef CONFIG_CYCLOMX_X25 - struct { /* X.25 specific data */ - u32 lo_pvc; - u32 hi_pvc; - u32 lo_svc; - u32 hi_svc; - struct cycx_x25_stats stats; - spinlock_t lock; - u32 connection_keys; - } x; -#endif - } u; -}; - -/* Public Functions */ -void cycx_set_state(struct cycx_device *card, int state); - -#ifdef CONFIG_CYCLOMX_X25 -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf); -#endif -#endif /* __KERNEL__ */ -#endif /* _CYCLOMX_H */ diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h deleted file mode 100644 index 12fe6b0bfcff..000000000000 --- a/include/linux/cycx_drv.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -* cycx_drv.h CYCX Support Module. Kernel API Definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/10/23 acme cycxhw_t cleanup -* 1999/01/03 acme more judicious use of data types... -* uclong, ucchar, etc deleted, the u8, u16, u32 -* types are the portable way to go. -* 1999/01/03 acme judicious use of data types... u16, u32, etc -* 1998/12/26 acme FIXED_BUFFERS, CONF_OFFSET, -* removal of cy_read{bwl} -* 1998/08/08 acme Initial version. -*/ -#ifndef _CYCX_DRV_H -#define _CYCX_DRV_H - -#define CYCX_WINDOWSIZE 0x4000 /* default dual-port memory window size */ -#define GEN_CYCX_INTR 0x02 -#define RST_ENABLE 0x04 -#define START_CPU 0x06 -#define RST_DISABLE 0x08 -#define FIXED_BUFFERS 0x08 -#define TEST_PATTERN 0xaa55 -#define CMD_OFFSET 0x20 -#define CONF_OFFSET 0x0380 -#define RESET_OFFSET 0x3c00 /* For reset file load */ -#define DATA_OFFSET 0x0100 /* For code and data files load */ -#define START_OFFSET 0x3ff0 /* 80186 starts here */ - -/** - * struct cycx_hw - Adapter hardware configuration - * @fwid - firmware ID - * @irq - interrupt request level - * @dpmbase - dual-port memory base - * @dpmsize - dual-port memory size - * @reserved - reserved for future use - */ -struct cycx_hw { - u32 fwid; - int irq; - void __iomem *dpmbase; - u32 dpmsize; - u32 reserved[5]; -}; - -/* Function Prototypes */ -extern int cycx_setup(struct cycx_hw *hw, void *sfm, u32 len, unsigned long base); -extern int cycx_down(struct cycx_hw *hw); -extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_exec(void __iomem *addr); - -extern void cycx_intr(struct cycx_hw *hw); -#endif /* _CYCX_DRV_H */ diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index cec4b4159767..8198a63cf459 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -1,129 +1,10 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ +/* + * wanrouter.h Legacy declarations kept around until X25 is removed + */ + #ifndef _ROUTER_H #define _ROUTER_H #include -/****** Kernel Interface ****************************************************/ - -#include /* support for device drivers */ -#include /* proc filesystem pragmatics */ -#include /* support for network drivers */ -#include /* Support for SMP Locking */ - -/*---------------------------------------------------------------------------- - * WAN device data space. - */ -struct wan_device { - unsigned magic; /* magic number */ - char* name; /* -> WAN device name (ASCIIZ) */ - void* private; /* -> driver private data */ - unsigned config_id; /* Configuration ID */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base #1 */ - char S514_cpu_no[1]; /* PCI CPU Number */ - unsigned char S514_slot_no; /* PCI Slot Number */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* max physical transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned enable_tx_int; /* Transmit Interrupt enabled or not */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char signalling; /* Signalling RS232 or V35 */ - char read_mode; /* read mode: Polling or interrupt */ - char new_if_cnt; /* Number of interfaces per wanpipe */ - char del_if_cnt; /* Number of times del_if() gets called */ - unsigned char piggyback; /* Piggibacking a port */ - unsigned hw_opt[4]; /* other hardware options */ - /****** status and statistics *******/ - char state; /* device state */ - char api_status; /* device api status */ - struct net_device_stats stats; /* interface statistics */ - unsigned reserved[16]; /* reserved for future use */ - unsigned long critical; /* critical section flag */ - spinlock_t lock; /* Support for SMP Locking */ - - /****** device management methods ***/ - int (*setup) (struct wan_device *wandev, wandev_conf_t *conf); - int (*shutdown) (struct wan_device *wandev); - int (*update) (struct wan_device *wandev); - int (*ioctl) (struct wan_device *wandev, unsigned cmd, - unsigned long arg); - int (*new_if)(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf); - int (*del_if)(struct wan_device *wandev, struct net_device *dev); - /****** maintained by the router ****/ - struct wan_device* next; /* -> next device */ - struct net_device* dev; /* list of network interfaces */ - unsigned ndev; /* number of interfaces */ - struct proc_dir_entry *dent; /* proc filesystem entry */ -}; - -/* Public functions available for device drivers */ -extern int register_wan_device(struct wan_device *wandev); -extern int unregister_wan_device(char *name); - -/* Proc interface functions. These must not be called by the drivers! */ -extern int wanrouter_proc_init(void); -extern void wanrouter_proc_cleanup(void); -extern int wanrouter_proc_add(struct wan_device *wandev); -extern int wanrouter_proc_delete(struct wan_device *wandev); -extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg); - -/* Public Data */ -/* list of registered devices */ -extern struct wan_device *wanrouter_router_devlist; - #endif /* _ROUTER_H */ diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h index 7617df2833d5..498d6c12c666 100644 --- a/include/uapi/linux/wanrouter.h +++ b/include/uapi/linux/wanrouter.h @@ -1,363 +1,9 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ - -#ifndef _UAPI_ROUTER_H -#define _UAPI_ROUTER_H - -#define ROUTER_NAME "wanrouter" /* in case we ever change it */ -#define ROUTER_VERSION 1 /* version number */ -#define ROUTER_RELEASE 1 /* release (minor version) number */ -#define ROUTER_IOCTL 'W' /* for IOCTL calls */ -#define ROUTER_MAGIC 0x524D4157L /* signature: 'WANR' reversed */ - -/* IOCTL codes for /proc/router/ entries (up to 255) */ -enum router_ioctls -{ - ROUTER_SETUP = ROUTER_IOCTL<<8, /* configure device */ - ROUTER_DOWN, /* shut down device */ - ROUTER_STAT, /* get device status */ - ROUTER_IFNEW, /* add interface */ - ROUTER_IFDEL, /* delete interface */ - ROUTER_IFSTAT, /* get interface status */ - ROUTER_USER = (ROUTER_IOCTL<<8)+16, /* driver-specific calls */ - ROUTER_USER_MAX = (ROUTER_IOCTL<<8)+31 -}; - -/* identifiers for displaying proc file data for dual port adapters */ -#define PROC_DATA_PORT_0 0x8000 /* the data is for port 0 */ -#define PROC_DATA_PORT_1 0x8001 /* the data is for port 1 */ - -/* NLPID for packet encapsulation (ISO/IEC TR 9577) */ -#define NLPID_IP 0xCC /* Internet Protocol Datagram */ -#define NLPID_SNAP 0x80 /* IEEE Subnetwork Access Protocol */ -#define NLPID_CLNP 0x81 /* ISO/IEC 8473 */ -#define NLPID_ESIS 0x82 /* ISO/IEC 9542 */ -#define NLPID_ISIS 0x83 /* ISO/IEC ISIS */ -#define NLPID_Q933 0x08 /* CCITT Q.933 */ - -/* Miscellaneous */ -#define WAN_IFNAME_SZ 15 /* max length of the interface name */ -#define WAN_DRVNAME_SZ 15 /* max length of the link driver name */ -#define WAN_ADDRESS_SZ 31 /* max length of the WAN media address */ -#define USED_BY_FIELD 8 /* max length of the used by field */ - -/* Defines for UDP PACKET TYPE */ -#define UDP_PTPIPE_TYPE 0x01 -#define UDP_FPIPE_TYPE 0x02 -#define UDP_CPIPE_TYPE 0x03 -#define UDP_DRVSTATS_TYPE 0x04 -#define UDP_INVALID_TYPE 0x05 - -/* Command return code */ -#define CMD_OK 0 /* normal firmware return code */ -#define CMD_TIMEOUT 0xFF /* firmware command timed out */ - -/* UDP Packet Management */ -#define UDP_PKT_FRM_STACK 0x00 -#define UDP_PKT_FRM_NETWORK 0x01 - -/* Maximum interrupt test counter */ -#define MAX_INTR_TEST_COUNTER 100 - -/* Critical Values for RACE conditions*/ -#define CRITICAL_IN_ISR 0xA1 -#define CRITICAL_INTR_HANDLED 0xB1 - -/****** Data Types **********************************************************/ - -/*---------------------------------------------------------------------------- - * X.25-specific link-level configuration. - */ -typedef struct wan_x25_conf -{ - unsigned lo_pvc; /* lowest permanent circuit number */ - unsigned hi_pvc; /* highest permanent circuit number */ - unsigned lo_svc; /* lowest switched circuit number */ - unsigned hi_svc; /* highest switched circuit number */ - unsigned hdlc_window; /* HDLC window size (1..7) */ - unsigned pkt_window; /* X.25 packet window size (1..7) */ - unsigned t1; /* HDLC timer T1, sec (1..30) */ - unsigned t2; /* HDLC timer T2, sec (0..29) */ - unsigned t4; /* HDLC supervisory frame timer = T4 * T1 */ - unsigned n2; /* HDLC retransmission limit (1..30) */ - unsigned t10_t20; /* X.25 RESTART timeout, sec (1..255) */ - unsigned t11_t21; /* X.25 CALL timeout, sec (1..255) */ - unsigned t12_t22; /* X.25 RESET timeout, sec (1..255) */ - unsigned t13_t23; /* X.25 CLEAR timeout, sec (1..255) */ - unsigned t16_t26; /* X.25 INTERRUPT timeout, sec (1..255) */ - unsigned t28; /* X.25 REGISTRATION timeout, sec (1..255) */ - unsigned r10_r20; /* RESTART retransmission limit (0..250) */ - unsigned r12_r22; /* RESET retransmission limit (0..250) */ - unsigned r13_r23; /* CLEAR retransmission limit (0..250) */ - unsigned ccitt_compat; /* compatibility mode: 1988/1984/1980 */ - unsigned x25_conf_opt; /* User defined x25 config optoins */ - unsigned char LAPB_hdlc_only; /* Run in HDLC only mode */ - unsigned char logging; /* Control connection logging */ - unsigned char oob_on_modem; /* Whether to send modem status to the user app */ -} wan_x25_conf_t; - -/*---------------------------------------------------------------------------- - * Frame relay specific link-level configuration. - */ -typedef struct wan_fr_conf -{ - unsigned signalling; /* local in-channel signalling type */ - unsigned t391; /* link integrity verification timer */ - unsigned t392; /* polling verification timer */ - unsigned n391; /* full status polling cycle counter */ - unsigned n392; /* error threshold counter */ - unsigned n393; /* monitored events counter */ - unsigned dlci_num; /* number of DLCs (access node) */ - unsigned dlci[100]; /* List of all DLCIs */ -} wan_fr_conf_t; - -/*---------------------------------------------------------------------------- - * PPP-specific link-level configuration. - */ -typedef struct wan_ppp_conf -{ - unsigned restart_tmr; /* restart timer */ - unsigned auth_rsrt_tmr; /* authentication timer */ - unsigned auth_wait_tmr; /* authentication timer */ - unsigned mdm_fail_tmr; /* modem failure timer */ - unsigned dtr_drop_tmr; /* DTR drop timer */ - unsigned connect_tmout; /* connection timeout */ - unsigned conf_retry; /* max. retry */ - unsigned term_retry; /* max. retry */ - unsigned fail_retry; /* max. retry */ - unsigned auth_retry; /* max. retry */ - unsigned auth_options; /* authentication opt. */ - unsigned ip_options; /* IP options */ - char authenticator; /* AUTHENTICATOR or not */ - char ip_mode; /* Static/Host/Peer */ -} wan_ppp_conf_t; - -/*---------------------------------------------------------------------------- - * CHDLC-specific link-level configuration. - */ -typedef struct wan_chdlc_conf -{ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* hdlc_streaming mode (Y/N) */ - unsigned char receive_only; /* no transmit buffering (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ -} wan_chdlc_conf_t; - - -/*---------------------------------------------------------------------------- - * WAN device configuration. Passed to ROUTER_SETUP IOCTL. - */ -typedef struct wandev_conf -{ - unsigned magic; /* magic number (for verification) */ - unsigned config_id; /* configuration structure identifier */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - char S514_CPU_no[1]; /* S514 PCI adapter CPU number ('A' or 'B') */ - unsigned PCI_slot_no; /* S514 PCI adapter slot number */ - char auto_pci_cfg; /* S515 PCI automatic slot detection */ - char comm_port; /* Communication Port (PRI=0, SEC=1) */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned char ft1; /* FT1 Configurator Option */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char read_mode; /* read mode: Polling or interrupt */ - char receive_only; /* disable tx buffers */ - char tty; /* Create a fake tty device */ - unsigned tty_major; /* Major number for wanpipe tty device */ - unsigned tty_minor; /* Minor number for wanpipe tty device */ - unsigned tty_mode; /* TTY operation mode SYNC or ASYNC */ - char backup; /* Backup Mode */ - unsigned hw_opt[4]; /* other hardware options */ - unsigned reserved[4]; - /****** arbitrary data ***************/ - unsigned data_size; /* data buffer size */ - void* data; /* data buffer, e.g. firmware */ - union /****** protocol-specific ************/ - { - wan_x25_conf_t x25; /* X.25 configuration */ - wan_ppp_conf_t ppp; /* PPP configuration */ - wan_fr_conf_t fr; /* frame relay configuration */ - wan_chdlc_conf_t chdlc; /* Cisco HDLC configuration */ - } u; -} wandev_conf_t; - -/* 'config_id' definitions */ -#define WANCONFIG_X25 101 /* X.25 link */ -#define WANCONFIG_FR 102 /* frame relay link */ -#define WANCONFIG_PPP 103 /* synchronous PPP link */ -#define WANCONFIG_CHDLC 104 /* Cisco HDLC Link */ -#define WANCONFIG_BSC 105 /* BiSync Streaming */ -#define WANCONFIG_HDLC 106 /* HDLC Support */ -#define WANCONFIG_MPPP 107 /* Multi Port PPP over RAW CHDLC */ - /* - * Configuration options defines. + * wanrouter.h Legacy declarations kept around until X25 is removed */ -/* general options */ -#define WANOPT_OFF 0 -#define WANOPT_ON 1 -#define WANOPT_NO 0 -#define WANOPT_YES 1 - -/* intercace options */ -#define WANOPT_RS232 0 -#define WANOPT_V35 1 - -/* data encoding options */ -#define WANOPT_NRZ 0 -#define WANOPT_NRZI 1 -#define WANOPT_FM0 2 -#define WANOPT_FM1 3 - -/* link type options */ -#define WANOPT_POINTTOPOINT 0 /* RTS always active */ -#define WANOPT_MULTIDROP 1 /* RTS is active when transmitting */ - -/* clocking options */ -#define WANOPT_EXTERNAL 0 -#define WANOPT_INTERNAL 1 - -/* station options */ -#define WANOPT_DTE 0 -#define WANOPT_DCE 1 -#define WANOPT_CPE 0 -#define WANOPT_NODE 1 -#define WANOPT_SECONDARY 0 -#define WANOPT_PRIMARY 1 - -/* connection options */ -#define WANOPT_PERMANENT 0 /* DTR always active */ -#define WANOPT_SWITCHED 1 /* use DTR to setup link (dial-up) */ -#define WANOPT_ONDEMAND 2 /* activate DTR only before sending */ - -/* frame relay in-channel signalling */ -#define WANOPT_FR_ANSI 1 /* ANSI T1.617 Annex D */ -#define WANOPT_FR_Q933 2 /* ITU Q.933A */ -#define WANOPT_FR_LMI 3 /* LMI */ - -/* PPP IP Mode Options */ -#define WANOPT_PPP_STATIC 0 -#define WANOPT_PPP_HOST 1 -#define WANOPT_PPP_PEER 2 - -/* ASY Mode Options */ -#define WANOPT_ONE 1 -#define WANOPT_TWO 2 -#define WANOPT_ONE_AND_HALF 3 - -#define WANOPT_NONE 0 -#define WANOPT_ODD 1 -#define WANOPT_EVEN 2 - -/* CHDLC Protocol Options */ -/* DF Commented out for now. - -#define WANOPT_CHDLC_NO_DCD IGNORE_DCD_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_CTS IGNORE_CTS_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_KEEPALIVE IGNORE_KPALV_FOR_LINK_STAT -*/ - -/* Port options */ -#define WANOPT_PRI 0 -#define WANOPT_SEC 1 -/* read mode */ -#define WANOPT_INTR 0 -#define WANOPT_POLL 1 - -#define WANOPT_TTY_SYNC 0 -#define WANOPT_TTY_ASYNC 1 -/*---------------------------------------------------------------------------- - * WAN Link Status Info (for ROUTER_STAT IOCTL). - */ -typedef struct wandev_stat -{ - unsigned state; /* link state */ - unsigned ndev; /* number of configured interfaces */ - - /* link/interface configuration */ - unsigned connection; /* permanent/switched/on-demand */ - unsigned media_type; /* Frame relay/PPP/X.25/SDLC, etc. */ - unsigned mtu; /* max. transmit unit for this device */ - - /* physical level statistics */ - unsigned modem_status; /* modem status */ - unsigned rx_frames; /* received frames count */ - unsigned rx_overruns; /* receiver overrun error count */ - unsigned rx_crc_err; /* receive CRC error count */ - unsigned rx_aborts; /* received aborted frames count */ - unsigned rx_bad_length; /* unexpetedly long/short frames count */ - unsigned rx_dropped; /* frames discarded at device level */ - unsigned tx_frames; /* transmitted frames count */ - unsigned tx_underruns; /* aborted transmissions (underruns) count */ - unsigned tx_timeouts; /* transmission timeouts */ - unsigned tx_rejects; /* other transmit errors */ - - /* media level statistics */ - unsigned rx_bad_format; /* frames with invalid format */ - unsigned rx_bad_addr; /* frames with invalid media address */ - unsigned tx_retries; /* frames re-transmitted */ - unsigned reserved[16]; /* reserved for future use */ -} wandev_stat_t; +#ifndef _UAPI_ROUTER_H +#define _UAPI_ROUTER_H /* 'state' defines */ enum wan_states @@ -365,88 +11,7 @@ enum wan_states WAN_UNCONFIGURED, /* link/channel is not configured */ WAN_DISCONNECTED, /* link/channel is disconnected */ WAN_CONNECTING, /* connection is in progress */ - WAN_CONNECTED, /* link/channel is operational */ - WAN_LIMIT, /* for verification only */ - WAN_DUALPORT, /* for Dual Port cards */ - WAN_DISCONNECTING, - WAN_FT1_READY /* FT1 Configurator Ready */ + WAN_CONNECTED /* link/channel is operational */ }; -enum { - WAN_LOCAL_IP, - WAN_POINTOPOINT_IP, - WAN_NETMASK_IP, - WAN_BROADCAST_IP -}; - -/* 'modem_status' masks */ -#define WAN_MODEM_CTS 0x0001 /* CTS line active */ -#define WAN_MODEM_DCD 0x0002 /* DCD line active */ -#define WAN_MODEM_DTR 0x0010 /* DTR line active */ -#define WAN_MODEM_RTS 0x0020 /* RTS line active */ - -/*---------------------------------------------------------------------------- - * WAN interface (logical channel) configuration (for ROUTER_IFNEW IOCTL). - */ -typedef struct wanif_conf -{ - unsigned magic; /* magic number */ - unsigned config_id; /* configuration identifier */ - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char usedby[USED_BY_FIELD]; /* used by API or WANPIPE */ - unsigned idle_timeout; /* sec, before disconnecting */ - unsigned hold_timeout; /* sec, before re-connecting */ - unsigned cir; /* Committed Information Rate fwd,bwd*/ - unsigned bc; /* Committed Burst Size fwd, bwd */ - unsigned be; /* Excess Burst Size fwd, bwd */ - unsigned char enable_IPX; /* Enable or Disable IPX */ - unsigned char inarp; /* Send Inverse ARP requests Y/N */ - unsigned inarp_interval; /* sec, between InARP requests */ - unsigned long network_number; /* Network Number for IPX */ - char mc; /* Multicast on or off */ - char local_addr[WAN_ADDRESS_SZ+1];/* local media address, ASCIIZ */ - unsigned char port; /* board port */ - unsigned char protocol; /* prococol used in this channel (TCPOX25 or X25) */ - char pap; /* PAP enabled or disabled */ - char chap; /* CHAP enabled or disabled */ - unsigned char userid[511]; /* List of User Id */ - unsigned char passwd[511]; /* List of passwords */ - unsigned char sysname[31]; /* Name of the system */ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* Hdlc streaming mode (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ - unsigned char ttl; /* Time To Live for UDP security */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned char if_down; /* brind down interface when disconnected */ - unsigned char gateway; /* Is this interface a gateway */ - unsigned char true_if_encoding; /* Set the dev->type to true board protocol */ - - unsigned char asy_data_trans; /* async API options */ - unsigned char rts_hs_for_receive; /* async Protocol options */ - unsigned char xon_xoff_hs_for_receive; - unsigned char xon_xoff_hs_for_transmit; - unsigned char dcd_hs_for_transmit; - unsigned char cts_hs_for_transmit; - unsigned char async_mode; - unsigned tx_bits_per_char; - unsigned rx_bits_per_char; - unsigned stop_bits; - unsigned char parity; - unsigned break_timer; - unsigned inter_char_timer; - unsigned rx_complete_length; - unsigned xon_char; - unsigned xoff_char; - unsigned char receive_only; /* no transmit buffering (Y/N) */ -} wanif_conf_t; - #endif /* _UAPI_ROUTER_H */ diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e64e18..000000000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from . - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc48078..000000000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea7767e..000000000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab785064b7e..000000000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include /* offsetof(), etc. */ -#include -#include /* return codes */ -#include -#include /* support for loadable modules */ -#include /* kmalloc(), kfree() */ -#include -#include -#include /* inline mem*, str* functions */ - -#include /* htons(), etc. */ -#include /* WAN router API definitions */ - -#include /* vmalloc, vfree */ -#include /* copy_to/from_user */ -#include /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612ee96bb..000000000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include /* __initfunc et al. */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include -#include -#include /* WAN router API definitions */ -#include -#include - -#include -#include - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * entry for each WAN device - */ - -/* - * Generic /proc/net/router/ file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - -- cgit v1.2.3-71-gd317 From 97cc019ee56d52005ea4544af17bef268c464862 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 1 Feb 2013 08:46:56 +0100 Subject: bcma: cc: fix (and rename) define of NAND flash type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- include/linux/bcma/bcma_driver_chipcommon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 9a0e3fa3ca95..6a299f416288 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -27,7 +27,7 @@ #define BCMA_CC_FLASHT_NONE 0x00000000 /* No flash */ #define BCMA_CC_FLASHT_STSER 0x00000100 /* ST serial flash */ #define BCMA_CC_FLASHT_ATSER 0x00000200 /* Atmel serial flash */ -#define BCMA_CC_FLASHT_NFLASH 0x00000200 /* NAND flash */ +#define BCMA_CC_FLASHT_NAND 0x00000300 /* NAND flash */ #define BCMA_CC_FLASHT_PARA 0x00000700 /* Parallel flash */ #define BCMA_CC_CAP_PLLT 0x00038000 /* PLL Type */ #define BCMA_PLLTYPE_NONE 0x00000000 -- cgit v1.2.3-71-gd317 From cfad1ba87150e198be9ea32367a24e500e59de2c Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 18 Dec 2012 14:53:53 +0100 Subject: NFC: Initial support for Inside Secure microread Inside Secure microread is an HCI based NFC chipset. This initial support includes reader and p2p (Target and initiator) modes. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- drivers/nfc/Kconfig | 1 + drivers/nfc/Makefile | 1 + drivers/nfc/microread/Kconfig | 13 + drivers/nfc/microread/Makefile | 5 + drivers/nfc/microread/microread.c | 728 ++++++++++++++++++++++++++++++++ drivers/nfc/microread/microread.h | 33 ++ include/linux/platform_data/microread.h | 35 ++ 7 files changed, 816 insertions(+) create mode 100644 drivers/nfc/microread/Kconfig create mode 100644 drivers/nfc/microread/Makefile create mode 100644 drivers/nfc/microread/microread.c create mode 100644 drivers/nfc/microread/microread.h create mode 100644 include/linux/platform_data/microread.h (limited to 'include/linux') diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 80c728b28828..e57034971ccc 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -27,5 +27,6 @@ config NFC_WILINK into the kernel or say M to compile it as module. source "drivers/nfc/pn544/Kconfig" +source "drivers/nfc/microread/Kconfig" endmenu diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index 574bbc04d97a..a189ada0926a 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_NFC_PN544) += pn544/ +obj-$(CONFIG_NFC_MICROREAD) += microread/ obj-$(CONFIG_NFC_PN533) += pn533.o obj-$(CONFIG_NFC_WILINK) += nfcwilink.o diff --git a/drivers/nfc/microread/Kconfig b/drivers/nfc/microread/Kconfig new file mode 100644 index 000000000000..5b89d011d098 --- /dev/null +++ b/drivers/nfc/microread/Kconfig @@ -0,0 +1,13 @@ +config NFC_MICROREAD + tristate "Inside Secure microread NFC driver" + depends on NFC_HCI + select CRC_CCITT + default n + ---help--- + This module contains the main code for Inside Secure microread + NFC chipsets. It implements the chipset HCI logic and hooks into + the NFC kernel APIs. Physical layers will register against it. + + To compile this driver as a module, choose m here. The module will + be called microread. + Say N if unsure. diff --git a/drivers/nfc/microread/Makefile b/drivers/nfc/microread/Makefile new file mode 100644 index 000000000000..9ce2c53f49a7 --- /dev/null +++ b/drivers/nfc/microread/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Microread HCI based NFC driver +# + +obj-$(CONFIG_NFC_MICROREAD) += microread.o diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c new file mode 100644 index 000000000000..3420d833db17 --- /dev/null +++ b/drivers/nfc/microread/microread.c @@ -0,0 +1,728 @@ +/* + * HCI based Driver for Inside Secure microread NFC Chip + * + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "microread.h" + +/* Proprietary gates, events, commands and registers */ +/* Admin */ +#define MICROREAD_GATE_ID_ADM NFC_HCI_ADMIN_GATE +#define MICROREAD_GATE_ID_MGT 0x01 +#define MICROREAD_GATE_ID_OS 0x02 +#define MICROREAD_GATE_ID_TESTRF 0x03 +#define MICROREAD_GATE_ID_LOOPBACK NFC_HCI_LOOPBACK_GATE +#define MICROREAD_GATE_ID_IDT NFC_HCI_ID_MGMT_GATE +#define MICROREAD_GATE_ID_LMS NFC_HCI_LINK_MGMT_GATE + +/* Reader */ +#define MICROREAD_GATE_ID_MREAD_GEN 0x10 +#define MICROREAD_GATE_ID_MREAD_ISO_B NFC_HCI_RF_READER_B_GATE +#define MICROREAD_GATE_ID_MREAD_NFC_T1 0x12 +#define MICROREAD_GATE_ID_MREAD_ISO_A NFC_HCI_RF_READER_A_GATE +#define MICROREAD_GATE_ID_MREAD_NFC_T3 0x14 +#define MICROREAD_GATE_ID_MREAD_ISO_15_3 0x15 +#define MICROREAD_GATE_ID_MREAD_ISO_15_2 0x16 +#define MICROREAD_GATE_ID_MREAD_ISO_B_3 0x17 +#define MICROREAD_GATE_ID_MREAD_BPRIME 0x18 +#define MICROREAD_GATE_ID_MREAD_ISO_A_3 0x19 + +/* Card */ +#define MICROREAD_GATE_ID_MCARD_GEN 0x20 +#define MICROREAD_GATE_ID_MCARD_ISO_B 0x21 +#define MICROREAD_GATE_ID_MCARD_BPRIME 0x22 +#define MICROREAD_GATE_ID_MCARD_ISO_A 0x23 +#define MICROREAD_GATE_ID_MCARD_NFC_T3 0x24 +#define MICROREAD_GATE_ID_MCARD_ISO_15_3 0x25 +#define MICROREAD_GATE_ID_MCARD_ISO_15_2 0x26 +#define MICROREAD_GATE_ID_MCARD_ISO_B_2 0x27 +#define MICROREAD_GATE_ID_MCARD_ISO_CUSTOM 0x28 +#define MICROREAD_GATE_ID_SECURE_ELEMENT 0x2F + +/* P2P */ +#define MICROREAD_GATE_ID_P2P_GEN 0x30 +#define MICROREAD_GATE_ID_P2P_TARGET 0x31 +#define MICROREAD_PAR_P2P_TARGET_MODE 0x01 +#define MICROREAD_PAR_P2P_TARGET_GT 0x04 +#define MICROREAD_GATE_ID_P2P_INITIATOR 0x32 +#define MICROREAD_PAR_P2P_INITIATOR_GI 0x01 +#define MICROREAD_PAR_P2P_INITIATOR_GT 0x03 + +/* Those pipes are created/opened by default in the chip */ +#define MICROREAD_PIPE_ID_LMS 0x00 +#define MICROREAD_PIPE_ID_ADMIN 0x01 +#define MICROREAD_PIPE_ID_MGT 0x02 +#define MICROREAD_PIPE_ID_OS 0x03 +#define MICROREAD_PIPE_ID_HDS_LOOPBACK 0x04 +#define MICROREAD_PIPE_ID_HDS_IDT 0x05 +#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_B 0x08 +#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_BPRIME 0x09 +#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_A 0x0A +#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_15_3 0x0B +#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_15_2 0x0C +#define MICROREAD_PIPE_ID_HDS_MCARD_NFC_T3 0x0D +#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_B_2 0x0E +#define MICROREAD_PIPE_ID_HDS_MCARD_CUSTOM 0x0F +#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_B 0x10 +#define MICROREAD_PIPE_ID_HDS_MREAD_NFC_T1 0x11 +#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_A 0x12 +#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_15_3 0x13 +#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_15_2 0x14 +#define MICROREAD_PIPE_ID_HDS_MREAD_NFC_T3 0x15 +#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_B_3 0x16 +#define MICROREAD_PIPE_ID_HDS_MREAD_BPRIME 0x17 +#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_A_3 0x18 +#define MICROREAD_PIPE_ID_HDS_MREAD_GEN 0x1B +#define MICROREAD_PIPE_ID_HDS_STACKED_ELEMENT 0x1C +#define MICROREAD_PIPE_ID_HDS_INSTANCES 0x1D +#define MICROREAD_PIPE_ID_HDS_TESTRF 0x1E +#define MICROREAD_PIPE_ID_HDS_P2P_TARGET 0x1F +#define MICROREAD_PIPE_ID_HDS_P2P_INITIATOR 0x20 + +/* Events */ +#define MICROREAD_EVT_MREAD_DISCOVERY_OCCURED NFC_HCI_EVT_TARGET_DISCOVERED +#define MICROREAD_EVT_MREAD_CARD_FOUND 0x3D +#define MICROREAD_EMCF_A_ATQA 0 +#define MICROREAD_EMCF_A_SAK 2 +#define MICROREAD_EMCF_A_LEN 3 +#define MICROREAD_EMCF_A_UID 4 +#define MICROREAD_EMCF_A3_ATQA 0 +#define MICROREAD_EMCF_A3_SAK 2 +#define MICROREAD_EMCF_A3_LEN 3 +#define MICROREAD_EMCF_A3_UID 4 +#define MICROREAD_EMCF_B_UID 0 +#define MICROREAD_EMCF_T1_ATQA 0 +#define MICROREAD_EMCF_T1_UID 4 +#define MICROREAD_EMCF_T3_UID 0 +#define MICROREAD_EVT_MREAD_DISCOVERY_START NFC_HCI_EVT_READER_REQUESTED +#define MICROREAD_EVT_MREAD_DISCOVERY_START_SOME 0x3E +#define MICROREAD_EVT_MREAD_DISCOVERY_STOP NFC_HCI_EVT_END_OPERATION +#define MICROREAD_EVT_MREAD_SIM_REQUESTS 0x3F +#define MICROREAD_EVT_MCARD_EXCHANGE NFC_HCI_EVT_TARGET_DISCOVERED +#define MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF 0x20 +#define MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_FROM_RF 0x21 +#define MICROREAD_EVT_MCARD_FIELD_ON 0x11 +#define MICROREAD_EVT_P2P_TARGET_ACTIVATED 0x13 +#define MICROREAD_EVT_P2P_TARGET_DEACTIVATED 0x12 +#define MICROREAD_EVT_MCARD_FIELD_OFF 0x14 + +/* Commands */ +#define MICROREAD_CMD_MREAD_EXCHANGE 0x10 +#define MICROREAD_CMD_MREAD_SUBSCRIBE 0x3F + +/* Hosts IDs */ +#define MICROREAD_ELT_ID_HDS NFC_HCI_TERMINAL_HOST_ID +#define MICROREAD_ELT_ID_SIM NFC_HCI_UICC_HOST_ID +#define MICROREAD_ELT_ID_SE1 0x03 +#define MICROREAD_ELT_ID_SE2 0x04 +#define MICROREAD_ELT_ID_SE3 0x05 + +static struct nfc_hci_gate microread_gates[] = { + {MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN}, + {MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK}, + {MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT}, + {MICROREAD_GATE_ID_LMS, MICROREAD_PIPE_ID_LMS}, + {MICROREAD_GATE_ID_MREAD_ISO_B, MICROREAD_PIPE_ID_HDS_MREAD_ISO_B}, + {MICROREAD_GATE_ID_MREAD_ISO_A, MICROREAD_PIPE_ID_HDS_MREAD_ISO_A}, + {MICROREAD_GATE_ID_MREAD_ISO_A_3, MICROREAD_PIPE_ID_HDS_MREAD_ISO_A_3}, + {MICROREAD_GATE_ID_MGT, MICROREAD_PIPE_ID_MGT}, + {MICROREAD_GATE_ID_OS, MICROREAD_PIPE_ID_OS}, + {MICROREAD_GATE_ID_MREAD_NFC_T1, MICROREAD_PIPE_ID_HDS_MREAD_NFC_T1}, + {MICROREAD_GATE_ID_MREAD_NFC_T3, MICROREAD_PIPE_ID_HDS_MREAD_NFC_T3}, + {MICROREAD_GATE_ID_P2P_TARGET, MICROREAD_PIPE_ID_HDS_P2P_TARGET}, + {MICROREAD_GATE_ID_P2P_INITIATOR, MICROREAD_PIPE_ID_HDS_P2P_INITIATOR} +}; + +/* Largest headroom needed for outgoing custom commands */ +#define MICROREAD_CMDS_HEADROOM 2 +#define MICROREAD_CMD_TAILROOM 2 + +struct microread_info { + struct nfc_phy_ops *phy_ops; + void *phy_id; + + struct nfc_hci_dev *hdev; + + int async_cb_type; + data_exchange_cb_t async_cb; + void *async_cb_context; +}; + +static int microread_open(struct nfc_hci_dev *hdev) +{ + struct microread_info *info = nfc_hci_get_clientdata(hdev); + + return info->phy_ops->enable(info->phy_id); +} + +static void microread_close(struct nfc_hci_dev *hdev) +{ + struct microread_info *info = nfc_hci_get_clientdata(hdev); + + info->phy_ops->disable(info->phy_id); +} + +static int microread_hci_ready(struct nfc_hci_dev *hdev) +{ + int r; + u8 param[4]; + + param[0] = 0x03; + r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_A, + MICROREAD_CMD_MREAD_SUBSCRIBE, param, 1, NULL); + if (r) + return r; + + r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_A_3, + MICROREAD_CMD_MREAD_SUBSCRIBE, NULL, 0, NULL); + if (r) + return r; + + param[0] = 0x00; + param[1] = 0x03; + param[2] = 0x00; + r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_B, + MICROREAD_CMD_MREAD_SUBSCRIBE, param, 3, NULL); + if (r) + return r; + + r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_NFC_T1, + MICROREAD_CMD_MREAD_SUBSCRIBE, NULL, 0, NULL); + if (r) + return r; + + param[0] = 0xFF; + param[1] = 0xFF; + param[2] = 0x00; + param[3] = 0x00; + r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_NFC_T3, + MICROREAD_CMD_MREAD_SUBSCRIBE, param, 4, NULL); + + return r; +} + +static int microread_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) +{ + struct microread_info *info = nfc_hci_get_clientdata(hdev); + + return info->phy_ops->write(info->phy_id, skb); +} + +static int microread_start_poll(struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols) +{ + int r; + + u8 param[2]; + u8 mode; + + param[0] = 0x00; + param[1] = 0x00; + + if (im_protocols & NFC_PROTO_ISO14443_MASK) + param[0] |= (1 << 2); + + if (im_protocols & NFC_PROTO_ISO14443_B_MASK) + param[0] |= 1; + + if (im_protocols & NFC_PROTO_MIFARE_MASK) + param[1] |= 1; + + if (im_protocols & NFC_PROTO_JEWEL_MASK) + param[0] |= (1 << 1); + + if (im_protocols & NFC_PROTO_FELICA_MASK) + param[0] |= (1 << 5); + + if (im_protocols & NFC_PROTO_NFC_DEP_MASK) + param[1] |= (1 << 1); + + if ((im_protocols | tm_protocols) & NFC_PROTO_NFC_DEP_MASK) { + hdev->gb = nfc_get_local_general_bytes(hdev->ndev, + &hdev->gb_len); + if (hdev->gb == NULL || hdev->gb_len == 0) { + im_protocols &= ~NFC_PROTO_NFC_DEP_MASK; + tm_protocols &= ~NFC_PROTO_NFC_DEP_MASK; + } + } + + r = nfc_hci_send_event(hdev, MICROREAD_GATE_ID_MREAD_ISO_A, + MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, 0); + if (r) + return r; + + mode = 0xff; + r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET, + MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1); + if (r) + return r; + + if (im_protocols & NFC_PROTO_NFC_DEP_MASK) { + r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_INITIATOR, + MICROREAD_PAR_P2P_INITIATOR_GI, + hdev->gb, hdev->gb_len); + if (r) + return r; + } + + if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) { + r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET, + MICROREAD_PAR_P2P_TARGET_GT, + hdev->gb, hdev->gb_len); + if (r) + return r; + + mode = 0x02; + r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET, + MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1); + if (r) + return r; + } + + return nfc_hci_send_event(hdev, MICROREAD_GATE_ID_MREAD_ISO_A, + MICROREAD_EVT_MREAD_DISCOVERY_START_SOME, + param, 2); +} + +static int microread_dep_link_up(struct nfc_hci_dev *hdev, + struct nfc_target *target, u8 comm_mode, + u8 *gb, size_t gb_len) +{ + struct sk_buff *rgb_skb = NULL; + int r; + + r = nfc_hci_get_param(hdev, target->hci_reader_gate, + MICROREAD_PAR_P2P_INITIATOR_GT, &rgb_skb); + if (r < 0) + return r; + + if (rgb_skb->len == 0 || rgb_skb->len > NFC_GB_MAXSIZE) { + r = -EPROTO; + goto exit; + } + + r = nfc_set_remote_general_bytes(hdev->ndev, rgb_skb->data, + rgb_skb->len); + if (r == 0) + r = nfc_dep_link_is_up(hdev->ndev, target->idx, comm_mode, + NFC_RF_INITIATOR); +exit: + kfree_skb(rgb_skb); + + return r; +} + +static int microread_dep_link_down(struct nfc_hci_dev *hdev) +{ + return nfc_hci_send_event(hdev, MICROREAD_GATE_ID_P2P_INITIATOR, + MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, 0); +} + +static int microread_target_from_gate(struct nfc_hci_dev *hdev, u8 gate, + struct nfc_target *target) +{ + switch (gate) { + case MICROREAD_GATE_ID_P2P_INITIATOR: + target->supported_protocols = NFC_PROTO_NFC_DEP_MASK; + break; + default: + return -EPROTO; + } + + return 0; +} + +static int microread_complete_target_discovered(struct nfc_hci_dev *hdev, + u8 gate, + struct nfc_target *target) +{ + return 0; +} + +#define MICROREAD_CB_TYPE_READER_ALL 1 + +static void microread_im_transceive_cb(void *context, struct sk_buff *skb, + int err) +{ + struct microread_info *info = context; + + switch (info->async_cb_type) { + case MICROREAD_CB_TYPE_READER_ALL: + if (err == 0) { + if (skb->len == 0) { + err = -EPROTO; + kfree_skb(skb); + info->async_cb(info->async_cb_context, NULL, + -EPROTO); + return; + } + + if (skb->data[skb->len - 1] != 0) { + err = nfc_hci_result_to_errno( + skb->data[skb->len - 1]); + kfree_skb(skb); + info->async_cb(info->async_cb_context, NULL, + err); + return; + } + + skb_trim(skb, skb->len - 1); /* RF Error ind. */ + } + info->async_cb(info->async_cb_context, skb, err); + break; + default: + if (err == 0) + kfree_skb(skb); + break; + } +} + +/* + * Returns: + * <= 0: driver handled the data exchange + * 1: driver doesn't especially handle, please do standard processing + */ +static int microread_im_transceive(struct nfc_hci_dev *hdev, + struct nfc_target *target, + struct sk_buff *skb, data_exchange_cb_t cb, + void *cb_context) +{ + struct microread_info *info = nfc_hci_get_clientdata(hdev); + u8 control_bits; + u16 crc; + + pr_info("data exchange to gate 0x%x\n", target->hci_reader_gate); + + if (target->hci_reader_gate == MICROREAD_GATE_ID_P2P_INITIATOR) { + *skb_push(skb, 1) = 0; + + return nfc_hci_send_event(hdev, target->hci_reader_gate, + MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF, + skb->data, skb->len); + } + + switch (target->hci_reader_gate) { + case MICROREAD_GATE_ID_MREAD_ISO_A: + control_bits = 0xCB; + break; + case MICROREAD_GATE_ID_MREAD_ISO_A_3: + control_bits = 0xCB; + break; + case MICROREAD_GATE_ID_MREAD_ISO_B: + control_bits = 0xCB; + break; + case MICROREAD_GATE_ID_MREAD_NFC_T1: + control_bits = 0x1B; + + crc = crc_ccitt(0xffff, skb->data, skb->len); + crc = ~crc; + *skb_put(skb, 1) = crc & 0xff; + *skb_put(skb, 1) = crc >> 8; + break; + case MICROREAD_GATE_ID_MREAD_NFC_T3: + control_bits = 0xDB; + break; + default: + pr_info("Abort im_transceive to invalid gate 0x%x\n", + target->hci_reader_gate); + return 1; + } + + *skb_push(skb, 1) = control_bits; + + info->async_cb_type = MICROREAD_CB_TYPE_READER_ALL; + info->async_cb = cb; + info->async_cb_context = cb_context; + + return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + MICROREAD_CMD_MREAD_EXCHANGE, + skb->data, skb->len, + microread_im_transceive_cb, info); +} + +static int microread_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb) +{ + int r; + + r = nfc_hci_send_event(hdev, MICROREAD_GATE_ID_P2P_TARGET, + MICROREAD_EVT_MCARD_EXCHANGE, + skb->data, skb->len); + + kfree_skb(skb); + + return r; +} + +static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate, + struct sk_buff *skb) +{ + struct nfc_target *targets; + int r = 0; + + pr_info("target discovered to gate 0x%x\n", gate); + + targets = kzalloc(sizeof(struct nfc_target), GFP_KERNEL); + if (targets == NULL) { + r = -ENOMEM; + goto exit; + } + + targets->hci_reader_gate = gate; + + switch (gate) { + case MICROREAD_GATE_ID_MREAD_ISO_A: + targets->supported_protocols = + nfc_hci_sak_to_protocol(skb->data[MICROREAD_EMCF_A_SAK]); + targets->sens_res = + be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A_ATQA]); + targets->sel_res = skb->data[MICROREAD_EMCF_A_SAK]; + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID], + skb->data[MICROREAD_EMCF_A_LEN]); + targets->nfcid1_len = skb->data[MICROREAD_EMCF_A_LEN]; + break; + case MICROREAD_GATE_ID_MREAD_ISO_A_3: + targets->supported_protocols = + nfc_hci_sak_to_protocol(skb->data[MICROREAD_EMCF_A3_SAK]); + targets->sens_res = + be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A3_ATQA]); + targets->sel_res = skb->data[MICROREAD_EMCF_A3_SAK]; + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID], + skb->data[MICROREAD_EMCF_A3_LEN]); + targets->nfcid1_len = skb->data[MICROREAD_EMCF_A3_LEN]; + break; + case MICROREAD_GATE_ID_MREAD_ISO_B: + targets->supported_protocols = NFC_PROTO_ISO14443_B_MASK; + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_B_UID], 4); + targets->nfcid1_len = 4; + break; + case MICROREAD_GATE_ID_MREAD_NFC_T1: + targets->supported_protocols = NFC_PROTO_JEWEL_MASK; + targets->sens_res = + le16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_T1_ATQA]); + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_T1_UID], 4); + targets->nfcid1_len = 4; + break; + case MICROREAD_GATE_ID_MREAD_NFC_T3: + targets->supported_protocols = NFC_PROTO_FELICA_MASK; + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_T3_UID], 8); + targets->nfcid1_len = 8; + break; + default: + pr_info("discard target discovered to gate 0x%x\n", gate); + goto exit_free; + } + + r = nfc_targets_found(hdev->ndev, targets, 1); + +exit_free: + kfree(targets); + +exit: + kfree_skb(skb); + + if (r) + pr_err("Failed to handle discovered target err=%d", r); +} + +static int microread_event_received(struct nfc_hci_dev *hdev, u8 gate, + u8 event, struct sk_buff *skb) +{ + int r; + u8 mode; + + pr_info("Microread received event 0x%x to gate 0x%x\n", event, gate); + + switch (event) { + case MICROREAD_EVT_MREAD_CARD_FOUND: + microread_target_discovered(hdev, gate, skb); + return 0; + + case MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_FROM_RF: + if (skb->len < 1) { + kfree_skb(skb); + return -EPROTO; + } + + if (skb->data[skb->len - 1]) { + kfree_skb(skb); + return -EIO; + } + + skb_trim(skb, skb->len - 1); + + r = nfc_tm_data_received(hdev->ndev, skb); + break; + + case MICROREAD_EVT_MCARD_FIELD_ON: + case MICROREAD_EVT_MCARD_FIELD_OFF: + kfree_skb(skb); + return 0; + + case MICROREAD_EVT_P2P_TARGET_ACTIVATED: + r = nfc_tm_activated(hdev->ndev, NFC_PROTO_NFC_DEP_MASK, + NFC_COMM_PASSIVE, skb->data, + skb->len); + + kfree_skb(skb); + break; + + case MICROREAD_EVT_MCARD_EXCHANGE: + if (skb->len < 1) { + kfree_skb(skb); + return -EPROTO; + } + + if (skb->data[skb->len-1]) { + kfree_skb(skb); + return -EIO; + } + + skb_trim(skb, skb->len - 1); + + r = nfc_tm_data_received(hdev->ndev, skb); + break; + + case MICROREAD_EVT_P2P_TARGET_DEACTIVATED: + kfree_skb(skb); + + mode = 0xff; + r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET, + MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1); + if (r) + break; + + r = nfc_hci_send_event(hdev, gate, + MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, + 0); + break; + + default: + return 1; + } + + return r; +} + +static struct nfc_hci_ops microread_hci_ops = { + .open = microread_open, + .close = microread_close, + .hci_ready = microread_hci_ready, + .xmit = microread_xmit, + .start_poll = microread_start_poll, + .dep_link_up = microread_dep_link_up, + .dep_link_down = microread_dep_link_down, + .target_from_gate = microread_target_from_gate, + .complete_target_discovered = microread_complete_target_discovered, + .im_transceive = microread_im_transceive, + .tm_send = microread_tm_send, + .check_presence = NULL, + .event_received = microread_event_received, +}; + +int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, + int phy_headroom, int phy_tailroom, int phy_payload, + struct nfc_hci_dev **hdev) +{ + struct microread_info *info; + unsigned long quirks = 0; + u32 protocols, se; + struct nfc_hci_init_data init_data; + int r; + + info = kzalloc(sizeof(struct microread_info), GFP_KERNEL); + if (!info) { + pr_err("Cannot allocate memory for microread_info.\n"); + r = -ENOMEM; + goto err_info_alloc; + } + + info->phy_ops = phy_ops; + info->phy_id = phy_id; + + init_data.gate_count = ARRAY_SIZE(microread_gates); + memcpy(init_data.gates, microread_gates, sizeof(microread_gates)); + + strcpy(init_data.session_id, "MICROREA"); + + set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks); + + protocols = NFC_PROTO_JEWEL_MASK | + NFC_PROTO_MIFARE_MASK | + NFC_PROTO_FELICA_MASK | + NFC_PROTO_ISO14443_MASK | + NFC_PROTO_ISO14443_B_MASK | + NFC_PROTO_NFC_DEP_MASK; + + se = NFC_SE_UICC | NFC_SE_EMBEDDED; + + info->hdev = nfc_hci_allocate_device(µread_hci_ops, &init_data, + quirks, protocols, se, llc_name, + phy_headroom + + MICROREAD_CMDS_HEADROOM, + phy_tailroom + + MICROREAD_CMD_TAILROOM, + phy_payload); + if (!info->hdev) { + pr_err("Cannot allocate nfc hdev.\n"); + r = -ENOMEM; + goto err_alloc_hdev; + } + + nfc_hci_set_clientdata(info->hdev, info); + + r = nfc_hci_register_device(info->hdev); + if (r) + goto err_regdev; + + *hdev = info->hdev; + + return 0; + +err_regdev: + nfc_hci_free_device(info->hdev); + +err_alloc_hdev: + kfree(info); + +err_info_alloc: + return r; +} +EXPORT_SYMBOL(microread_probe); + +void microread_remove(struct nfc_hci_dev *hdev) +{ + struct microread_info *info = nfc_hci_get_clientdata(hdev); + + nfc_hci_unregister_device(hdev); + nfc_hci_free_device(hdev); + kfree(info); +} +EXPORT_SYMBOL(microread_remove); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/microread/microread.h b/drivers/nfc/microread/microread.h new file mode 100644 index 000000000000..64b447a1c5bf --- /dev/null +++ b/drivers/nfc/microread/microread.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2011 - 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LOCAL_MICROREAD_H_ +#define __LOCAL_MICROREAD_H_ + +#include + +#define DRIVER_DESC "NFC driver for microread" + +int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, + int phy_headroom, int phy_tailroom, int phy_payload, + struct nfc_hci_dev **hdev); + +void microread_remove(struct nfc_hci_dev *hdev); + +#endif /* __LOCAL_MICROREAD_H_ */ diff --git a/include/linux/platform_data/microread.h b/include/linux/platform_data/microread.h new file mode 100644 index 000000000000..cfda59b226ee --- /dev/null +++ b/include/linux/platform_data/microread.h @@ -0,0 +1,35 @@ +/* + * Driver include for the PN544 NFC chip. + * + * Copyright (C) 2011 Tieto Poland + * Copyright (C) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _MICROREAD_H +#define _MICROREAD_H + +#include + +#define MICROREAD_DRIVER_NAME "microread" + +/* board config platform data for microread */ +struct microread_nfc_platform_data { + unsigned int rst_gpio; + unsigned int irq_gpio; + unsigned int ioh_gpio; +}; + +#endif /* _MICROREAD_H */ -- cgit v1.2.3-71-gd317 From 3f52b7e328c526fa7a592af9bf5772c591ed38a4 Mon Sep 17 00:00:00 2001 From: Marco Porsch Date: Wed, 30 Jan 2013 18:14:08 +0100 Subject: mac80211: mesh power save basics Add routines to - maintain a PS mode for each peer and a non-peer PS mode - indicate own PS mode in transmitted frames - track neighbor STAs power modes - buffer frames when neighbors are in PS mode - add TIM and Awake Window IE to beacons - release frames in Mesh Peer Service Periods Add local_pm to sta_info to represent the link-specific power mode at this station towards the remote station. When a peer link is established, use the default power mode stored in mesh config. Update the PS status if the peering status of a neighbor changes. Maintain a mesh power mode for non-peer mesh STAs. Set the non-peer power mode to active mode during peering. Authenticated mesh peering is currently not working when either node is configured to be in power save mode. Indicate the current power mode in transmitted frames. Use QoS Nulls to indicate mesh power mode transitions. For performance reasons, calls to the function setting the frame flags are placed in HWMP routing routines, as there the STA pointer is already available. Add peer_pm to sta_info to represent the peer's link-specific power mode towards the local station. Add nonpeer_pm to represent the peer's power mode towards all non-peer stations. Track power modes based on received frames. Add the ps_data structure to ieee80211_if_mesh (for TIM map, PS neighbor counter and group-addressed frame buffer). Set WLAN_STA_PS flag for STA in PS mode to use the unicast frame buffering routines in the tx path. Update num_sta_ps to buffer and release group-addressed frames after DTIM beacons. Announce the awake window duration in beacons if in light or deep sleep mode towards any peer or non-peer. Create a TIM IE similarly to AP mode and add it to mesh beacons. Parse received Awake Window IEs and check TIM IEs for buffered frames. Release frames towards peers in mesh Peer Service Periods. Use the corresponding trigger frames and monitor the MPSP status. Append a QoS Null as trigger frame if neccessary to properly end the MPSP. Currently, in HT channels MPSPs behave imperfectly and show large delay spikes and frame losses. Signed-off-by: Marco Porsch Signed-off-by: Ivan Bezyazychnyy Signed-off-by: Mike Krinkin Signed-off-by: Max Filippov Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 + net/mac80211/Kconfig | 11 + net/mac80211/Makefile | 3 +- net/mac80211/cfg.c | 27 +- net/mac80211/debug.h | 10 + net/mac80211/debugfs_netdev.c | 5 + net/mac80211/debugfs_sta.c | 5 +- net/mac80211/ieee80211_i.h | 6 + net/mac80211/mesh.c | 33 +++ net/mac80211/mesh.h | 17 ++ net/mac80211/mesh_hwmp.c | 7 + net/mac80211/mesh_pathtbl.c | 1 + net/mac80211/mesh_plink.c | 17 ++ net/mac80211/mesh_ps.c | 585 ++++++++++++++++++++++++++++++++++++++++++ net/mac80211/rx.c | 7 + net/mac80211/sta_info.c | 20 +- net/mac80211/sta_info.h | 11 + net/mac80211/status.c | 7 + net/mac80211/tx.c | 31 ++- net/mac80211/util.c | 4 + net/mac80211/wme.c | 13 +- 21 files changed, 811 insertions(+), 17 deletions(-) create mode 100644 net/mac80211/mesh_ps.c (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 11c8bc87fdcb..7e8a498efe6d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -151,6 +151,11 @@ /* Mesh Control 802.11s */ #define IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT 0x0100 +/* Mesh Power Save Level */ +#define IEEE80211_QOS_CTL_MESH_PS_LEVEL 0x0200 +/* Mesh Receiver Service Period Initiated */ +#define IEEE80211_QOS_CTL_RSPI 0x0400 + /* U-APSD queue for WMM IEs sent by AP */ #define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD (1<<7) #define IEEE80211_WMM_IE_AP_QOSINFO_PARAM_SET_CNT_MASK 0x0f @@ -675,11 +680,14 @@ struct ieee80211_meshconf_ie { * @IEEE80211_MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs * @IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure * is ongoing + * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has + * neighbors in deep sleep mode */ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS = 0x01, IEEE80211_MESHCONF_CAPAB_FORWARDING = 0x08, IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING = 0x20, + IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL = 0x40, }; /** diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index b4ecf267a34b..0ecf947ad378 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -258,6 +258,17 @@ config MAC80211_MESH_SYNC_DEBUG Do not select this option. +config MAC80211_MESH_PS_DEBUG + bool "Verbose mesh powersave debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very verbose mesh + powersave debugging messages (when mac80211 is taking part in a + mesh network). + + Do not select this option. + config MAC80211_TDLS_DEBUG bool "Verbose TDLS debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 4911202334d9..9d7d840aac6d 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -39,7 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mesh_pathtbl.o \ mesh_plink.o \ mesh_hwmp.o \ - mesh_sync.o + mesh_sync.o \ + mesh_ps.o mac80211-$(CONFIG_PM) += pm.o diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 661b878bd19c..f4f7e7691077 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -492,7 +492,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) #ifdef CONFIG_MAC80211_MESH sinfo->filled |= STATION_INFO_LLID | STATION_INFO_PLID | - STATION_INFO_PLINK_STATE; + STATION_INFO_PLINK_STATE | + STATION_INFO_LOCAL_PM | + STATION_INFO_PEER_PM | + STATION_INFO_NONPEER_PM; sinfo->llid = le16_to_cpu(sta->llid); sinfo->plid = le16_to_cpu(sta->plid); @@ -501,6 +504,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled |= STATION_INFO_T_OFFSET; sinfo->t_offset = sta->t_offset; } + sinfo->local_pm = sta->local_pm; + sinfo->peer_pm = sta->peer_pm; + sinfo->nonpeer_pm = sta->nonpeer_pm; #endif } @@ -1262,6 +1268,10 @@ static int sta_apply_parameters(struct ieee80211_local *local, changed = mesh_plink_inc_estab_count( sdata); sta->plink_state = params->plink_state; + + ieee80211_mps_sta_status_update(sta); + ieee80211_mps_set_sta_local_pm(sta, + sdata->u.mesh.mshcfg.power_mode); break; case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: @@ -1273,6 +1283,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, changed = mesh_plink_dec_estab_count( sdata); sta->plink_state = params->plink_state; + + ieee80211_mps_sta_status_update(sta); + ieee80211_mps_local_status_update(sdata); break; default: /* nothing */ @@ -1289,6 +1302,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, break; } } + + if (params->local_pm) + ieee80211_mps_set_sta_local_pm(sta, params->local_pm); #endif } @@ -1777,6 +1793,15 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) conf->dot11MeshHWMPconfirmationInterval = nconf->dot11MeshHWMPconfirmationInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) { + conf->power_mode = nconf->power_mode; + ieee80211_mps_local_status_update(sdata); + } + if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) { + conf->dot11MeshAwakeWindowDuration = + nconf->dot11MeshAwakeWindowDuration; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + } return 0; } diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 8f383a576016..4ccc5ed6237d 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -44,6 +44,12 @@ #define MAC80211_MESH_SYNC_DEBUG 0 #endif +#ifdef CONFIG_MAC80211_MESH_PS_DEBUG +#define MAC80211_MESH_PS_DEBUG 1 +#else +#define MAC80211_MESH_PS_DEBUG 0 +#endif + #ifdef CONFIG_MAC80211_TDLS_DEBUG #define MAC80211_TDLS_DEBUG 1 #else @@ -151,6 +157,10 @@ do { \ _sdata_dbg(MAC80211_MESH_SYNC_DEBUG, \ sdata, fmt, ##__VA_ARGS__) +#define mps_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MESH_PS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + #define tdls_dbg(sdata, fmt, ...) \ _sdata_dbg(MAC80211_TDLS_DEBUG, \ sdata, fmt, ##__VA_ARGS__) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cbde5cc49a40..059bbb82e84f 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -515,6 +515,9 @@ IEEE80211_IF_FILE(dot11MeshHWMProotInterval, u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval, u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC); +IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC); +IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, + u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -620,6 +623,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); MESHPARAMS_ADD(dot11MeshHWMProotInterval); MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval); + MESHPARAMS_ADD(power_mode); + MESHPARAMS_ADD(dot11MeshAwakeWindowDuration); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6fb1168b9f16..c7591f73dbc3 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -65,7 +65,7 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" int res = scnprintf(buf, sizeof(buf), - "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", TEST(AUTH), TEST(ASSOC), TEST(PS_STA), TEST(PS_DRIVER), TEST(AUTHORIZED), TEST(SHORT_PREAMBLE), @@ -74,7 +74,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT), TEST(INSERTED), TEST(RATE_CONTROL), - TEST(TOFFSET_KNOWN)); + TEST(TOFFSET_KNOWN), TEST(MPSP_OWNER), + TEST(MPSP_RECIPIENT)); #undef TEST return simple_read_from_buffer(userbuf, count, ppos, buf, res); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8faf360e0b4c..5fe9db707880 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -590,6 +590,11 @@ struct ieee80211_if_mesh { s64 sync_offset_clockdrift_max; spinlock_t sync_offset_lock; bool adjusting_tbtt; + /* mesh power save */ + enum nl80211_mesh_power_mode nonpeer_pm; + int ps_peers_light_sleep; + int ps_peers_deep_sleep; + struct ps_data ps; }; #ifdef CONFIG_MAC80211_MESH @@ -1185,6 +1190,7 @@ struct ieee802_11_elems { struct ieee80211_meshconf_ie *mesh_config; u8 *mesh_id; u8 *peering; + __le16 *awake_window; u8 *preq; u8 *prep; u8 *perr; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f920da1201ab..35ac38871420 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -261,6 +261,9 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; *pos |= ifmsh->accepting_plinks ? IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; + /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ + *pos |= ifmsh->ps_peers_deep_sleep ? + IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; *pos++ |= ifmsh->adjusting_tbtt ? IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00; *pos++ = 0x00; @@ -286,6 +289,29 @@ mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) return 0; } +int mesh_add_awake_window_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 *pos; + + /* see IEEE802.11-2012 13.14.6 */ + if (ifmsh->ps_peers_light_sleep == 0 && + ifmsh->ps_peers_deep_sleep == 0 && + ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE) + return 0; + + if (skb_tailroom(skb) < 4) + return -ENOMEM; + + pos = skb_put(skb, 2 + 2); + *pos++ = WLAN_EID_MESH_AWAKE_WINDOW; + *pos++ = 2; + put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos); + + return 0; +} + int mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) { @@ -629,6 +655,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(local, band); + ieee80211_mps_local_status_update(sdata); + ieee80211_bss_info_change_notify(sdata, changed); netif_carrier_on(sdata->dev); @@ -651,6 +679,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sta_info_flush(sdata); mesh_path_flush_by_iface(sdata); + /* free all potentially still buffered group-addressed frames */ + local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); + skb_queue_purge(&ifmsh->ps.bc_buf); + del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); @@ -828,6 +860,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_mesh_path_root_timer, (unsigned long) sdata); INIT_LIST_HEAD(&ifmsh->preq_queue.list); + skb_queue_head_init(&ifmsh->ps.bc_buf); spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index aff301544c7f..eb336253b6b3 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -222,6 +222,8 @@ int mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); int mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); +int mesh_add_awake_window_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); int mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); int mesh_add_ds_params_ie(struct sk_buff *skb, @@ -242,6 +244,21 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method); +/* mesh power save */ +void ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata); +void ieee80211_mps_set_sta_local_pm(struct sta_info *sta, + enum nl80211_mesh_power_mode pm); +void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_hdr *hdr); +void ieee80211_mps_sta_status_update(struct sta_info *sta); +void ieee80211_mps_rx_h_sta_process(struct sta_info *sta, + struct ieee80211_hdr *hdr); +void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta, + bool tx, bool acked); +void ieee80211_mps_frame_release(struct sta_info *sta, + struct ieee802_11_elems *elems); + /* Mesh paths */ int mesh_nexthop_lookup(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 6b4603a90031..f0dd8742ed42 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -205,6 +205,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; skb_set_mac_header(skb, 0); skb_set_network_header(skb, 0); @@ -217,6 +218,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, info->control.vif = &sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; ieee80211_set_qos_hdr(sdata, skb); + ieee80211_mps_set_frame_flags(sdata, NULL, hdr); } /** @@ -1080,6 +1082,10 @@ int mesh_nexthop_resolve(struct sk_buff *skb, u8 *target_addr = hdr->addr3; int err = 0; + /* Nulls are only sent to peers for PS and should be pre-addressed */ + if (ieee80211_is_qos_nullfunc(hdr->frame_control)) + return 0; + rcu_read_lock(); err = mesh_nexthop_lookup(skb, sdata); if (!err) @@ -1151,6 +1157,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, if (next_hop) { memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sdata, next_hop, hdr); err = 0; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index aa749818860e..d5786c3eaee2 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -212,6 +212,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr); } spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 6787d696d94c..fe7c3334d6fe 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -201,6 +201,9 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta) sta->plink_state = NL80211_PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); + ieee80211_mps_sta_status_update(sta); + ieee80211_mps_local_status_update(sdata); + return changed; } @@ -503,6 +506,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, rssi_threshold_check(sta, sdata)) mesh_plink_open(sta); + ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); } @@ -633,6 +637,9 @@ int mesh_plink_open(struct sta_info *sta) "Mesh plink: starting establishment with %pM\n", sta->sta.addr); + /* set the non-peer mode to active during peering */ + ieee80211_mps_local_status_update(sdata); + return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, sta->sta.addr, llid, 0, 0); } @@ -866,6 +873,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->llid = llid; mesh_plink_timer_set(sta, mshcfg->dot11MeshRetryTimeout); + + /* set the non-peer mode to active during peering */ + ieee80211_mps_local_status_update(sdata); + spin_unlock_bh(&sta->lock); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, @@ -959,6 +970,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m changed |= mesh_set_short_slot_time(sdata); mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); + ieee80211_mps_sta_status_update(sta); + ieee80211_mps_set_sta_local_pm(sta, + mshcfg->power_mode); break; default: spin_unlock_bh(&sta->lock); @@ -998,6 +1012,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CONFIRM, sta->sta.addr, llid, plid, 0); + ieee80211_mps_sta_status_update(sta); + ieee80211_mps_set_sta_local_pm(sta, + mshcfg->power_mode); break; default: spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c new file mode 100644 index 000000000000..b677962525ed --- /dev/null +++ b/net/mac80211/mesh_ps.c @@ -0,0 +1,585 @@ +/* + * Copyright 2012-2013, Marco Porsch + * Copyright 2012-2013, cozybit Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "mesh.h" +#include "wme.h" + + +/* mesh PS management */ + +/** + * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave + */ +static struct sk_buff *mps_qos_null_get(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *nullfunc; /* use 4addr header */ + struct sk_buff *skb; + int size = sizeof(*nullfunc); + __le16 fc; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + size + 2); + if (!skb) + return NULL; + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr *) skb_put(skb, size); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); + ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr, + sdata->vif.addr); + nullfunc->frame_control = fc; + nullfunc->duration_id = 0; + /* no address resolution for this frame -> set addr 1 immediately */ + memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); + memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ + ieee80211_mps_set_frame_flags(sdata, sta, nullfunc); + + return skb; +} + +/** + * mps_qos_null_tx - send a QoS Null to indicate link-specific power mode + */ +static void mps_qos_null_tx(struct sta_info *sta) +{ + struct sk_buff *skb; + + skb = mps_qos_null_get(sta); + if (!skb) + return; + + mps_dbg(sta->sdata, "announcing peer-specific power mode to %pM\n", + sta->sta.addr); + + /* don't unintentionally start a MPSP */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { + u8 *qc = ieee80211_get_qos_ctl((void *) skb->data); + + qc[0] |= IEEE80211_QOS_CTL_EOSP; + } + + ieee80211_tx_skb(sta->sdata, skb); +} + +/** + * ieee80211_mps_local_status_update - track status of local link-specific PMs + * + * @sdata: local mesh subif + * + * sets the non-peer power mode and triggers the driver PS (re-)configuration + */ +void ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sta_info *sta; + bool peering = false; + int light_sleep_cnt = 0; + int deep_sleep_cnt = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { + if (sdata != sta->sdata) + continue; + + switch (sta->plink_state) { + case NL80211_PLINK_OPN_SNT: + case NL80211_PLINK_OPN_RCVD: + case NL80211_PLINK_CNF_RCVD: + peering = true; + break; + case NL80211_PLINK_ESTAB: + if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP) + light_sleep_cnt++; + else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP) + deep_sleep_cnt++; + break; + default: + break; + } + } + rcu_read_unlock(); + + /* + * Set non-peer mode to active during peering/scanning/authentication + * (see IEEE802.11-2012 13.14.8.3). The non-peer mesh power mode is + * deep sleep if the local STA is in light or deep sleep towards at + * least one mesh peer (see 13.14.3.1). Otherwise, set it to the + * user-configured default value. + */ + if (peering) { + mps_dbg(sdata, "setting non-peer PM to active for peering\n"); + ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; + } else if (light_sleep_cnt || deep_sleep_cnt) { + mps_dbg(sdata, "setting non-peer PM to deep sleep\n"); + ifmsh->nonpeer_pm = NL80211_MESH_POWER_DEEP_SLEEP; + } else { + mps_dbg(sdata, "setting non-peer PM to user value\n"); + ifmsh->nonpeer_pm = ifmsh->mshcfg.power_mode; + } + + ifmsh->ps_peers_light_sleep = light_sleep_cnt; + ifmsh->ps_peers_deep_sleep = deep_sleep_cnt; +} + +/** + * ieee80211_mps_set_sta_local_pm - set local PM towards a mesh STA + * + * @sta: mesh STA + * @pm: the power mode to set + */ +void ieee80211_mps_set_sta_local_pm(struct sta_info *sta, + enum nl80211_mesh_power_mode pm) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + + mps_dbg(sdata, "local STA operates in mode %d with %pM\n", + pm, sta->sta.addr); + + sta->local_pm = pm; + + /* + * announce peer-specific power mode transition + * (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3) + */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + mps_qos_null_tx(sta); + + ieee80211_mps_local_status_update(sdata); +} + +/** + * ieee80211_mps_set_frame_flags - set mesh PS flags in FC (and QoS Control) + * + * @sdata: local mesh subif + * @sta: mesh STA + * @hdr: 802.11 frame header + * + * see IEEE802.11-2012 8.2.4.1.7 and 8.2.4.5.11 + * + * NOTE: sta must be given when an individually-addressed QoS frame header + * is handled, for group-addressed and management frames it is not used + */ +void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + u8 *qc; + + if (WARN_ON(is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control) && + !sta)) + return; + + if (is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control) && + sta->plink_state == NL80211_PLINK_ESTAB) + pm = sta->local_pm; + else + pm = sdata->u.mesh.nonpeer_pm; + + if (pm == NL80211_MESH_POWER_ACTIVE) + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_PM); + else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); + + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + qc = ieee80211_get_qos_ctl(hdr); + + if ((is_unicast_ether_addr(hdr->addr1) && + pm == NL80211_MESH_POWER_DEEP_SLEEP) || + (is_multicast_ether_addr(hdr->addr1) && + sdata->u.mesh.ps_peers_deep_sleep > 0)) + qc[1] |= (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8); + else + qc[1] &= ~(IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8); +} + +/** + * ieee80211_mps_sta_status_update - update buffering status of neighbor STA + * + * @sta: mesh STA + * + * called after change of peering status or non-peer/peer-specific power mode + */ +void ieee80211_mps_sta_status_update(struct sta_info *sta) +{ + enum nl80211_mesh_power_mode pm; + bool do_buffer; + + /* + * use peer-specific power mode if peering is established and the + * peer's power mode is known + */ + if (sta->plink_state == NL80211_PLINK_ESTAB && + sta->peer_pm != NL80211_MESH_POWER_UNKNOWN) + pm = sta->peer_pm; + else + pm = sta->nonpeer_pm; + + do_buffer = (pm != NL80211_MESH_POWER_ACTIVE); + + /* Don't let the same PS state be set twice */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) == do_buffer) + return; + + if (do_buffer) { + set_sta_flag(sta, WLAN_STA_PS_STA); + atomic_inc(&sta->sdata->u.mesh.ps.num_sta_ps); + mps_dbg(sta->sdata, "start PS buffering frames towards %pM\n", + sta->sta.addr); + } else { + ieee80211_sta_ps_deliver_wakeup(sta); + } + + /* clear the MPSP flags for non-peers or active STA */ + if (sta->plink_state != NL80211_PLINK_ESTAB) { + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + } else if (!do_buffer) { + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + } +} + +static void mps_set_sta_peer_pm(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + u8 *qc = ieee80211_get_qos_ctl(hdr); + + /* + * Test Power Management field of frame control (PW) and + * mesh power save level subfield of QoS control field (PSL) + * + * | PM | PSL| Mesh PM | + * +----+----+---------+ + * | 0 |Rsrv| Active | + * | 1 | 0 | Light | + * | 1 | 1 | Deep | + */ + if (ieee80211_has_pm(hdr->frame_control)) { + if (qc[1] & (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8)) + pm = NL80211_MESH_POWER_DEEP_SLEEP; + else + pm = NL80211_MESH_POWER_LIGHT_SLEEP; + } else { + pm = NL80211_MESH_POWER_ACTIVE; + } + + if (sta->peer_pm == pm) + return; + + mps_dbg(sta->sdata, "STA %pM enters mode %d\n", + sta->sta.addr, pm); + + sta->peer_pm = pm; + + ieee80211_mps_sta_status_update(sta); +} + +static void mps_set_sta_nonpeer_pm(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + enum nl80211_mesh_power_mode pm; + + if (ieee80211_has_pm(hdr->frame_control)) + pm = NL80211_MESH_POWER_DEEP_SLEEP; + else + pm = NL80211_MESH_POWER_ACTIVE; + + if (sta->nonpeer_pm == pm) + return; + + mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n", + sta->sta.addr, pm); + + sta->nonpeer_pm = pm; + + ieee80211_mps_sta_status_update(sta); +} + +/** + * ieee80211_mps_rx_h_sta_process - frame receive handler for mesh powersave + * + * @sta: STA info that transmitted the frame + * @hdr: IEEE 802.11 (QoS) Header + */ +void ieee80211_mps_rx_h_sta_process(struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + if (is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data_qos(hdr->frame_control)) { + /* + * individually addressed QoS Data/Null frames contain + * peer link-specific PS mode towards the local STA + */ + mps_set_sta_peer_pm(sta, hdr); + + /* check for mesh Peer Service Period trigger frames */ + ieee80211_mpsp_trigger_process(ieee80211_get_qos_ctl(hdr), + sta, false, false); + } else { + /* + * can only determine non-peer PS mode + * (see IEEE802.11-2012 8.2.4.1.7) + */ + mps_set_sta_nonpeer_pm(sta, hdr); + } +} + + +/* mesh PS frame release */ + +static void mpsp_trigger_send(struct sta_info *sta, bool rspi, bool eosp) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct sk_buff *skb; + struct ieee80211_hdr *nullfunc; + struct ieee80211_tx_info *info; + u8 *qc; + + skb = mps_qos_null_get(sta); + if (!skb) + return; + + nullfunc = (struct ieee80211_hdr *) skb->data; + if (!eosp) + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + /* + * | RSPI | EOSP | MPSP triggering | + * +------+------+--------------------+ + * | 0 | 0 | local STA is owner | + * | 0 | 1 | no MPSP (MPSP end) | + * | 1 | 0 | both STA are owner | + * | 1 | 1 | peer STA is owner | see IEEE802.11-2012 13.14.9.2 + */ + qc = ieee80211_get_qos_ctl(nullfunc); + if (rspi) + qc[1] |= (IEEE80211_QOS_CTL_RSPI >> 8); + if (eosp) + qc[0] |= IEEE80211_QOS_CTL_EOSP; + + info = IEEE80211_SKB_CB(skb); + + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + mps_dbg(sdata, "sending MPSP trigger%s%s to %pM\n", + rspi ? " RSPI" : "", eosp ? " EOSP" : "", sta->sta.addr); + + ieee80211_tx_skb(sdata, skb); +} + +/** + * mpsp_qos_null_append - append QoS Null frame to MPSP skb queue if needed + * + * To properly end a mesh MPSP the last transmitted frame has to set the EOSP + * flag in the QoS Control field. In case the current tailing frame is not a + * QoS Data frame, append a QoS Null to carry the flag. + */ +static void mpsp_qos_null_append(struct sta_info *sta, + struct sk_buff_head *frames) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct sk_buff *new_skb, *skb = skb_peek_tail(frames); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info; + + if (ieee80211_is_data_qos(hdr->frame_control)) + return; + + new_skb = mps_qos_null_get(sta); + if (!new_skb) + return; + + mps_dbg(sdata, "appending QoS Null in MPSP towards %pM\n", + sta->sta.addr); + /* + * This frame has to be transmitted last. Assign lowest priority to + * make sure it cannot pass other frames when releasing multiple ACs. + */ + new_skb->priority = 1; + skb_set_queue_mapping(new_skb, IEEE80211_AC_BK); + ieee80211_set_qos_hdr(sdata, new_skb); + + info = IEEE80211_SKB_CB(new_skb); + info->control.vif = &sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + + __skb_queue_tail(frames, new_skb); +} + +/** + * mps_frame_deliver - transmit frames during mesh powersave + * + * @sta: STA info to transmit to + * @n_frames: number of frames to transmit. -1 for all + */ +static void mps_frame_deliver(struct sta_info *sta, int n_frames) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + int ac; + struct sk_buff_head frames; + struct sk_buff *skb; + bool more_data = false; + + skb_queue_head_init(&frames); + + /* collect frame(s) from buffers */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + while (n_frames != 0) { + skb = skb_dequeue(&sta->tx_filtered[ac]); + if (!skb) { + skb = skb_dequeue( + &sta->ps_tx_buf[ac]); + if (skb) + local->total_ps_buffered--; + } + if (!skb) + break; + n_frames--; + __skb_queue_tail(&frames, skb); + } + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) + more_data = true; + } + + /* nothing to send? -> EOSP */ + if (skb_queue_empty(&frames)) { + mpsp_trigger_send(sta, false, true); + return; + } + + /* in a MPSP make sure the last skb is a QoS Data frame */ + if (test_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mpsp_qos_null_append(sta, &frames); + + mps_dbg(sta->sdata, "sending %d frames to PS STA %pM\n", + skb_queue_len(&frames), sta->sta.addr); + + /* prepare collected frames for transmission */ + skb_queue_walk(&frames, skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *) skb->data; + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. + */ + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + + if (more_data || !skb_queue_is_last(&frames, skb)) + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control &= + cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + + if (skb_queue_is_last(&frames, skb) && + ieee80211_is_data_qos(hdr->frame_control)) { + u8 *qoshdr = ieee80211_get_qos_ctl(hdr); + + /* MPSP trigger frame ends service period */ + *qoshdr |= IEEE80211_QOS_CTL_EOSP; + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + } + } + + ieee80211_add_pending_skbs(local, &frames); + sta_info_recalc_tim(sta); +} + +/** + * ieee80211_mpsp_trigger_process - track status of mesh Peer Service Periods + * + * @qc: QoS Control field + * @sta: peer to start a MPSP with + * @tx: frame was transmitted by the local STA + * @acked: frame has been transmitted successfully + * + * NOTE: active mode STA may only serve as MPSP owner + */ +void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta, + bool tx, bool acked) +{ + u8 rspi = qc[1] & (IEEE80211_QOS_CTL_RSPI >> 8); + u8 eosp = qc[0] & IEEE80211_QOS_CTL_EOSP; + + if (tx) { + if (rspi && acked) + set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + + if (eosp) + clear_sta_flag(sta, WLAN_STA_MPSP_OWNER); + else if (acked && + test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mps_frame_deliver(sta, -1); + } else { + if (eosp) + clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE) + set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT); + + if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER)) + mps_frame_deliver(sta, -1); + } +} + +/** + * ieee80211_mps_frame_release - release buffered frames in response to beacon + * + * @sta: mesh STA + * @elems: beacon IEs + * + * For peers if we have individually-addressed frames buffered or the peer + * indicates buffered frames, send a corresponding MPSP trigger frame. Since + * we do not evaluate the awake window duration, QoS Nulls are used as MPSP + * trigger frames. If the neighbour STA is not a peer, only send single frames. + */ +void ieee80211_mps_frame_release(struct sta_info *sta, + struct ieee802_11_elems *elems) +{ + int ac, buffer_local = 0; + bool has_buffered = false; + + /* TIM map only for LLID <= IEEE80211_MAX_AID */ + if (sta->plink_state == NL80211_PLINK_ESTAB) + has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len, + le16_to_cpu(sta->llid) % IEEE80211_MAX_AID); + + if (has_buffered) + mps_dbg(sta->sdata, "%pM indicates buffered frames\n", + sta->sta.addr); + + /* only transmit to PS STA with announced, non-zero awake window */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) && + (!elems->awake_window || !le16_to_cpu(*elems->awake_window))) + return; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + buffer_local += skb_queue_len(&sta->ps_tx_buf[ac]) + + skb_queue_len(&sta->tx_filtered[ac]); + + if (!has_buffered && !buffer_local) + return; + + if (sta->plink_state == NL80211_PLINK_ESTAB) + mpsp_trigger_send(sta, has_buffered, !buffer_local); + else + mps_frame_deliver(sta, 1); +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a19089565c4b..c98be0593756 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1452,6 +1452,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } + /* mesh power save support */ + if (ieee80211_vif_is_mesh(&rx->sdata->vif)) + ieee80211_mps_rx_h_sta_process(sta, hdr); + /* * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. @@ -2090,7 +2094,10 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(fwd_hdr->addr1)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast); memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); + /* update power mode indication when forwarding */ + ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr); } else if (!mesh_nexthop_lookup(fwd_skb, sdata)) { + /* mesh power mode flags updated in mesh_nexthop_lookup */ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast); } else { /* unable to resolve next hop */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 227233c3ff7f..47a0f0601768 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -120,6 +120,8 @@ static void cleanup_single_sta(struct sta_info *sta) if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else return; @@ -587,6 +589,12 @@ void sta_info_recalc_tim(struct sta_info *sta) ps = &sta->sdata->bss->ps; id = sta->sta.aid; +#ifdef CONFIG_MAC80211_MESH + } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { + ps = &sta->sdata->u.mesh.ps; + /* TIM map only for PLID <= IEEE80211_MAX_AID */ + id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID; +#endif } else { return; } @@ -745,8 +753,9 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, bool have_buffered = false; int ac; - /* This is only necessary for stations on BSS interfaces */ - if (!sta->sdata->bss) + /* This is only necessary for stations on BSS/MBSS interfaces */ + if (!sta->sdata->bss && + !ieee80211_vif_is_mesh(&sta->sdata->vif)) return false; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) @@ -934,6 +943,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, sta->last_rx + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); + + if (ieee80211_vif_is_mesh(&sdata->vif) && + test_sta_flag(sta, WLAN_STA_PS_STA)) + atomic_dec(&sdata->u.mesh.ps.num_sta_ps); + WARN_ON(__sta_info_destroy(sta)); } } @@ -992,6 +1006,8 @@ static void clear_sta_ps_flags(void *_sta) if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else return; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index af7d78aa5523..5a1deba2c645 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -56,6 +56,8 @@ * @WLAN_STA_INSERTED: This station is inserted into the hash table. * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station. * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid. + * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period. + * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -78,6 +80,8 @@ enum ieee80211_sta_info_flags { WLAN_STA_INSERTED, WLAN_STA_RATE_CONTROL, WLAN_STA_TOFFSET_KNOWN, + WLAN_STA_MPSP_OWNER, + WLAN_STA_MPSP_RECIPIENT, }; #define ADDBA_RESP_INTERVAL HZ @@ -282,6 +286,9 @@ struct sta_ampdu_mlme { * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift * @ch_width: peer's channel width + * @local_pm: local link-specific power save mode + * @peer_pm: peer-specific power save mode towards local STA + * @nonpeer_pm: STA power save mode towards non-peer neighbors * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver @@ -379,6 +386,10 @@ struct sta_info { s64 t_offset; s64 t_offset_setpoint; enum nl80211_chan_width ch_width; + /* mesh power save */ + enum nl80211_mesh_power_mode local_pm; + enum nl80211_mesh_power_mode peer_pm; + enum nl80211_mesh_power_mode nonpeer_pm; #endif #ifdef CONFIG_MAC80211_DEBUGFS diff --git a/net/mac80211/status.c b/net/mac80211/status.c index d041de056b7f..43439203f4e4 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -472,6 +472,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } + /* mesh Peer Service Period support */ + if (ieee80211_vif_is_mesh(&sta->sdata->vif) && + ieee80211_is_data_qos(fc)) + ieee80211_mpsp_trigger_process( + ieee80211_get_qos_ctl(hdr), + sta, true, acked); + if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) && (rates_idx != -1)) sta->last_tx_rate = info->status.rates[rates_idx]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7892b0a8873e..2ef0e19b06bb 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -329,6 +329,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->u.ap.ps; + else if (ieee80211_vif_is_mesh(&sdata->vif)) + ps = &sdata->u.mesh.ps; else continue; @@ -372,18 +374,20 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) /* * broadcast/multicast frame * - * If any of the associated stations is in power save mode, + * If any of the associated/peer stations is in power save mode, * the frame is buffered to be sent after DTIM beacon frame. * This is done either by the hardware or us. */ - /* powersaving STAs currently only in AP/VLAN mode */ + /* powersaving STAs currently only in AP/VLAN/mesh mode */ if (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (!tx->sdata->bss) return TX_CONTINUE; ps = &tx->sdata->bss->ps; + } else if (ieee80211_vif_is_mesh(&tx->sdata->vif)) { + ps = &tx->sdata->u.mesh.ps; } else { return TX_CONTINUE; } @@ -1473,12 +1477,14 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif; - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_is_data(hdr->frame_control) && - !is_multicast_ether_addr(hdr->addr1) && - mesh_nexthop_resolve(skb, sdata)) { - /* skb queued: don't free */ - return; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + if (ieee80211_is_data(hdr->frame_control) && + is_unicast_ether_addr(hdr->addr1)) { + if (mesh_nexthop_resolve(skb, sdata)) + return; /* skb queued: don't free */ + } else { + ieee80211_mps_set_frame_flags(sdata, NULL, hdr); + } } ieee80211_set_qos_hdr(sdata, skb); @@ -2445,12 +2451,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, 2 + /* NULL SSID */ 2 + 8 + /* supported rates */ 2 + 3 + /* DS params */ + 256 + /* TIM IE */ 2 + (IEEE80211_MAX_SUPP_RATES - 8) + 2 + sizeof(struct ieee80211_ht_cap) + 2 + sizeof(struct ieee80211_ht_operation) + 2 + sdata->u.mesh.mesh_id_len + 2 + sizeof(struct ieee80211_meshconf_ie) + - sdata->u.mesh.ie_len); + sdata->u.mesh.ie_len + + 2 + sizeof(__le16)); /* awake window */ if (!skb) goto out; @@ -2462,6 +2470,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, eth_broadcast_addr(mgmt->da); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); + ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt); mgmt->u.beacon.beacon_int = cpu_to_le16(sdata->vif.bss_conf.beacon_int); mgmt->u.beacon.capab_info |= cpu_to_le16( @@ -2475,12 +2484,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (ieee80211_add_srates_ie(sdata, skb, true, band) || mesh_add_ds_params_ie(skb, sdata) || + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || mesh_add_rsn_ie(skb, sdata) || mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata) || mesh_add_meshid_ie(skb, sdata) || mesh_add_meshconf_ie(skb, sdata) || + mesh_add_awake_window_ie(skb, sdata) || mesh_add_vendor_ies(skb, sdata)) { pr_err("o11s: couldn't add ies!\n"); goto out; @@ -2734,6 +2745,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, goto out; ps = &sdata->u.ap.ps; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + ps = &sdata->u.mesh.ps; } else { goto out; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 139ad9b66c39..6cb71a350edd 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -805,6 +805,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->peering = pos; elems->peering_len = elen; break; + case WLAN_EID_MESH_AWAKE_WINDOW: + if (elen >= 2) + elems->awake_window = (void *)pos; + break; case WLAN_EID_PREQ: elems->preq = pos; elems->preq_len = elen; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 906f00cd6d2f..afba19cb6f87 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -191,6 +191,15 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, /* qos header is 2 bytes */ *p++ = ack_policy | tid; - *p = ieee80211_vif_is_mesh(&sdata->vif) ? - (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* preserve RSPI and Mesh PS Level bit */ + *p &= ((IEEE80211_QOS_CTL_RSPI | + IEEE80211_QOS_CTL_MESH_PS_LEVEL) >> 8); + + /* Nulls don't have a mesh header (frame body) */ + if (!ieee80211_is_qos_nullfunc(hdr->frame_control)) + *p |= (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8); + } else { + *p = 0; + } } -- cgit v1.2.3-71-gd317 From c14b78e7decd0d1d5add6a4604feb8609fe920a9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Feb 2013 01:50:26 +0100 Subject: netfilter: nfnetlink: add mutex per subsystem This patch replaces the global lock to one lock per subsystem. The per-subsystem lock avoids that processes operating with different subsystems are synchronized. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 4 +-- net/netfilter/ipset/ip_set_core.c | 26 +++++++++--------- net/netfilter/nf_conntrack_netlink.c | 12 ++++----- net/netfilter/nfnetlink.c | 52 ++++++++++++++++++++++-------------- 4 files changed, 53 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 4966ddec039b..ecbb8e495912 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -34,8 +34,8 @@ extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigne extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); -extern void nfnl_lock(void); -extern void nfnl_unlock(void); +extern void nfnl_lock(__u8 subsys_id); +extern void nfnl_unlock(__u8 subsys_id); #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6d6d8f2b033e..f82b2e606cfd 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -88,14 +88,14 @@ find_set_type(const char *name, u8 family, u8 revision) static bool load_settype(const char *name) { - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { pr_warning("Can't find ip_set type %s\n", name); - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); return false; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); return true; } @@ -532,7 +532,7 @@ ip_set_nfnl_get(const char *name) ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); for (i = 0; i < ip_set_max; i++) { s = nfnl_set(i); if (s != NULL && STREQ(s->name, name)) { @@ -541,7 +541,7 @@ ip_set_nfnl_get(const char *name) break; } } - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } @@ -561,13 +561,13 @@ ip_set_nfnl_get_byindex(ip_set_id_t index) if (index > ip_set_max) return IPSET_INVALID_ID; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(index); if (set) __ip_set_get(set); else index = IPSET_INVALID_ID; - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } @@ -584,11 +584,11 @@ void ip_set_nfnl_put(ip_set_id_t index) { struct ip_set *set; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(index); if (set != NULL) __ip_set_put(set); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -1763,10 +1763,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); find_set_and_id(req_get->set.name, &id); req_get->set.index = id; - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } case IP_SET_OP_GET_BYINDEX: { @@ -1778,11 +1778,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EINVAL; goto done; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_IPSET); set = nfnl_set(req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } default: diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2334cc5d2b16..d490a300ce2b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1256,13 +1256,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat") < 0) { - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); if (nfnetlink_parse_nat_setup_hook) return -EAGAIN; @@ -1274,13 +1274,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (err == -EAGAIN) { #ifdef CONFIG_MODULES rcu_read_unlock(); - nfnl_unlock(); + nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } - nfnl_lock(); + nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); #else err = -EOPNOTSUPP; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 58a09b7c3f6d..d578ec251712 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -36,8 +36,10 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT]; -static DEFINE_MUTEX(nfnl_mutex); +static struct { + struct mutex mutex; + const struct nfnetlink_subsystem __rcu *subsys; +} table[NFNL_SUBSYS_COUNT]; static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, @@ -48,27 +50,32 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, }; -void nfnl_lock(void) +void nfnl_lock(__u8 subsys_id) { - mutex_lock(&nfnl_mutex); + mutex_lock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_lock); -void nfnl_unlock(void) +void nfnl_unlock(__u8 subsys_id) { - mutex_unlock(&nfnl_mutex); + mutex_unlock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_unlock); +static struct mutex *nfnl_get_lock(__u8 subsys_id) +{ + return &table[subsys_id].mutex; +} + int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { - nfnl_lock(); - if (subsys_table[n->subsys_id]) { - nfnl_unlock(); + nfnl_lock(n->subsys_id); + if (table[n->subsys_id].subsys) { + nfnl_unlock(n->subsys_id); return -EBUSY; } - rcu_assign_pointer(subsys_table[n->subsys_id], n); - nfnl_unlock(); + rcu_assign_pointer(table[n->subsys_id].subsys, n); + nfnl_unlock(n->subsys_id); return 0; } @@ -76,9 +83,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { - nfnl_lock(); - subsys_table[n->subsys_id] = NULL; - nfnl_unlock(); + nfnl_lock(n->subsys_id); + table[n->subsys_id].subsys = NULL; + nfnl_unlock(n->subsys_id); synchronize_rcu(); return 0; } @@ -91,7 +98,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; - return rcu_dereference(subsys_table[subsys_id]); + return rcu_dereference(table[subsys_id].subsys); } static inline const struct nfnl_callback * @@ -175,6 +182,7 @@ replay: struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; + __u8 subsys_id = NFNL_SUBSYS_ID(type); err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); @@ -189,10 +197,9 @@ replay: rcu_read_unlock(); } else { rcu_read_unlock(); - nfnl_lock(); - if (rcu_dereference_protected( - subsys_table[NFNL_SUBSYS_ID(type)], - lockdep_is_held(&nfnl_mutex)) != ss || + nfnl_lock(subsys_id); + if (rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(nfnl_get_lock(subsys_id))) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -200,7 +207,7 @@ replay: (const struct nlattr **)cda); else err = -EINVAL; - nfnl_unlock(); + nfnl_unlock(subsys_id); } if (err == -EAGAIN) goto replay; @@ -267,6 +274,11 @@ static struct pernet_operations nfnetlink_net_ops = { static int __init nfnetlink_init(void) { + int i; + + for (i=0; i Date: Tue, 5 Feb 2013 07:25:17 +0000 Subject: tcp: remove Appropriate Byte Count support TCP Appropriate Byte Count was added by me, but later disabled. There is no point in maintaining it since it is a potential source of bugs and Linux already implements other better window protection heuristics. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 11 ----------- include/linux/tcp.h | 1 - include/net/tcp.h | 1 - kernel/sysctl_binary.c | 1 - net/ipv4/sysctl_net_ipv4.c | 7 ------- net/ipv4/tcp.c | 1 - net/ipv4/tcp_cong.c | 30 +----------------------------- net/ipv4/tcp_input.c | 15 --------------- net/ipv4/tcp_minisocks.c | 1 - 9 files changed, 1 insertion(+), 67 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 19ac1802bfd4..dc2dc87d2557 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -130,17 +130,6 @@ somaxconn - INTEGER Defaults to 128. See also tcp_max_syn_backlog for additional tuning for TCP sockets. -tcp_abc - INTEGER - Controls Appropriate Byte Count (ABC) defined in RFC3465. - ABC is a way of increasing congestion window (cwnd) more slowly - in response to partial acknowledgments. - Possible values are: - 0 increase cwnd once per acknowledgment (no ABC) - 1 increase cwnd once per acknowledgment of full sized segment - 2 allow increase cwnd by two if acknowledgment is - of two segments to compensate for delayed acknowledgments. - Default: 0 (off) - tcp_abort_on_overflow - BOOLEAN If listening service is too slow to accept new connections, reset them. Default state is FALSE. It means that if overflow diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e1d2283e3cc..6d0d46138ae8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -246,7 +246,6 @@ struct tcp_sock { u32 sacked_out; /* SACK'd packets */ u32 fackets_out; /* FACK'd packets */ u32 tso_deferred; - u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */ /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; diff --git a/include/net/tcp.h b/include/net/tcp.h index 614af8b7758e..23f2e98d4b65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -279,7 +279,6 @@ extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 5a6384450501..b669ca1fa103 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -387,7 +387,6 @@ static const struct bin_table bin_net_ipv4_table[] = { { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" }, { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" }, { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" }, - { CTL_INT, NET_TCP_ABC, "tcp_abc" }, { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2622707602d1..960fd29d9b8e 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -632,13 +632,6 @@ static struct ctl_table ipv4_table[] = { .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, - { - .procname = "tcp_abc", - .data = &sysctl_tcp_abc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3ec1f69c5ceb..2c7e5963c2ea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2289,7 +2289,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->packets_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index cdf2e707bb10..019c2389a341 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -317,28 +317,11 @@ void tcp_slow_start(struct tcp_sock *tp) snd_cwnd = 1U; } - /* RFC3465: ABC Slow start - * Increase only after a full MSS of bytes is acked - * - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) - return; - if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else cnt = snd_cwnd; /* exponential increase */ - /* RFC3465: ABC - * We MAY increase by 2 if discovered delayed ack - */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) - cnt <<= 1; - tp->bytes_acked = 0; - tp->snd_cwnd_cnt += cnt; while (tp->snd_cwnd_cnt >= snd_cwnd) { tp->snd_cwnd_cnt -= snd_cwnd; @@ -378,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In "safe" area, increase. */ if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - /* In dangerous area, increase slowly. */ - else if (sysctl_tcp_abc) { - /* RFC3465: Appropriate Byte Count - * increase once for each full cwnd acked - */ - if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { - tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } else { + else tcp_cong_avoid_ai(tp, tp->snd_cwnd); - } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e376aa9591bc..f56bd1082f54 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_abc __read_mostly; int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ @@ -2007,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; tp->frto_counter = 0; - tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -2056,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how) tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tp->bytes_acked = 0; tcp_clear_retrans_partial(tp); if (tcp_is_reno(tp)) @@ -2684,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; - tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -2735,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; - tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; tcp_init_cwnd_reduction(sk, set_ssthresh); @@ -3417,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; - tp->bytes_acked = 0; TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -3609,15 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; - if (sysctl_tcp_abc) { - if (icsk->icsk_ca_state < TCP_CA_CWR) - tp->bytes_acked += ack - prior_snd_una; - else if (icsk->icsk_ca_state == TCP_CA_Loss) - /* we assume just one segment left network */ - tp->bytes_acked += min(ack - prior_snd_una, - tp->mss_cache); - } - prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f35f2dfb6401..f0409287b5f4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -446,7 +446,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, */ newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->bytes_acked = 0; newtp->frto_counter = 0; newtp->frto_highmark = 0; -- cgit v1.2.3-71-gd317 From ca99ca14c95ae49fb4c9cd3abf5f84d11a7e8a61 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 5 Feb 2013 08:05:43 +0000 Subject: netpoll: protect napi_poll and poll_controller during dev_[open|close] Ivan Vercera was recently backporting commit 9c13cb8bb477a83b9a3c9e5a5478a4e21294a760 to a RHEL kernel, and I noticed that, while this patch protects the tg3 driver from having its ndo_poll_controller routine called during device initalization, it does nothing for the driver during shutdown. I.e. it would be entirely possible to have the ndo_poll_controller method (or subsequently the ndo_poll) routine called for a driver in the netpoll path on CPU A while in parallel on CPU B, the ndo_close or ndo_open routine could be called. Given that the two latter routines tend to initizlize and free many data structures that the former two rely on, the result can easily be data corruption or various other crashes. Furthermore, it seems that this is potentially a problem with all net drivers that support netpoll, and so this should ideally be fixed in a common path. As Ben H Pointed out to me, we can't preform dev_open/dev_close in atomic context, so I've come up with this solution. We can use a mutex to sleep in open/close paths and just do a mutex_trylock in the napi poll path and abandon the poll attempt if we're locked, as we'll just retry the poll on the next send anyway. I've tested this here by flooding netconsole with messages on a system whos nic driver I modfied to periodically return NETDEV_TX_BUSY, so that the netpoll tx workqueue would be forced to send frames and poll the device. While this was going on I rapidly ifdown/up'ed the interface and watched for any problems. I've not found any. Signed-off-by: Neil Horman CC: Ivan Vecera CC: "David S. Miller" CC: Ben Hutchings CC: Francois Romieu CC: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netpoll.h | 11 ++++++++++- net/core/dev.c | 27 ++++++++++++++++++++++++++- net/core/netpoll.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index f54c3bb6a22b..ab856d507b7e 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -38,8 +38,9 @@ struct netpoll { struct netpoll_info { atomic_t refcnt; - int rx_flags; + unsigned long rx_flags; spinlock_t rx_lock; + struct mutex dev_lock; struct list_head rx_np; /* netpolls that registered an rx_hook */ struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ @@ -51,6 +52,14 @@ struct netpoll_info { struct rcu_head rcu; }; +#ifdef CONFIG_NETPOLL +extern int netpoll_rx_disable(struct net_device *dev); +extern void netpoll_rx_enable(struct net_device *dev); +#else +static inline int netpoll_rx_disable(struct net_device *dev) { return 0; } +static inline void netpoll_rx_enable(struct net_device *dev) { return; } +#endif + void netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); diff --git a/net/core/dev.c b/net/core/dev.c index e04bfdc9e3e4..2b275a7b8677 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1266,6 +1266,14 @@ static int __dev_open(struct net_device *dev) if (!netif_device_present(dev)) return -ENODEV; + /* Block netpoll from trying to do any rx path servicing. + * If we don't do this there is a chance ndo_poll_controller + * or ndo_poll may be running while we open the device + */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); if (ret) @@ -1279,6 +1287,8 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); + netpoll_rx_enable(dev); + if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { @@ -1370,9 +1380,16 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); + /* Temporarily disable netpoll until the interface is down */ + retval = netpoll_rx_disable(dev); + if (retval) + return retval; + list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); return retval; } @@ -1408,14 +1425,22 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { + int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); + /* Block netpoll rx while the interface is going down */ + ret = netpoll_rx_disable(dev); + if (ret) + return ret; + list_add(&dev->unreg_list, &single); dev_close_many(&single); list_del(&single); + + netpoll_rx_enable(dev); } - return 0; + return ret; } EXPORT_SYMBOL(dev_close); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 331ccb90f915..edcd9ad95304 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,6 +47,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; +static struct srcu_struct netpoll_srcu; + #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -199,6 +201,13 @@ static void netpoll_poll_dev(struct net_device *dev) const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); + /* Don't do any rx activity if the dev_lock mutex is held + * the dev_open/close paths use this to block netpoll activity + * while changing device state + */ + if (!mutex_trylock(&dev->npinfo->dev_lock)) + return; + if (!dev || !netif_running(dev)) return; @@ -211,6 +220,8 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); + mutex_unlock(&dev->npinfo->dev_lock); + if (dev->flags & IFF_SLAVE) { if (ni) { struct net_device *bond_dev; @@ -231,6 +242,31 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +int netpoll_rx_disable(struct net_device *dev) +{ + struct netpoll_info *ni; + int idx; + might_sleep(); + idx = srcu_read_lock(&netpoll_srcu); + ni = srcu_dereference(dev->npinfo, &netpoll_srcu); + if (ni) + mutex_lock(&ni->dev_lock); + srcu_read_unlock(&netpoll_srcu, idx); + return 0; +} +EXPORT_SYMBOL(netpoll_rx_disable); + +void netpoll_rx_enable(struct net_device *dev) +{ + struct netpoll_info *ni; + rcu_read_lock(); + ni = rcu_dereference(dev->npinfo); + if (ni) + mutex_unlock(&ni->dev_lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(netpoll_rx_enable); + static void refill_skbs(void) { struct sk_buff *skb; @@ -1004,6 +1040,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); + mutex_init(&npinfo->dev_lock); skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1169,6 +1206,7 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); + init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); @@ -1208,6 +1246,8 @@ void __netpoll_cleanup(struct netpoll *np) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } + synchronize_srcu(&netpoll_srcu); + if (atomic_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; -- cgit v1.2.3-71-gd317 From 3b72c2fe0c6bbec42ed7f899931daef227b80322 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 5 Feb 2013 08:26:48 +0000 Subject: drivers: net:ethernet: cpsw: add support for VLAN adding support for VLAN interface for cpsw. CPSW VLAN Capability * Can filter VLAN packets in Hardware Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 106 ++++++++++++++++++++++++++++++++++++- drivers/net/ethernet/ti/cpsw_ale.h | 4 ++ include/linux/platform_data/cpsw.h | 1 + 3 files changed, 109 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 534bf7bc34db..888708ceb13c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -118,6 +119,9 @@ do { \ #define TX_PRIORITY_MAPPING 0x33221100 #define CPDMA_TX_PRIORITY_MAP 0x76543210 +#define CPSW_VLAN_AWARE BIT(1) +#define CPSW_ALE_VLAN_AWARE 1 + #define cpsw_enable_irq(priv) \ do { \ u32 i; \ @@ -607,14 +611,40 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } } +static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) +{ + const int vlan = priv->data.default_vlan; + const int port = priv->host_port; + u32 reg; + int i; + + reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : + CPSW2_PORT_VLAN; + + writel(vlan, &priv->host_port_regs->port_vlan); + + for (i = 0; i < 2; i++) + slave_write(priv->slaves + i, vlan, reg); + + cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS << port, + ALE_ALL_PORTS << port, ALE_ALL_PORTS << port, + (ALE_PORT_1 | ALE_PORT_2) << port); +} + static void cpsw_init_host_port(struct cpsw_priv *priv) { + u32 control_reg; + /* soft reset the controller and initialize ale */ soft_reset("cpsw", &priv->regs->soft_reset); cpsw_ale_start(priv->ale); /* switch to vlan unaware mode */ - cpsw_ale_control_set(priv->ale, 0, ALE_VLAN_AWARE, 0); + cpsw_ale_control_set(priv->ale, priv->host_port, ALE_VLAN_AWARE, + CPSW_ALE_VLAN_AWARE); + control_reg = readl(&priv->regs->control); + control_reg |= CPSW_VLAN_AWARE; + writel(control_reg, &priv->regs->control); /* setup host port priority mapping */ __raw_writel(CPDMA_TX_PRIORITY_MAP, @@ -650,6 +680,9 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_init_host_port(priv); for_each_slave(priv, cpsw_slave_open, priv); + /* Add default VLAN */ + cpsw_add_default_vlan(priv); + /* setup tx dma to fixed prio and zero offset */ cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1); cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0); @@ -933,6 +966,73 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) } #endif +static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv, + unsigned short vid) +{ + int ret; + + ret = cpsw_ale_add_vlan(priv->ale, vid, + ALE_ALL_PORTS << priv->host_port, + 0, ALE_ALL_PORTS << priv->host_port, + (ALE_PORT_1 | ALE_PORT_2) << priv->host_port); + if (ret != 0) + return ret; + + ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr, + priv->host_port, ALE_VLAN, vid); + if (ret != 0) + goto clean_vid; + + ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + ALE_ALL_PORTS << priv->host_port, + ALE_VLAN, vid, 0); + if (ret != 0) + goto clean_vlan_ucast; + return 0; + +clean_vlan_ucast: + cpsw_ale_del_ucast(priv->ale, priv->mac_addr, + priv->host_port, ALE_VLAN, vid); +clean_vid: + cpsw_ale_del_vlan(priv->ale, vid, 0); + return ret; +} + +static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, + unsigned short vid) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + if (vid == priv->data.default_vlan) + return 0; + + dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); + return cpsw_add_vlan_ale_entry(priv, vid); +} + +static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, + unsigned short vid) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + int ret; + + if (vid == priv->data.default_vlan) + return 0; + + dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid); + ret = cpsw_ale_del_vlan(priv->ale, vid, 0); + if (ret != 0) + return ret; + + ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr, + priv->host_port, ALE_VLAN, vid); + if (ret != 0) + return ret; + + return cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast, + 0, ALE_VLAN, vid); +} + static const struct net_device_ops cpsw_netdev_ops = { .ndo_open = cpsw_ndo_open, .ndo_stop = cpsw_ndo_stop, @@ -947,6 +1047,8 @@ static const struct net_device_ops cpsw_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = cpsw_ndo_poll_controller, #endif + .ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid, }; static void cpsw_get_drvinfo(struct net_device *ndev, @@ -1354,7 +1456,7 @@ static int cpsw_probe(struct platform_device *pdev) k++; } - ndev->flags |= IFF_ALLMULTI; /* see cpsw_ndo_change_rx_flags() */ + ndev->features |= NETIF_F_HW_VLAN_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index a002417f952b..30daa1265f0c 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -69,6 +69,10 @@ enum cpsw_ale_port_state { #define ALE_SUPER BIT(2) #define ALE_VLAN BIT(3) +#define ALE_PORT_HOST BIT(0) +#define ALE_PORT_1 BIT(1) +#define ALE_PORT_2 BIT(2) + #define ALE_MCAST_FWD 0 #define ALE_MCAST_BLOCK_LEARN_FWD 1 #define ALE_MCAST_FWD_LEARN 2 diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h index 24368a2e8b87..e962cfd552e3 100644 --- a/include/linux/platform_data/cpsw.h +++ b/include/linux/platform_data/cpsw.h @@ -35,6 +35,7 @@ struct cpsw_platform_data { u32 bd_ram_size; /*buffer descriptor ram size */ u32 rx_descs; /* Number of Rx Descriptios */ u32 mac_control; /* Mac control register */ + u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/ }; #endif /* __CPSW_H__ */ -- cgit v1.2.3-71-gd317 From e185483e6b84c127d0b1c890b6b703701ae52d35 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 5 Feb 2013 09:30:55 +0000 Subject: team: allow userspace to take control over carrier Some modes don't require any special carrier handling so in these cases, the kernel can control the carrier as for any other interface. However, some other modes, e.g. lacp, requires more than just that, so userspace needs to control the carrier itself. The daemon today is ready to control it, but the kernel still can change it based on events. This fix so that either kernel or userspace is controlling the carrier. Signed-off-by: Flavio Leitner Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 8 ++++++++ include/linux/if_team.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 694ccf6d71a3..05c5efe84591 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -508,6 +508,7 @@ static bool team_is_mode_set(struct team *team) static void team_set_no_mode(struct team *team) { + team->user_carrier_enabled = false; team->mode = &__team_no_mode; } @@ -1710,6 +1711,10 @@ static netdev_features_t team_fix_features(struct net_device *dev, static int team_change_carrier(struct net_device *dev, bool new_carrier) { + struct team *team = netdev_priv(dev); + + team->user_carrier_enabled = true; + if (new_carrier) netif_carrier_on(dev); else @@ -2573,6 +2578,9 @@ static void __team_carrier_check(struct team *team) struct team_port *port; bool team_linkup; + if (team->user_carrier_enabled) + return; + team_linkup = false; list_for_each_entry(port, &team->port_list, list) { if (port->linkup) { diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 0245def2aa93..4648d8021244 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -186,6 +186,7 @@ struct team { const struct team_mode *mode; struct team_mode_ops ops; + bool user_carrier_enabled; bool queue_override_enabled; struct list_head *qom_lists; /* array of queue override mapping lists */ long mode_priv[TEAM_MODE_PRIV_LONGS]; -- cgit v1.2.3-71-gd317 From 12b0004d1d1e2a9aa667412d479041e403bcafae Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 5 Feb 2013 16:36:38 +0000 Subject: net: adjust skb_gso_segment() for calling in rx path skb_gso_segment() is almost always called in tx path, except for openvswitch. It calls this function when it receives the packet and tries to queue it to user-space. In this special case, the ->ip_summed check inside skb_gso_segment() is no longer true, as ->ip_summed value has different meanings on rx path. This patch adjusts skb_gso_segment() so that we can at least avoid such warnings on checksum. Cc: Jesse Gross Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 +++++++++-- net/core/dev.c | 21 ++++++++++++++++----- net/openvswitch/datapath.c | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85b0949d9946..ab2774eb49e8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2662,8 +2662,15 @@ extern int netdev_master_upper_dev_link(struct net_device *dev, extern void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); extern int skb_checksum_help(struct sk_buff *skb); -extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, - netdev_features_t features); +extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path); + +static inline +struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) +{ + return __skb_gso_segment(skb, features, true); +} + #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/net/core/dev.c b/net/core/dev.c index 2b275a7b8677..65da698c500b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2327,18 +2327,29 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + /** - * skb_gso_segment - Perform segmentation on skb. + * __skb_gso_segment - Perform segmentation on skb. * @skb: buffer to segment * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path * * This function segments the given skb and returns a list of segments. * * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; @@ -2361,7 +2372,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb_needs_check(skb, tx_path))) { skb_warn_bad_offload(skb); if (skb_header_cloned(skb) && @@ -2390,7 +2401,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, return segs; } -EXPORT_SYMBOL(skb_gso_segment); +EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8c13a965459..9dc537df46c4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -301,7 +301,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); if (IS_ERR(segs)) return PTR_ERR(segs); -- cgit v1.2.3-71-gd317 From cd431e738509e74726055390c9e5e81e8e7e03ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Feb 2013 20:22:50 +0000 Subject: macvlan: add multicast filter Setting up IPv6 addresses on configurations with many macvlans is not really working, as many multicast messages are dropped. Add a multicast filter to macvlan to reduce the amount of cloned skbs and overhead. Successfully tested with 1024 macvlans on one ethernet device. Signed-off-by: Eric Dumazet Cc: Ben Greear Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 23 +++++++++++++++++++++++ include/linux/if_macvlan.h | 6 ++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 7b44ebd7770e..f494da82c33f 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,13 @@ static int macvlan_broadcast_one(struct sk_buff *skb, return vlan->receive(skb); } +static unsigned int mc_hash(const unsigned char *addr) +{ + u32 val = __get_unaligned_cpu32(addr + 2); + + return hash_32(val, MACVLAN_MC_FILTER_BITS); +} + static void macvlan_broadcast(struct sk_buff *skb, const struct macvlan_port *port, struct net_device *src, @@ -137,6 +145,7 @@ static void macvlan_broadcast(struct sk_buff *skb, struct sk_buff *nskb; unsigned int i; int err; + unsigned int hash = mc_hash(eth->h_dest); if (skb->protocol == htons(ETH_P_PAUSE)) return; @@ -146,6 +155,8 @@ static void macvlan_broadcast(struct sk_buff *skb, if (vlan->dev == src || !(vlan->mode & mode)) continue; + if (!test_bit(hash, vlan->mc_filter)) + continue; nskb = skb_clone(skb, GFP_ATOMIC); err = macvlan_broadcast_one(nskb, vlan, eth, mode == MACVLAN_MODE_BRIDGE); @@ -405,6 +416,18 @@ static void macvlan_set_mac_lists(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); + if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { + bitmap_fill(vlan->mc_filter, MACVLAN_MC_FILTER_SZ); + } else { + struct netdev_hw_addr *ha; + DECLARE_BITMAP(filter, MACVLAN_MC_FILTER_SZ); + + bitmap_zero(filter, MACVLAN_MC_FILTER_SZ); + netdev_for_each_mc_addr(ha, dev) { + __set_bit(mc_hash(ha->addr), filter); + } + bitmap_copy(vlan->mc_filter, filter, MACVLAN_MC_FILTER_SZ); + } dev_uc_sync(vlan->lowerdev, dev); dev_mc_sync(vlan->lowerdev, dev); } diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index f65e8d250f7e..84dde1dd1da4 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -52,6 +52,9 @@ struct macvlan_pcpu_stats { */ #define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) +#define MACVLAN_MC_FILTER_BITS 8 +#define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) + struct macvlan_dev { struct net_device *dev; struct list_head list; @@ -59,6 +62,9 @@ struct macvlan_dev { struct macvlan_port *port; struct net_device *lowerdev; struct macvlan_pcpu_stats __percpu *pcpu_stats; + + DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); + enum macvlan_mode mode; u16 flags; int (*receive)(struct sk_buff *skb); -- cgit v1.2.3-71-gd317 From 16a10ffd20a13215243bdba64c8e57ef277a55b9 Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Thu, 7 Feb 2013 02:25:22 +0000 Subject: net/mlx4: Move Ethernet related functionality from mlx4_core to mlx4_en Move low level code that deals with management of Ethernet MACs and QPs from mlx4_core to mlx4_en. Also convert the new functions to deal with MACs in form of char array instead of u64. Actual functions moved: mlx4_replace_mac mlx4_get_eth_qp mlx4_put_eth_qp To conduct this change, some functionality had to be exported from the core, the following functions were added: mlx4_get_base_qp __mlx4_replace_mac (low level function for CX1/A0 compatibility) Signed-off-by: Yan Burman Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 216 +++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 5 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 7 - drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 + drivers/net/ethernet/mellanox/mlx4/port.c | 193 +--------------------- include/linux/mlx4/device.h | 5 +- 6 files changed, 224 insertions(+), 208 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0843dd793aa7..63a1ef348387 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -420,6 +420,206 @@ static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) memset(&dst_mac[ETH_ALEN], 0, 2); } +static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, + unsigned char *mac, int *qpn, u64 *reg_id) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int err; + + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = *qpn; + memcpy(&gid[10], mac, ETH_ALEN); + gid[5] = priv->port; + + err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec_eth = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_PROMISC_NONE, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.port = priv->port; + rule.qpn = *qpn; + INIT_LIST_HEAD(&rule.list); + + spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + list_add_tail(&spec_eth.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + break; + } + default: + return -EINVAL; + } + if (err) + en_warn(priv, "Failed Attaching Unicast\n"); + + return err; +} + +static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, + unsigned char *mac, int qpn, u64 reg_id) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = qpn; + memcpy(&gid[10], mac, ETH_ALEN); + gid[5] = priv->port; + + mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + mlx4_flow_detach(dev, reg_id); + break; + } + default: + en_err(priv, "Invalid steering mode.\n"); + } +} + +static int mlx4_en_get_qp(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + struct mlx4_mac_entry *entry; + int index = 0; + int err = 0; + u64 reg_id; + int *qpn = &priv->base_qpn; + u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); + + en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", + priv->dev->dev_addr); + index = mlx4_register_mac(dev, priv->port, mac); + if (index < 0) { + err = index; + en_err(priv, "Failed adding MAC: %pM\n", + priv->dev->dev_addr); + return err; + } + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { + int base_qpn = mlx4_get_base_qpn(dev, priv->port); + *qpn = base_qpn + index; + return 0; + } + + err = mlx4_qp_reserve_range(dev, 1, 1, qpn); + en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); + if (err) { + en_err(priv, "Failed to reserve qp for mac registration\n"); + goto qp_err; + } + + err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, ®_id); + if (err) + goto steer_err; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto alloc_err; + } + memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac)); + entry->reg_id = reg_id; + + err = radix_tree_insert(&priv->mac_tree, *qpn, entry); + if (err) + goto insert_err; + return 0; + +insert_err: + kfree(entry); + +alloc_err: + mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id); + +steer_err: + mlx4_qp_release_range(dev, *qpn, 1); + +qp_err: + mlx4_unregister_mac(dev, priv->port, mac); + return err; +} + +static void mlx4_en_put_qp(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + struct mlx4_mac_entry *entry; + int qpn = priv->base_qpn; + u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); + + en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", + priv->dev->dev_addr); + mlx4_unregister_mac(dev, priv->port, mac); + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { + entry = radix_tree_lookup(&priv->mac_tree, qpn); + if (entry) { + en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n", + priv->port, entry->mac, qpn); + mlx4_en_uc_steer_release(priv, entry->mac, + qpn, entry->reg_id); + mlx4_qp_release_range(dev, qpn, 1); + radix_tree_delete(&priv->mac_tree, qpn); + kfree(entry); + } + } +} + +static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, + unsigned char *new_mac) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + struct mlx4_mac_entry *entry; + int err = 0; + u64 new_mac_u64 = mlx4_en_mac_to_u64(new_mac); + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { + u64 prev_mac_u64; + + entry = radix_tree_lookup(&priv->mac_tree, qpn); + if (!entry) + return -EINVAL; + prev_mac_u64 = mlx4_en_mac_to_u64(entry->mac); + mlx4_en_uc_steer_release(priv, entry->mac, + qpn, entry->reg_id); + mlx4_unregister_mac(dev, priv->port, prev_mac_u64); + memcpy(entry->mac, new_mac, ETH_ALEN); + entry->reg_id = 0; + mlx4_register_mac(dev, priv->port, new_mac_u64); + err = mlx4_en_uc_steer_add(priv, new_mac, + &qpn, &entry->reg_id); + return err; + } + + return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64); +} + u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; @@ -456,9 +656,8 @@ static void mlx4_en_do_set_mac(struct work_struct *work) mutex_lock(&mdev->state_lock); if (priv->port_up) { /* Remove old MAC and insert the new one */ - u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr); - err = mlx4_replace_mac(mdev->dev, priv->port, - priv->base_qpn, mac); + err = mlx4_en_replace_mac(priv, priv->base_qpn, + priv->dev->dev_addr); if (err) en_err(priv, "Failed changing HW MAC address\n"); memcpy(priv->prev_mac, priv->dev->dev_addr, @@ -1035,7 +1234,6 @@ int mlx4_en_start_port(struct net_device *dev) int i; int j; u8 mc_list[16] = {0}; - u64 mac = mlx4_en_mac_to_u64(dev->dev_addr); if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); @@ -1082,8 +1280,7 @@ int mlx4_en_start_port(struct net_device *dev) /* Set qp number */ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); - err = mlx4_get_eth_qp(mdev->dev, priv->port, - mac, &priv->base_qpn); + err = mlx4_en_get_qp(priv); if (err) { en_err(priv, "Failed getting eth qp\n"); goto cq_err; @@ -1196,7 +1393,7 @@ tx_err: rss_err: mlx4_en_release_rss_steer(priv); mac_err: - mlx4_put_eth_qp(mdev->dev, priv->port, mac, priv->base_qpn); + mlx4_en_put_qp(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]); @@ -1215,7 +1412,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) struct ethtool_flow_id *flow, *tmp_flow; int i; u8 mc_list[16] = {0}; - u64 mac = mlx4_en_mac_to_u64(dev->dev_addr); if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); @@ -1296,7 +1492,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) mlx4_en_release_rss_steer(priv); /* Unregister Mac address for the port */ - mlx4_put_eth_qp(mdev->dev, priv->port, mac, priv->base_qpn); + mlx4_en_put_qp(priv); if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) mdev->mac_removed[priv->port] = 1; @@ -1661,6 +1857,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->dcbnl_ops = &mlx4_en_dcbnl_ops; #endif + INIT_RADIX_TREE(&priv->mac_tree, GFP_KERNEL); + /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12ddae6efce3..2d7b9377883a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1833,12 +1833,9 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->dev = dev; info->port = port; if (!mlx4_is_slave(dev)) { - INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL); mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); - info->base_qpn = - dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] + - (port - 1) * (1 << log_num_mac); + info->base_qpn = mlx4_get_base_qpn(dev, port); } sprintf(info->dev_name, "mlx4_port%d", port); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 172daaa29a9e..ed4a6959e828 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -653,11 +653,6 @@ struct mlx4_set_port_rqp_calc_context { __be32 mcast; }; -struct mlx4_mac_entry { - u64 mac; - u64 reg_id; -}; - struct mlx4_port_info { struct mlx4_dev *dev; int port; @@ -667,7 +662,6 @@ struct mlx4_port_info { char dev_mtu_name[16]; struct device_attribute port_mtu_attr; struct mlx4_mac_table mac_table; - struct radix_tree_root mac_tree; struct mlx4_vlan_table vlan_table; int base_qpn; }; @@ -916,7 +910,6 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); -int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9d0105d3eaf3..84ed328582ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -521,6 +521,7 @@ struct mlx4_en_priv { bool wol; struct device *ddev; int base_tx_qpn; + struct radix_tree_root mac_tree; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; @@ -540,6 +541,11 @@ enum mlx4_en_wol { MLX4_EN_WOL_ENABLED = (1ULL << 62), }; +struct mlx4_mac_entry { + unsigned char mac[ETH_ALEN + 2]; + u64 reg_id; +}; + #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_update_loopback_state(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4c51b05efa28..719ead15e491 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -74,87 +74,6 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) table->total = 0; } -static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, - u64 mac, int *qpn, u64 *reg_id) -{ - __be64 be_mac; - int err; - - mac &= MLX4_MAC_MASK; - be_mac = cpu_to_be64(mac << 16); - - switch (dev->caps.steering_mode) { - case MLX4_STEERING_MODE_B0: { - struct mlx4_qp qp; - u8 gid[16] = {0}; - - qp.qpn = *qpn; - memcpy(&gid[10], &be_mac, ETH_ALEN); - gid[5] = port; - - err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); - break; - } - case MLX4_STEERING_MODE_DEVICE_MANAGED: { - struct mlx4_spec_list spec_eth = { {NULL} }; - __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); - - struct mlx4_net_trans_rule rule = { - .queue_mode = MLX4_NET_TRANS_Q_FIFO, - .exclusive = 0, - .allow_loopback = 1, - .promisc_mode = MLX4_FS_PROMISC_NONE, - .priority = MLX4_DOMAIN_NIC, - }; - - rule.port = port; - rule.qpn = *qpn; - INIT_LIST_HEAD(&rule.list); - - spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; - memcpy(spec_eth.eth.dst_mac, &be_mac, ETH_ALEN); - memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); - list_add_tail(&spec_eth.list, &rule.list); - - err = mlx4_flow_attach(dev, &rule, reg_id); - break; - } - default: - return -EINVAL; - } - if (err) - mlx4_warn(dev, "Failed Attaching Unicast\n"); - - return err; -} - -static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port, - u64 mac, int qpn, u64 reg_id) -{ - switch (dev->caps.steering_mode) { - case MLX4_STEERING_MODE_B0: { - struct mlx4_qp qp; - u8 gid[16] = {0}; - __be64 be_mac; - - qp.qpn = qpn; - mac &= MLX4_MAC_MASK; - be_mac = cpu_to_be64(mac << 16); - memcpy(&gid[10], &be_mac, ETH_ALEN); - gid[5] = port; - - mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); - break; - } - case MLX4_STEERING_MODE_DEVICE_MANAGED: { - mlx4_flow_detach(dev, reg_id); - break; - } - default: - mlx4_err(dev, "Invalid steering mode.\n"); - } -} - static int validate_index(struct mlx4_dev *dev, struct mlx4_mac_table *table, int index) { @@ -181,92 +100,6 @@ static int find_index(struct mlx4_dev *dev, return -EINVAL; } -int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) -{ - struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - struct mlx4_mac_entry *entry; - int index = 0; - int err = 0; - u64 reg_id; - - mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n", - (unsigned long long) mac); - index = mlx4_register_mac(dev, port, mac); - if (index < 0) { - err = index; - mlx4_err(dev, "Failed adding MAC: 0x%llx\n", - (unsigned long long) mac); - return err; - } - - if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { - *qpn = info->base_qpn + index; - return 0; - } - - err = mlx4_qp_reserve_range(dev, 1, 1, qpn); - mlx4_dbg(dev, "Reserved qp %d\n", *qpn); - if (err) { - mlx4_err(dev, "Failed to reserve qp for mac registration\n"); - goto qp_err; - } - - err = mlx4_uc_steer_add(dev, port, mac, qpn, ®_id); - if (err) - goto steer_err; - - entry = kmalloc(sizeof *entry, GFP_KERNEL); - if (!entry) { - err = -ENOMEM; - goto alloc_err; - } - entry->mac = mac; - entry->reg_id = reg_id; - err = radix_tree_insert(&info->mac_tree, *qpn, entry); - if (err) - goto insert_err; - return 0; - -insert_err: - kfree(entry); - -alloc_err: - mlx4_uc_steer_release(dev, port, mac, *qpn, reg_id); - -steer_err: - mlx4_qp_release_range(dev, *qpn, 1); - -qp_err: - mlx4_unregister_mac(dev, port, mac); - return err; -} -EXPORT_SYMBOL_GPL(mlx4_get_eth_qp); - -void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn) -{ - struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - struct mlx4_mac_entry *entry; - - mlx4_dbg(dev, "Registering MAC: 0x%llx for deleting\n", - (unsigned long long) mac); - mlx4_unregister_mac(dev, port, mac); - - if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { - entry = radix_tree_lookup(&info->mac_tree, qpn); - if (entry) { - mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx," - " qpn %d\n", port, - (unsigned long long) mac, qpn); - mlx4_uc_steer_release(dev, port, entry->mac, - qpn, entry->reg_id); - mlx4_qp_release_range(dev, qpn, 1); - radix_tree_delete(&info->mac_tree, qpn); - kfree(entry); - } - } -} -EXPORT_SYMBOL_GPL(mlx4_put_eth_qp); - static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, __be64 *entries) { @@ -359,6 +192,12 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) } EXPORT_SYMBOL_GPL(mlx4_register_mac); +int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port) +{ + return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] + + (port - 1) * (1 << dev->caps.log_num_macs); +} +EXPORT_SYMBOL_GPL(mlx4_get_base_qpn); void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) { @@ -397,29 +236,13 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) } EXPORT_SYMBOL_GPL(mlx4_unregister_mac); -int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) +int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; struct mlx4_mac_table *table = &info->mac_table; - struct mlx4_mac_entry *entry; int index = qpn - info->base_qpn; int err = 0; - if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { - entry = radix_tree_lookup(&info->mac_tree, qpn); - if (!entry) - return -EINVAL; - mlx4_uc_steer_release(dev, port, entry->mac, - qpn, entry->reg_id); - mlx4_unregister_mac(dev, port, entry->mac); - entry->mac = new_mac; - entry->reg_id = 0; - mlx4_register_mac(dev, port, new_mac); - err = mlx4_uc_steer_add(dev, port, entry->mac, - &qpn, &entry->reg_id); - return err; - } - /* CX1 doesn't support multi-functions */ mutex_lock(&table->mutex); @@ -439,7 +262,7 @@ out: mutex_unlock(&table->mutex); return err; } -EXPORT_SYMBOL_GPL(mlx4_replace_mac); +EXPORT_SYMBOL_GPL(__mlx4_replace_mac); static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, __be32 *entries) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1883e8e84718..6d48fce06b4a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -956,9 +956,8 @@ int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mo int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); -int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); -int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn); -void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn); +int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); +int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); -- cgit v1.2.3-71-gd317 From 180996c30517b5374f63df3c9765716c5b477155 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 7 Feb 2013 05:37:37 +0000 Subject: ssb: get mac address from sprom struct for gige driver The mac address is already stored in the sprom structure by the platform code of the SoC this Ethernet core is found on, it just has to be fetched from this structure instead of accessing the nvram here. This patch also adds a return value to indicate if a mac address could be fetched from the sprom structure. When CONFIG_SSB_DRIVER_GIGE is not set the header file now also declares ssb_gige_get_macaddr(). Signed-off-by: Hauke Mehrtens Acked-by: Michael Buesch Signed-off-by: David S. Miller --- include/linux/ssb/ssb_driver_gige.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 6b05dcd927ff..86a12b0cb239 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -97,21 +97,16 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) return 0; } -#ifdef CONFIG_BCM47XX -#include /* Get the device MAC address */ -static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) -{ - char buf[20]; - if (nvram_getenv("et0macaddr", buf, sizeof(buf)) < 0) - return; - nvram_parse_macaddr(buf, macaddr); -} -#else -static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) +static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) { + struct ssb_gige *dev = pdev_to_ssb_gige(pdev); + if (!dev) + return -ENODEV; + + memcpy(macaddr, dev->dev->bus->sprom.et0mac, 6); + return 0; } -#endif extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev, struct pci_dev *pdev); @@ -175,6 +170,10 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) { return 0; } +static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) +{ + return -ENODEV; +} #endif /* CONFIG_SSB_DRIVER_GIGE */ #endif /* LINUX_SSB_DRIVER_GIGE_H_ */ -- cgit v1.2.3-71-gd317 From 7e6c63f03d94278135753fef7ffcc5b03e34282e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 7 Feb 2013 05:37:39 +0000 Subject: tg3: add support for Ethernet core in bcm4785 The BCM4785 or sometimes named BMC4705 is a Broadcom SoC which a Gigabit 5750 Ethernet core. The core is connected via PCI with the rest of the SoC, but it uses some extension. This core does not use a firmware or an eeprom. Some devices only have a switch which supports 100MBit/s, this currently does not work with this driver. This patch was original written by Michael Buesch and is in OpenWrt for some years now. This was tested on a Linksys WRT610N V1 and older versions of this patch were tested by other people on different devices. Signed-off-by: Hauke Mehrtens Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 117 ++++++++++++++++++++++++++++++++++-- drivers/net/ethernet/broadcom/tg3.h | 5 ++ include/linux/pci_ids.h | 1 + 3 files changed, 117 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d9e81d6be7b9..b1b3bc01cbc2 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -263,6 +264,7 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = { TG3_DRV_DATA_FLAG_5705_10_100}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F), @@ -573,7 +575,9 @@ static void _tw32_flush(struct tg3 *tp, u32 off, u32 val, u32 usec_wait) static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val) { tp->write32_mbox(tp, off, val); - if (!tg3_flag(tp, MBOX_WRITE_REORDER) && !tg3_flag(tp, ICH_WORKAROUND)) + if (tg3_flag(tp, FLUSH_POSTED_WRITES) || + (!tg3_flag(tp, MBOX_WRITE_REORDER) && + !tg3_flag(tp, ICH_WORKAROUND))) tp->read32_mbox(tp, off); } @@ -583,7 +587,8 @@ static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) writel(val, mbox); if (tg3_flag(tp, TXD_MBOX_HWBUG)) writel(val, mbox); - if (tg3_flag(tp, MBOX_WRITE_REORDER)) + if (tg3_flag(tp, MBOX_WRITE_REORDER) || + tg3_flag(tp, FLUSH_POSTED_WRITES)) readl(mbox); } @@ -1793,6 +1798,11 @@ static int tg3_poll_fw(struct tg3 *tp) int i; u32 val; + if (tg3_flag(tp, IS_SSB_CORE)) { + /* We don't use firmware. */ + return 0; + } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { /* Wait up to 20ms for init done. */ for (i = 0; i < 200; i++) { @@ -3465,6 +3475,13 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) tw32_f(offset + CPU_MODE, CPU_MODE_HALT); udelay(10); } else { + /* + * There is only an Rx CPU for the 5750 derivative in the + * BCM4785. + */ + if (tg3_flag(tp, IS_SSB_CORE)) + return 0; + for (i = 0; i < 10000; i++) { tw32(offset + CPU_STATE, 0xffffffff); tw32(offset + CPU_MODE, CPU_MODE_HALT); @@ -3932,8 +3949,9 @@ static int tg3_power_down_prepare(struct tg3 *tp) tg3_frob_aux_power(tp, true); /* Workaround for unstable PLL clock */ - if ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) || - (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX)) { + if ((!tg3_flag(tp, IS_SSB_CORE)) && + ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) || + (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX))) { u32 val = tr32(0x7d00); val &= ~((1 << 16) | (1 << 4) | (1 << 2) | (1 << 1) | 1); @@ -4454,6 +4472,15 @@ relink: if (current_link_up == 0 || (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) { tg3_phy_copper_begin(tp); + if (tg3_flag(tp, ROBOSWITCH)) { + current_link_up = 1; + /* FIXME: when BCM5325 switch is used use 100 MBit/s */ + current_speed = SPEED_1000; + current_duplex = DUPLEX_FULL; + tp->link_config.active_speed = current_speed; + tp->link_config.active_duplex = current_duplex; + } + tg3_readphy(tp, MII_BMSR, &bmsr); if ((!tg3_readphy(tp, MII_BMSR, &bmsr) && (bmsr & BMSR_LSTATUS)) || (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK)) @@ -4472,6 +4499,26 @@ relink: else tp->mac_mode |= MAC_MODE_PORT_MODE_GMII; + /* In order for the 5750 core in BCM4785 chip to work properly + * in RGMII mode, the Led Control Register must be set up. + */ + if (tg3_flag(tp, RGMII_MODE)) { + u32 led_ctrl = tr32(MAC_LED_CTRL); + led_ctrl &= ~(LED_CTRL_1000MBPS_ON | LED_CTRL_100MBPS_ON); + + if (tp->link_config.active_speed == SPEED_10) + led_ctrl |= LED_CTRL_LNKLED_OVERRIDE; + else if (tp->link_config.active_speed == SPEED_100) + led_ctrl |= (LED_CTRL_LNKLED_OVERRIDE | + LED_CTRL_100MBPS_ON); + else if (tp->link_config.active_speed == SPEED_1000) + led_ctrl |= (LED_CTRL_LNKLED_OVERRIDE | + LED_CTRL_1000MBPS_ON); + + tw32(MAC_LED_CTRL, led_ctrl); + udelay(40); + } + tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX; if (tp->link_config.active_duplex == DUPLEX_HALF) tp->mac_mode |= MAC_MODE_HALF_DUPLEX; @@ -8449,6 +8496,16 @@ static int tg3_chip_reset(struct tg3 *tp) tw32(0x5000, 0x400); } + if (tg3_flag(tp, IS_SSB_CORE)) { + /* + * BCM4785: In order to avoid repercussions from using + * potentially defective internal ROM, stop the Rx RISC CPU, + * which is not required. + */ + tg3_stop_fw(tp); + tg3_halt_cpu(tp, RX_CPU_BASE); + } + tw32(GRC_MODE, tp->grc_mode); if (tp->pci_chip_rev_id == CHIPREV_ID_5705_A0) { @@ -10107,6 +10164,11 @@ static void tg3_timer(unsigned long __opaque) tg3_flag(tp, 57765_CLASS)) tg3_chk_missed_msi(tp); + if (tg3_flag(tp, FLUSH_POSTED_WRITES)) { + /* BCM4785: Flush posted writes from GbE to host memory. */ + tr32(HOSTCC_MODE); + } + if (!tg3_flag(tp, TAGGED_STATUS)) { /* All of this garbage is because when using non-tagged * IRQ status the mailbox/status_block protocol the chip @@ -13879,6 +13941,14 @@ static void tg3_get_5720_nvram_info(struct tg3 *tp) /* Chips other than 5700/5701 use the NVRAM for fetching info. */ static void tg3_nvram_init(struct tg3 *tp) { + if (tg3_flag(tp, IS_SSB_CORE)) { + /* No NVRAM and EEPROM on the SSB Broadcom GigE core. */ + tg3_flag_clear(tp, NVRAM); + tg3_flag_clear(tp, NVRAM_BUFFERED); + tg3_flag_set(tp, NO_NVRAM); + return; + } + tw32_f(GRC_EEPROM_ADDR, (EEPROM_ADDR_FSM_RESET | (EEPROM_DEFAULT_CLOCK_PERIOD << @@ -14405,10 +14475,19 @@ static int tg3_phy_probe(struct tg3 *tp) * subsys device table. */ p = tg3_lookup_by_subsys(tp); - if (!p) + if (p) { + tp->phy_id = p->phy_id; + } else if (!tg3_flag(tp, IS_SSB_CORE)) { + /* For now we saw the IDs 0xbc050cd0, + * 0xbc050f80 and 0xbc050c30 on devices + * connected to an BCM4785 and there are + * probably more. Just assume that the phy is + * supported when it is connected to a SSB core + * for now. + */ return -ENODEV; + } - tp->phy_id = p->phy_id; if (!tp->phy_id || tp->phy_id == TG3_PHY_ID_BCM8002) tp->phy_flags |= TG3_PHYFLG_PHY_SERDES; @@ -15484,6 +15563,11 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) TG3_CPMU_STATUS_FSHFT_5719; } + if (tg3_flag(tp, FLUSH_POSTED_WRITES)) { + tp->write32_tx_mbox = tg3_write_flush_reg32; + tp->write32_rx_mbox = tg3_write_flush_reg32; + } + /* Get eeprom hw config before calling tg3_set_power_state(). * In particular, the TG3_FLAG_IS_NIC flag must be * determined before calling tg3_set_power_state() so that @@ -15820,12 +15904,19 @@ static int tg3_get_device_address(struct tg3 *tp) struct net_device *dev = tp->dev; u32 hi, lo, mac_offset; int addr_ok = 0; + int err; #ifdef CONFIG_SPARC if (!tg3_get_macaddr_sparc(tp)) return 0; #endif + if (tg3_flag(tp, IS_SSB_CORE)) { + err = ssb_gige_get_macaddr(tp->pdev, &dev->dev_addr[0]); + if (!err && is_valid_ether_addr(&dev->dev_addr[0])) + return 0; + } + mac_offset = 0x7c; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 || tg3_flag(tp, 5780_CLASS)) { @@ -16185,6 +16276,8 @@ static int tg3_test_dma(struct tg3 *tp) tp->dma_rwctrl |= 0x001b000f; } } + if (tg3_flag(tp, ONE_DMA_AT_ONCE)) + tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) @@ -16530,6 +16623,18 @@ static int tg3_init_one(struct pci_dev *pdev, else tp->msg_enable = TG3_DEF_MSG_ENABLE; + if (pdev_is_ssb_gige_core(pdev)) { + tg3_flag_set(tp, IS_SSB_CORE); + if (ssb_gige_must_flush_posted_writes(pdev)) + tg3_flag_set(tp, FLUSH_POSTED_WRITES); + if (ssb_gige_one_dma_at_once(pdev)) + tg3_flag_set(tp, ONE_DMA_AT_ONCE); + if (ssb_gige_have_roboswitch(pdev)) + tg3_flag_set(tp, ROBOSWITCH); + if (ssb_gige_is_rgmii(pdev)) + tg3_flag_set(tp, RGMII_MODE); + } + /* The word/byte swap controls here control register access byte * swapping. DMA data byte swapping is controlled in the GRC_MODE * setting below. diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 9cd88a4b9a5f..ef6ced2bf9c3 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3056,6 +3056,11 @@ enum TG3_FLAGS { TG3_FLAG_57765_PLUS, TG3_FLAG_57765_CLASS, TG3_FLAG_5717_PLUS, + TG3_FLAG_IS_SSB_CORE, + TG3_FLAG_FLUSH_POSTED_WRITES, + TG3_FLAG_ROBOSWITCH, + TG3_FLAG_ONE_DMA_AT_ONCE, + TG3_FLAG_RGMII_MODE, /* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */ TG3_FLAG_NUMBER_OF_FLAGS, /* Last entry in enum TG3_FLAGS */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0eb65796bcb9..907e7e56fa4b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2127,6 +2127,7 @@ #define PCI_DEVICE_ID_TIGON3_5754M 0x1672 #define PCI_DEVICE_ID_TIGON3_5755M 0x1673 #define PCI_DEVICE_ID_TIGON3_5756 0x1674 +#define PCI_DEVICE_ID_TIGON3_5750 0x1676 #define PCI_DEVICE_ID_TIGON3_5751 0x1677 #define PCI_DEVICE_ID_TIGON3_5715 0x1678 #define PCI_DEVICE_ID_TIGON3_5715S 0x1679 -- cgit v1.2.3-71-gd317 From afb43e6d88e587441c960a5d214d2c698d076c9c Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 25 Jan 2013 11:57:48 +0200 Subject: wlcore: remove if_ops from platform_data We can't pass pointers from the platform data to the modules, because with DT it cannot be done. Those pointers are not set by the board files anyway. It's the bus modules that set them, so they can be safely removed from the platform data without changing any board files. Create a new structure that the bus modules pass to wlcore. This structure contains the if_ops pointers and a pointer to the actual platform data. Signed-off-by: Luciano Coelho Reviewed-by: Felipe Balbi --- drivers/net/wireless/ti/wl12xx/main.c | 3 ++- drivers/net/wireless/ti/wlcore/main.c | 5 +++-- drivers/net/wireless/ti/wlcore/sdio.c | 22 +++++++++++++++++----- drivers/net/wireless/ti/wlcore/spi.c | 20 +++++++++++++++++--- drivers/net/wireless/ti/wlcore/wlcore_i.h | 5 +++++ include/linux/wl12xx.h | 2 -- 6 files changed, 44 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c index 3254bfc81a2a..09694e39bb14 100644 --- a/drivers/net/wireless/ti/wl12xx/main.c +++ b/drivers/net/wireless/ti/wl12xx/main.c @@ -1703,7 +1703,8 @@ static struct ieee80211_sta_ht_cap wl12xx_ht_cap = { static int wl12xx_setup(struct wl1271 *wl) { struct wl12xx_priv *priv = wl->priv; - struct wl12xx_platform_data *pdata = wl->pdev->dev.platform_data; + struct wlcore_platdev_data *pdev_data = wl->pdev->dev.platform_data; + struct wl12xx_platform_data *pdata = pdev_data->pdata; wl->rtable = wl12xx_rtable; wl->num_tx_desc = WL12XX_NUM_TX_DESCRIPTORS; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 9a66acf1205f..cd70335cd5e8 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5966,7 +5966,8 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context) { struct wl1271 *wl = context; struct platform_device *pdev = wl->pdev; - struct wl12xx_platform_data *pdata = pdev->dev.platform_data; + struct wlcore_platdev_data *pdev_data = pdev->dev.platform_data; + struct wl12xx_platform_data *pdata = pdev_data->pdata; unsigned long irqflags; int ret; @@ -5995,7 +5996,7 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context) wl->irq = platform_get_irq(pdev, 0); wl->platform_quirks = pdata->platform_quirks; - wl->if_ops = pdata->ops; + wl->if_ops = pdev_data->if_ops; if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ) irqflags = IRQF_TRIGGER_RISING; diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index d4f184e2efed..1f6f6e30daca 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -218,6 +218,7 @@ static int wl1271_probe(struct sdio_func *func, const struct sdio_device_id *id) { struct wl12xx_platform_data *wlan_data; + struct wlcore_platdev_data *pdev_data; struct wl12xx_sdio_glue *glue; struct resource res[1]; mmc_pm_flag_t mmcflags; @@ -228,10 +229,18 @@ static int wl1271_probe(struct sdio_func *func, if (func->num != 0x02) return -ENODEV; + pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) { + dev_err(&func->dev, "can't allocate platdev_data\n"); + goto out; + } + + pdev_data->if_ops = &sdio_ops; + glue = kzalloc(sizeof(*glue), GFP_KERNEL); if (!glue) { dev_err(&func->dev, "can't allocate glue\n"); - goto out; + goto out_free_pdev_data; } glue->dev = &func->dev; @@ -256,8 +265,6 @@ static int wl1271_probe(struct sdio_func *func, if (mmcflags & MMC_PM_KEEP_POWER) wlan_data->pwr_in_suspend = true; - wlan_data->ops = &sdio_ops; - sdio_set_drvdata(func, glue); /* Tell PM core that we don't need the card to be powered now */ @@ -295,8 +302,10 @@ static int wl1271_probe(struct sdio_func *func, goto out_dev_put; } - ret = platform_device_add_data(glue->core, wlan_data, - sizeof(*wlan_data)); + pdev_data->pdata = wlan_data; + + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; @@ -315,6 +324,9 @@ out_dev_put: out_free_glue: kfree(glue); +out_free_pdev_data: + kfree(pdev_data); + out: return ret; } diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 2d700b7ae14c..d437f4d28bd0 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -328,6 +328,7 @@ static int wl1271_probe(struct spi_device *spi) { struct wl12xx_spi_glue *glue; struct wl12xx_platform_data *pdata; + struct wlcore_platdev_data *pdev_data; struct resource res[1]; int ret = -ENOMEM; @@ -337,12 +338,18 @@ static int wl1271_probe(struct spi_device *spi) return -ENODEV; } - pdata->ops = &spi_ops; + pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) { + dev_err(&spi->dev, "can't allocate platdev_data\n"); + goto out; + } + + pdev_data->if_ops = &spi_ops; glue = kzalloc(sizeof(*glue), GFP_KERNEL); if (!glue) { dev_err(&spi->dev, "can't allocate glue\n"); - goto out; + goto out_free_pdev_data; } glue->dev = &spi->dev; @@ -380,7 +387,10 @@ static int wl1271_probe(struct spi_device *spi) goto out_dev_put; } - ret = platform_device_add_data(glue->core, pdata, sizeof(*pdata)); + pdev_data->pdata = pdata; + + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; @@ -399,6 +409,10 @@ out_dev_put: out_free_glue: kfree(glue); + +out_free_pdev_data: + kfree(pdev_data); + out: return ret; } diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 20316ac328a2..c845b0ef7f4b 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -206,6 +206,11 @@ struct wl1271_if_operations { void (*set_block_size) (struct device *child, unsigned int blksz); }; +struct wlcore_platdev_data { + struct wl12xx_platform_data *pdata; + struct wl1271_if_operations *if_ops; +}; + #define MAX_NUM_KEYS 14 #define MAX_KEY_SIZE 32 diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 0d6373195d32..360c9bce665c 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -55,8 +55,6 @@ struct wl12xx_platform_data { int board_tcxo_clock; unsigned long platform_quirks; bool pwr_in_suspend; - - struct wl1271_if_operations *ops; }; /* Platform does not support level trigger interrupts */ -- cgit v1.2.3-71-gd317 From 6cc9efed707c575a9e5880ea68f8b9d36b235f1f Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 25 Jan 2013 12:05:34 +0200 Subject: wlcore: move wl12xx_platform_data up and make it truly optional The platform data is used not only by wlcore-based drivers, but also by wl1251. Move it up in the directory hierarchy to reflect this. Additionally, make it truly optional. At the moment, disabling platform data while wl1251_sdio or wlcore_sdio are enabled doesn't work, but it will be necessary when device tree support is implemented. Signed-off-by: Luciano Coelho Reviewed-by: Felipe Balbi --- arch/arm/mach-omap2/board-omap3evm.c | 10 ++--- drivers/net/wireless/ti/Kconfig | 9 ++++ drivers/net/wireless/ti/Makefile | 4 +- drivers/net/wireless/ti/wilink_platform_data.c | 49 ++++++++++++++++++++++ drivers/net/wireless/ti/wlcore/Kconfig | 5 --- drivers/net/wireless/ti/wlcore/Makefile | 3 -- .../net/wireless/ti/wlcore/wl12xx_platform_data.c | 49 ---------------------- include/linux/wl12xx.h | 14 +++++-- 8 files changed, 77 insertions(+), 66 deletions(-) create mode 100644 drivers/net/wireless/ti/wilink_platform_data.c delete mode 100644 drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index 3985f35aee06..a4ca63ba7faa 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -309,7 +309,7 @@ static struct omap2_hsmmc_info mmc[] = { .gpio_wp = 63, .deferred = true, }, -#ifdef CONFIG_WL12XX_PLATFORM_DATA +#ifdef CONFIG_WILINK_PLATFORM_DATA { .name = "wl1271", .mmc = 2, @@ -450,7 +450,7 @@ static struct regulator_init_data omap3evm_vio = { .consumer_supplies = omap3evm_vio_supply, }; -#ifdef CONFIG_WL12XX_PLATFORM_DATA +#ifdef CONFIG_WILINK_PLATFORM_DATA #define OMAP3EVM_WLAN_PMENA_GPIO (150) #define OMAP3EVM_WLAN_IRQ_GPIO (149) @@ -563,7 +563,7 @@ static struct omap_board_mux omap35x_board_mux[] __initdata = { OMAP_PIN_OFF_NONE), OMAP3_MUX(GPMC_WAIT2, OMAP_MUX_MODE4 | OMAP_PIN_INPUT_PULLUP | OMAP_PIN_OFF_NONE), -#ifdef CONFIG_WL12XX_PLATFORM_DATA +#ifdef CONFIG_WILINK_PLATFORM_DATA /* WLAN IRQ - GPIO 149 */ OMAP3_MUX(UART1_RTS, OMAP_MUX_MODE4 | OMAP_PIN_INPUT), @@ -601,7 +601,7 @@ static struct omap_board_mux omap36x_board_mux[] __initdata = { OMAP3_MUX(SYS_BOOT4, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE), OMAP3_MUX(SYS_BOOT5, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE), OMAP3_MUX(SYS_BOOT6, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE), -#ifdef CONFIG_WL12XX_PLATFORM_DATA +#ifdef CONFIG_WILINK_PLATFORM_DATA /* WLAN IRQ - GPIO 149 */ OMAP3_MUX(UART1_RTS, OMAP_MUX_MODE4 | OMAP_PIN_INPUT), @@ -637,7 +637,7 @@ static struct gpio omap3_evm_ehci_gpios[] __initdata = { static void __init omap3_evm_wl12xx_init(void) { -#ifdef CONFIG_WL12XX_PLATFORM_DATA +#ifdef CONFIG_WILINK_PLATFORM_DATA int ret; /* WL12xx WLAN Init */ diff --git a/drivers/net/wireless/ti/Kconfig b/drivers/net/wireless/ti/Kconfig index be800119d0a3..cbe1e7fef61b 100644 --- a/drivers/net/wireless/ti/Kconfig +++ b/drivers/net/wireless/ti/Kconfig @@ -12,4 +12,13 @@ source "drivers/net/wireless/ti/wl18xx/Kconfig" # keep last for automatic dependencies source "drivers/net/wireless/ti/wlcore/Kconfig" + +config WILINK_PLATFORM_DATA + bool "TI WiLink platform data" + depends on WLCORE_SDIO || WL1251_SDIO + default y + ---help--- + Small platform data bit needed to pass data to the sdio modules. + + endif # WL_TI diff --git a/drivers/net/wireless/ti/Makefile b/drivers/net/wireless/ti/Makefile index 4d6823983c04..af14231aeede 100644 --- a/drivers/net/wireless/ti/Makefile +++ b/drivers/net/wireless/ti/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_WLCORE) += wlcore/ obj-$(CONFIG_WL12XX) += wl12xx/ -obj-$(CONFIG_WL12XX_PLATFORM_DATA) += wlcore/ obj-$(CONFIG_WL1251) += wl1251/ obj-$(CONFIG_WL18XX) += wl18xx/ + +# small builtin driver bit +obj-$(CONFIG_WILINK_PLATFORM_DATA) += wilink_platform_data.o diff --git a/drivers/net/wireless/ti/wilink_platform_data.c b/drivers/net/wireless/ti/wilink_platform_data.c new file mode 100644 index 000000000000..998e95895f9d --- /dev/null +++ b/drivers/net/wireless/ti/wilink_platform_data.c @@ -0,0 +1,49 @@ +/* + * This file is part of wl12xx + * + * Copyright (C) 2010-2011 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include + +static struct wl12xx_platform_data *platform_data; + +int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data) +{ + if (platform_data) + return -EBUSY; + if (!data) + return -EINVAL; + + platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL); + if (!platform_data) + return -ENOMEM; + + return 0; +} + +struct wl12xx_platform_data *wl12xx_get_platform_data(void) +{ + if (!platform_data) + return ERR_PTR(-ENODEV); + + return platform_data; +} +EXPORT_SYMBOL(wl12xx_get_platform_data); diff --git a/drivers/net/wireless/ti/wlcore/Kconfig b/drivers/net/wireless/ti/wlcore/Kconfig index d7b907e67170..2b832825c3d4 100644 --- a/drivers/net/wireless/ti/wlcore/Kconfig +++ b/drivers/net/wireless/ti/wlcore/Kconfig @@ -33,8 +33,3 @@ config WLCORE_SDIO If you choose to build a module, it'll be called wlcore_sdio. Say N if unsure. - -config WL12XX_PLATFORM_DATA - bool - depends on WLCORE_SDIO != n || WL1251_SDIO != n - default y diff --git a/drivers/net/wireless/ti/wlcore/Makefile b/drivers/net/wireless/ti/wlcore/Makefile index d9fba9e32130..b21398f6c3ec 100644 --- a/drivers/net/wireless/ti/wlcore/Makefile +++ b/drivers/net/wireless/ti/wlcore/Makefile @@ -9,7 +9,4 @@ obj-$(CONFIG_WLCORE) += wlcore.o obj-$(CONFIG_WLCORE_SPI) += wlcore_spi.o obj-$(CONFIG_WLCORE_SDIO) += wlcore_sdio.o -# small builtin driver bit -obj-$(CONFIG_WL12XX_PLATFORM_DATA) += wl12xx_platform_data.o - ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c b/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c deleted file mode 100644 index 998e95895f9d..000000000000 --- a/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * This file is part of wl12xx - * - * Copyright (C) 2010-2011 Texas Instruments, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#include -#include -#include - -static struct wl12xx_platform_data *platform_data; - -int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data) -{ - if (platform_data) - return -EBUSY; - if (!data) - return -EINVAL; - - platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL); - if (!platform_data) - return -ENOMEM; - - return 0; -} - -struct wl12xx_platform_data *wl12xx_get_platform_data(void) -{ - if (!platform_data) - return ERR_PTR(-ENODEV); - - return platform_data; -} -EXPORT_SYMBOL(wl12xx_get_platform_data); diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 360c9bce665c..a54fe82e704b 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -24,6 +24,8 @@ #ifndef _LINUX_WL12XX_H #define _LINUX_WL12XX_H +#include + /* Reference clock values */ enum { WL12XX_REFCLOCK_19 = 0, /* 19.2 MHz */ @@ -60,10 +62,12 @@ struct wl12xx_platform_data { /* Platform does not support level trigger interrupts */ #define WL12XX_PLATFORM_QUIRK_EDGE_IRQ BIT(0) -#ifdef CONFIG_WL12XX_PLATFORM_DATA +#ifdef CONFIG_WILINK_PLATFORM_DATA int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); +struct wl12xx_platform_data *wl12xx_get_platform_data(void); + #else static inline @@ -72,8 +76,12 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data) return -ENOSYS; } -#endif +static inline +struct wl12xx_platform_data *wl12xx_get_platform_data(void) +{ + return ERR_PTR(-ENODATA); +} -struct wl12xx_platform_data *wl12xx_get_platform_data(void); +#endif #endif -- cgit v1.2.3-71-gd317 From e5e67305885eb12849b5475764b0542f03dc2b59 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Feb 2013 10:17:15 +0000 Subject: skbuff: Move definition of NETDEV_FRAG_PAGE_MAX_SIZE In order to address the fact that some devices cannot support the full 32K frag size we need to have the value accessible somewhere so that we can use it to do comparisons against what the device can support. As such I am moving the values out of skbuff.c and into skbuff.h. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ net/core/skbuff.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0259b719bebf..d7573c37a51d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1832,6 +1832,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) +#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) +#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE + extern void *netdev_alloc_frag(unsigned int fragsz); extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 55f7ef6ada6d..6114c1143564 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -351,10 +351,6 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; -- cgit v1.2.3-71-gd317 From d021c344051af91f42c5ba9fdedc176740cbd238 Mon Sep 17 00:00:00 2001 From: Andy King Date: Wed, 6 Feb 2013 14:23:56 +0000 Subject: VSOCK: Introduce VM Sockets VM Sockets allows communication between virtual machines and the hypervisor. User level applications both in a virtual machine and on the host can use the VM Sockets API, which facilitates fast and efficient communication between guest virtual machines and their host. A socket address family, designed to be compatible with UDP and TCP at the interface level, is provided. Today, VM Sockets is used by various VMware Tools components inside the guest for zero-config, network-less access to VMware host services. In addition to this, VMware's users are using VM Sockets for various applications, where network access of the virtual machine is restricted or non-existent. Examples of this are VMs communicating with device proxies for proprietary hardware running as host applications and automated testing of applications running within virtual machines. The VMware VM Sockets are similar to other socket types, like Berkeley UNIX socket interface. The VM Sockets module supports both connection-oriented stream sockets like TCP, and connectionless datagram sockets like UDP. The VM Sockets protocol family is defined as "AF_VSOCK" and the socket operations split for SOCK_DGRAM and SOCK_STREAM. For additional information about the use of VM Sockets, please refer to the VM Sockets Programming Guide available at: https://www.vmware.com/support/developer/vmci-sdk/ Signed-off-by: George Zhang Signed-off-by: Dmitry Torokhov Signed-off-by: Andy king Signed-off-by: David S. Miller --- include/linux/socket.h | 4 +- include/uapi/linux/vm_sockets.h | 171 ++ net/Kconfig | 1 + net/Makefile | 1 + net/vmw_vsock/Kconfig | 28 + net/vmw_vsock/Makefile | 7 + net/vmw_vsock/af_vsock.c | 2015 ++++++++++++++++++++++++ net/vmw_vsock/af_vsock.h | 175 +++ net/vmw_vsock/vmci_transport.c | 2157 ++++++++++++++++++++++++++ net/vmw_vsock/vmci_transport.h | 139 ++ net/vmw_vsock/vmci_transport_notify.c | 680 ++++++++ net/vmw_vsock/vmci_transport_notify.h | 83 + net/vmw_vsock/vmci_transport_notify_qstate.c | 438 ++++++ net/vmw_vsock/vsock_addr.c | 86 + net/vmw_vsock/vsock_addr.h | 32 + net/vmw_vsock/vsock_version.h | 22 + 16 files changed, 6038 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/vm_sockets.h create mode 100644 net/vmw_vsock/Kconfig create mode 100644 net/vmw_vsock/Makefile create mode 100644 net/vmw_vsock/af_vsock.c create mode 100644 net/vmw_vsock/af_vsock.h create mode 100644 net/vmw_vsock/vmci_transport.c create mode 100644 net/vmw_vsock/vmci_transport.h create mode 100644 net/vmw_vsock/vmci_transport_notify.c create mode 100644 net/vmw_vsock/vmci_transport_notify.h create mode 100644 net/vmw_vsock/vmci_transport_notify_qstate.c create mode 100644 net/vmw_vsock/vsock_addr.c create mode 100644 net/vmw_vsock/vsock_addr.h create mode 100644 net/vmw_vsock/vsock_version.h (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9a546ff853dc..2b9f74b0ffea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -178,7 +178,8 @@ struct ucred { #define AF_CAIF 37 /* CAIF sockets */ #define AF_ALG 38 /* Algorithm sockets */ #define AF_NFC 39 /* NFC sockets */ -#define AF_MAX 40 /* For now.. */ +#define AF_VSOCK 40 /* vSockets */ +#define AF_MAX 41 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -221,6 +222,7 @@ struct ucred { #define PF_CAIF AF_CAIF #define PF_ALG AF_ALG #define PF_NFC AF_NFC +#define PF_VSOCK AF_VSOCK #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h new file mode 100644 index 000000000000..f7f2e99dec84 --- /dev/null +++ b/include/uapi/linux/vm_sockets.h @@ -0,0 +1,171 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H_ +#define _VM_SOCKETS_H_ + +#if !defined(__KERNEL__) +#include +#endif + +/* Option name for STREAM socket buffer size. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the size of the buffer underlying a vSockets STREAM socket. + * Value is clamped to the MIN and MAX. + */ + +#define SO_VM_SOCKETS_BUFFER_SIZE 0 + +/* Option name for STREAM socket minimum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the minimum size allowed for the buffer underlying a vSockets + * STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1 + +/* Option name for STREAM socket maximum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long + * that specifies the maximum size allowed for the buffer underlying a + * vSockets STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2 + +/* Option name for socket peer's host-specific VM ID. Use as the option name + * in getsockopt(3) to get a host-specific identifier for the peer endpoint's + * VM. The identifier is a signed integer. + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 + +/* Option name for socket's service label. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. + * The service label is a C-style NUL-terminated string. Only available for + * hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_SERVICE_LABEL 4 + +/* Option name for determining if a socket is trusted. Use as the option name + * in getsockopt(3) to determine if a socket is trusted. The value is a + * signed integer. + */ + +#define SO_VM_SOCKETS_TRUSTED 5 + +/* Option name for STREAM socket connection timeout. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get the connection + * timeout for a STREAM socket. + */ + +#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 + +/* Option name for using non-blocking send/receive. Use as the option name + * for setsockopt(3) or getsockopt(3) to set or get the non-blocking + * transmit/receive flag for a STREAM socket. This flag determines whether + * send() and recv() can be called in non-blocking contexts for the given + * socket. The value is a signed integer. + * + * This option is only relevant to kernel endpoints, where descheduling the + * thread of execution is not allowed, for example, while holding a spinlock. + * It is not to be confused with conventional non-blocking socket operations. + * + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_NONBLOCK_TXRX 7 + +/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of + * sockaddr_vm and indicates the context ID of the current endpoint. + */ + +#define VMADDR_CID_ANY -1U + +/* Bind to any available port. Works for the svm_port field of + * sockaddr_vm. + */ + +#define VMADDR_PORT_ANY -1U + +/* Use this as the destination CID in an address when referring to the + * hypervisor. VMCI relies on it being 0, but this would be useful for other + * transports too. + */ + +#define VMADDR_CID_HYPERVISOR 0 + +/* This CID is specific to VMCI and can be considered reserved (even VMCI + * doesn't use it anymore, it's a legacy value from an older release). + */ + +#define VMADDR_CID_RESERVED 1 + +/* Use this as the destination CID in an address when referring to the host + * (any process other than the hypervisor). VMCI relies on it being 2, but + * this would be useful for other transports too. + */ + +#define VMADDR_CID_HOST 2 + +/* Invalid vSockets version. */ + +#define VM_SOCKETS_INVALID_VERSION -1U + +/* The epoch (first) component of the vSockets version. A single byte + * representing the epoch component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24) + +/* The major (second) component of the vSockets version. A single byte + * representing the major component of the vSockets version. Typically + * changes for every major release of a product. + */ + +#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16) + +/* The minor (third) component of the vSockets version. Two bytes representing + * the minor component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) + +/* Address structure for vSockets. The address family should be set to + * whatever vmci_sock_get_af_value_fd() returns. The structure members should + * all align on their natural boundaries without resorting to compiler packing + * directives. The total size of this structure should be exactly the same as + * that of struct sockaddr. + */ + +struct sockaddr_vm { + sa_family_t svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(sa_family_t) - + sizeof(unsigned short) - + sizeof(unsigned int) - sizeof(unsigned int)]; +}; + +#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) + +#if defined(__KERNEL__) +int vm_sockets_get_local_cid(void); +#endif + +#endif diff --git a/net/Kconfig b/net/Kconfig index c31348e70aad..5a1888bb036d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index c5aa8b3b49dc..091e7b04f301 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 000000000000..b5fa7e40cdcb --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 000000000000..2ce52d70f224 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 000000000000..54bb7bdf92d3 --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2015 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vsock_version.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) + && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 000000000000..7d64d3609ec9 --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include +#include +#include + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 000000000000..e8a87cf37072 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2157 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + struct sockaddr_vm src; + struct sockaddr_vm dst; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + msg->msg_namelen = 0; + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 000000000000..1bf991803ec0 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include +#include + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 000000000000..9a730744e7bc --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 000000000000..7df793249b6c --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include +#include +#include +#include + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 000000000000..622bd7aa1016 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 000000000000..b7df1aea7c59 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,86 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return (addr->svm_cid == VMADDR_CID_ANY || + other->svm_cid == VMADDR_CID_ANY || + addr->svm_cid == other->svm_cid) && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 000000000000..cdfbcefdf843 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,32 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h new file mode 100644 index 000000000000..4df7f5e2151c --- /dev/null +++ b/net/vmw_vsock/vsock_version.h @@ -0,0 +1,22 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2011-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_VERSION_H_ +#define _VSOCK_VERSION_H_ + +#define VSOCK_DRIVER_VERSION_PARTS { 1, 0, 0, 0 } +#define VSOCK_DRIVER_VERSION_STRING "1.0.0.0-k" + +#endif /* _VSOCK_VERSION_H_ */ -- cgit v1.2.3-71-gd317 From febf018d22347b5df94066bca05d0c11a84e839d Mon Sep 17 00:00:00 2001 From: David Ward Date: Fri, 8 Feb 2013 17:17:06 +0000 Subject: net/802: Implement Multiple Registration Protocol (MRP) Initial implementation of the Multiple Registration Protocol (MRP) from IEEE 802.1Q-2011, based on the existing implementation of the Generic Attribute Registration Protocol (GARP). Signed-off-by: David Ward Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 + include/net/mrp.h | 143 ++++++++ net/802/Kconfig | 3 + net/802/Makefile | 1 + net/802/mrp.c | 895 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1044 insertions(+) create mode 100644 include/net/mrp.h create mode 100644 net/802/mrp.c (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ab2774eb49e8..25bd46f52877 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1290,6 +1290,8 @@ struct net_device { }; /* GARP */ struct garp_port __rcu *garp_port; + /* MRP */ + struct mrp_port __rcu *mrp_port; /* class/net/name entry */ struct device dev; diff --git a/include/net/mrp.h b/include/net/mrp.h new file mode 100644 index 000000000000..4fbf02aa2ec1 --- /dev/null +++ b/include/net/mrp.h @@ -0,0 +1,143 @@ +#ifndef _NET_MRP_H +#define _NET_MRP_H + +#define MRP_END_MARK 0x0 + +struct mrp_pdu_hdr { + u8 version; +}; + +struct mrp_msg_hdr { + u8 attrtype; + u8 attrlen; +}; + +struct mrp_vecattr_hdr { + __be16 lenflags; + unsigned char firstattrvalue[]; +#define MRP_VECATTR_HDR_LEN_MASK cpu_to_be16(0x1FFF) +#define MRP_VECATTR_HDR_FLAG_LA cpu_to_be16(0x2000) +}; + +enum mrp_vecattr_event { + MRP_VECATTR_EVENT_NEW, + MRP_VECATTR_EVENT_JOIN_IN, + MRP_VECATTR_EVENT_IN, + MRP_VECATTR_EVENT_JOIN_MT, + MRP_VECATTR_EVENT_MT, + MRP_VECATTR_EVENT_LV, + __MRP_VECATTR_EVENT_MAX +}; + +struct mrp_skb_cb { + struct mrp_msg_hdr *mh; + struct mrp_vecattr_hdr *vah; + unsigned char attrvalue[]; +}; + +static inline struct mrp_skb_cb *mrp_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct mrp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); + return (struct mrp_skb_cb *)skb->cb; +} + +enum mrp_applicant_state { + MRP_APPLICANT_INVALID, + MRP_APPLICANT_VO, + MRP_APPLICANT_VP, + MRP_APPLICANT_VN, + MRP_APPLICANT_AN, + MRP_APPLICANT_AA, + MRP_APPLICANT_QA, + MRP_APPLICANT_LA, + MRP_APPLICANT_AO, + MRP_APPLICANT_QO, + MRP_APPLICANT_AP, + MRP_APPLICANT_QP, + __MRP_APPLICANT_MAX +}; +#define MRP_APPLICANT_MAX (__MRP_APPLICANT_MAX - 1) + +enum mrp_event { + MRP_EVENT_NEW, + MRP_EVENT_JOIN, + MRP_EVENT_LV, + MRP_EVENT_TX, + MRP_EVENT_R_NEW, + MRP_EVENT_R_JOIN_IN, + MRP_EVENT_R_IN, + MRP_EVENT_R_JOIN_MT, + MRP_EVENT_R_MT, + MRP_EVENT_R_LV, + MRP_EVENT_R_LA, + MRP_EVENT_REDECLARE, + MRP_EVENT_PERIODIC, + __MRP_EVENT_MAX +}; +#define MRP_EVENT_MAX (__MRP_EVENT_MAX - 1) + +enum mrp_tx_action { + MRP_TX_ACTION_NONE, + MRP_TX_ACTION_S_NEW, + MRP_TX_ACTION_S_JOIN_IN, + MRP_TX_ACTION_S_JOIN_IN_OPTIONAL, + MRP_TX_ACTION_S_IN_OPTIONAL, + MRP_TX_ACTION_S_LV, +}; + +struct mrp_attr { + struct rb_node node; + enum mrp_applicant_state state; + u8 type; + u8 len; + unsigned char value[]; +}; + +enum mrp_applications { + MRP_APPLICATION_MVRP, + __MRP_APPLICATION_MAX +}; +#define MRP_APPLICATION_MAX (__MRP_APPLICATION_MAX - 1) + +struct mrp_application { + enum mrp_applications type; + unsigned int maxattr; + struct packet_type pkttype; + unsigned char group_address[ETH_ALEN]; + u8 version; +}; + +struct mrp_applicant { + struct mrp_application *app; + struct net_device *dev; + struct timer_list join_timer; + + spinlock_t lock; + struct sk_buff_head queue; + struct sk_buff *pdu; + struct rb_root mad; + struct rcu_head rcu; +}; + +struct mrp_port { + struct mrp_applicant __rcu *applicants[MRP_APPLICATION_MAX + 1]; + struct rcu_head rcu; +}; + +extern int mrp_register_application(struct mrp_application *app); +extern void mrp_unregister_application(struct mrp_application *app); + +extern int mrp_init_applicant(struct net_device *dev, + struct mrp_application *app); +extern void mrp_uninit_applicant(struct net_device *dev, + struct mrp_application *app); + +extern int mrp_request_join(const struct net_device *dev, + const struct mrp_application *app, + const void *value, u8 len, u8 type); +extern void mrp_request_leave(const struct net_device *dev, + const struct mrp_application *app, + const void *value, u8 len, u8 type); + +#endif /* _NET_MRP_H */ diff --git a/net/802/Kconfig b/net/802/Kconfig index be33d27c8e69..80d4bf78905d 100644 --- a/net/802/Kconfig +++ b/net/802/Kconfig @@ -5,3 +5,6 @@ config STP config GARP tristate select STP + +config MRP + tristate diff --git a/net/802/Makefile b/net/802/Makefile index a30d6e385aed..37e654d6615e 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o +obj-$(CONFIG_MRP) += mrp.o diff --git a/net/802/mrp.c b/net/802/mrp.c new file mode 100644 index 000000000000..47a9e14c8ba7 --- /dev/null +++ b/net/802/mrp.c @@ -0,0 +1,895 @@ +/* + * IEEE 802.1Q Multiple Registration Protocol (MRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/802/garp.c + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int mrp_join_time __read_mostly = 200; +module_param(mrp_join_time, uint, 0644); +MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const u8 +mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = { + [MRP_APPLICANT_VO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VO, + }, + [MRP_APPLICANT_VP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VP, + [MRP_EVENT_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_TX] = MRP_APPLICANT_AA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VP, + }, + [MRP_APPLICANT_VN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_VN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_AN, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_VN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_VN, + }, + [MRP_APPLICANT_AN] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AN, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AN, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AN, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VN, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VN, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VN, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AN, + }, + [MRP_APPLICANT_AA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_QA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AA, + }, + [MRP_APPLICANT_LA] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AA, + [MRP_EVENT_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_TX] = MRP_APPLICANT_VO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_IN] = MRP_APPLICANT_LA, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_MT] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LV] = MRP_APPLICANT_LA, + [MRP_EVENT_R_LA] = MRP_APPLICANT_LA, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_LA, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_LA, + }, + [MRP_APPLICANT_AO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_AO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AO, + }, + [MRP_APPLICANT_QO] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QO, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QO, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AO, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VO, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VO, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VO, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_QO, + }, + [MRP_APPLICANT_AP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_AP, + [MRP_EVENT_LV] = MRP_APPLICANT_AO, + [MRP_EVENT_TX] = MRP_APPLICANT_QA, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_AP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, + [MRP_APPLICANT_QP] = { + [MRP_EVENT_NEW] = MRP_APPLICANT_VN, + [MRP_EVENT_JOIN] = MRP_APPLICANT_QP, + [MRP_EVENT_LV] = MRP_APPLICANT_QO, + [MRP_EVENT_TX] = MRP_APPLICANT_QP, + [MRP_EVENT_R_NEW] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_IN] = MRP_APPLICANT_QP, + [MRP_EVENT_R_JOIN_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_MT] = MRP_APPLICANT_AP, + [MRP_EVENT_R_LV] = MRP_APPLICANT_VP, + [MRP_EVENT_R_LA] = MRP_APPLICANT_VP, + [MRP_EVENT_REDECLARE] = MRP_APPLICANT_VP, + [MRP_EVENT_PERIODIC] = MRP_APPLICANT_AP, + }, +}; + +static const u8 +mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = { + [MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW, + [MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL, + [MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV, + [MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL, + [MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN, + [MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL, +}; + +static void mrp_attrvalue_inc(void *value, u8 len) +{ + u8 *v = (u8 *)value; + + /* Add 1 to the last byte. If it becomes zero, + * go to the previous byte and repeat. + */ + while (len > 0 && !++v[--len]) + ; +} + +static int mrp_attr_cmp(const struct mrp_attr *attr, + const void *value, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->len != len) + return attr->len - len; + return memcmp(attr->value, value, len); +} + +static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + parent = parent->rb_left; + else if (d < 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app, + const void *value, u8 len, u8 type) +{ + struct rb_node *parent = NULL, **p = &app->mad.rb_node; + struct mrp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct mrp_attr, node); + d = mrp_attr_cmp(attr, value, len, type); + if (d > 0) + p = &parent->rb_left; + else if (d < 0) + p = &parent->rb_right; + else { + /* The attribute already exists; re-use it. */ + return attr; + } + } + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = MRP_APPLICANT_VO; + attr->type = type; + attr->len = len; + memcpy(attr->value, value, len); + + rb_link_node(&attr->node, parent, p); + rb_insert_color(&attr->node, &app->mad); + return attr; +} + +static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr) +{ + rb_erase(&attr->node, &app->mad); + kfree(attr); +} + +static int mrp_pdu_init(struct mrp_applicant *app) +{ + struct sk_buff *skb; + struct mrp_pdu_hdr *ph; + + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = app->app->pkttype.type; + skb_reserve(skb, LL_RESERVED_SPACE(app->dev)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph)); + ph->version = app->app->version; + + app->pdu = skb; + return 0; +} + +static int mrp_pdu_append_end_mark(struct mrp_applicant *app) +{ + __be16 *endmark; + + if (skb_tailroom(app->pdu) < sizeof(*endmark)) + return -1; + endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark)); + put_unaligned(MRP_END_MARK, endmark); + return 0; +} + +static void mrp_pdu_queue(struct mrp_applicant *app) +{ + if (!app->pdu) + return; + + if (mrp_cb(app->pdu)->mh) + mrp_pdu_append_end_mark(app); + mrp_pdu_append_end_mark(app); + + dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type), + app->app->group_address, app->dev->dev_addr, + app->pdu->len); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void mrp_queue_xmit(struct mrp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app, + u8 attrtype, u8 attrlen) +{ + struct mrp_msg_hdr *mh; + + if (mrp_cb(app->pdu)->mh) { + if (mrp_pdu_append_end_mark(app) < 0) + return -1; + mrp_cb(app->pdu)->mh = NULL; + mrp_cb(app->pdu)->vah = NULL; + } + + if (skb_tailroom(app->pdu) < sizeof(*mh)) + return -1; + mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh)); + mh->attrtype = attrtype; + mh->attrlen = attrlen; + mrp_cb(app->pdu)->mh = mh; + return 0; +} + +static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app, + const void *firstattrvalue, u8 attrlen) +{ + struct mrp_vecattr_hdr *vah; + + if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen) + return -1; + vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu, + sizeof(*vah) + attrlen); + put_unaligned(0, &vah->lenflags); + memcpy(vah->firstattrvalue, firstattrvalue, attrlen); + mrp_cb(app->pdu)->vah = vah; + memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen); + return 0; +} + +static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app, + const struct mrp_attr *attr, + enum mrp_vecattr_event vaevent) +{ + u16 len, pos; + u8 *vaevents; + int err; +again: + if (!app->pdu) { + err = mrp_pdu_init(app); + if (err < 0) + return err; + } + + /* If there is no Message header in the PDU, or the Message header is + * for a different attribute type, add an EndMark (if necessary) and a + * new Message header to the PDU. + */ + if (!mrp_cb(app->pdu)->mh || + mrp_cb(app->pdu)->mh->attrtype != attr->type || + mrp_cb(app->pdu)->mh->attrlen != attr->len) { + if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0) + goto queue; + } + + /* If there is no VectorAttribute header for this Message in the PDU, + * or this attribute's value does not sequentially follow the previous + * attribute's value, add a new VectorAttribute header to the PDU. + */ + if (!mrp_cb(app->pdu)->vah || + memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) { + if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0) + goto queue; + } + + len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags)); + pos = len % 3; + + /* Events are packed into Vectors in the PDU, three to a byte. Add a + * byte to the end of the Vector if necessary. + */ + if (!pos) { + if (skb_tailroom(app->pdu) < sizeof(u8)) + goto queue; + vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8)); + } else { + vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8)); + } + + switch (pos) { + case 0: + *vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + break; + case 1: + *vaevents += vaevent * __MRP_VECATTR_EVENT_MAX; + break; + case 2: + *vaevents += vaevent; + break; + default: + WARN_ON(1); + } + + /* Increment the length of the VectorAttribute in the PDU, as well as + * the value of the next attribute that would continue its Vector. + */ + put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags); + mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len); + + return 0; + +queue: + mrp_pdu_queue(app); + goto again; +} + +static void mrp_attr_event(struct mrp_applicant *app, + struct mrp_attr *attr, enum mrp_event event) +{ + enum mrp_applicant_state state; + + state = mrp_applicant_state_table[attr->state][event]; + if (state == MRP_APPLICANT_INVALID) { + WARN_ON(1); + return; + } + + if (event == MRP_EVENT_TX) { + /* When appending the attribute fails, don't update its state + * in order to retry at the next TX event. + */ + + switch (mrp_tx_action_table[attr->state]) { + case MRP_TX_ACTION_NONE: + case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL: + case MRP_TX_ACTION_S_IN_OPTIONAL: + break; + case MRP_TX_ACTION_S_NEW: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_NEW) < 0) + return; + break; + case MRP_TX_ACTION_S_JOIN_IN: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0) + return; + break; + case MRP_TX_ACTION_S_LV: + if (mrp_pdu_append_vecattr_event( + app, attr, MRP_VECATTR_EVENT_LV) < 0) + return; + /* As a pure applicant, sending a leave message + * implies that the attribute was unregistered and + * can be destroyed. + */ + mrp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + } + + attr->state = state; +} + +int mrp_request_join(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return -ENOMEM; + + spin_lock_bh(&app->lock); + attr = mrp_attr_create(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + mrp_attr_event(app, attr, MRP_EVENT_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_request_join); + +void mrp_request_leave(const struct net_device *dev, + const struct mrp_application *appl, + const void *value, u8 len, u8 type) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + struct mrp_attr *attr; + + if (sizeof(struct mrp_skb_cb) + len > + FIELD_SIZEOF(struct sk_buff, cb)) + return; + + spin_lock_bh(&app->lock); + attr = mrp_attr_lookup(app, value, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + mrp_attr_event(app, attr, MRP_EVENT_LV); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(mrp_request_leave); + +static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event) +{ + struct rb_node *node, *next; + struct mrp_attr *attr; + + for (node = rb_first(&app->mad); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct mrp_attr, node); + mrp_attr_event(app, attr, event); + } +} + +static void mrp_join_timer_arm(struct mrp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void mrp_join_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_queue_xmit(app); + mrp_join_timer_arm(app); +} + +static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) +{ + __be16 endmark; + + if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0) + return -1; + if (endmark == MRP_END_MARK) { + *offset += sizeof(endmark); + return -1; + } + return 0; +} + +static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app, + struct sk_buff *skb, + enum mrp_vecattr_event vaevent) +{ + struct mrp_attr *attr; + enum mrp_event event; + + attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen, + mrp_cb(skb)->mh->attrtype); + if (attr == NULL) + return; + + switch (vaevent) { + case MRP_VECATTR_EVENT_NEW: + event = MRP_EVENT_R_NEW; + break; + case MRP_VECATTR_EVENT_JOIN_IN: + event = MRP_EVENT_R_JOIN_IN; + break; + case MRP_VECATTR_EVENT_IN: + event = MRP_EVENT_R_IN; + break; + case MRP_VECATTR_EVENT_JOIN_MT: + event = MRP_EVENT_R_JOIN_MT; + break; + case MRP_VECATTR_EVENT_MT: + event = MRP_EVENT_R_MT; + break; + case MRP_VECATTR_EVENT_LV: + event = MRP_EVENT_R_LV; + break; + default: + return; + } + + mrp_attr_event(app, attr, event); +} + +static int mrp_pdu_parse_vecattr(struct mrp_applicant *app, + struct sk_buff *skb, int *offset) +{ + struct mrp_vecattr_hdr _vah; + u16 valen; + u8 vaevents, vaevent; + + mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah), + &_vah); + if (!mrp_cb(skb)->vah) + return -1; + *offset += sizeof(_vah); + + if (get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_FLAG_LA) + mrp_mad_event(app, MRP_EVENT_R_LA); + valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) & + MRP_VECATTR_HDR_LEN_MASK); + + /* The VectorAttribute structure in a PDU carries event information + * about one or more attributes having consecutive values. Only the + * value for the first attribute is contained in the structure. So + * we make a copy of that value, and then increment it each time we + * advance to the next event in its Vector. + */ + if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen > + FIELD_SIZEOF(struct sk_buff, cb)) + return -1; + if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen) < 0) + return -1; + *offset += mrp_cb(skb)->mh->attrlen; + + /* In a VectorAttribute, the Vector contains events which are packed + * three to a byte. We process one byte of the Vector at a time. + */ + while (valen > 0) { + if (skb_copy_bits(skb, *offset, &vaevents, + sizeof(vaevents)) < 0) + return -1; + *offset += sizeof(vaevents); + + /* Extract and process the first event. */ + vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + if (vaevent >= __MRP_VECATTR_EVENT_MAX) { + /* The byte is malformed; stop processing. */ + return -1; + } + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the second event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= (__MRP_VECATTR_EVENT_MAX * + __MRP_VECATTR_EVENT_MAX); + vaevent = vaevents / __MRP_VECATTR_EVENT_MAX; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + + /* If present, extract and process the third event. */ + if (!--valen) + break; + mrp_attrvalue_inc(mrp_cb(skb)->attrvalue, + mrp_cb(skb)->mh->attrlen); + vaevents %= __MRP_VECATTR_EVENT_MAX; + vaevent = vaevents; + mrp_pdu_parse_vecattr_event(app, skb, vaevent); + } + return 0; +} + +static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb, + int *offset) +{ + struct mrp_msg_hdr _mh; + + mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh); + if (!mrp_cb(skb)->mh) + return -1; + *offset += sizeof(_mh); + + if (mrp_cb(skb)->mh->attrtype == 0 || + mrp_cb(skb)->mh->attrtype > app->app->maxattr || + mrp_cb(skb)->mh->attrlen == 0) + return -1; + + while (skb->len > *offset) { + if (mrp_pdu_parse_end_mark(skb, offset) < 0) + break; + if (mrp_pdu_parse_vecattr(app, skb, offset) < 0) + return -1; + } + return 0; +} + +int mrp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct mrp_application *appl = container_of(pt, struct mrp_application, + pkttype); + struct mrp_port *port; + struct mrp_applicant *app; + struct mrp_pdu_hdr _ph; + const struct mrp_pdu_hdr *ph; + int offset = skb_network_offset(skb); + + /* If the interface is in promiscuous mode, drop the packet if + * it was unicast to another host. + */ + if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) + goto out; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto out; + port = rcu_dereference(dev->mrp_port); + if (unlikely(!port)) + goto out; + app = rcu_dereference(port->applicants[appl->type]); + if (unlikely(!app)) + goto out; + + ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph); + if (!ph) + goto out; + offset += sizeof(_ph); + + if (ph->version != app->app->version) + goto out; + + spin_lock(&app->lock); + while (skb->len > offset) { + if (mrp_pdu_parse_end_mark(skb, &offset) < 0) + break; + if (mrp_pdu_parse_msg(app, skb, &offset) < 0) + break; + } + spin_unlock(&app->lock); +out: + kfree_skb(skb); + return 0; +} + +static int mrp_init_port(struct net_device *dev) +{ + struct mrp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->mrp_port, port); + return 0; +} + +static void mrp_release_port(struct net_device *dev) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + unsigned int i; + + for (i = 0; i <= MRP_APPLICATION_MAX; i++) { + if (rtnl_dereference(port->applicants[i])) + return; + } + RCU_INIT_POINTER(dev->mrp_port, NULL); + kfree_rcu(port, rcu); +} + +int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!rtnl_dereference(dev->mrp_port)) { + err = mrp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->group_address); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->mad = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); + mrp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + mrp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(mrp_init_applicant); + +void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) +{ + struct mrp_port *port = rtnl_dereference(dev->mrp_port); + struct mrp_applicant *app = rtnl_dereference( + port->applicants[appl->type]); + + ASSERT_RTNL(); + + RCU_INIT_POINTER(port->applicants[appl->type], NULL); + + /* Delete timer and generate a final TX event to flush out + * all pending messages before the applicant is gone. + */ + del_timer_sync(&app->join_timer); + mrp_mad_event(app, MRP_EVENT_TX); + mrp_pdu_queue(app); + mrp_queue_xmit(app); + + dev_mc_del(dev, appl->group_address); + kfree_rcu(app, rcu); + mrp_release_port(dev); +} +EXPORT_SYMBOL_GPL(mrp_uninit_applicant); + +int mrp_register_application(struct mrp_application *appl) +{ + appl->pkttype.func = mrp_rcv; + dev_add_pack(&appl->pkttype); + return 0; +} +EXPORT_SYMBOL_GPL(mrp_register_application); + +void mrp_unregister_application(struct mrp_application *appl) +{ + dev_remove_pack(&appl->pkttype); +} +EXPORT_SYMBOL_GPL(mrp_unregister_application); -- cgit v1.2.3-71-gd317 From 2c5e89338493882719f8d138f8f2717ee9a04153 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 10 Feb 2013 03:50:18 +0000 Subject: ipv6: by default join ff01::1 and in case of forwarding ff01::2 and ff05:2 Cc: Erik Hugne Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/in6.h | 9 +++++++++ net/ipv6/addrconf.c | 15 +++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index a16e19349ec0..34edf1f6c9a3 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -36,4 +36,13 @@ extern const struct in6_addr in6addr_linklocal_allnodes; extern const struct in6_addr in6addr_linklocal_allrouters; #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ { { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } } +extern const struct in6_addr in6addr_interfacelocal_allnodes; +#define IN6ADDR_INTERFACELOCAL_ALLNODES_INIT \ + { { { 0xff,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } +extern const struct in6_addr in6addr_interfacelocal_allrouters; +#define IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT \ + { { { 0xff,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } } +extern const struct in6_addr in6addr_sitelocal_allrouters; +#define IN6ADDR_SITELOCAL_ALLROUTERS_INIT \ + { { { 0xff,5,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } } #endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bd9f9360f769..86c235d05aba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -244,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ static inline bool addrconf_qdisc_ok(const struct net_device *dev) @@ -428,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); + /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -611,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev) if (idev->cnf.forwarding) dev_disable_lro(dev); if (dev->flags & IFF_MULTICAST) { - if (idev->cnf.forwarding) + if (idev->cnf.forwarding) { ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); - else + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters); + } else { ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters); + ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters); + } } list_for_each_entry(ifa, &idev->addr_list, if_list) { -- cgit v1.2.3-71-gd317 From 2cde6acd49daca58b96f1fbc697492825511ad31 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 11 Feb 2013 10:25:30 +0000 Subject: netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock __netpoll_rcu_free is used to free netpoll structures when the rtnl_lock is already held. The mechanism is used to asynchronously call __netpoll_cleanup outside of the holding of the rtnl_lock, so as to avoid deadlock. Unfortunately, __netpoll_cleanup modifies pointers (dev->np), which means the rtnl_lock must be held while calling it. Further, it cannot be held, because rcu callbacks may be issued in softirq contexts, which cannot sleep. Fix this by converting the rcu callback to a work queue that is guaranteed to get scheduled in process context, so that we can hold the rtnl properly while calling __netpoll_cleanup Tested successfully by myself. Signed-off-by: Neil Horman CC: "David S. Miller" CC: Cong Wang CC: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- include/linux/netpoll.h | 4 ++-- net/8021q/vlan_dev.c | 2 +- net/bridge/br_device.c | 2 +- net/core/netpoll.c | 16 ++++++++++------ 5 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 22399374b1e1..94c1534dd578 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1249,7 +1249,7 @@ static inline void slave_disable_netpoll(struct slave *slave) return; slave->np = NULL; - __netpoll_free_rcu(np); + __netpoll_free_async(np); } static inline bool slave_dev_support_netpoll(struct net_device *slave_dev) { diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index ab856d507b7e..9d7d8c64f7c8 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -32,7 +32,7 @@ struct netpoll { u8 remote_mac[ETH_ALEN]; struct list_head rx; /* rx_np list element */ - struct rcu_head rcu; + struct work_struct cleanup_work; }; struct netpoll_info { @@ -68,7 +68,7 @@ int netpoll_setup(struct netpoll *np); int netpoll_trap(void); void netpoll_set_trap(int trap); void __netpoll_cleanup(struct netpoll *np); -void __netpoll_free_rcu(struct netpoll *np); +void __netpoll_free_async(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo); void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 34df5b3c9b75..19cf81bf9f69 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -733,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) vlan->netpoll = NULL; - __netpoll_free_rcu(netpoll); + __netpoll_free_async(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ba6fb2d60940..ca98fa5b2c78 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -265,7 +265,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - __netpoll_free_rcu(np); + __netpoll_free_async(np); } #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index edcd9ad95304..c536474e2260 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -61,6 +61,7 @@ static struct srcu_struct netpoll_srcu; static void zap_completion_queue(void); static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); +static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -1020,6 +1021,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); + INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { @@ -1255,25 +1257,27 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - RCU_INIT_POINTER(np->dev->npinfo, NULL); + rcu_assign_pointer(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +static void netpoll_async_cleanup(struct work_struct *work) { - struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); + struct netpoll *np = container_of(work, struct netpoll, cleanup_work); + rtnl_lock(); __netpoll_cleanup(np); + rtnl_unlock(); kfree(np); } -void __netpoll_free_rcu(struct netpoll *np) +void __netpoll_free_async(struct netpoll *np) { - call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); + schedule_work(&np->cleanup_work); } -EXPORT_SYMBOL_GPL(__netpoll_free_rcu); +EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { -- cgit v1.2.3-71-gd317 From d9ba8f9e6298af71ec1c1fd3d88c3ef68abd0ec3 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 11 Feb 2013 09:52:20 +0000 Subject: driver: net: ethernet: cpsw: dual emac interface implementation The CPSW switch can act as Dual EMAC by segregating the switch ports using VLAN and port VLAN as per the TRM description in 14.3.2.10.2 Dual Mac Mode Following CPSW components will be common for both the interfaces. * Interrupt source is common for both eth interfaces * Interrupt pacing is common for both interfaces * Hardware statistics is common for all the ports * CPDMA is common for both eth interface * CPTS is common for both the interface and it should not be enabled on both the interface as timestamping information doesn't contain port information. Constrains * Reserved VID of One port should not be used in other interface which will enable switching functionality * Same VID must not be used in both the interface which will enable switching functionality Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 2 + drivers/net/ethernet/ti/cpsw.c | 335 +++++++++++++++++++++---- include/linux/platform_data/cpsw.h | 3 + 3 files changed, 288 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 6ddd0286a9b7..ecfdf756d10f 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -24,6 +24,8 @@ Required properties: Optional properties: - ti,hwmods : Must be "cpgmac0" - no_bd_ram : Must be 0 or 1 +- dual_emac : Specifies Switch to act as Dual EMAC +- dual_emac_res_vlan : Specifies VID to be used to segregate the ports Note: "ti,hwmods" field is used to fetch the base address and irq resources from TI, omap hwmod data base during device registration. diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4b964bb02d4c..4ceed6e0f1be 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -122,6 +122,10 @@ do { \ #define CPSW_VLAN_AWARE BIT(1) #define CPSW_ALE_VLAN_AWARE 1 +#define CPSW_FIFO_NORMAL_MODE (0 << 15) +#define CPSW_FIFO_DUAL_MAC_MODE (1 << 15) +#define CPSW_FIFO_RATE_LIMIT_MODE (2 << 15) + #define cpsw_enable_irq(priv) \ do { \ u32 i; \ @@ -254,7 +258,7 @@ struct cpsw_ss_regs { struct cpsw_host_regs { u32 max_blks; u32 blk_cnt; - u32 flow_thresh; + u32 tx_in_ctl; u32 port_vlan; u32 tx_pri_map; u32 cpdma_tx_pri_map; @@ -281,6 +285,9 @@ struct cpsw_slave { u32 mac_control; struct cpsw_slave_data *data; struct phy_device *phy; + struct net_device *ndev; + u32 port_vlan; + u32 open_stat; }; static inline u32 slave_read(struct cpsw_slave *slave, u32 offset) @@ -320,15 +327,63 @@ struct cpsw_priv { u32 irqs_table[4]; u32 num_irqs; struct cpts *cpts; + u32 emac_port; }; #define napi_to_priv(napi) container_of(napi, struct cpsw_priv, napi) -#define for_each_slave(priv, func, arg...) \ - do { \ - int idx; \ - for (idx = 0; idx < (priv)->data.slaves; idx++) \ - (func)((priv)->slaves + idx, ##arg); \ +#define for_each_slave(priv, func, arg...) \ + do { \ + int idx; \ + if (priv->data.dual_emac) \ + (func)((priv)->slaves + priv->emac_port, ##arg);\ + else \ + for (idx = 0; idx < (priv)->data.slaves; idx++) \ + (func)((priv)->slaves + idx, ##arg); \ + } while (0) +#define cpsw_get_slave_ndev(priv, __slave_no__) \ + (priv->slaves[__slave_no__].ndev) +#define cpsw_get_slave_priv(priv, __slave_no__) \ + ((priv->slaves[__slave_no__].ndev) ? \ + netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \ + +#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \ + do { \ + if (!priv->data.dual_emac) \ + break; \ + if (CPDMA_RX_SOURCE_PORT(status) == 1) { \ + ndev = cpsw_get_slave_ndev(priv, 0); \ + priv = netdev_priv(ndev); \ + skb->dev = ndev; \ + } else if (CPDMA_RX_SOURCE_PORT(status) == 2) { \ + ndev = cpsw_get_slave_ndev(priv, 1); \ + priv = netdev_priv(ndev); \ + skb->dev = ndev; \ + } \ } while (0) +#define cpsw_add_mcast(priv, addr) \ + do { \ + if (priv->data.dual_emac) { \ + struct cpsw_slave *slave = priv->slaves + \ + priv->emac_port; \ + int slave_port = cpsw_get_slave_port(priv, \ + slave->slave_num); \ + cpsw_ale_add_mcast(priv->ale, addr, \ + 1 << slave_port | 1 << priv->host_port, \ + ALE_VLAN, slave->port_vlan, 0); \ + } else { \ + cpsw_ale_add_mcast(priv->ale, addr, \ + ALE_ALL_PORTS << priv->host_port, \ + 0, 0, 0); \ + } \ + } while (0) + +static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num) +{ + if (priv->host_port == 0) + return slave_num + 1; + else + return slave_num; +} static void cpsw_ndo_set_rx_mode(struct net_device *ndev) { @@ -348,8 +403,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) /* program multicast address list into ALE register */ netdev_for_each_mc_addr(ha, ndev) { - cpsw_ale_add_mcast(priv->ale, (u8 *)ha->addr, - ALE_ALL_PORTS << priv->host_port, 0, 0, 0); + cpsw_add_mcast(priv, (u8 *)ha->addr); } } } @@ -396,6 +450,8 @@ void cpsw_rx_handler(void *token, int len, int status) struct cpsw_priv *priv = netdev_priv(ndev); int ret = 0; + cpsw_dual_emac_src_port_detect(status, priv, ndev, skb); + /* free and bail if we are shutting down */ if (unlikely(!netif_running(ndev)) || unlikely(!netif_carrier_ok(ndev))) { @@ -437,18 +493,17 @@ static irqreturn_t cpsw_interrupt(int irq, void *dev_id) cpsw_intr_disable(priv); cpsw_disable_irq(priv); napi_schedule(&priv->napi); + } else { + priv = cpsw_get_slave_priv(priv, 1); + if (likely(priv) && likely(netif_running(priv->ndev))) { + cpsw_intr_disable(priv); + cpsw_disable_irq(priv); + napi_schedule(&priv->napi); + } } return IRQ_HANDLED; } -static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num) -{ - if (priv->host_port == 0) - return slave_num + 1; - else - return slave_num; -} - static int cpsw_poll(struct napi_struct *napi, int budget) { struct cpsw_priv *priv = napi_to_priv(napi); @@ -566,6 +621,54 @@ static inline int __show_stat(char *buf, int maxlen, const char *name, u32 val) leader + strlen(name), val); } +static int cpsw_common_res_usage_state(struct cpsw_priv *priv) +{ + u32 i; + u32 usage_count = 0; + + if (!priv->data.dual_emac) + return 0; + + for (i = 0; i < priv->data.slaves; i++) + if (priv->slaves[i].open_stat) + usage_count++; + + return usage_count; +} + +static inline int cpsw_tx_packet_submit(struct net_device *ndev, + struct cpsw_priv *priv, struct sk_buff *skb) +{ + if (!priv->data.dual_emac) + return cpdma_chan_submit(priv->txch, skb, skb->data, + skb->len, 0, GFP_KERNEL); + + if (ndev == cpsw_get_slave_ndev(priv, 0)) + return cpdma_chan_submit(priv->txch, skb, skb->data, + skb->len, 1, GFP_KERNEL); + else + return cpdma_chan_submit(priv->txch, skb, skb->data, + skb->len, 2, GFP_KERNEL); +} + +static inline void cpsw_add_dual_emac_def_ale_entries( + struct cpsw_priv *priv, struct cpsw_slave *slave, + u32 slave_port) +{ + u32 port_mask = 1 << slave_port | 1 << priv->host_port; + + if (priv->version == CPSW_VERSION_1) + slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN); + else + slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN); + cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask, + port_mask, port_mask, 0); + cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + port_mask, ALE_VLAN, slave->port_vlan, 0); + cpsw_ale_add_ucast(priv->ale, priv->mac_addr, + priv->host_port, ALE_VLAN, slave->port_vlan); +} + static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) { char name[32]; @@ -595,8 +698,11 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave_port = cpsw_get_slave_port(priv, slave->slave_num); - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, - 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); + if (priv->data.dual_emac) + cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); + else + cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); slave->phy = phy_connect(priv->ndev, slave->data->phy_id, &cpsw_adjust_link, slave->data->phy_if); @@ -634,6 +740,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) static void cpsw_init_host_port(struct cpsw_priv *priv) { u32 control_reg; + u32 fifo_mode; /* soft reset the controller and initialize ale */ soft_reset("cpsw", &priv->regs->soft_reset); @@ -645,6 +752,9 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) control_reg = readl(&priv->regs->control); control_reg |= CPSW_VLAN_AWARE; writel(control_reg, &priv->regs->control); + fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE : + CPSW_FIFO_NORMAL_MODE; + writel(fifo_mode, &priv->host_port_regs->tx_in_ctl); /* setup host port priority mapping */ __raw_writel(CPDMA_TX_PRIORITY_MAP, @@ -654,9 +764,12 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) cpsw_ale_control_set(priv->ale, priv->host_port, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); - cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port, 0, 0); - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, - 1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2); + if (!priv->data.dual_emac) { + cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port, + 0, 0); + cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + 1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2); + } } static int cpsw_ndo_open(struct net_device *ndev) @@ -665,7 +778,8 @@ static int cpsw_ndo_open(struct net_device *ndev) int i, ret; u32 reg; - cpsw_intr_disable(priv); + if (!cpsw_common_res_usage_state(priv)) + cpsw_intr_disable(priv); netif_carrier_off(ndev); pm_runtime_get_sync(&priv->pdev->dev); @@ -677,46 +791,54 @@ static int cpsw_ndo_open(struct net_device *ndev) CPSW_RTL_VERSION(reg)); /* initialize host and slave ports */ - cpsw_init_host_port(priv); + if (!cpsw_common_res_usage_state(priv)) + cpsw_init_host_port(priv); for_each_slave(priv, cpsw_slave_open, priv); /* Add default VLAN */ - cpsw_add_default_vlan(priv); + if (!priv->data.dual_emac) + cpsw_add_default_vlan(priv); - /* setup tx dma to fixed prio and zero offset */ - cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1); - cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0); + if (!cpsw_common_res_usage_state(priv)) { + /* setup tx dma to fixed prio and zero offset */ + cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1); + cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0); - /* disable priority elevation and enable statistics on all ports */ - __raw_writel(0, &priv->regs->ptype); + /* disable priority elevation */ + __raw_writel(0, &priv->regs->ptype); - /* enable statistics collection only on the host port */ - __raw_writel(0x7, &priv->regs->stat_port_en); + /* enable statistics collection only on all ports */ + __raw_writel(0x7, &priv->regs->stat_port_en); - if (WARN_ON(!priv->data.rx_descs)) - priv->data.rx_descs = 128; + if (WARN_ON(!priv->data.rx_descs)) + priv->data.rx_descs = 128; - for (i = 0; i < priv->data.rx_descs; i++) { - struct sk_buff *skb; + for (i = 0; i < priv->data.rx_descs; i++) { + struct sk_buff *skb; - ret = -ENOMEM; - skb = netdev_alloc_skb_ip_align(priv->ndev, - priv->rx_packet_max); - if (!skb) - break; - ret = cpdma_chan_submit(priv->rxch, skb, skb->data, + ret = -ENOMEM; + skb = netdev_alloc_skb_ip_align(priv->ndev, + priv->rx_packet_max); + if (!skb) + break; + ret = cpdma_chan_submit(priv->rxch, skb, skb->data, skb_tailroom(skb), 0, GFP_KERNEL); - if (WARN_ON(ret < 0)) - break; + if (WARN_ON(ret < 0)) + break; + } + /* continue even if we didn't manage to submit all + * receive descs + */ + cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); } - /* continue even if we didn't manage to submit all receive descs */ - cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); cpdma_ctlr_start(priv->dma); cpsw_intr_enable(priv); napi_enable(&priv->napi); cpdma_ctlr_eoi(priv->dma); + if (priv->data.dual_emac) + priv->slaves[priv->emac_port].open_stat = true; return 0; } @@ -737,12 +859,17 @@ static int cpsw_ndo_stop(struct net_device *ndev) netif_stop_queue(priv->ndev); napi_disable(&priv->napi); netif_carrier_off(priv->ndev); - cpsw_intr_disable(priv); - cpdma_ctlr_int_ctrl(priv->dma, false); - cpdma_ctlr_stop(priv->dma); - cpsw_ale_stop(priv->ale); + + if (cpsw_common_res_usage_state(priv) <= 1) { + cpsw_intr_disable(priv); + cpdma_ctlr_int_ctrl(priv->dma, false); + cpdma_ctlr_stop(priv->dma); + cpsw_ale_stop(priv->ale); + } for_each_slave(priv, cpsw_slave_stop, priv); pm_runtime_put_sync(&priv->pdev->dev); + if (priv->data.dual_emac) + priv->slaves[priv->emac_port].open_stat = false; return 0; } @@ -766,8 +893,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); - ret = cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 0, GFP_KERNEL); + ret = cpsw_tx_packet_submit(ndev, priv, skb); if (unlikely(ret != 0)) { cpsw_err(priv, tx_err, "desc submit failed\n"); goto fail; @@ -836,9 +962,14 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) { - struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave]; + struct cpsw_slave *slave; u32 ctrl, mtype; + if (priv->data.dual_emac) + slave = &priv->slaves[priv->emac_port]; + else + slave = &priv->slaves[priv->data.cpts_active_slave]; + ctrl = slave_read(slave, CPSW2_CONTROL); ctrl &= ~CTRL_ALL_TS_MASK; @@ -1124,6 +1255,7 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, slave->data = data; slave->regs = regs + slave_reg_ofs; slave->sliver = regs + sliver_reg_ofs; + slave->port_vlan = data->dual_emac_res_vlan; } static int cpsw_probe_dt(struct cpsw_platform_data *data, @@ -1204,6 +1336,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->mac_control = prop; + if (!of_property_read_u32(node, "dual_emac", &prop)) + data->dual_emac = prop; + /* * Populate all the child nodes here... */ @@ -1237,6 +1372,18 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, if (mac_addr) memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); + if (data->dual_emac) { + if (of_property_read_u32(node, "dual_emac_res_vlan", + &prop)) { + pr_err("Missing dual_emac_res_vlan in DT.\n"); + slave_data->dual_emac_res_vlan = i+1; + pr_err("Using %d as Reserved VLAN for %d slave\n", + slave_data->dual_emac_res_vlan, i); + } else { + slave_data->dual_emac_res_vlan = prop; + } + } + i++; } @@ -1247,6 +1394,79 @@ error_ret: return ret; } +static int cpsw_probe_dual_emac(struct platform_device *pdev, + struct cpsw_priv *priv) +{ + struct cpsw_platform_data *data = &priv->data; + struct net_device *ndev; + struct cpsw_priv *priv_sl2; + int ret = 0, i; + + ndev = alloc_etherdev(sizeof(struct cpsw_priv)); + if (!ndev) { + pr_err("cpsw: error allocating net_device\n"); + return -ENOMEM; + } + + priv_sl2 = netdev_priv(ndev); + spin_lock_init(&priv_sl2->lock); + priv_sl2->data = *data; + priv_sl2->pdev = pdev; + priv_sl2->ndev = ndev; + priv_sl2->dev = &ndev->dev; + priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); + priv_sl2->rx_packet_max = max(rx_packet_max, 128); + + if (is_valid_ether_addr(data->slave_data[1].mac_addr)) { + memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr, + ETH_ALEN); + pr_info("cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr); + } else { + random_ether_addr(priv_sl2->mac_addr); + pr_info("cpsw: Random MACID = %pM\n", priv_sl2->mac_addr); + } + memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN); + + priv_sl2->slaves = priv->slaves; + priv_sl2->clk = priv->clk; + + priv_sl2->cpsw_res = priv->cpsw_res; + priv_sl2->regs = priv->regs; + priv_sl2->host_port = priv->host_port; + priv_sl2->host_port_regs = priv->host_port_regs; + priv_sl2->wr_regs = priv->wr_regs; + priv_sl2->dma = priv->dma; + priv_sl2->txch = priv->txch; + priv_sl2->rxch = priv->rxch; + priv_sl2->ale = priv->ale; + priv_sl2->emac_port = 1; + priv->slaves[1].ndev = ndev; + priv_sl2->cpts = priv->cpts; + priv_sl2->version = priv->version; + + for (i = 0; i < priv->num_irqs; i++) { + priv_sl2->irqs_table[i] = priv->irqs_table[i]; + priv_sl2->num_irqs = priv->num_irqs; + } + + ndev->features |= NETIF_F_HW_VLAN_FILTER; + + ndev->netdev_ops = &cpsw_netdev_ops; + SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); + netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT); + + /* register the network device */ + SET_NETDEV_DEV(ndev, &pdev->dev); + ret = register_netdev(ndev); + if (ret) { + pr_err("cpsw: error registering net device\n"); + free_netdev(ndev); + ret = -ENODEV; + } + + return ret; +} + static int cpsw_probe(struct platform_device *pdev) { struct cpsw_platform_data *data = pdev->dev.platform_data; @@ -1310,6 +1530,9 @@ static int cpsw_probe(struct platform_device *pdev) for (i = 0; i < data->slaves; i++) priv->slaves[i].slave_num = i; + priv->slaves[0].ndev = ndev; + priv->emac_port = 0; + priv->clk = clk_get(&pdev->dev, "fck"); if (IS_ERR(priv->clk)) { dev_err(&pdev->dev, "fck is not found\n"); @@ -1484,6 +1707,14 @@ static int cpsw_probe(struct platform_device *pdev) cpsw_notice(priv, probe, "initialized device (regs %x, irq %d)\n", priv->cpsw_res->start, ndev->irq); + if (priv->data.dual_emac) { + ret = cpsw_probe_dual_emac(pdev, priv); + if (ret) { + cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); + goto clean_irq_ret; + } + } + return 0; clean_irq_ret: diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h index e962cfd552e3..798fb80b024b 100644 --- a/include/linux/platform_data/cpsw.h +++ b/include/linux/platform_data/cpsw.h @@ -21,6 +21,8 @@ struct cpsw_slave_data { char phy_id[MII_BUS_ID_SIZE]; int phy_if; u8 mac_addr[ETH_ALEN]; + u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */ + }; struct cpsw_platform_data { @@ -36,6 +38,7 @@ struct cpsw_platform_data { u32 rx_descs; /* Number of Rx Descriptios */ u32 mac_control; /* Mac control register */ u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/ + bool dual_emac; /* Enable Dual EMAC mode */ }; #endif /* __CPSW_H__ */ -- cgit v1.2.3-71-gd317 From ceaa1fef65a7c2e017b260b879b310dd24888083 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 11 Feb 2013 05:50:17 +0000 Subject: tcp: adding a per-socket timestamp offset This functionality is used for restoring tcp sockets. A tcp timestamp depends on how long a system has been running, so it's differ for each host. The solution is to set a per-socket offset. A per-socket offset for a TIME_WAIT socket is inherited from a proper tcp socket. tcp_request_sock doesn't have a timestamp offset, because the repair mode for them are not implemented. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Eric Dumazet Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_minisocks.c | 2 ++ 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6d0d46138ae8..f28408c07dc2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -162,6 +162,8 @@ struct tcp_sock { u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ + u32 tsoffset; /* timestamp offset */ + struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ unsigned long tsq_flags; @@ -353,6 +355,7 @@ struct tcp_timewait_sock { u32 tw_rcv_nxt; u32 tw_snd_nxt; u32 tw_rcv_wnd; + u32 tw_ts_offset; u32 tw_ts_recent; long tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2c7e5963c2ea..8a90bda96038 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk) tcp_enable_early_retrans(tp); icsk->icsk_ca_ops = &tcp_init_congestion_ops; + tp->tsoffset = 0; + sk->sk_state = TCP_CLOSE; sk->sk_write_space = sk_stream_write_space; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f0409287b5f4..4dfc99f54f67 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -288,6 +288,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + tcptw->tw_ts_offset = tp->tsoffset; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { @@ -499,6 +500,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + newtp->tsoffset = 0; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ if (newtp->af_specific->md5_lookup(sk, newsk)) -- cgit v1.2.3-71-gd317 From c9af6db4c11ccc6c3e7f19bbc15d54023956f97c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 11 Feb 2013 09:27:41 +0000 Subject: net: Fix possible wrong checksum generation. Patch cef401de7be8c4e (net: fix possible wrong checksum generation) fixed wrong checksum calculation but it broke TSO by defining new GSO type but not a netdev feature for that type. net_gso_ok() would not allow hardware checksum/segmentation offload of such packets without the feature. Following patch fixes TSO and wrong checksum. This patch uses same logic that Eric Dumazet used. Patch introduces new flag SKBTX_SHARED_FRAG if at least one frag can be modified by the user. but SKBTX_SHARED_FRAG flag is kept in skb shared info tx_flags rather than gso_type. tx_flags is better compared to gso_type since we can have skb with shared frag without gso packet. It does not link SHARED_FRAG to GSO, So there is no need to define netdev feature for this. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 4 ++-- drivers/net/tun.c | 13 +++++-------- drivers/net/virtio_net.c | 13 +++++-------- include/linux/skbuff.h | 17 +++++++++-------- net/core/skbuff.c | 5 ++--- net/ipv4/af_inet.c | 1 - net/ipv4/ip_output.c | 1 + net/ipv4/tcp.c | 4 +--- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/ip6_offload.c | 1 - 11 files changed, 29 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b181dfb3d6d6..97243011d319 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -543,7 +543,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -599,7 +598,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; - skb_shinfo(skb)->gso_type |= gso_type; + skb_shinfo(skb)->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; @@ -743,6 +742,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (zerocopy) { skb_shinfo(skb)->destructor_arg = m->msg_control; skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } if (vlan) macvlan_start_xmit(skb, vlan->dev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b1038c0e2240..b6f45c5d84d5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1019,7 +1019,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -1165,18 +1164,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { - unsigned short gso_type = 0; - pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_TCPV6: - gso_type = SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; default: tun->dev->stats.rx_frame_errors++; @@ -1185,10 +1182,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) - gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = gso.gso_size; - skb_shinfo(skb)->gso_type |= gso_type; if (skb_shinfo(skb)->gso_size == 0) { tun->dev->stats.rx_frame_errors++; kfree_skb(skb); @@ -1204,6 +1200,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) { skb_shinfo(skb)->destructor_arg = msg_control; skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } skb_reset_network_header(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 381a2d8d8a81..192c91c8e799 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -227,7 +227,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page, skb->len += size; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; *len -= size; } @@ -387,18 +387,16 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ntohs(skb->protocol), skb->len, skb->pkt_type); if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { - unsigned short gso_type = 0; - pr_debug("GSO!\n"); switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; case VIRTIO_NET_HDR_GSO_TCPV6: - gso_type = SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; default: net_warn_ratelimited("%s: bad gso type %u.\n", @@ -407,7 +405,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) } if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) - gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { @@ -415,7 +413,6 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) goto frame_err; } - skb_shinfo(skb)->gso_type |= gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_segs = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d7573c37a51d..9da99520ccd5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -230,6 +230,13 @@ enum { /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKBTX_SHARED_FRAG = 1 << 5, }; /* @@ -307,13 +314,6 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, - - /* This indicates at least one fragment might be overwritten - * (as in vmsplice(), sendfile() ...) - * If we need to compute a TX checksum, we'll need to copy - * all frags to avoid possible bad checksum - */ - SKB_GSO_SHARED_FRAG = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2220,7 +2220,8 @@ static inline int skb_linearize(struct sk_buff *skb) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG; + return skb_is_nonlinear(skb) && + skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; } /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 21a22cce6e53..6c1ad09f8796 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2326,8 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; - + skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2833,7 +2832,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1aec92bf8018..e6e5d8506336 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1287,7 +1287,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | - SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e98ed2bff55..5e12dca7b3dd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -598,6 +598,7 @@ slow_path: /* for offloaded checksums cleanup checksum before fragmentation */ if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) goto fail; + iph = ip_hdr(skb); left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 801b07b796f0..1f0bedb8622f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -897,8 +897,7 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } - - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -3044,7 +3043,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | - SKB_GSO_SHARED_FRAG | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9bfaea34322..a759e19496d2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1239,13 +1239,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ if (!skb_shinfo(prev)->gso_size) { skb_shinfo(prev)->gso_size = mss; - skb_shinfo(prev)->gso_type |= sk->sk_gso_type; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; } /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (skb_shinfo(skb)->gso_segs <= 1) { skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->gso_type = 0; } /* Difference in this won't matter, both ACKed by the same cumul. ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 564bf89d9fd3..6182d90e97b0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1133,7 +1133,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1141,10 +1140,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, */ skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; } else { skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type |= sk->sk_gso_type; + skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d141fc32a2ea..f26f0da7f095 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -100,7 +100,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | - SKB_GSO_SHARED_FRAG | 0))) goto out; -- cgit v1.2.3-71-gd317 From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:12 +0000 Subject: bridge: Add netlink interface to configure vlans on bridge ports Add a netlink interface to add and remove vlan configuration on bridge port. The interface uses the RTM_SETLINK message and encodes the vlan configuration inside the IFLA_AF_SPEC. It is possble to include multiple vlans to either add or remove in a single message. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 + include/uapi/linux/if_bridge.h | 9 +++ net/bridge/br_device.c | 1 + net/bridge/br_if.c | 1 + net/bridge/br_netlink.c | 139 +++++++++++++++++++++++++++++++++++------ net/bridge/br_private.h | 1 + net/core/rtnetlink.c | 72 +++++++++++++++++++++ 7 files changed, 207 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 25bd46f52877..1b90f9401000 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1020,6 +1020,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); }; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5db297514aec..3ca9817ca7e8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -108,15 +108,24 @@ struct __fdb_entry { * [IFLA_AF_SPEC] = { * [IFLA_BRIDGE_FLAGS] * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] * } */ enum { IFLA_BRIDGE_FLAGS, IFLA_BRIDGE_MODE, + IFLA_BRIDGE_VLAN_INFO, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) +#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ + +struct bridge_vlan_info { + u16 flags; + u16 vid; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 35a2c2c84f33..091bedf266a0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index af9d65ab4001..335c60cebfd1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 39ca9796f3f7..534a9f4587a9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "br_private.h" #include "br_private_stp.h" @@ -119,10 +120,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -144,6 +149,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ @@ -162,6 +168,64 @@ out: return err; } +const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid); + } else + err = br_vlan_add(br, vinfo->vid); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f0f24610d111..a42f9d49a64e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -713,6 +713,7 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c1e4db60eeca..2c9ccbfbd93c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2464,6 +2464,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.2.3-71-gd317 From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:13 +0000 Subject: bridge: Dump vlan information from a bridge port Using the RTM_GETLINK dump the vlan filter list of a given bridge port. The information depends on setting the filter flag similar to how nic VF info is dumped. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- include/linux/netdevice.h | 3 +- include/uapi/linux/rtnetlink.h | 1 + net/bridge/br_netlink.c | 94 +++++++++++++++++++++++---- net/bridge/br_private.h | 3 +- net/bridge/br_vlan.c | 2 + net/core/rtnetlink.c | 16 +++-- 7 files changed, 104 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6999269b3a4a..4e2aa47193cb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, } static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, + u32 filter_mask) { struct ixgbe_adapter *adapter = netdev_priv(dev); u16 mode; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b90f9401000..1964ca66df56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1019,7 +1019,8 @@ struct net_device_ops { struct nlmsghdr *nlh); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, + u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a5eb196ade9..7a2144e1afae 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -630,6 +630,7 @@ struct tcamsg { /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) /* End of information exported to user level */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 534a9f4587a9..fe1980d5a7e4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -154,16 +194,17 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } @@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -408,11 +472,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -421,5 +492,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a42f9d49a64e..ce2235255c2f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -73,6 +73,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d8690bfe63d4..f2bf5a197ea3 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) } set_bit(vid, v->vlan_bitmap); + v->num_vlans++; return 0; } @@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) } clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c9ccbfbd93c..f3a112ec86d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } -- cgit v1.2.3-71-gd317 From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:18 +0000 Subject: bridge: Add vlan support to static neighbors When a user adds bridge neighbors, allow him to specify VLAN id. If the VLAN id is not specified, the neighbor will be added for VLANs currently in the ports filter list. If no VLANs are configured on the port, we use vlan 0 and only add 1 entry. Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan.c | 3 +- include/linux/netdevice.h | 4 +- include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 148 ++++++++++++++++++++--- net/bridge/br_private.h | 6 +- net/core/rtnetlink.c | 26 ++-- 10 files changed, 162 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4e2aa47193cb..1c0efcb7920f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 937bcc3d3212..5088dc5c3d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } static int mlx4_en_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index b745194391a1..b95316831587 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) return 0; } -static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev, - const unsigned char *addr) +static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *netdev, const unsigned char *addr) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = -EOPNOTSUPP; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index e4b8078e88a9..defcd8a85744 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int macvlan_fdb_del(struct ndmsg *ndm, +static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 72485b9b9005..9d70421cf3a0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* Delete entry (via netlink) */ -static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct vxlan_dev *vxlan = netdev_priv(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1964ca66df56..9deb672d999f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. - * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, + * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1008,6 +1009,7 @@ struct net_device_ops { const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 275e5d65dcb2..adb068c53c4e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -20,6 +20,7 @@ enum { NDA_LLADDR, NDA_CACHEINFO, NDA_PROBES, + NDA_VLAN, __NDA_MAX }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 276a52254606..4b75ad43aa85 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr, 0); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags, - 0); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, 0); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22915c8e9961..799dbb37e5a2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br, const unsigned char *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) { - return rcu_dereference(br->vlan_info); + return rcu_dereference_rtnl(br->vlan_info); } static inline struct net_port_vlans *nbp_get_vlan_info( const struct net_bridge_port *p) { - return rcu_dereference(p->vlan_info); + return rcu_dereference_rtnl(p->vlan_info); } /* Since bridge now depends on 8021Q module, but the time bridge sees the diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f3a112ec86d5..d8aa20f6a46e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) const struct net_device_ops *ops = br_dev->netdev_ops; if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, dev, addr); + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); -- cgit v1.2.3-71-gd317 From 7bf9b9a0f0372d45b581f00173505fb76a9c5d23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Dec 2012 18:45:41 +0100 Subject: wireless: define operating mode action frame Define the action frame format, the VHT category and its action types and the field format and EID for operating mode notifications. The frame may be used outside of VHT context as well, so don't include "VHT" in the names. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7e8a498efe6d..67c1a6c45837 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -714,6 +714,30 @@ enum ieee80211_ht_chanwidth_values { IEEE80211_HT_CHANWIDTH_ANY = 1, }; +/** + * enum ieee80211_opmode_bits - VHT operating mode field bits + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width + * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask + * (the NSS value is the value of this field + 1) + * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift + * @IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF: indicates streams in SU-MIMO PPDU + * using a beamforming steering matrix + */ +enum ieee80211_vht_opmode_bits { + IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK = 3, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ = 0, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ = 1, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ = 2, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ = 3, + IEEE80211_OPMODE_NOTIF_RX_NSS_MASK = 0x70, + IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, + IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF = 0x80, +}; + #define WLAN_SA_QUERY_TR_ID_LEN 2 struct ieee80211_mgmt { @@ -844,6 +868,10 @@ struct ieee80211_mgmt { __le16 capability; u8 variable[0]; } __packed tdls_discover_resp; + struct { + u8 action_code; + u8 operating_mode; + } __packed vht_opmode_notif; } u; } __packed action; } u; @@ -1598,6 +1626,7 @@ enum ieee80211_eid { WLAN_EID_VHT_CAPABILITY = 191, WLAN_EID_VHT_OPERATION = 192, + WLAN_EID_OPMODE_NOTIF = 199, /* 802.11ad */ WLAN_EID_NON_TX_BSSID_CAP = 83, @@ -1652,6 +1681,7 @@ enum ieee80211_category { WLAN_CATEGORY_WMM = 17, WLAN_CATEGORY_FST = 18, WLAN_CATEGORY_UNPROT_DMG = 20, + WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -1677,6 +1707,13 @@ enum ieee80211_ht_actioncode { WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7, }; +/* VHT action codes */ +enum ieee80211_vht_actioncode { + WLAN_VHT_ACTION_COMPRESSED_BF = 0, + WLAN_VHT_ACTION_GROUPID_MGMT = 1, + WLAN_VHT_ACTION_OPMODE_NOTIF = 2, +}; + /* Self Protected Action codes */ enum ieee80211_self_protected_actioncode { WLAN_SP_RESERVED = 0, -- cgit v1.2.3-71-gd317 From 0af83d3df5863224336a18c24a14fda542b712f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Dec 2012 18:55:36 +0100 Subject: mac80211: handle VHT operating mode notification Handle the operating mode notification action frame. When the supported streams or the bandwidth change let the driver and rate control algorithm know. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/net/mac80211.h | 3 ++ net/mac80211/ht.c | 4 ++ net/mac80211/ieee80211_i.h | 3 ++ net/mac80211/rx.c | 30 +++++++++++++++ net/mac80211/sta_info.h | 4 ++ net/mac80211/vht.c | 93 +++++++++++++++++++++++++++++++++++++++++----- 7 files changed, 128 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 67c1a6c45837..12b5996533ec 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1301,6 +1301,7 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 0x00000002 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 +#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK 0x0000000C #define IEEE80211_VHT_CAP_RXLDPC 0x00000010 #define IEEE80211_VHT_CAP_SHORT_GI_80 0x00000020 #define IEEE80211_VHT_CAP_SHORT_GI_160 0x00000040 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a608ab9879b4..b7fb311e83f4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2118,11 +2118,14 @@ enum ieee80211_frame_release_type { * @IEEE80211_RC_SUPP_RATES_CHANGED: The supported rate set of this peer * changed (in IBSS mode) due to discovering more information about * the peer. + * @IEEE80211_RC_NSS_CHANGED: N_SS (number of spatial streams) was changed + * by the peer */ enum ieee80211_rate_control_changed { IEEE80211_RC_BW_CHANGED = BIT(0), IEEE80211_RC_SMPS_CHANGED = BIT(1), IEEE80211_RC_SUPP_RATES_CHANGED = BIT(2), + IEEE80211_RC_NSS_CHANGED = BIT(3), }; /** diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a64b4f0d373f..797969bc26e1 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -212,6 +212,10 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, changed = true; sta->sta.bandwidth = bw; + sta->cur_max_bandwidth = + ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + return changed; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3b13af4e6c49..4947c91c6c86 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1433,6 +1433,9 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8a861a50b12f..1617e0bd4ca6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2435,6 +2435,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto invalid; } + break; + case WLAN_CATEGORY_VHT: + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + break; + + /* verify action code is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + goto invalid; + + switch (mgmt->u.action.u.vht_opmode_notif.action_code) { + case WLAN_VHT_ACTION_OPMODE_NOTIF: { + u8 opmode; + + /* verify opmode is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 2) + goto invalid; + + opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + + ieee80211_vht_handle_opmode(rx->sdata, rx->sta, + opmode, status->band); + goto handled; + } + default: + break; + } break; case WLAN_CATEGORY_BACK: if (sdata->vif.type != NL80211_IFTYPE_STATION && diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 03c42f8d39f0..63dfdb5e91da 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -297,6 +297,8 @@ struct sta_ampdu_mlme { * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered * @rcu_head: RCU head used for freeing this station struct + * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, + * taken from HT/VHT capabilities or VHT operating mode notification */ struct sta_info { /* General information, mostly static */ @@ -398,6 +400,8 @@ struct sta_info { } debugfs; #endif + enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; + unsigned int lost_packets; unsigned int beacon_loss_count; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 67436e3efbbd..0951f74e7ff5 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -10,6 +10,7 @@ #include #include #include "ieee80211_i.h" +#include "rate.h" void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, @@ -39,6 +40,15 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + default: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + } + sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); } @@ -46,12 +56,13 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 cap = sta->sta.vht_cap.cap; + enum ieee80211_sta_rx_bandwidth bw; - if (!sta->sta.vht_cap.vht_supported) - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + if (!sta->sta.vht_cap.vht_supported) { + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - - /* TODO: handle VHT opmode notification data */ + goto check_max; + } switch (sdata->vif.bss_conf.chandef.width) { default: @@ -60,19 +71,31 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + break; case NL80211_CHAN_WIDTH_160: - if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) - return IEEE80211_STA_RX_BW_160; + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } /* fall through */ case NL80211_CHAN_WIDTH_80P80: - if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) - return IEEE80211_STA_RX_BW_160; + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } /* fall through */ case NL80211_CHAN_WIDTH_80: - return IEEE80211_STA_RX_BW_80; + bw = IEEE80211_STA_RX_BW_80; } + + check_max: + if (bw > sta->cur_max_bandwidth) + bw = sta->cur_max_bandwidth; + return bw; } void ieee80211_sta_set_rx_nss(struct sta_info *sta) @@ -115,3 +138,53 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) ht_rx_nss = max(ht_rx_nss, vht_rx_nss); sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss); } + +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + enum ieee80211_sta_rx_bandwidth new_bw; + u32 changed = 0; + u8 nss; + + sband = local->hw.wiphy->bands[band]; + + /* ignore - no support for BF yet */ + if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF) + return; + + nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; + nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; + nss += 1; + + if (sta->sta.rx_nss != nss) { + sta->sta.rx_nss = nss; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + } + + new_bw = ieee80211_sta_cur_vht_bw(sta); + if (new_bw != sta->sta.bandwidth) { + sta->sta.bandwidth = new_bw; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + if (changed) + rate_control_rate_update(local, sband, sta, changed); +} -- cgit v1.2.3-71-gd317 From 4a3cb702b05868f67c4ee3da3380461c5b90b4ca Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Feb 2013 16:43:19 +0100 Subject: mac80211: constify IE parsing Make all the parsed IE pointers const, and propagate the change to all the users etc. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- net/mac80211/ht.c | 2 +- net/mac80211/ieee80211_i.h | 87 +++++++++++++++++++++++---------------------- net/mac80211/mesh.h | 11 +++--- net/mac80211/mesh_hwmp.c | 42 ++++++++++++---------- net/mac80211/mesh_pathtbl.c | 11 +++--- net/mac80211/mlme.c | 15 ++++---- net/mac80211/util.c | 4 +-- net/mac80211/vht.c | 9 ++--- 9 files changed, 96 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 12b5996533ec..e085fcf52b26 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2152,7 +2152,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) * @tim_len: length of the TIM IE * @aid: the AID to look for */ -static inline bool ieee80211_check_tim(struct ieee80211_tim_ie *tim, +static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) { u8 mask; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 797969bc26e1..b84147ac5b4c 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -94,7 +94,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, + const struct ieee80211_ht_cap *ht_cap_ie, struct sta_info *sta) { struct ieee80211_sta_ht_cap ht_cap; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d1074442f1b5..d702f7dc321b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1163,41 +1163,41 @@ struct ieee80211_ra_tid { /* Parsed Information Elements */ struct ieee802_11_elems { - u8 *ie_start; + const u8 *ie_start; size_t total_len; /* pointers to IEs */ - u8 *ssid; - u8 *supp_rates; - u8 *fh_params; - u8 *ds_params; - u8 *cf_params; - struct ieee80211_tim_ie *tim; - u8 *ibss_params; - u8 *challenge; - u8 *rsn; - u8 *erp_info; - u8 *ext_supp_rates; - u8 *wmm_info; - u8 *wmm_param; - struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_operation *ht_operation; - struct ieee80211_vht_cap *vht_cap_elem; - struct ieee80211_vht_operation *vht_operation; - struct ieee80211_meshconf_ie *mesh_config; - u8 *mesh_id; - u8 *peering; - __le16 *awake_window; - u8 *preq; - u8 *prep; - u8 *perr; - struct ieee80211_rann_ie *rann; - struct ieee80211_channel_sw_ie *ch_switch_ie; - u8 *country_elem; - u8 *pwr_constr_elem; - u8 *quiet_elem; /* first quite element */ - u8 *timeout_int; - u8 *opmode_notif; + const u8 *ssid; + const u8 *supp_rates; + const u8 *fh_params; + const u8 *ds_params; + const u8 *cf_params; + const struct ieee80211_tim_ie *tim; + const u8 *ibss_params; + const u8 *challenge; + const u8 *rsn; + const u8 *erp_info; + const u8 *ext_supp_rates; + const u8 *wmm_info; + const u8 *wmm_param; + const struct ieee80211_ht_cap *ht_cap_elem; + const struct ieee80211_ht_operation *ht_operation; + const struct ieee80211_vht_cap *vht_cap_elem; + const struct ieee80211_vht_operation *vht_operation; + const struct ieee80211_meshconf_ie *mesh_config; + const u8 *mesh_id; + const u8 *peering; + const __le16 *awake_window; + const u8 *preq; + const u8 *prep; + const u8 *perr; + const struct ieee80211_rann_ie *rann; + const struct ieee80211_channel_sw_ie *ch_switch_ie; + const u8 *country_elem; + const u8 *pwr_constr_elem; + const u8 *quiet_elem; /* first quite element */ + const u8 *timeout_int; + const u8 *opmode_notif; /* length of them, respectively */ u8 ssid_len; @@ -1276,10 +1276,10 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp); +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); @@ -1387,7 +1387,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, + const struct ieee80211_ht_cap *ht_cap_ie, struct sta_info *sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, @@ -1428,10 +1428,11 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs); /* VHT */ -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct sta_info *sta); +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -1555,7 +1556,7 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *bssid, + const u8 *extra, size_t extra_len, const u8 *bssid, const u8 *da, const u8 *key, u8 key_len, u8 key_idx, u32 tx_flags); void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, @@ -1606,7 +1607,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, /* channel management */ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); int __must_check diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7ad035f0cacc..a1bad310f2e9 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -265,8 +265,8 @@ int mesh_nexthop_lookup(struct sk_buff *skb, int mesh_nexthop_resolve(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); -struct mesh_path *mesh_path_lookup(u8 *dst, - struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup(const u8 *dst, + struct ieee80211_sub_if_data *sdata); struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata); int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); @@ -276,7 +276,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); void mesh_path_expire(struct ieee80211_sub_if_data *sdata); void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); +int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata); int mesh_path_add_gate(struct mesh_path *mpath); int mesh_path_send_to_gates(struct mesh_path *mpath); @@ -301,8 +301,9 @@ void mesh_sta_cleanup(struct sta_info *sta); void mesh_mpath_table_grow(void); void mesh_mpp_table_grow(void); /* Mesh paths */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, - const u8 *ra, struct ieee80211_sub_if_data *sdata); +int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra, + struct ieee80211_sub_if_data *sdata); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index f0dd8742ed42..585c1e26cca8 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -30,14 +30,14 @@ static void mesh_queue_preq(struct mesh_path *, u8); -static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -102,10 +102,13 @@ enum mpath_frame_type { static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target, - __le32 target_sn, const u8 *da, u8 hop_count, u8 ttl, - __le32 lifetime, __le32 metric, __le32 preq_id, - struct ieee80211_sub_if_data *sdata) + const u8 *orig_addr, __le32 orig_sn, + u8 target_flags, const u8 *target, + __le32 target_sn, const u8 *da, + u8 hop_count, u8 ttl, + __le32 lifetime, __le32 metric, + __le32 preq_id, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -235,7 +238,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, * also acquires in the TX path. To avoid a deadlock we don't transmit the * frame directly but add it to the pending queue instead. */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, +int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn, __le16 target_rcode, const u8 *ra, struct ieee80211_sub_if_data *sdata) { @@ -369,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * path routing information is updated. */ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - u8 *hwmp_ie, enum mpath_frame_type action) + struct ieee80211_mgmt *mgmt, + const u8 *hwmp_ie, enum mpath_frame_type action) { struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; - u8 *orig_addr, *ta; + const u8 *orig_addr, *ta; u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; @@ -511,11 +514,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *preq_elem, u32 metric) + const u8 *preq_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath = NULL; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; const u8 *da; u8 target_flags, ttl, flags; u32 orig_sn, target_sn, lifetime, orig_metric; @@ -648,11 +651,11 @@ next_hop_deref_protected(struct mesh_path *mpath) static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *prep_elem, u32 metric) + const u8 *prep_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; u8 ttl, hopcount, flags; u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; @@ -711,12 +714,13 @@ fail: } static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, u8 *perr_elem) + struct ieee80211_mgmt *mgmt, + const u8 *perr_elem) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 ttl; - u8 *ta, *target_addr; + const u8 *ta, *target_addr; u32 target_sn; u16 target_rcode; @@ -758,15 +762,15 @@ endperr: } static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - struct ieee80211_rann_ie *rann) + struct ieee80211_mgmt *mgmt, + const struct ieee80211_rann_ie *rann) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct mesh_path *mpath; u8 ttl, flags, hopcount; - u8 *orig_addr; + const u8 *orig_addr; u32 orig_sn, metric, metric_txsta, interval; bool root_is_gate; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index d5786c3eaee2..2ce4c4023a97 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -181,7 +181,7 @@ errcopy: return -ENOMEM; } -static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, +static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { /* Use last four bytes of hw addr and interface index as hash index */ @@ -326,8 +326,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, } -static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, - struct ieee80211_sub_if_data *sdata) +static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, + struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; @@ -359,7 +359,8 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +struct mesh_path *mesh_path_lookup(const u8 *dst, + struct ieee80211_sub_if_data *sdata) { return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata); } @@ -494,7 +495,7 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) * * State: the initial state of the new path is set to 0 */ -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) +int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 05b229e3b226..7a8cd789e487 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1041,10 +1041,10 @@ static void ieee80211_chswitch_timer(unsigned long data) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp) +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp) { struct cfg80211_bss *cbss = container_of((void *)bss, struct cfg80211_bss, priv); @@ -1479,13 +1479,14 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work) /* MLME */ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - u8 *wmm_param, size_t wmm_param_len) + const u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t left; int count; - u8 *pos, uapsd_queues = 0; + const u8 *pos; + u8 uapsd_queues = 0; if (!local->ops->conf_tx) return false; @@ -2670,7 +2671,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; if (elems->tim && !elems->parse_error) { - struct ieee80211_tim_ie *tim_ie = elems->tim; + const struct ieee80211_tim_ie *tim_ie = elems->tim; sdata->u.mgd.dtim_period = tim_ie->dtim_period; } } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e24ff38606a9..0f38f43ac62e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1035,7 +1035,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *da, + const u8 *extra, size_t extra_len, const u8 *da, const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx, u32 tx_flags) { @@ -1947,7 +1947,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { enum nl80211_channel_type channel_type; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index a9549fcc5a04..a2c2258bc84e 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -13,10 +13,11 @@ #include "rate.h" -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct sta_info *sta) +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; -- cgit v1.2.3-71-gd317 From c6f9d6c3bdeb337809d667ef2a41597229a1ce57 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Feb 2013 14:27:08 +0100 Subject: mac80211: advertise operating mode notification capability Use the new extended capabilities advertising to advertise the fact that operating mode notification is supported. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ net/mac80211/main.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e085fcf52b26..7e24fe0cfbcd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1776,6 +1776,8 @@ enum ieee80211_tdls_actioncode { #define WLAN_EXT_CAPA5_TDLS_ENABLED BIT(5) #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) +#define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) + /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9cdbc774cfd7..035344bc6b9c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -501,6 +501,11 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { }, }; +static const u8 extended_capabilities[] = { + 0, 0, 0, 0, 0, 0, 0, + WLAN_EXT_CAPA8_OPMODE_NOTIF, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -557,6 +562,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, WIPHY_FLAG_REPORTS_OBSS | WIPHY_FLAG_OFFCHAN_TX; + wiphy->extended_capabilities = extended_capabilities; + wiphy->extended_capabilities_mask = extended_capabilities; + wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities); + if (ops->remain_on_channel) wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; -- cgit v1.2.3-71-gd317 From 14bbd6a565e1bcdc240d44687edb93f721cfdf99 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 09:44:49 +0000 Subject: net: Add skb_unclone() helper function. This function will be used in next GRE_GSO patch. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet --- drivers/net/ppp/ppp_generic.c | 3 +-- include/linux/skbuff.h | 10 ++++++++++ net/ipv4/ah4.c | 3 +-- net/ipv4/ip_fragment.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_mode_tunnel.c | 3 +-- net/ipv6/ah6.c | 3 +-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 3 +-- net/sched/act_ipt.c | 6 ++---- net/sched/act_pedit.c | 3 +-- 13 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0b2706abe3e3..4fd754e74eb2 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1805,8 +1805,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto err; *skb_push(skb, 2) = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9da99520ccd5..ca6ee7d93edb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -804,6 +804,16 @@ static inline int skb_cloned(const struct sk_buff *skb) (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(pri & __GFP_WAIT); + + if (skb_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a69b4e4a02b5..2e7f1948216f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1211613c6c34..b6d30acb600c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -590,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6182d90e97b0..fd0cea114b5d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; __pskb_trim_head(skb, len); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..1f12c8b45864 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto drop; /* Now we can update and verify the packet length... */ diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ddee0a099a2c..1162ace30838 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -142,8 +142,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) for_each_input_rcu(rcv_notify_handlers, handler) handler->handler(skb); - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 384233188ac1..bb02e176cb70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c674f158efa8..b89a8c3186cd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -368,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index bab2c270f292..e354743ed426 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -404,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b19ad0..93c41a81c4c3 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0fb9e3f567e6..e0f6de64afec 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_ipt *ipt = a->priv; struct xt_action_param par; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - } + if (skb_unclone(skb, GFP_ATOMIC)) + return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 0c3faddf3f2c..7ed78c9e505c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -131,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); -- cgit v1.2.3-71-gd317 From 05e8ef4ab2d8087d360e814d14da20b9f7fb2283 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 09:44:55 +0000 Subject: net: factor out skb_mac_gso_segment() from skb_gso_segment() This function will be used in next GRE_GSO patch. This patch does not change any functionality. It only exports skb_mac_gso_segment() function. [ Use skb_reset_mac_len() -DaveM ] Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 79 ++++++++++++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9deb672d999f..920361bc27e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2671,6 +2671,8 @@ extern void netdev_upper_dev_unlink(struct net_device *dev, extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); +extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/core/dev.c b/net/core/dev.c index f44473696b8b..67deae60214c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2327,37 +2327,20 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* openvswitch calls this on rx path, so we need a different check. - */ -static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) -{ - if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; - else - return skb->ip_summed == CHECKSUM_NONE; -} - /** - * __skb_gso_segment - Perform segmentation on skb. + * skb_mac_gso_segment - mac layer segmentation handler. * @skb: buffer to segment * @features: features for the output path (see dev->features) - * @tx_path: whether it is called in TX path - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path) +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; - int err; while (type == htons(ETH_P_8021Q)) { + int vlan_depth = ETH_HLEN; struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) @@ -2368,22 +2351,14 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, vlan_depth += VLAN_HLEN; } - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb_needs_check(skb, tx_path))) { - skb_warn_bad_offload(skb); - - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + int err; + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -2401,6 +2376,48 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return segs; } +EXPORT_SYMBOL(skb_mac_gso_segment); + + +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + skb_warn_bad_offload(skb); + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + return skb_mac_gso_segment(skb, features); +} EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ -- cgit v1.2.3-71-gd317 From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 14:02:41 +0000 Subject: v4 GRE: Add TCP segmentation offload for GRE Following patch adds GRE protocol offload handler so that skb_gso_segment() can segment GRE packets. SKB GSO CB is added to keep track of total header length so that skb_segment can push entire header. e.g. in case of GRE, skb_segment need to push inner and outer headers to every segment. New NETIF_F_GRE_GSO feature is added for devices which support HW GRE TSO offload. Currently none of devices support it therefore GRE GSO always fall backs to software GSO. [ Compute pkt_len before ip_local_out() invocation. -DaveM ] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +- include/linux/skbuff.h | 17 ++++++ net/core/dev.c | 1 + net/core/ethtool.c | 1 + net/core/skbuff.c | 6 +- net/ipv4/af_inet.c | 1 + net/ipv4/gre.c | 118 ++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 82 +++++++++++++++++++++++++--- net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 3 +- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 3 +- 12 files changed, 226 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5ac32123035a..3dd39340430e 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -41,7 +41,7 @@ enum { NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ - NETIF_F_GSO_RESERVED1, /* ... free (fill GSO_MASK to 8 bits) */ + NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ = NETIF_F_GSO_LAST, @@ -102,6 +102,7 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) +#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca6ee7d93edb..821c7f45d2a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,6 +314,8 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + SKB_GSO_GRE = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) } #endif +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. */ +struct skb_gso_cb { + int mac_offset; +}; +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/dev.c b/net/core/dev.c index 67deae60214c..1cd6297fd34b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); skb_reset_mac_header(skb); skb_reset_mac_len(skb); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d9d55209db67..3e9b2c3e30f0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c1ad09f8796..2a3ca33c30aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e6e5d8506336..e225a4e5b572 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | 0))) goto out; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a491055c76..7a4c710c4cdd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,11 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + if (greh->protocol == htons(ETH_P_TEB)) { + struct ethhdr *eth = eth_hdr(skb); + skb->protocol = eth->h_proto; + } else { + skb->protocol = greh->protocol; + } + + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + } while ((skb = skb->next)); +out: + return segs; +} + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -127,11 +238,18 @@ static int __init gre_init(void) return -EAGAIN; } + if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { + pr_err("can't add protocol offload\n"); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } + return 0; } static void __exit gre_exit(void) { + inet_del_offload(&gre_offload, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 00a14b9864ea..a56f1182c176 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,8 +735,33 @@ drop: return 0; } +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } + skb->ip_summed = CHECKSUM_NONE; + + return skb; + +error: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *old_iph; const struct iphdr *tiph; @@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; u8 ttl; + int err; + int pkt_len; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) - goto tx_error; + skb = handle_offloads(skb); + if (IS_ERR(skb)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } old_iph = ip_hdr(skb); @@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); - if ((old_iph->frag_off&htons(IP_DF)) && + if (!skb_is_gso(skb) && + (old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { + if (!skb_is_gso(skb) && + mtu >= IPV6_MIN_MTU && + mtu < skb->len - tunnel->hlen + gre_hlen) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; + iph->id = 0; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + /* Skip GRE checksum if skb is getting offloaded. */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && + (tunnel->parms.o_flags&GRE_CSUM)) { int offset = skb_transport_offset(skb); + if (skb_has_shared_frag(skb)) { + err = __skb_linearize(skb); + if (err) { + ip_rt_put(rt); + goto tx_error; + } + } + *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, skb->len - offset, @@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - iptunnel_xmit(skb, dev); + nf_reset(skb); + + pkt_len = skb->len - skb_transport_offset(skb); + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) @@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) mtu = 68; tunnel->hlen = addend; + /* TCP offload with GRE SEQ is not supported. */ + if (!(tunnel->parms.o_flags & GRE_SEQ)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } return mtu; } @@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev) dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f0bedb8622f..7a5ba48c2cc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_GRE | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6791aac06ea9..39a5e7a9a77f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..8234c1dcdf72 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a317c2..cf05cf073c51 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit v1.2.3-71-gd317 From b4278c961aca320839964e23cfc7906ff61af0c2 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 18 Feb 2013 01:34:55 +0000 Subject: net: proc: remove proc_net_fops_create proc_net_fops_create has been replaced by proc_create, we can remove it now. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 8 -------- include/linux/proc_fs.h | 3 --- 2 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index fe72cd073dea..30f7c678424b 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -177,14 +177,6 @@ const struct file_operations proc_net_operations = { .readdir = proc_tgid_net_readdir, }; - -struct proc_dir_entry *proc_net_fops_create(struct net *net, - const char *name, umode_t mode, const struct file_operations *fops) -{ - return proc_create(name, mode, net->proc_net, fops); -} -EXPORT_SYMBOL_GPL(proc_net_fops_create); - void proc_net_remove(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 32676b35d2f5..35ee1891ae25 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -171,8 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, return res; } -extern struct proc_dir_entry *proc_net_fops_create(struct net *net, - const char *name, umode_t mode, const struct file_operations *fops); extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); @@ -184,7 +182,6 @@ extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else -#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} static inline void proc_flush_task(struct task_struct *task) -- cgit v1.2.3-71-gd317 From c2399059a389ba686fa7f45d8913a708914752c4 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 18 Feb 2013 01:34:57 +0000 Subject: net: proc: remove proc_net_remove proc_net_remove has been replaced by remove_proc_entry. we can remove it now. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 6 ------ include/linux/proc_fs.h | 3 --- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 30f7c678424b..3131a03d7d37 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -177,12 +177,6 @@ const struct file_operations proc_net_operations = { .readdir = proc_tgid_net_readdir, }; -void proc_net_remove(struct net *net, const char *name) -{ - remove_proc_entry(name, net->proc_net); -} -EXPORT_SYMBOL_GPL(proc_net_remove); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 35ee1891ae25..319f69422667 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -171,7 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, return res; } -extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); @@ -182,8 +181,6 @@ extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else -static inline void proc_net_remove(struct net *net, const char *name) {} - static inline void proc_flush_task(struct task_struct *task) { } -- cgit v1.2.3-71-gd317 From 900ff8c6321418dafa03c22e215cb9646a2541b9 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 18 Feb 2013 19:20:33 +0000 Subject: net: move procfs code to net/core/net-procfs.c Similar to net/core/net-sysfs.c, group procfs code to a single unit. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 36 +++- net/core/Makefile | 1 + net/core/dev.c | 384 +----------------------------------------- net/core/dev_addr_lists.c | 74 --------- net/core/net-procfs.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 450 insertions(+), 459 deletions(-) create mode 100644 net/core/net-procfs.c (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 920361bc27e7..f111b4f038f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2692,9 +2692,9 @@ extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); #ifdef CONFIG_PROC_FS -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); +extern int __init dev_proc_init(void); +#else +#define dev_proc_init() 0 #endif extern int netdev_class_create_file(struct class_attribute *class_attr); @@ -2896,4 +2896,34 @@ do { \ }) #endif +/* + * The list of packet types we will receive (as opposed to discard) + * and the routines to invoke. + * + * Why 16. Because with 16 the only overlap we get on a hash of the + * low nibble of the protocol value is RARP/SNAP/X.25. + * + * NOTE: That is no longer true with the addition of VLAN tags. Not + * sure which should go first, but I bet it won't make much + * difference if we are running VLANs. The good news is that + * this protocol won't be in the list unless compiled in, so + * the average user (w/out VLANs) will not be adversely affected. + * --BLG + * + * 0800 IP + * 8100 802.1Q VLAN + * 0001 802.3 + * 0002 AX.25 + * 0004 802.2 + * 8035 RARP + * 0005 SNAP + * 0805 X.25 + * 0806 ARP + * 8137 IPX + * 0009 Localtalk + * 86DD IPv6 + */ +#define PTYPE_HASH_SIZE (16) +#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) + #endif /* _LINUX_NETDEVICE_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 0c5e3618c80b..b33b996f5dd6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -13,6 +13,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o +obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index decf55f9ad80..8d9ddb09f208 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -97,8 +97,6 @@ #include #include #include -#include -#include #include #include #include @@ -110,7 +108,6 @@ #include #include #include -#include #include #include #include @@ -141,41 +138,10 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) -/* - * The list of packet types we will receive (as opposed to discard) - * and the routines to invoke. - * - * Why 16. Because with 16 the only overlap we get on a hash of the - * low nibble of the protocol value is RARP/SNAP/X.25. - * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * - * 0800 IP - * 8100 802.1Q VLAN - * 0001 802.3 - * 0002 AX.25 - * 0004 802.2 - * 8035 RARP - * 0005 SNAP - * 0805 X.25 - * 0806 ARP - * 8137 IPX - * 0009 Localtalk - * 86DD IPv6 - */ - -#define PTYPE_HASH_SIZE (16) -#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) - static DEFINE_SPINLOCK(ptype_lock); static DEFINE_SPINLOCK(offload_lock); -static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -static struct list_head ptype_all __read_mostly; /* Taps */ +struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; +struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; /* @@ -4217,352 +4183,6 @@ softnet_break: goto out; } -#ifdef CONFIG_PROC_FS - -#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) - -#define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) -#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) - -static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - struct net_device *dev; - struct hlist_node *p; - struct hlist_head *h; - unsigned int count = 0, offset = get_offset(*pos); - - h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, p, h, name_hlist) { - if (++count == offset) - return dev; - } - - return NULL; -} - -static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net_device *dev; - unsigned int bucket; - - do { - dev = dev_from_same_bucket(seq, pos); - if (dev) - return dev; - - bucket = get_bucket(*pos) + 1; - *pos = set_bucket_offset(bucket, 1); - } while (bucket < NETDEV_HASHENTRIES); - - return NULL; -} - -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. - */ -void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - if (!*pos) - return SEQ_START_TOKEN; - - if (get_bucket(*pos) >= NETDEV_HASHENTRIES) - return NULL; - - return dev_from_bucket(seq, pos); -} - -void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return dev_from_bucket(seq, pos); -} - -void dev_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -{ - struct rtnl_link_stats64 temp; - const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - - seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " - "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); -} - -/* - * Called from the PROCfs module. This now uses the new arbitrary sized - * /proc/net interface to create /proc/net/dev - */ -static int dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); - else - dev_seq_printf_stats(seq, v); - return 0; -} - -static struct softnet_data *softnet_get_online(loff_t *pos) -{ - struct softnet_data *sd = NULL; - - while (*pos < nr_cpu_ids) - if (cpu_online(*pos)) { - sd = &per_cpu(softnet_data, *pos); - break; - } else - ++*pos; - return sd; -} - -static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -{ - return softnet_get_online(pos); -} - -static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return softnet_get_online(pos); -} - -static void softnet_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int softnet_seq_show(struct seq_file *seq, void *v) -{ - struct softnet_data *sd = v; - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - sd->processed, sd->dropped, sd->time_squeeze, 0, - 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); - return 0; -} - -static const struct seq_operations dev_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_seq_show, -}; - -static int dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static const struct seq_operations softnet_seq_ops = { - .start = softnet_seq_start, - .next = softnet_seq_next, - .stop = softnet_seq_stop, - .show = softnet_seq_show, -}; - -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); -} - -static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *ptype_get_idx(loff_t pos) -{ - struct packet_type *pt = NULL; - loff_t i = 0; - int t; - - list_for_each_entry_rcu(pt, &ptype_all, list) { - if (i == pos) - return pt; - ++i; - } - - for (t = 0; t < PTYPE_HASH_SIZE; t++) { - list_for_each_entry_rcu(pt, &ptype_base[t], list) { - if (i == pos) - return pt; - ++i; - } - } - return NULL; -} - -static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; -} - -static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct packet_type *pt; - struct list_head *nxt; - int hash; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); - - pt = v; - nxt = pt->list.next; - if (pt->type == htons(ETH_P_ALL)) { - if (nxt != &ptype_all) - goto found; - hash = 0; - nxt = ptype_base[0].next; - } else - hash = ntohs(pt->type) & PTYPE_HASH_MASK; - - while (nxt == &ptype_base[hash]) { - if (++hash >= PTYPE_HASH_SIZE) - return NULL; - nxt = ptype_base[hash].next; - } -found: - return list_entry(nxt, struct packet_type, list); -} - -static void ptype_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int ptype_seq_show(struct seq_file *seq, void *v) -{ - struct packet_type *pt = v; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { - if (pt->type == htons(ETH_P_ALL)) - seq_puts(seq, "ALL "); - else - seq_printf(seq, "%04x", ntohs(pt->type)); - - seq_printf(seq, " %-8s %pF\n", - pt->dev ? pt->dev->name : "", pt->func); - } - - return 0; -} - -static const struct seq_operations ptype_seq_ops = { - .start = ptype_seq_start, - .next = ptype_seq_next, - .stop = ptype_seq_stop, - .show = ptype_seq_show, -}; - -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - -static int __net_init dev_proc_net_init(struct net *net) -{ - int rc = -ENOMEM; - - if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) - goto out; - if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, - &softnet_seq_fops)) - goto out_dev; - if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) - goto out_softnet; - - if (wext_proc_init(net)) - goto out_ptype; - rc = 0; -out: - return rc; -out_ptype: - remove_proc_entry("ptype", net->proc_net); -out_softnet: - remove_proc_entry("softnet_stat", net->proc_net); -out_dev: - remove_proc_entry("dev", net->proc_net); - goto out; -} - -static void __net_exit dev_proc_net_exit(struct net *net) -{ - wext_proc_exit(net); - - remove_proc_entry("ptype", net->proc_net); - remove_proc_entry("softnet_stat", net->proc_net); - remove_proc_entry("dev", net->proc_net); -} - -static struct pernet_operations __net_initdata dev_proc_ops = { - .init = dev_proc_net_init, - .exit = dev_proc_net_exit, -}; - -static int __init dev_proc_init(void) -{ - return register_pernet_subsys(&dev_proc_ops); -} -#else -#define dev_proc_init() 0 -#endif /* CONFIG_PROC_FS */ - - struct netdev_upper { struct net_device *dev; bool master; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 89562529df45..bd2eb9d3e369 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -15,7 +15,6 @@ #include #include #include -#include /* * General list handling functions @@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev) __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init); - -#ifdef CONFIG_PROC_FS -#include - -static int dev_mc_seq_show(struct seq_file *seq, void *v) -{ - struct netdev_hw_addr *ha; - struct net_device *dev = v; - - if (v == SEQ_START_TOKEN) - return 0; - - netif_addr_lock_bh(dev); - netdev_for_each_mc_addr(ha, dev) { - int i; - - seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, - dev->name, ha->refcount, ha->global_use); - - for (i = 0; i < dev->addr_len; i++) - seq_printf(seq, "%02x", ha->addr[i]); - - seq_putc(seq, '\n'); - } - netif_addr_unlock_bh(dev); - return 0; -} - -static const struct seq_operations dev_mc_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_mc_seq_show, -}; - -static int dev_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_mc_seq_fops = { - .owner = THIS_MODULE, - .open = dev_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -#endif - -static int __net_init dev_mc_net_init(struct net *net) -{ - if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) - return -ENOMEM; - return 0; -} - -static void __net_exit dev_mc_net_exit(struct net *net) -{ - remove_proc_entry("dev_mcast", net->proc_net); -} - -static struct pernet_operations __net_initdata dev_mc_net_ops = { - .init = dev_mc_net_init, - .exit = dev_mc_net_exit, -}; - -void __init dev_mcast_init(void) -{ - register_pernet_subsys(&dev_mc_net_ops); -} - diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c new file mode 100644 index 000000000000..ac87066491e9 --- /dev/null +++ b/net/core/net-procfs.c @@ -0,0 +1,414 @@ +#include +#include +#include +#include + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +extern struct list_head ptype_all __read_mostly; +extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count = 0, offset = get_offset(*pos); + + h = &net->dev_name_head[get_bucket(*pos)]; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (++count == offset) + return dev; + } + + return NULL; +} + +static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net_device *dev; + unsigned int bucket; + + do { + dev = dev_from_same_bucket(seq, pos); + if (dev) + return dev; + + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + +/* + * This is invoked by the /proc filesystem handler to display a device + * in detail. + */ +static void *dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= NETDEV_HASHENTRIES) + return NULL; + + return dev_from_bucket(seq, pos); +} + +static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return dev_from_bucket(seq, pos); +} + +static void dev_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) +{ + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", + dev->name, stats->rx_bytes, stats->rx_packets, + stats->rx_errors, + stats->rx_dropped + stats->rx_missed_errors, + stats->rx_fifo_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_compressed, stats->multicast, + stats->tx_bytes, stats->tx_packets, + stats->tx_errors, stats->tx_dropped, + stats->tx_fifo_errors, stats->collisions, + stats->tx_carrier_errors + + stats->tx_aborted_errors + + stats->tx_window_errors + + stats->tx_heartbeat_errors, + stats->tx_compressed); +} + +/* + * Called from the PROCfs module. This now uses the new arbitrary sized + * /proc/net interface to create /proc/net/dev + */ +static int dev_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Inter-| Receive " + " | Transmit\n" + " face |bytes packets errs drop fifo frame " + "compressed multicast|bytes packets errs " + "drop fifo colls carrier compressed\n"); + else + dev_seq_printf_stats(seq, v); + return 0; +} + +static struct softnet_data *softnet_get_online(loff_t *pos) +{ + struct softnet_data *sd = NULL; + + while (*pos < nr_cpu_ids) + if (cpu_online(*pos)) { + sd = &per_cpu(softnet_data, *pos); + break; + } else + ++*pos; + return sd; +} + +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) +{ + return softnet_get_online(pos); +} + +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return softnet_get_online(pos); +} + +static void softnet_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int softnet_seq_show(struct seq_file *seq, void *v) +{ + struct softnet_data *sd = v; + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + sd->processed, sd->dropped, sd->time_squeeze, 0, + 0, 0, 0, 0, /* was fastroute */ + sd->cpu_collision, sd->received_rps); + return 0; +} + +static const struct seq_operations dev_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_seq_show, +}; + +static int dev_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_seq_fops = { + .owner = THIS_MODULE, + .open = dev_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static const struct seq_operations softnet_seq_ops = { + .start = softnet_seq_start, + .next = softnet_seq_next, + .stop = softnet_seq_stop, + .show = softnet_seq_show, +}; + +static int softnet_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &softnet_seq_ops); +} + +static const struct file_operations softnet_seq_fops = { + .owner = THIS_MODULE, + .open = softnet_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *ptype_get_idx(loff_t pos) +{ + struct packet_type *pt = NULL; + loff_t i = 0; + int t; + + list_for_each_entry_rcu(pt, &ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + for (t = 0; t < PTYPE_HASH_SIZE; t++) { + list_for_each_entry_rcu(pt, &ptype_base[t], list) { + if (i == pos) + return pt; + ++i; + } + } + return NULL; +} + +static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; +} + +static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct packet_type *pt; + struct list_head *nxt; + int hash; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ptype_get_idx(0); + + pt = v; + nxt = pt->list.next; + if (pt->type == htons(ETH_P_ALL)) { + if (nxt != &ptype_all) + goto found; + hash = 0; + nxt = ptype_base[0].next; + } else + hash = ntohs(pt->type) & PTYPE_HASH_MASK; + + while (nxt == &ptype_base[hash]) { + if (++hash >= PTYPE_HASH_SIZE) + return NULL; + nxt = ptype_base[hash].next; + } +found: + return list_entry(nxt, struct packet_type, list); +} + +static void ptype_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static int ptype_seq_show(struct seq_file *seq, void *v) +{ + struct packet_type *pt = v; + + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Type Device Function\n"); + else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + if (pt->type == htons(ETH_P_ALL)) + seq_puts(seq, "ALL "); + else + seq_printf(seq, "%04x", ntohs(pt->type)); + + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); + } + + return 0; +} + +static const struct seq_operations ptype_seq_ops = { + .start = ptype_seq_start, + .next = ptype_seq_next, + .stop = ptype_seq_stop, + .show = ptype_seq_show, +}; + +static int ptype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ptype_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations ptype_seq_fops = { + .owner = THIS_MODULE, + .open = ptype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + + +static int __net_init dev_proc_net_init(struct net *net) +{ + int rc = -ENOMEM; + + if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) + goto out; + if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, + &softnet_seq_fops)) + goto out_dev; + if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) + goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; + rc = 0; +out: + return rc; +out_ptype: + remove_proc_entry("ptype", net->proc_net); +out_softnet: + remove_proc_entry("softnet_stat", net->proc_net); +out_dev: + remove_proc_entry("dev", net->proc_net); + goto out; +} + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + remove_proc_entry("ptype", net->proc_net); + remove_proc_entry("softnet_stat", net->proc_net); + remove_proc_entry("dev", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} + +static int dev_mc_seq_show(struct seq_file *seq, void *v) +{ + struct netdev_hw_addr *ha; + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + return 0; + + netif_addr_lock_bh(dev); + netdev_for_each_mc_addr(ha, dev) { + int i; + + seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, + dev->name, ha->refcount, ha->global_use); + + for (i = 0; i < dev->addr_len; i++) + seq_printf(seq, "%02x", ha->addr[i]); + + seq_putc(seq, '\n'); + } + netif_addr_unlock_bh(dev); + return 0; +} + +static const struct seq_operations dev_mc_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_mc_seq_show, +}; + +static int dev_mc_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_mc_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_mc_seq_fops = { + .owner = THIS_MODULE, + .open = dev_mc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + remove_proc_entry("dev_mcast", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + +void __init dev_mcast_init(void) +{ + register_pernet_subsys(&dev_mc_net_ops); +} -- cgit v1.2.3-71-gd317 From 4fc1a601f147abe3bfb4d70fe718110ed21953e1 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Tue, 19 Feb 2013 00:43:10 +0000 Subject: net: proc: fix build failed when procfs is not configured commit d4beaa66add8aebf83ab16d2fde4e4de8dac36df "net: proc: change proc_net_fops_create to proc_create" uses proc_create to replace proc_net_fops_create, when CONFIG_PROC isn't configured, some build error will occurs. net/packet/af_packet.c: In function 'packet_net_init': net/packet/af_packet.c:3831:48: error: 'packet_seq_fops' undeclared (first use in this function) net/packet/af_packet.c:3831:48: note: each undeclared identifier is reported only once for each function it appears in There may be other build fails like above,this patch change proc_create from function to macros when CONFIG_PROC is not configured,just like what proc_net_fops_create did before this commit. Reported-by: Fengguang Wu Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/linux/proc_fs.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 319f69422667..d0a1f2ca1c3f 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -187,12 +187,9 @@ static inline void proc_flush_task(struct task_struct *task) static inline struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode, struct proc_dir_entry *parent) { return NULL; } -static inline struct proc_dir_entry *proc_create(const char *name, - umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops) -{ - return NULL; -} + +#define proc_create(name, mode, parent, fops) ({ (void)(mode), NULL; }) + static inline struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, void *data) -- cgit v1.2.3-71-gd317 From cd0615746ba0f6643fb984345ae6ee0b73404ca6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 19 Feb 2013 02:47:05 +0000 Subject: net: fix a build failure when !CONFIG_PROC_FS When !CONFIG_PROC_FS dev_mcast_init() is not defined, actually we can just merge dev_mcast_init() into dev_proc_init(). Reported-by: Gao feng Cc: Gao feng Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - net/core/dev.c | 1 - net/core/net-procfs.c | 12 +++++------- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f111b4f038f3..b3d00fa4b314 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2646,7 +2646,6 @@ extern void netdev_notify_peers(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); -extern void dev_mcast_init(void); extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, diff --git a/net/core/dev.c b/net/core/dev.c index 8d9ddb09f208..17bc535115d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6260,7 +6260,6 @@ static int __init net_dev_init(void) hotcpu_notifier(dev_cpu_callback, 0); dst_init(); - dev_mcast_init(); rc = 0; out: return rc; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index ac87066491e9..0f6bb6f8d391 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -341,11 +341,6 @@ static struct pernet_operations __net_initdata dev_proc_ops = { .exit = dev_proc_net_exit, }; -int __init dev_proc_init(void) -{ - return register_pernet_subsys(&dev_proc_ops); -} - static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; @@ -408,7 +403,10 @@ static struct pernet_operations __net_initdata dev_mc_net_ops = { .exit = dev_mc_net_exit, }; -void __init dev_mcast_init(void) +int __init dev_proc_init(void) { - register_pernet_subsys(&dev_mc_net_ops); + int ret = register_pernet_subsys(&dev_proc_ops); + if (!ret) + return register_pernet_subsys(&dev_mc_net_ops); + return ret; } -- cgit v1.2.3-71-gd317