cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bond_alb.c (50074B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
      4 */
      5
      6#include <linux/skbuff.h>
      7#include <linux/netdevice.h>
      8#include <linux/etherdevice.h>
      9#include <linux/pkt_sched.h>
     10#include <linux/spinlock.h>
     11#include <linux/slab.h>
     12#include <linux/timer.h>
     13#include <linux/ip.h>
     14#include <linux/ipv6.h>
     15#include <linux/if_arp.h>
     16#include <linux/if_ether.h>
     17#include <linux/if_bonding.h>
     18#include <linux/if_vlan.h>
     19#include <linux/in.h>
     20#include <net/arp.h>
     21#include <net/ipv6.h>
     22#include <net/ndisc.h>
     23#include <asm/byteorder.h>
     24#include <net/bonding.h>
     25#include <net/bond_alb.h>
     26
     27static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
     28	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
     29};
     30static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
     31
     32#pragma pack(1)
     33struct learning_pkt {
     34	u8 mac_dst[ETH_ALEN];
     35	u8 mac_src[ETH_ALEN];
     36	__be16 type;
     37	u8 padding[ETH_ZLEN - ETH_HLEN];
     38};
     39
     40struct arp_pkt {
     41	__be16  hw_addr_space;
     42	__be16  prot_addr_space;
     43	u8      hw_addr_len;
     44	u8      prot_addr_len;
     45	__be16  op_code;
     46	u8      mac_src[ETH_ALEN];	/* sender hardware address */
     47	__be32  ip_src;			/* sender IP address */
     48	u8      mac_dst[ETH_ALEN];	/* target hardware address */
     49	__be32  ip_dst;			/* target IP address */
     50};
     51#pragma pack()
     52
     53/* Forward declaration */
     54static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[],
     55				      bool strict_match);
     56static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
     57static void rlb_src_unlink(struct bonding *bond, u32 index);
     58static void rlb_src_link(struct bonding *bond, u32 ip_src_hash,
     59			 u32 ip_dst_hash);
     60
     61static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
     62{
     63	int i;
     64	u8 hash = 0;
     65
     66	for (i = 0; i < hash_size; i++)
     67		hash ^= hash_start[i];
     68
     69	return hash;
     70}
     71
     72/*********************** tlb specific functions ***************************/
     73
     74static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
     75{
     76	if (save_load) {
     77		entry->load_history = 1 + entry->tx_bytes /
     78				      BOND_TLB_REBALANCE_INTERVAL;
     79		entry->tx_bytes = 0;
     80	}
     81
     82	entry->tx_slave = NULL;
     83	entry->next = TLB_NULL_INDEX;
     84	entry->prev = TLB_NULL_INDEX;
     85}
     86
     87static inline void tlb_init_slave(struct slave *slave)
     88{
     89	SLAVE_TLB_INFO(slave).load = 0;
     90	SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
     91}
     92
     93static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
     94			 int save_load)
     95{
     96	struct tlb_client_info *tx_hash_table;
     97	u32 index;
     98
     99	/* clear slave from tx_hashtbl */
    100	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
    101
    102	/* skip this if we've already freed the tx hash table */
    103	if (tx_hash_table) {
    104		index = SLAVE_TLB_INFO(slave).head;
    105		while (index != TLB_NULL_INDEX) {
    106			u32 next_index = tx_hash_table[index].next;
    107
    108			tlb_init_table_entry(&tx_hash_table[index], save_load);
    109			index = next_index;
    110		}
    111	}
    112
    113	tlb_init_slave(slave);
    114}
    115
    116static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
    117			 int save_load)
    118{
    119	spin_lock_bh(&bond->mode_lock);
    120	__tlb_clear_slave(bond, slave, save_load);
    121	spin_unlock_bh(&bond->mode_lock);
    122}
    123
    124/* Must be called before starting the monitor timer */
    125static int tlb_initialize(struct bonding *bond)
    126{
    127	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    128	int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
    129	struct tlb_client_info *new_hashtbl;
    130	int i;
    131
    132	new_hashtbl = kzalloc(size, GFP_KERNEL);
    133	if (!new_hashtbl)
    134		return -ENOMEM;
    135
    136	spin_lock_bh(&bond->mode_lock);
    137
    138	bond_info->tx_hashtbl = new_hashtbl;
    139
    140	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
    141		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
    142
    143	spin_unlock_bh(&bond->mode_lock);
    144
    145	return 0;
    146}
    147
    148/* Must be called only after all slaves have been released */
    149static void tlb_deinitialize(struct bonding *bond)
    150{
    151	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    152
    153	spin_lock_bh(&bond->mode_lock);
    154
    155	kfree(bond_info->tx_hashtbl);
    156	bond_info->tx_hashtbl = NULL;
    157
    158	spin_unlock_bh(&bond->mode_lock);
    159}
    160
    161static long long compute_gap(struct slave *slave)
    162{
    163	return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */
    164	       (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
    165}
    166
    167static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
    168{
    169	struct slave *slave, *least_loaded;
    170	struct list_head *iter;
    171	long long max_gap;
    172
    173	least_loaded = NULL;
    174	max_gap = LLONG_MIN;
    175
    176	/* Find the slave with the largest gap */
    177	bond_for_each_slave_rcu(bond, slave, iter) {
    178		if (bond_slave_can_tx(slave)) {
    179			long long gap = compute_gap(slave);
    180
    181			if (max_gap < gap) {
    182				least_loaded = slave;
    183				max_gap = gap;
    184			}
    185		}
    186	}
    187
    188	return least_loaded;
    189}
    190
    191static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
    192						u32 skb_len)
    193{
    194	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    195	struct tlb_client_info *hash_table;
    196	struct slave *assigned_slave;
    197
    198	hash_table = bond_info->tx_hashtbl;
    199	assigned_slave = hash_table[hash_index].tx_slave;
    200	if (!assigned_slave) {
    201		assigned_slave = tlb_get_least_loaded_slave(bond);
    202
    203		if (assigned_slave) {
    204			struct tlb_slave_info *slave_info =
    205				&(SLAVE_TLB_INFO(assigned_slave));
    206			u32 next_index = slave_info->head;
    207
    208			hash_table[hash_index].tx_slave = assigned_slave;
    209			hash_table[hash_index].next = next_index;
    210			hash_table[hash_index].prev = TLB_NULL_INDEX;
    211
    212			if (next_index != TLB_NULL_INDEX)
    213				hash_table[next_index].prev = hash_index;
    214
    215			slave_info->head = hash_index;
    216			slave_info->load +=
    217				hash_table[hash_index].load_history;
    218		}
    219	}
    220
    221	if (assigned_slave)
    222		hash_table[hash_index].tx_bytes += skb_len;
    223
    224	return assigned_slave;
    225}
    226
    227static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
    228					u32 skb_len)
    229{
    230	struct slave *tx_slave;
    231
    232	/* We don't need to disable softirq here, because
    233	 * tlb_choose_channel() is only called by bond_alb_xmit()
    234	 * which already has softirq disabled.
    235	 */
    236	spin_lock(&bond->mode_lock);
    237	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
    238	spin_unlock(&bond->mode_lock);
    239
    240	return tx_slave;
    241}
    242
    243/*********************** rlb specific functions ***************************/
    244
    245/* when an ARP REPLY is received from a client update its info
    246 * in the rx_hashtbl
    247 */
    248static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
    249{
    250	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    251	struct rlb_client_info *client_info;
    252	u32 hash_index;
    253
    254	spin_lock_bh(&bond->mode_lock);
    255
    256	hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
    257	client_info = &(bond_info->rx_hashtbl[hash_index]);
    258
    259	if ((client_info->assigned) &&
    260	    (client_info->ip_src == arp->ip_dst) &&
    261	    (client_info->ip_dst == arp->ip_src) &&
    262	    (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) {
    263		/* update the clients MAC address */
    264		ether_addr_copy(client_info->mac_dst, arp->mac_src);
    265		client_info->ntt = 1;
    266		bond_info->rx_ntt = 1;
    267	}
    268
    269	spin_unlock_bh(&bond->mode_lock);
    270}
    271
    272static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
    273			struct slave *slave)
    274{
    275	struct arp_pkt *arp, _arp;
    276
    277	if (skb->protocol != cpu_to_be16(ETH_P_ARP))
    278		goto out;
    279
    280	arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp);
    281	if (!arp)
    282		goto out;
    283
    284	/* We received an ARP from arp->ip_src.
    285	 * We might have used this IP address previously (on the bonding host
    286	 * itself or on a system that is bridged together with the bond).
    287	 * However, if arp->mac_src is different than what is stored in
    288	 * rx_hashtbl, some other host is now using the IP and we must prevent
    289	 * sending out client updates with this IP address and the old MAC
    290	 * address.
    291	 * Clean up all hash table entries that have this address as ip_src but
    292	 * have a different mac_src.
    293	 */
    294	rlb_purge_src_ip(bond, arp);
    295
    296	if (arp->op_code == htons(ARPOP_REPLY)) {
    297		/* update rx hash table for this ARP */
    298		rlb_update_entry_from_arp(bond, arp);
    299		slave_dbg(bond->dev, slave->dev, "Server received an ARP Reply from client\n");
    300	}
    301out:
    302	return RX_HANDLER_ANOTHER;
    303}
    304
    305/* Caller must hold rcu_read_lock() */
    306static struct slave *__rlb_next_rx_slave(struct bonding *bond)
    307{
    308	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    309	struct slave *before = NULL, *rx_slave = NULL, *slave;
    310	struct list_head *iter;
    311	bool found = false;
    312
    313	bond_for_each_slave_rcu(bond, slave, iter) {
    314		if (!bond_slave_can_tx(slave))
    315			continue;
    316		if (!found) {
    317			if (!before || before->speed < slave->speed)
    318				before = slave;
    319		} else {
    320			if (!rx_slave || rx_slave->speed < slave->speed)
    321				rx_slave = slave;
    322		}
    323		if (slave == bond_info->rx_slave)
    324			found = true;
    325	}
    326	/* we didn't find anything after the current or we have something
    327	 * better before and up to the current slave
    328	 */
    329	if (!rx_slave || (before && rx_slave->speed < before->speed))
    330		rx_slave = before;
    331
    332	if (rx_slave)
    333		bond_info->rx_slave = rx_slave;
    334
    335	return rx_slave;
    336}
    337
    338/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */
    339static struct slave *rlb_next_rx_slave(struct bonding *bond)
    340{
    341	struct slave *rx_slave;
    342
    343	ASSERT_RTNL();
    344
    345	rcu_read_lock();
    346	rx_slave = __rlb_next_rx_slave(bond);
    347	rcu_read_unlock();
    348
    349	return rx_slave;
    350}
    351
    352/* teach the switch the mac of a disabled slave
    353 * on the primary for fault tolerance
    354 *
    355 * Caller must hold RTNL
    356 */
    357static void rlb_teach_disabled_mac_on_primary(struct bonding *bond,
    358					      const u8 addr[])
    359{
    360	struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
    361
    362	if (!curr_active)
    363		return;
    364
    365	if (!bond->alb_info.primary_is_promisc) {
    366		if (!dev_set_promiscuity(curr_active->dev, 1))
    367			bond->alb_info.primary_is_promisc = 1;
    368		else
    369			bond->alb_info.primary_is_promisc = 0;
    370	}
    371
    372	bond->alb_info.rlb_promisc_timeout_counter = 0;
    373
    374	alb_send_learning_packets(curr_active, addr, true);
    375}
    376
    377/* slave being removed should not be active at this point
    378 *
    379 * Caller must hold rtnl.
    380 */
    381static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
    382{
    383	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    384	struct rlb_client_info *rx_hash_table;
    385	u32 index, next_index;
    386
    387	/* clear slave from rx_hashtbl */
    388	spin_lock_bh(&bond->mode_lock);
    389
    390	rx_hash_table = bond_info->rx_hashtbl;
    391	index = bond_info->rx_hashtbl_used_head;
    392	for (; index != RLB_NULL_INDEX; index = next_index) {
    393		next_index = rx_hash_table[index].used_next;
    394		if (rx_hash_table[index].slave == slave) {
    395			struct slave *assigned_slave = rlb_next_rx_slave(bond);
    396
    397			if (assigned_slave) {
    398				rx_hash_table[index].slave = assigned_slave;
    399				if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) {
    400					bond_info->rx_hashtbl[index].ntt = 1;
    401					bond_info->rx_ntt = 1;
    402					/* A slave has been removed from the
    403					 * table because it is either disabled
    404					 * or being released. We must retry the
    405					 * update to avoid clients from not
    406					 * being updated & disconnecting when
    407					 * there is stress
    408					 */
    409					bond_info->rlb_update_retry_counter =
    410						RLB_UPDATE_RETRY;
    411				}
    412			} else {  /* there is no active slave */
    413				rx_hash_table[index].slave = NULL;
    414			}
    415		}
    416	}
    417
    418	spin_unlock_bh(&bond->mode_lock);
    419
    420	if (slave != rtnl_dereference(bond->curr_active_slave))
    421		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
    422}
    423
    424static void rlb_update_client(struct rlb_client_info *client_info)
    425{
    426	int i;
    427
    428	if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
    429		return;
    430
    431	for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
    432		struct sk_buff *skb;
    433
    434		skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
    435				 client_info->ip_dst,
    436				 client_info->slave->dev,
    437				 client_info->ip_src,
    438				 client_info->mac_dst,
    439				 client_info->slave->dev->dev_addr,
    440				 client_info->mac_dst);
    441		if (!skb) {
    442			slave_err(client_info->slave->bond->dev,
    443				  client_info->slave->dev,
    444				  "failed to create an ARP packet\n");
    445			continue;
    446		}
    447
    448		skb->dev = client_info->slave->dev;
    449
    450		if (client_info->vlan_id) {
    451			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
    452					       client_info->vlan_id);
    453		}
    454
    455		arp_xmit(skb);
    456	}
    457}
    458
    459/* sends ARP REPLIES that update the clients that need updating */
    460static void rlb_update_rx_clients(struct bonding *bond)
    461{
    462	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    463	struct rlb_client_info *client_info;
    464	u32 hash_index;
    465
    466	spin_lock_bh(&bond->mode_lock);
    467
    468	hash_index = bond_info->rx_hashtbl_used_head;
    469	for (; hash_index != RLB_NULL_INDEX;
    470	     hash_index = client_info->used_next) {
    471		client_info = &(bond_info->rx_hashtbl[hash_index]);
    472		if (client_info->ntt) {
    473			rlb_update_client(client_info);
    474			if (bond_info->rlb_update_retry_counter == 0)
    475				client_info->ntt = 0;
    476		}
    477	}
    478
    479	/* do not update the entries again until this counter is zero so that
    480	 * not to confuse the clients.
    481	 */
    482	bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
    483
    484	spin_unlock_bh(&bond->mode_lock);
    485}
    486
    487/* The slave was assigned a new mac address - update the clients */
    488static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
    489{
    490	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    491	struct rlb_client_info *client_info;
    492	int ntt = 0;
    493	u32 hash_index;
    494
    495	spin_lock_bh(&bond->mode_lock);
    496
    497	hash_index = bond_info->rx_hashtbl_used_head;
    498	for (; hash_index != RLB_NULL_INDEX;
    499	     hash_index = client_info->used_next) {
    500		client_info = &(bond_info->rx_hashtbl[hash_index]);
    501
    502		if ((client_info->slave == slave) &&
    503		    is_valid_ether_addr(client_info->mac_dst)) {
    504			client_info->ntt = 1;
    505			ntt = 1;
    506		}
    507	}
    508
    509	/* update the team's flag only after the whole iteration */
    510	if (ntt) {
    511		bond_info->rx_ntt = 1;
    512		/* fasten the change */
    513		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
    514	}
    515
    516	spin_unlock_bh(&bond->mode_lock);
    517}
    518
    519/* mark all clients using src_ip to be updated */
    520static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
    521{
    522	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    523	struct rlb_client_info *client_info;
    524	u32 hash_index;
    525
    526	spin_lock(&bond->mode_lock);
    527
    528	hash_index = bond_info->rx_hashtbl_used_head;
    529	for (; hash_index != RLB_NULL_INDEX;
    530	     hash_index = client_info->used_next) {
    531		client_info = &(bond_info->rx_hashtbl[hash_index]);
    532
    533		if (!client_info->slave) {
    534			netdev_err(bond->dev, "found a client with no channel in the client's hash table\n");
    535			continue;
    536		}
    537		/* update all clients using this src_ip, that are not assigned
    538		 * to the team's address (curr_active_slave) and have a known
    539		 * unicast mac address.
    540		 */
    541		if ((client_info->ip_src == src_ip) &&
    542		    !ether_addr_equal_64bits(client_info->slave->dev->dev_addr,
    543					     bond->dev->dev_addr) &&
    544		    is_valid_ether_addr(client_info->mac_dst)) {
    545			client_info->ntt = 1;
    546			bond_info->rx_ntt = 1;
    547		}
    548	}
    549
    550	spin_unlock(&bond->mode_lock);
    551}
    552
    553static struct slave *rlb_choose_channel(struct sk_buff *skb,
    554					struct bonding *bond,
    555					const struct arp_pkt *arp)
    556{
    557	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    558	struct slave *assigned_slave, *curr_active_slave;
    559	struct rlb_client_info *client_info;
    560	u32 hash_index = 0;
    561
    562	spin_lock(&bond->mode_lock);
    563
    564	curr_active_slave = rcu_dereference(bond->curr_active_slave);
    565
    566	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
    567	client_info = &(bond_info->rx_hashtbl[hash_index]);
    568
    569	if (client_info->assigned) {
    570		if ((client_info->ip_src == arp->ip_src) &&
    571		    (client_info->ip_dst == arp->ip_dst)) {
    572			/* the entry is already assigned to this client */
    573			if (!is_broadcast_ether_addr(arp->mac_dst)) {
    574				/* update mac address from arp */
    575				ether_addr_copy(client_info->mac_dst, arp->mac_dst);
    576			}
    577			ether_addr_copy(client_info->mac_src, arp->mac_src);
    578
    579			assigned_slave = client_info->slave;
    580			if (assigned_slave) {
    581				spin_unlock(&bond->mode_lock);
    582				return assigned_slave;
    583			}
    584		} else {
    585			/* the entry is already assigned to some other client,
    586			 * move the old client to primary (curr_active_slave) so
    587			 * that the new client can be assigned to this entry.
    588			 */
    589			if (curr_active_slave &&
    590			    client_info->slave != curr_active_slave) {
    591				client_info->slave = curr_active_slave;
    592				rlb_update_client(client_info);
    593			}
    594		}
    595	}
    596	/* assign a new slave */
    597	assigned_slave = __rlb_next_rx_slave(bond);
    598
    599	if (assigned_slave) {
    600		if (!(client_info->assigned &&
    601		      client_info->ip_src == arp->ip_src)) {
    602			/* ip_src is going to be updated,
    603			 * fix the src hash list
    604			 */
    605			u32 hash_src = _simple_hash((u8 *)&arp->ip_src,
    606						    sizeof(arp->ip_src));
    607			rlb_src_unlink(bond, hash_index);
    608			rlb_src_link(bond, hash_src, hash_index);
    609		}
    610
    611		client_info->ip_src = arp->ip_src;
    612		client_info->ip_dst = arp->ip_dst;
    613		/* arp->mac_dst is broadcast for arp requests.
    614		 * will be updated with clients actual unicast mac address
    615		 * upon receiving an arp reply.
    616		 */
    617		ether_addr_copy(client_info->mac_dst, arp->mac_dst);
    618		ether_addr_copy(client_info->mac_src, arp->mac_src);
    619		client_info->slave = assigned_slave;
    620
    621		if (is_valid_ether_addr(client_info->mac_dst)) {
    622			client_info->ntt = 1;
    623			bond->alb_info.rx_ntt = 1;
    624		} else {
    625			client_info->ntt = 0;
    626		}
    627
    628		if (vlan_get_tag(skb, &client_info->vlan_id))
    629			client_info->vlan_id = 0;
    630
    631		if (!client_info->assigned) {
    632			u32 prev_tbl_head = bond_info->rx_hashtbl_used_head;
    633
    634			bond_info->rx_hashtbl_used_head = hash_index;
    635			client_info->used_next = prev_tbl_head;
    636			if (prev_tbl_head != RLB_NULL_INDEX) {
    637				bond_info->rx_hashtbl[prev_tbl_head].used_prev =
    638					hash_index;
    639			}
    640			client_info->assigned = 1;
    641		}
    642	}
    643
    644	spin_unlock(&bond->mode_lock);
    645
    646	return assigned_slave;
    647}
    648
    649/* chooses (and returns) transmit channel for arp reply
    650 * does not choose channel for other arp types since they are
    651 * sent on the curr_active_slave
    652 */
    653static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
    654{
    655	struct slave *tx_slave = NULL;
    656	struct arp_pkt *arp;
    657
    658	if (!pskb_network_may_pull(skb, sizeof(*arp)))
    659		return NULL;
    660	arp = (struct arp_pkt *)skb_network_header(skb);
    661
    662	/* Don't modify or load balance ARPs that do not originate locally
    663	 * (e.g.,arrive via a bridge).
    664	 */
    665	if (!bond_slave_has_mac_rx(bond, arp->mac_src))
    666		return NULL;
    667
    668	if (arp->op_code == htons(ARPOP_REPLY)) {
    669		/* the arp must be sent on the selected rx channel */
    670		tx_slave = rlb_choose_channel(skb, bond, arp);
    671		if (tx_slave)
    672			bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr,
    673					  tx_slave->dev->addr_len);
    674		netdev_dbg(bond->dev, "(slave %s): Server sent ARP Reply packet\n",
    675			   tx_slave ? tx_slave->dev->name : "NULL");
    676	} else if (arp->op_code == htons(ARPOP_REQUEST)) {
    677		/* Create an entry in the rx_hashtbl for this client as a
    678		 * place holder.
    679		 * When the arp reply is received the entry will be updated
    680		 * with the correct unicast address of the client.
    681		 */
    682		tx_slave = rlb_choose_channel(skb, bond, arp);
    683
    684		/* The ARP reply packets must be delayed so that
    685		 * they can cancel out the influence of the ARP request.
    686		 */
    687		bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;
    688
    689		/* arp requests are broadcast and are sent on the primary
    690		 * the arp request will collapse all clients on the subnet to
    691		 * the primary slave. We must register these clients to be
    692		 * updated with their assigned mac.
    693		 */
    694		rlb_req_update_subnet_clients(bond, arp->ip_src);
    695		netdev_dbg(bond->dev, "(slave %s): Server sent ARP Request packet\n",
    696			   tx_slave ? tx_slave->dev->name : "NULL");
    697	}
    698
    699	return tx_slave;
    700}
    701
    702static void rlb_rebalance(struct bonding *bond)
    703{
    704	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    705	struct slave *assigned_slave;
    706	struct rlb_client_info *client_info;
    707	int ntt;
    708	u32 hash_index;
    709
    710	spin_lock_bh(&bond->mode_lock);
    711
    712	ntt = 0;
    713	hash_index = bond_info->rx_hashtbl_used_head;
    714	for (; hash_index != RLB_NULL_INDEX;
    715	     hash_index = client_info->used_next) {
    716		client_info = &(bond_info->rx_hashtbl[hash_index]);
    717		assigned_slave = __rlb_next_rx_slave(bond);
    718		if (assigned_slave && (client_info->slave != assigned_slave)) {
    719			client_info->slave = assigned_slave;
    720			if (!is_zero_ether_addr(client_info->mac_dst)) {
    721				client_info->ntt = 1;
    722				ntt = 1;
    723			}
    724		}
    725	}
    726
    727	/* update the team's flag only after the whole iteration */
    728	if (ntt)
    729		bond_info->rx_ntt = 1;
    730	spin_unlock_bh(&bond->mode_lock);
    731}
    732
    733/* Caller must hold mode_lock */
    734static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
    735{
    736	entry->used_next = RLB_NULL_INDEX;
    737	entry->used_prev = RLB_NULL_INDEX;
    738	entry->assigned = 0;
    739	entry->slave = NULL;
    740	entry->vlan_id = 0;
    741}
    742static void rlb_init_table_entry_src(struct rlb_client_info *entry)
    743{
    744	entry->src_first = RLB_NULL_INDEX;
    745	entry->src_prev = RLB_NULL_INDEX;
    746	entry->src_next = RLB_NULL_INDEX;
    747}
    748
    749static void rlb_init_table_entry(struct rlb_client_info *entry)
    750{
    751	memset(entry, 0, sizeof(struct rlb_client_info));
    752	rlb_init_table_entry_dst(entry);
    753	rlb_init_table_entry_src(entry);
    754}
    755
    756static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index)
    757{
    758	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    759	u32 next_index = bond_info->rx_hashtbl[index].used_next;
    760	u32 prev_index = bond_info->rx_hashtbl[index].used_prev;
    761
    762	if (index == bond_info->rx_hashtbl_used_head)
    763		bond_info->rx_hashtbl_used_head = next_index;
    764	if (prev_index != RLB_NULL_INDEX)
    765		bond_info->rx_hashtbl[prev_index].used_next = next_index;
    766	if (next_index != RLB_NULL_INDEX)
    767		bond_info->rx_hashtbl[next_index].used_prev = prev_index;
    768}
    769
    770/* unlink a rlb hash table entry from the src list */
    771static void rlb_src_unlink(struct bonding *bond, u32 index)
    772{
    773	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    774	u32 next_index = bond_info->rx_hashtbl[index].src_next;
    775	u32 prev_index = bond_info->rx_hashtbl[index].src_prev;
    776
    777	bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX;
    778	bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX;
    779
    780	if (next_index != RLB_NULL_INDEX)
    781		bond_info->rx_hashtbl[next_index].src_prev = prev_index;
    782
    783	if (prev_index == RLB_NULL_INDEX)
    784		return;
    785
    786	/* is prev_index pointing to the head of this list? */
    787	if (bond_info->rx_hashtbl[prev_index].src_first == index)
    788		bond_info->rx_hashtbl[prev_index].src_first = next_index;
    789	else
    790		bond_info->rx_hashtbl[prev_index].src_next = next_index;
    791
    792}
    793
    794static void rlb_delete_table_entry(struct bonding *bond, u32 index)
    795{
    796	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    797	struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
    798
    799	rlb_delete_table_entry_dst(bond, index);
    800	rlb_init_table_entry_dst(entry);
    801
    802	rlb_src_unlink(bond, index);
    803}
    804
    805/* add the rx_hashtbl[ip_dst_hash] entry to the list
    806 * of entries with identical ip_src_hash
    807 */
    808static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
    809{
    810	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    811	u32 next;
    812
    813	bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
    814	next = bond_info->rx_hashtbl[ip_src_hash].src_first;
    815	bond_info->rx_hashtbl[ip_dst_hash].src_next = next;
    816	if (next != RLB_NULL_INDEX)
    817		bond_info->rx_hashtbl[next].src_prev = ip_dst_hash;
    818	bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
    819}
    820
    821/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
    822 * not match arp->mac_src
    823 */
    824static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
    825{
    826	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    827	u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
    828	u32 index;
    829
    830	spin_lock_bh(&bond->mode_lock);
    831
    832	index = bond_info->rx_hashtbl[ip_src_hash].src_first;
    833	while (index != RLB_NULL_INDEX) {
    834		struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
    835		u32 next_index = entry->src_next;
    836
    837		if (entry->ip_src == arp->ip_src &&
    838		    !ether_addr_equal_64bits(arp->mac_src, entry->mac_src))
    839			rlb_delete_table_entry(bond, index);
    840		index = next_index;
    841	}
    842	spin_unlock_bh(&bond->mode_lock);
    843}
    844
    845static int rlb_initialize(struct bonding *bond)
    846{
    847	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    848	struct rlb_client_info	*new_hashtbl;
    849	int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
    850	int i;
    851
    852	new_hashtbl = kmalloc(size, GFP_KERNEL);
    853	if (!new_hashtbl)
    854		return -1;
    855
    856	spin_lock_bh(&bond->mode_lock);
    857
    858	bond_info->rx_hashtbl = new_hashtbl;
    859
    860	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
    861
    862	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
    863		rlb_init_table_entry(bond_info->rx_hashtbl + i);
    864
    865	spin_unlock_bh(&bond->mode_lock);
    866
    867	/* register to receive ARPs */
    868	bond->recv_probe = rlb_arp_recv;
    869
    870	return 0;
    871}
    872
    873static void rlb_deinitialize(struct bonding *bond)
    874{
    875	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    876
    877	spin_lock_bh(&bond->mode_lock);
    878
    879	kfree(bond_info->rx_hashtbl);
    880	bond_info->rx_hashtbl = NULL;
    881	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
    882
    883	spin_unlock_bh(&bond->mode_lock);
    884}
    885
    886static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
    887{
    888	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
    889	u32 curr_index;
    890
    891	spin_lock_bh(&bond->mode_lock);
    892
    893	curr_index = bond_info->rx_hashtbl_used_head;
    894	while (curr_index != RLB_NULL_INDEX) {
    895		struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
    896		u32 next_index = bond_info->rx_hashtbl[curr_index].used_next;
    897
    898		if (curr->vlan_id == vlan_id)
    899			rlb_delete_table_entry(bond, curr_index);
    900
    901		curr_index = next_index;
    902	}
    903
    904	spin_unlock_bh(&bond->mode_lock);
    905}
    906
    907/*********************** tlb/rlb shared functions *********************/
    908
    909static void alb_send_lp_vid(struct slave *slave, const u8 mac_addr[],
    910			    __be16 vlan_proto, u16 vid)
    911{
    912	struct learning_pkt pkt;
    913	struct sk_buff *skb;
    914	int size = sizeof(struct learning_pkt);
    915
    916	memset(&pkt, 0, size);
    917	ether_addr_copy(pkt.mac_dst, mac_addr);
    918	ether_addr_copy(pkt.mac_src, mac_addr);
    919	pkt.type = cpu_to_be16(ETH_P_LOOPBACK);
    920
    921	skb = dev_alloc_skb(size);
    922	if (!skb)
    923		return;
    924
    925	skb_put_data(skb, &pkt, size);
    926
    927	skb_reset_mac_header(skb);
    928	skb->network_header = skb->mac_header + ETH_HLEN;
    929	skb->protocol = pkt.type;
    930	skb->priority = TC_PRIO_CONTROL;
    931	skb->dev = slave->dev;
    932
    933	slave_dbg(slave->bond->dev, slave->dev,
    934		  "Send learning packet: mac %pM vlan %d\n", mac_addr, vid);
    935
    936	if (vid)
    937		__vlan_hwaccel_put_tag(skb, vlan_proto, vid);
    938
    939	dev_queue_xmit(skb);
    940}
    941
    942struct alb_walk_data {
    943	struct bonding *bond;
    944	struct slave *slave;
    945	const u8 *mac_addr;
    946	bool strict_match;
    947};
    948
    949static int alb_upper_dev_walk(struct net_device *upper,
    950			      struct netdev_nested_priv *priv)
    951{
    952	struct alb_walk_data *data = (struct alb_walk_data *)priv->data;
    953	bool strict_match = data->strict_match;
    954	const u8 *mac_addr = data->mac_addr;
    955	struct bonding *bond = data->bond;
    956	struct slave *slave = data->slave;
    957	struct bond_vlan_tag *tags;
    958
    959	if (is_vlan_dev(upper) &&
    960	    bond->dev->lower_level == upper->lower_level - 1) {
    961		if (upper->addr_assign_type == NET_ADDR_STOLEN) {
    962			alb_send_lp_vid(slave, mac_addr,
    963					vlan_dev_vlan_proto(upper),
    964					vlan_dev_vlan_id(upper));
    965		} else {
    966			alb_send_lp_vid(slave, upper->dev_addr,
    967					vlan_dev_vlan_proto(upper),
    968					vlan_dev_vlan_id(upper));
    969		}
    970	}
    971
    972	/* If this is a macvlan device, then only send updates
    973	 * when strict_match is turned off.
    974	 */
    975	if (netif_is_macvlan(upper) && !strict_match) {
    976		tags = bond_verify_device_path(bond->dev, upper, 0);
    977		if (IS_ERR_OR_NULL(tags))
    978			BUG();
    979		alb_send_lp_vid(slave, upper->dev_addr,
    980				tags[0].vlan_proto, tags[0].vlan_id);
    981		kfree(tags);
    982	}
    983
    984	return 0;
    985}
    986
    987static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[],
    988				      bool strict_match)
    989{
    990	struct bonding *bond = bond_get_bond_by_slave(slave);
    991	struct netdev_nested_priv priv;
    992	struct alb_walk_data data = {
    993		.strict_match = strict_match,
    994		.mac_addr = mac_addr,
    995		.slave = slave,
    996		.bond = bond,
    997	};
    998
    999	priv.data = (void *)&data;
   1000	/* send untagged */
   1001	alb_send_lp_vid(slave, mac_addr, 0, 0);
   1002
   1003	/* loop through all devices and see if we need to send a packet
   1004	 * for that device.
   1005	 */
   1006	rcu_read_lock();
   1007	netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv);
   1008	rcu_read_unlock();
   1009}
   1010
   1011static int alb_set_slave_mac_addr(struct slave *slave, const u8 addr[],
   1012				  unsigned int len)
   1013{
   1014	struct net_device *dev = slave->dev;
   1015	struct sockaddr_storage ss;
   1016
   1017	if (BOND_MODE(slave->bond) == BOND_MODE_TLB) {
   1018		__dev_addr_set(dev, addr, len);
   1019		return 0;
   1020	}
   1021
   1022	/* for rlb each slave must have a unique hw mac addresses so that
   1023	 * each slave will receive packets destined to a different mac
   1024	 */
   1025	memcpy(ss.__data, addr, len);
   1026	ss.ss_family = dev->type;
   1027	if (dev_set_mac_address(dev, (struct sockaddr *)&ss, NULL)) {
   1028		slave_err(slave->bond->dev, dev, "dev_set_mac_address on slave failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n");
   1029		return -EOPNOTSUPP;
   1030	}
   1031	return 0;
   1032}
   1033
   1034/* Swap MAC addresses between two slaves.
   1035 *
   1036 * Called with RTNL held, and no other locks.
   1037 */
   1038static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)
   1039{
   1040	u8 tmp_mac_addr[MAX_ADDR_LEN];
   1041
   1042	bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr,
   1043			  slave1->dev->addr_len);
   1044	alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr,
   1045			       slave2->dev->addr_len);
   1046	alb_set_slave_mac_addr(slave2, tmp_mac_addr,
   1047			       slave1->dev->addr_len);
   1048
   1049}
   1050
   1051/* Send learning packets after MAC address swap.
   1052 *
   1053 * Called with RTNL and no other locks
   1054 */
   1055static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
   1056				struct slave *slave2)
   1057{
   1058	int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2));
   1059	struct slave *disabled_slave = NULL;
   1060
   1061	ASSERT_RTNL();
   1062
   1063	/* fasten the change in the switch */
   1064	if (bond_slave_can_tx(slave1)) {
   1065		alb_send_learning_packets(slave1, slave1->dev->dev_addr, false);
   1066		if (bond->alb_info.rlb_enabled) {
   1067			/* inform the clients that the mac address
   1068			 * has changed
   1069			 */
   1070			rlb_req_update_slave_clients(bond, slave1);
   1071		}
   1072	} else {
   1073		disabled_slave = slave1;
   1074	}
   1075
   1076	if (bond_slave_can_tx(slave2)) {
   1077		alb_send_learning_packets(slave2, slave2->dev->dev_addr, false);
   1078		if (bond->alb_info.rlb_enabled) {
   1079			/* inform the clients that the mac address
   1080			 * has changed
   1081			 */
   1082			rlb_req_update_slave_clients(bond, slave2);
   1083		}
   1084	} else {
   1085		disabled_slave = slave2;
   1086	}
   1087
   1088	if (bond->alb_info.rlb_enabled && slaves_state_differ) {
   1089		/* A disabled slave was assigned an active mac addr */
   1090		rlb_teach_disabled_mac_on_primary(bond,
   1091						  disabled_slave->dev->dev_addr);
   1092	}
   1093}
   1094
   1095/**
   1096 * alb_change_hw_addr_on_detach
   1097 * @bond: bonding we're working on
   1098 * @slave: the slave that was just detached
   1099 *
   1100 * We assume that @slave was already detached from the slave list.
   1101 *
   1102 * If @slave's permanent hw address is different both from its current
   1103 * address and from @bond's address, then somewhere in the bond there's
   1104 * a slave that has @slave's permanet address as its current address.
   1105 * We'll make sure that slave no longer uses @slave's permanent address.
   1106 *
   1107 * Caller must hold RTNL and no other locks
   1108 */
   1109static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave)
   1110{
   1111	int perm_curr_diff;
   1112	int perm_bond_diff;
   1113	struct slave *found_slave;
   1114
   1115	perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr,
   1116						  slave->dev->dev_addr);
   1117	perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr,
   1118						  bond->dev->dev_addr);
   1119
   1120	if (perm_curr_diff && perm_bond_diff) {
   1121		found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr);
   1122
   1123		if (found_slave) {
   1124			alb_swap_mac_addr(slave, found_slave);
   1125			alb_fasten_mac_swap(bond, slave, found_slave);
   1126		}
   1127	}
   1128}
   1129
   1130/**
   1131 * alb_handle_addr_collision_on_attach
   1132 * @bond: bonding we're working on
   1133 * @slave: the slave that was just attached
   1134 *
   1135 * checks uniqueness of slave's mac address and handles the case the
   1136 * new slave uses the bonds mac address.
   1137 *
   1138 * If the permanent hw address of @slave is @bond's hw address, we need to
   1139 * find a different hw address to give @slave, that isn't in use by any other
   1140 * slave in the bond. This address must be, of course, one of the permanent
   1141 * addresses of the other slaves.
   1142 *
   1143 * We go over the slave list, and for each slave there we compare its
   1144 * permanent hw address with the current address of all the other slaves.
   1145 * If no match was found, then we've found a slave with a permanent address
   1146 * that isn't used by any other slave in the bond, so we can assign it to
   1147 * @slave.
   1148 *
   1149 * assumption: this function is called before @slave is attached to the
   1150 *	       bond slave list.
   1151 */
   1152static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)
   1153{
   1154	struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave);
   1155	struct slave *tmp_slave1, *free_mac_slave = NULL;
   1156	struct list_head *iter;
   1157
   1158	if (!bond_has_slaves(bond)) {
   1159		/* this is the first slave */
   1160		return 0;
   1161	}
   1162
   1163	/* if slave's mac address differs from bond's mac address
   1164	 * check uniqueness of slave's mac address against the other
   1165	 * slaves in the bond.
   1166	 */
   1167	if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) {
   1168		if (!bond_slave_has_mac(bond, slave->dev->dev_addr))
   1169			return 0;
   1170
   1171		/* Try setting slave mac to bond address and fall-through
   1172		 * to code handling that situation below...
   1173		 */
   1174		alb_set_slave_mac_addr(slave, bond->dev->dev_addr,
   1175				       bond->dev->addr_len);
   1176	}
   1177
   1178	/* The slave's address is equal to the address of the bond.
   1179	 * Search for a spare address in the bond for this slave.
   1180	 */
   1181	bond_for_each_slave(bond, tmp_slave1, iter) {
   1182		if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) {
   1183			/* no slave has tmp_slave1's perm addr
   1184			 * as its curr addr
   1185			 */
   1186			free_mac_slave = tmp_slave1;
   1187			break;
   1188		}
   1189
   1190		if (!has_bond_addr) {
   1191			if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr,
   1192						    bond->dev->dev_addr)) {
   1193
   1194				has_bond_addr = tmp_slave1;
   1195			}
   1196		}
   1197	}
   1198
   1199	if (free_mac_slave) {
   1200		alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr,
   1201				       free_mac_slave->dev->addr_len);
   1202
   1203		slave_warn(bond->dev, slave->dev, "the slave hw address is in use by the bond; giving it the hw address of %s\n",
   1204			   free_mac_slave->dev->name);
   1205
   1206	} else if (has_bond_addr) {
   1207		slave_err(bond->dev, slave->dev, "the slave hw address is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n");
   1208		return -EFAULT;
   1209	}
   1210
   1211	return 0;
   1212}
   1213
   1214/**
   1215 * alb_set_mac_address
   1216 * @bond: bonding we're working on
   1217 * @addr: MAC address to set
   1218 *
   1219 * In TLB mode all slaves are configured to the bond's hw address, but set
   1220 * their dev_addr field to different addresses (based on their permanent hw
   1221 * addresses).
   1222 *
   1223 * For each slave, this function sets the interface to the new address and then
   1224 * changes its dev_addr field to its previous value.
   1225 *
   1226 * Unwinding assumes bond's mac address has not yet changed.
   1227 */
   1228static int alb_set_mac_address(struct bonding *bond, void *addr)
   1229{
   1230	struct slave *slave, *rollback_slave;
   1231	struct list_head *iter;
   1232	struct sockaddr_storage ss;
   1233	char tmp_addr[MAX_ADDR_LEN];
   1234	int res;
   1235
   1236	if (bond->alb_info.rlb_enabled)
   1237		return 0;
   1238
   1239	bond_for_each_slave(bond, slave, iter) {
   1240		/* save net_device's current hw address */
   1241		bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr,
   1242				  slave->dev->addr_len);
   1243
   1244		res = dev_set_mac_address(slave->dev, addr, NULL);
   1245
   1246		/* restore net_device's hw address */
   1247		dev_addr_set(slave->dev, tmp_addr);
   1248
   1249		if (res)
   1250			goto unwind;
   1251	}
   1252
   1253	return 0;
   1254
   1255unwind:
   1256	memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len);
   1257	ss.ss_family = bond->dev->type;
   1258
   1259	/* unwind from head to the slave that failed */
   1260	bond_for_each_slave(bond, rollback_slave, iter) {
   1261		if (rollback_slave == slave)
   1262			break;
   1263		bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr,
   1264				  rollback_slave->dev->addr_len);
   1265		dev_set_mac_address(rollback_slave->dev,
   1266				    (struct sockaddr *)&ss, NULL);
   1267		dev_addr_set(rollback_slave->dev, tmp_addr);
   1268	}
   1269
   1270	return res;
   1271}
   1272
   1273/* determine if the packet is NA or NS */
   1274static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
   1275{
   1276	struct ipv6hdr *ip6hdr;
   1277	struct icmp6hdr *hdr;
   1278
   1279	if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
   1280		return true;
   1281
   1282	ip6hdr = ipv6_hdr(skb);
   1283	if (ip6hdr->nexthdr != IPPROTO_ICMPV6)
   1284		return false;
   1285
   1286	if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr)))
   1287		return true;
   1288
   1289	hdr = icmp6_hdr(skb);
   1290	return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
   1291		hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION;
   1292}
   1293
   1294/************************ exported alb functions ************************/
   1295
   1296int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
   1297{
   1298	int res;
   1299
   1300	res = tlb_initialize(bond);
   1301	if (res)
   1302		return res;
   1303
   1304	if (rlb_enabled) {
   1305		res = rlb_initialize(bond);
   1306		if (res) {
   1307			tlb_deinitialize(bond);
   1308			return res;
   1309		}
   1310		bond->alb_info.rlb_enabled = 1;
   1311	} else {
   1312		bond->alb_info.rlb_enabled = 0;
   1313	}
   1314
   1315	return 0;
   1316}
   1317
   1318void bond_alb_deinitialize(struct bonding *bond)
   1319{
   1320	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
   1321
   1322	tlb_deinitialize(bond);
   1323
   1324	if (bond_info->rlb_enabled)
   1325		rlb_deinitialize(bond);
   1326}
   1327
   1328static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
   1329				    struct slave *tx_slave)
   1330{
   1331	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
   1332	struct ethhdr *eth_data = eth_hdr(skb);
   1333
   1334	if (!tx_slave) {
   1335		/* unbalanced or unassigned, send through primary */
   1336		tx_slave = rcu_dereference(bond->curr_active_slave);
   1337		if (bond->params.tlb_dynamic_lb)
   1338			bond_info->unbalanced_load += skb->len;
   1339	}
   1340
   1341	if (tx_slave && bond_slave_can_tx(tx_slave)) {
   1342		if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
   1343			ether_addr_copy(eth_data->h_source,
   1344					tx_slave->dev->dev_addr);
   1345		}
   1346
   1347		return bond_dev_queue_xmit(bond, skb, tx_slave->dev);
   1348	}
   1349
   1350	if (tx_slave && bond->params.tlb_dynamic_lb) {
   1351		spin_lock(&bond->mode_lock);
   1352		__tlb_clear_slave(bond, tx_slave, 0);
   1353		spin_unlock(&bond->mode_lock);
   1354	}
   1355
   1356	/* no suitable interface, frame not sent */
   1357	return bond_tx_drop(bond->dev, skb);
   1358}
   1359
   1360struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
   1361				      struct sk_buff *skb)
   1362{
   1363	struct slave *tx_slave = NULL;
   1364	struct ethhdr *eth_data;
   1365	u32 hash_index;
   1366
   1367	skb_reset_mac_header(skb);
   1368	eth_data = eth_hdr(skb);
   1369
   1370	/* Do not TX balance any multicast or broadcast */
   1371	if (!is_multicast_ether_addr(eth_data->h_dest)) {
   1372		switch (skb->protocol) {
   1373		case htons(ETH_P_IPV6):
   1374			if (alb_determine_nd(skb, bond))
   1375				break;
   1376			fallthrough;
   1377		case htons(ETH_P_IP):
   1378			hash_index = bond_xmit_hash(bond, skb);
   1379			if (bond->params.tlb_dynamic_lb) {
   1380				tx_slave = tlb_choose_channel(bond,
   1381							      hash_index & 0xFF,
   1382							      skb->len);
   1383			} else {
   1384				struct bond_up_slave *slaves;
   1385				unsigned int count;
   1386
   1387				slaves = rcu_dereference(bond->usable_slaves);
   1388				count = slaves ? READ_ONCE(slaves->count) : 0;
   1389				if (likely(count))
   1390					tx_slave = slaves->arr[hash_index %
   1391							       count];
   1392			}
   1393			break;
   1394		}
   1395	}
   1396	return tx_slave;
   1397}
   1398
   1399netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
   1400{
   1401	struct bonding *bond = netdev_priv(bond_dev);
   1402	struct slave *tx_slave;
   1403
   1404	tx_slave = bond_xmit_tlb_slave_get(bond, skb);
   1405	return bond_do_alb_xmit(skb, bond, tx_slave);
   1406}
   1407
   1408struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
   1409				      struct sk_buff *skb)
   1410{
   1411	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
   1412	static const __be32 ip_bcast = htonl(0xffffffff);
   1413	struct slave *tx_slave = NULL;
   1414	const u8 *hash_start = NULL;
   1415	bool do_tx_balance = true;
   1416	struct ethhdr *eth_data;
   1417	u32 hash_index = 0;
   1418	int hash_size = 0;
   1419
   1420	skb_reset_mac_header(skb);
   1421	eth_data = eth_hdr(skb);
   1422
   1423	switch (ntohs(skb->protocol)) {
   1424	case ETH_P_IP: {
   1425		const struct iphdr *iph;
   1426
   1427		if (is_broadcast_ether_addr(eth_data->h_dest) ||
   1428		    !pskb_network_may_pull(skb, sizeof(*iph))) {
   1429			do_tx_balance = false;
   1430			break;
   1431		}
   1432		iph = ip_hdr(skb);
   1433		if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) {
   1434			do_tx_balance = false;
   1435			break;
   1436		}
   1437		hash_start = (char *)&(iph->daddr);
   1438		hash_size = sizeof(iph->daddr);
   1439		break;
   1440	}
   1441	case ETH_P_IPV6: {
   1442		const struct ipv6hdr *ip6hdr;
   1443
   1444		/* IPv6 doesn't really use broadcast mac address, but leave
   1445		 * that here just in case.
   1446		 */
   1447		if (is_broadcast_ether_addr(eth_data->h_dest)) {
   1448			do_tx_balance = false;
   1449			break;
   1450		}
   1451
   1452		/* IPv6 uses all-nodes multicast as an equivalent to
   1453		 * broadcasts in IPv4.
   1454		 */
   1455		if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) {
   1456			do_tx_balance = false;
   1457			break;
   1458		}
   1459
   1460		if (alb_determine_nd(skb, bond)) {
   1461			do_tx_balance = false;
   1462			break;
   1463		}
   1464
   1465		/* The IPv6 header is pulled by alb_determine_nd */
   1466		/* Additionally, DAD probes should not be tx-balanced as that
   1467		 * will lead to false positives for duplicate addresses and
   1468		 * prevent address configuration from working.
   1469		 */
   1470		ip6hdr = ipv6_hdr(skb);
   1471		if (ipv6_addr_any(&ip6hdr->saddr)) {
   1472			do_tx_balance = false;
   1473			break;
   1474		}
   1475
   1476		hash_start = (char *)&ip6hdr->daddr;
   1477		hash_size = sizeof(ip6hdr->daddr);
   1478		break;
   1479	}
   1480	case ETH_P_ARP:
   1481		do_tx_balance = false;
   1482		if (bond_info->rlb_enabled)
   1483			tx_slave = rlb_arp_xmit(skb, bond);
   1484		break;
   1485	default:
   1486		do_tx_balance = false;
   1487		break;
   1488	}
   1489
   1490	if (do_tx_balance) {
   1491		if (bond->params.tlb_dynamic_lb) {
   1492			hash_index = _simple_hash(hash_start, hash_size);
   1493			tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
   1494		} else {
   1495			/*
   1496			 * do_tx_balance means we are free to select the tx_slave
   1497			 * So we do exactly what tlb would do for hash selection
   1498			 */
   1499
   1500			struct bond_up_slave *slaves;
   1501			unsigned int count;
   1502
   1503			slaves = rcu_dereference(bond->usable_slaves);
   1504			count = slaves ? READ_ONCE(slaves->count) : 0;
   1505			if (likely(count))
   1506				tx_slave = slaves->arr[bond_xmit_hash(bond, skb) %
   1507						       count];
   1508		}
   1509	}
   1510	return tx_slave;
   1511}
   1512
   1513netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
   1514{
   1515	struct bonding *bond = netdev_priv(bond_dev);
   1516	struct slave *tx_slave = NULL;
   1517
   1518	tx_slave = bond_xmit_alb_slave_get(bond, skb);
   1519	return bond_do_alb_xmit(skb, bond, tx_slave);
   1520}
   1521
   1522void bond_alb_monitor(struct work_struct *work)
   1523{
   1524	struct bonding *bond = container_of(work, struct bonding,
   1525					    alb_work.work);
   1526	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
   1527	struct list_head *iter;
   1528	struct slave *slave;
   1529
   1530	if (!bond_has_slaves(bond)) {
   1531		atomic_set(&bond_info->tx_rebalance_counter, 0);
   1532		bond_info->lp_counter = 0;
   1533		goto re_arm;
   1534	}
   1535
   1536	rcu_read_lock();
   1537
   1538	atomic_inc(&bond_info->tx_rebalance_counter);
   1539	bond_info->lp_counter++;
   1540
   1541	/* send learning packets */
   1542	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) {
   1543		bool strict_match;
   1544
   1545		bond_for_each_slave_rcu(bond, slave, iter) {
   1546			/* If updating current_active, use all currently
   1547			 * user mac addresses (!strict_match).  Otherwise, only
   1548			 * use mac of the slave device.
   1549			 * In RLB mode, we always use strict matches.
   1550			 */
   1551			strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) ||
   1552					bond_info->rlb_enabled);
   1553			alb_send_learning_packets(slave, slave->dev->dev_addr,
   1554						  strict_match);
   1555		}
   1556		bond_info->lp_counter = 0;
   1557	}
   1558
   1559	/* rebalance tx traffic */
   1560	if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) {
   1561		bond_for_each_slave_rcu(bond, slave, iter) {
   1562			tlb_clear_slave(bond, slave, 1);
   1563			if (slave == rcu_access_pointer(bond->curr_active_slave)) {
   1564				SLAVE_TLB_INFO(slave).load =
   1565					bond_info->unbalanced_load /
   1566						BOND_TLB_REBALANCE_INTERVAL;
   1567				bond_info->unbalanced_load = 0;
   1568			}
   1569		}
   1570		atomic_set(&bond_info->tx_rebalance_counter, 0);
   1571	}
   1572
   1573	if (bond_info->rlb_enabled) {
   1574		if (bond_info->primary_is_promisc &&
   1575		    (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
   1576
   1577			/* dev_set_promiscuity requires rtnl and
   1578			 * nothing else.  Avoid race with bond_close.
   1579			 */
   1580			rcu_read_unlock();
   1581			if (!rtnl_trylock())
   1582				goto re_arm;
   1583
   1584			bond_info->rlb_promisc_timeout_counter = 0;
   1585
   1586			/* If the primary was set to promiscuous mode
   1587			 * because a slave was disabled then
   1588			 * it can now leave promiscuous mode.
   1589			 */
   1590			dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev,
   1591					    -1);
   1592			bond_info->primary_is_promisc = 0;
   1593
   1594			rtnl_unlock();
   1595			rcu_read_lock();
   1596		}
   1597
   1598		if (bond_info->rlb_rebalance) {
   1599			bond_info->rlb_rebalance = 0;
   1600			rlb_rebalance(bond);
   1601		}
   1602
   1603		/* check if clients need updating */
   1604		if (bond_info->rx_ntt) {
   1605			if (bond_info->rlb_update_delay_counter) {
   1606				--bond_info->rlb_update_delay_counter;
   1607			} else {
   1608				rlb_update_rx_clients(bond);
   1609				if (bond_info->rlb_update_retry_counter)
   1610					--bond_info->rlb_update_retry_counter;
   1611				else
   1612					bond_info->rx_ntt = 0;
   1613			}
   1614		}
   1615	}
   1616	rcu_read_unlock();
   1617re_arm:
   1618	queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
   1619}
   1620
   1621/* assumption: called before the slave is attached to the bond
   1622 * and not locked by the bond lock
   1623 */
   1624int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
   1625{
   1626	int res;
   1627
   1628	res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr,
   1629				     slave->dev->addr_len);
   1630	if (res)
   1631		return res;
   1632
   1633	res = alb_handle_addr_collision_on_attach(bond, slave);
   1634	if (res)
   1635		return res;
   1636
   1637	tlb_init_slave(slave);
   1638
   1639	/* order a rebalance ASAP */
   1640	atomic_set(&bond->alb_info.tx_rebalance_counter,
   1641		   BOND_TLB_REBALANCE_TICKS);
   1642
   1643	if (bond->alb_info.rlb_enabled)
   1644		bond->alb_info.rlb_rebalance = 1;
   1645
   1646	return 0;
   1647}
   1648
   1649/* Remove slave from tlb and rlb hash tables, and fix up MAC addresses
   1650 * if necessary.
   1651 *
   1652 * Caller must hold RTNL and no other locks
   1653 */
   1654void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
   1655{
   1656	if (bond_has_slaves(bond))
   1657		alb_change_hw_addr_on_detach(bond, slave);
   1658
   1659	tlb_clear_slave(bond, slave, 0);
   1660
   1661	if (bond->alb_info.rlb_enabled) {
   1662		bond->alb_info.rx_slave = NULL;
   1663		rlb_clear_slave(bond, slave);
   1664	}
   1665
   1666}
   1667
   1668void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
   1669{
   1670	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
   1671
   1672	if (link == BOND_LINK_DOWN) {
   1673		tlb_clear_slave(bond, slave, 0);
   1674		if (bond->alb_info.rlb_enabled)
   1675			rlb_clear_slave(bond, slave);
   1676	} else if (link == BOND_LINK_UP) {
   1677		/* order a rebalance ASAP */
   1678		atomic_set(&bond_info->tx_rebalance_counter,
   1679			   BOND_TLB_REBALANCE_TICKS);
   1680		if (bond->alb_info.rlb_enabled) {
   1681			bond->alb_info.rlb_rebalance = 1;
   1682			/* If the updelay module parameter is smaller than the
   1683			 * forwarding delay of the switch the rebalance will
   1684			 * not work because the rebalance arp replies will
   1685			 * not be forwarded to the clients..
   1686			 */
   1687		}
   1688	}
   1689
   1690	if (bond_is_nondyn_tlb(bond)) {
   1691		if (bond_update_slave_arr(bond, NULL))
   1692			pr_err("Failed to build slave-array for TLB mode.\n");
   1693	}
   1694}
   1695
   1696/**
   1697 * bond_alb_handle_active_change - assign new curr_active_slave
   1698 * @bond: our bonding struct
   1699 * @new_slave: new slave to assign
   1700 *
   1701 * Set the bond->curr_active_slave to @new_slave and handle
   1702 * mac address swapping and promiscuity changes as needed.
   1703 *
   1704 * Caller must hold RTNL
   1705 */
   1706void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
   1707{
   1708	struct slave *swap_slave;
   1709	struct slave *curr_active;
   1710
   1711	curr_active = rtnl_dereference(bond->curr_active_slave);
   1712	if (curr_active == new_slave)
   1713		return;
   1714
   1715	if (curr_active && bond->alb_info.primary_is_promisc) {
   1716		dev_set_promiscuity(curr_active->dev, -1);
   1717		bond->alb_info.primary_is_promisc = 0;
   1718		bond->alb_info.rlb_promisc_timeout_counter = 0;
   1719	}
   1720
   1721	swap_slave = curr_active;
   1722	rcu_assign_pointer(bond->curr_active_slave, new_slave);
   1723
   1724	if (!new_slave || !bond_has_slaves(bond))
   1725		return;
   1726
   1727	/* set the new curr_active_slave to the bonds mac address
   1728	 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave
   1729	 */
   1730	if (!swap_slave)
   1731		swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr);
   1732
   1733	/* Arrange for swap_slave and new_slave to temporarily be
   1734	 * ignored so we can mess with their MAC addresses without
   1735	 * fear of interference from transmit activity.
   1736	 */
   1737	if (swap_slave)
   1738		tlb_clear_slave(bond, swap_slave, 1);
   1739	tlb_clear_slave(bond, new_slave, 1);
   1740
   1741	/* in TLB mode, the slave might flip down/up with the old dev_addr,
   1742	 * and thus filter bond->dev_addr's packets, so force bond's mac
   1743	 */
   1744	if (BOND_MODE(bond) == BOND_MODE_TLB) {
   1745		struct sockaddr_storage ss;
   1746		u8 tmp_addr[MAX_ADDR_LEN];
   1747
   1748		bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr,
   1749				  new_slave->dev->addr_len);
   1750
   1751		bond_hw_addr_copy(ss.__data, bond->dev->dev_addr,
   1752				  bond->dev->addr_len);
   1753		ss.ss_family = bond->dev->type;
   1754		/* we don't care if it can't change its mac, best effort */
   1755		dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss,
   1756				    NULL);
   1757
   1758		dev_addr_set(new_slave->dev, tmp_addr);
   1759	}
   1760
   1761	/* curr_active_slave must be set before calling alb_swap_mac_addr */
   1762	if (swap_slave) {
   1763		/* swap mac address */
   1764		alb_swap_mac_addr(swap_slave, new_slave);
   1765		alb_fasten_mac_swap(bond, swap_slave, new_slave);
   1766	} else {
   1767		/* set the new_slave to the bond mac address */
   1768		alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr,
   1769				       bond->dev->addr_len);
   1770		alb_send_learning_packets(new_slave, bond->dev->dev_addr,
   1771					  false);
   1772	}
   1773}
   1774
   1775/* Called with RTNL */
   1776int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
   1777{
   1778	struct bonding *bond = netdev_priv(bond_dev);
   1779	struct sockaddr_storage *ss = addr;
   1780	struct slave *curr_active;
   1781	struct slave *swap_slave;
   1782	int res;
   1783
   1784	if (!is_valid_ether_addr(ss->__data))
   1785		return -EADDRNOTAVAIL;
   1786
   1787	res = alb_set_mac_address(bond, addr);
   1788	if (res)
   1789		return res;
   1790
   1791	dev_addr_set(bond_dev, ss->__data);
   1792
   1793	/* If there is no curr_active_slave there is nothing else to do.
   1794	 * Otherwise we'll need to pass the new address to it and handle
   1795	 * duplications.
   1796	 */
   1797	curr_active = rtnl_dereference(bond->curr_active_slave);
   1798	if (!curr_active)
   1799		return 0;
   1800
   1801	swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr);
   1802
   1803	if (swap_slave) {
   1804		alb_swap_mac_addr(swap_slave, curr_active);
   1805		alb_fasten_mac_swap(bond, swap_slave, curr_active);
   1806	} else {
   1807		alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr,
   1808				       bond_dev->addr_len);
   1809
   1810		alb_send_learning_packets(curr_active,
   1811					  bond_dev->dev_addr, false);
   1812		if (bond->alb_info.rlb_enabled) {
   1813			/* inform clients mac address has changed */
   1814			rlb_req_update_slave_clients(bond, curr_active);
   1815		}
   1816	}
   1817
   1818	return 0;
   1819}
   1820
   1821void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
   1822{
   1823	if (bond->alb_info.rlb_enabled)
   1824		rlb_clear_vlan(bond, vlan_id);
   1825}
   1826