cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ibmveth.c (53202B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * IBM Power Virtual Ethernet Device Driver
      4 *
      5 * Copyright (C) IBM Corporation, 2003, 2010
      6 *
      7 * Authors: Dave Larson <larson1@us.ibm.com>
      8 *	    Santiago Leon <santil@linux.vnet.ibm.com>
      9 *	    Brian King <brking@linux.vnet.ibm.com>
     10 *	    Robert Jennings <rcj@linux.vnet.ibm.com>
     11 *	    Anton Blanchard <anton@au.ibm.com>
     12 */
     13
     14#include <linux/module.h>
     15#include <linux/types.h>
     16#include <linux/errno.h>
     17#include <linux/dma-mapping.h>
     18#include <linux/kernel.h>
     19#include <linux/netdevice.h>
     20#include <linux/etherdevice.h>
     21#include <linux/skbuff.h>
     22#include <linux/init.h>
     23#include <linux/interrupt.h>
     24#include <linux/mm.h>
     25#include <linux/pm.h>
     26#include <linux/ethtool.h>
     27#include <linux/in.h>
     28#include <linux/ip.h>
     29#include <linux/ipv6.h>
     30#include <linux/slab.h>
     31#include <asm/hvcall.h>
     32#include <linux/atomic.h>
     33#include <asm/vio.h>
     34#include <asm/iommu.h>
     35#include <asm/firmware.h>
     36#include <net/tcp.h>
     37#include <net/ip6_checksum.h>
     38
     39#include "ibmveth.h"
     40
     41static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
     42static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
     43static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
     44
     45static struct kobj_type ktype_veth_pool;
     46
     47
     48static const char ibmveth_driver_name[] = "ibmveth";
     49static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver";
     50#define ibmveth_driver_version "1.06"
     51
     52MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
     53MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
     54MODULE_LICENSE("GPL");
     55MODULE_VERSION(ibmveth_driver_version);
     56
     57static unsigned int tx_copybreak __read_mostly = 128;
     58module_param(tx_copybreak, uint, 0644);
     59MODULE_PARM_DESC(tx_copybreak,
     60	"Maximum size of packet that is copied to a new buffer on transmit");
     61
     62static unsigned int rx_copybreak __read_mostly = 128;
     63module_param(rx_copybreak, uint, 0644);
     64MODULE_PARM_DESC(rx_copybreak,
     65	"Maximum size of packet that is copied to a new buffer on receive");
     66
     67static unsigned int rx_flush __read_mostly = 0;
     68module_param(rx_flush, uint, 0644);
     69MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
     70
     71static bool old_large_send __read_mostly;
     72module_param(old_large_send, bool, 0444);
     73MODULE_PARM_DESC(old_large_send,
     74	"Use old large send method on firmware that supports the new method");
     75
     76struct ibmveth_stat {
     77	char name[ETH_GSTRING_LEN];
     78	int offset;
     79};
     80
     81#define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat)
     82#define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off))
     83
     84static struct ibmveth_stat ibmveth_stats[] = {
     85	{ "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) },
     86	{ "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) },
     87	{ "replenish_add_buff_failure",
     88			IBMVETH_STAT_OFF(replenish_add_buff_failure) },
     89	{ "replenish_add_buff_success",
     90			IBMVETH_STAT_OFF(replenish_add_buff_success) },
     91	{ "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) },
     92	{ "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) },
     93	{ "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) },
     94	{ "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) },
     95	{ "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) },
     96	{ "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) },
     97	{ "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) },
     98	{ "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) },
     99	{ "fw_enabled_large_send", IBMVETH_STAT_OFF(fw_large_send_support) }
    100};
    101
    102/* simple methods of getting data from the current rxq entry */
    103static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter)
    104{
    105	return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off);
    106}
    107
    108static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter)
    109{
    110	return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >>
    111			IBMVETH_RXQ_TOGGLE_SHIFT;
    112}
    113
    114static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter)
    115{
    116	return ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle;
    117}
    118
    119static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter)
    120{
    121	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID;
    122}
    123
    124static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter)
    125{
    126	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK;
    127}
    128
    129static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter)
    130{
    131	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT;
    132}
    133
    134static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
    135{
    136	return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
    137}
    138
    139static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
    140{
    141	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
    142}
    143
    144/* setup the initial settings for a buffer pool */
    145static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool,
    146				     u32 pool_index, u32 pool_size,
    147				     u32 buff_size, u32 pool_active)
    148{
    149	pool->size = pool_size;
    150	pool->index = pool_index;
    151	pool->buff_size = buff_size;
    152	pool->threshold = pool_size * 7 / 8;
    153	pool->active = pool_active;
    154}
    155
    156/* allocate and setup an buffer pool - called during open */
    157static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool)
    158{
    159	int i;
    160
    161	pool->free_map = kmalloc_array(pool->size, sizeof(u16), GFP_KERNEL);
    162
    163	if (!pool->free_map)
    164		return -1;
    165
    166	pool->dma_addr = kcalloc(pool->size, sizeof(dma_addr_t), GFP_KERNEL);
    167	if (!pool->dma_addr) {
    168		kfree(pool->free_map);
    169		pool->free_map = NULL;
    170		return -1;
    171	}
    172
    173	pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL);
    174
    175	if (!pool->skbuff) {
    176		kfree(pool->dma_addr);
    177		pool->dma_addr = NULL;
    178
    179		kfree(pool->free_map);
    180		pool->free_map = NULL;
    181		return -1;
    182	}
    183
    184	for (i = 0; i < pool->size; ++i)
    185		pool->free_map[i] = i;
    186
    187	atomic_set(&pool->available, 0);
    188	pool->producer_index = 0;
    189	pool->consumer_index = 0;
    190
    191	return 0;
    192}
    193
    194static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
    195{
    196	unsigned long offset;
    197
    198	for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
    199		asm("dcbfl %0,%1" :: "b" (addr), "r" (offset));
    200}
    201
    202/* replenish the buffers for a pool.  note that we don't need to
    203 * skb_reserve these since they are used for incoming...
    204 */
    205static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
    206					  struct ibmveth_buff_pool *pool)
    207{
    208	u32 i;
    209	u32 count = pool->size - atomic_read(&pool->available);
    210	u32 buffers_added = 0;
    211	struct sk_buff *skb;
    212	unsigned int free_index, index;
    213	u64 correlator;
    214	unsigned long lpar_rc;
    215	dma_addr_t dma_addr;
    216
    217	mb();
    218
    219	for (i = 0; i < count; ++i) {
    220		union ibmveth_buf_desc desc;
    221
    222		skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
    223
    224		if (!skb) {
    225			netdev_dbg(adapter->netdev,
    226				   "replenish: unable to allocate skb\n");
    227			adapter->replenish_no_mem++;
    228			break;
    229		}
    230
    231		free_index = pool->consumer_index;
    232		pool->consumer_index++;
    233		if (pool->consumer_index >= pool->size)
    234			pool->consumer_index = 0;
    235		index = pool->free_map[free_index];
    236
    237		BUG_ON(index == IBM_VETH_INVALID_MAP);
    238		BUG_ON(pool->skbuff[index] != NULL);
    239
    240		dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
    241				pool->buff_size, DMA_FROM_DEVICE);
    242
    243		if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
    244			goto failure;
    245
    246		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
    247		pool->dma_addr[index] = dma_addr;
    248		pool->skbuff[index] = skb;
    249
    250		correlator = ((u64)pool->index << 32) | index;
    251		*(u64 *)skb->data = correlator;
    252
    253		desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
    254		desc.fields.address = dma_addr;
    255
    256		if (rx_flush) {
    257			unsigned int len = min(pool->buff_size,
    258						adapter->netdev->mtu +
    259						IBMVETH_BUFF_OH);
    260			ibmveth_flush_buffer(skb->data, len);
    261		}
    262		lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
    263						   desc.desc);
    264
    265		if (lpar_rc != H_SUCCESS) {
    266			goto failure;
    267		} else {
    268			buffers_added++;
    269			adapter->replenish_add_buff_success++;
    270		}
    271	}
    272
    273	mb();
    274	atomic_add(buffers_added, &(pool->available));
    275	return;
    276
    277failure:
    278	pool->free_map[free_index] = index;
    279	pool->skbuff[index] = NULL;
    280	if (pool->consumer_index == 0)
    281		pool->consumer_index = pool->size - 1;
    282	else
    283		pool->consumer_index--;
    284	if (!dma_mapping_error(&adapter->vdev->dev, dma_addr))
    285		dma_unmap_single(&adapter->vdev->dev,
    286		                 pool->dma_addr[index], pool->buff_size,
    287		                 DMA_FROM_DEVICE);
    288	dev_kfree_skb_any(skb);
    289	adapter->replenish_add_buff_failure++;
    290
    291	mb();
    292	atomic_add(buffers_added, &(pool->available));
    293}
    294
    295/*
    296 * The final 8 bytes of the buffer list is a counter of frames dropped
    297 * because there was not a buffer in the buffer list capable of holding
    298 * the frame.
    299 */
    300static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter)
    301{
    302	__be64 *p = adapter->buffer_list_addr + 4096 - 8;
    303
    304	adapter->rx_no_buffer = be64_to_cpup(p);
    305}
    306
    307/* replenish routine */
    308static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
    309{
    310	int i;
    311
    312	adapter->replenish_task_cycles++;
    313
    314	for (i = (IBMVETH_NUM_BUFF_POOLS - 1); i >= 0; i--) {
    315		struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i];
    316
    317		if (pool->active &&
    318		    (atomic_read(&pool->available) < pool->threshold))
    319			ibmveth_replenish_buffer_pool(adapter, pool);
    320	}
    321
    322	ibmveth_update_rx_no_buffer(adapter);
    323}
    324
    325/* empty and free ana buffer pool - also used to do cleanup in error paths */
    326static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
    327				     struct ibmveth_buff_pool *pool)
    328{
    329	int i;
    330
    331	kfree(pool->free_map);
    332	pool->free_map = NULL;
    333
    334	if (pool->skbuff && pool->dma_addr) {
    335		for (i = 0; i < pool->size; ++i) {
    336			struct sk_buff *skb = pool->skbuff[i];
    337			if (skb) {
    338				dma_unmap_single(&adapter->vdev->dev,
    339						 pool->dma_addr[i],
    340						 pool->buff_size,
    341						 DMA_FROM_DEVICE);
    342				dev_kfree_skb_any(skb);
    343				pool->skbuff[i] = NULL;
    344			}
    345		}
    346	}
    347
    348	if (pool->dma_addr) {
    349		kfree(pool->dma_addr);
    350		pool->dma_addr = NULL;
    351	}
    352
    353	if (pool->skbuff) {
    354		kfree(pool->skbuff);
    355		pool->skbuff = NULL;
    356	}
    357}
    358
    359/* remove a buffer from a pool */
    360static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
    361					    u64 correlator)
    362{
    363	unsigned int pool  = correlator >> 32;
    364	unsigned int index = correlator & 0xffffffffUL;
    365	unsigned int free_index;
    366	struct sk_buff *skb;
    367
    368	BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
    369	BUG_ON(index >= adapter->rx_buff_pool[pool].size);
    370
    371	skb = adapter->rx_buff_pool[pool].skbuff[index];
    372
    373	BUG_ON(skb == NULL);
    374
    375	adapter->rx_buff_pool[pool].skbuff[index] = NULL;
    376
    377	dma_unmap_single(&adapter->vdev->dev,
    378			 adapter->rx_buff_pool[pool].dma_addr[index],
    379			 adapter->rx_buff_pool[pool].buff_size,
    380			 DMA_FROM_DEVICE);
    381
    382	free_index = adapter->rx_buff_pool[pool].producer_index;
    383	adapter->rx_buff_pool[pool].producer_index++;
    384	if (adapter->rx_buff_pool[pool].producer_index >=
    385	    adapter->rx_buff_pool[pool].size)
    386		adapter->rx_buff_pool[pool].producer_index = 0;
    387	adapter->rx_buff_pool[pool].free_map[free_index] = index;
    388
    389	mb();
    390
    391	atomic_dec(&(adapter->rx_buff_pool[pool].available));
    392}
    393
    394/* get the current buffer on the rx queue */
    395static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter)
    396{
    397	u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
    398	unsigned int pool = correlator >> 32;
    399	unsigned int index = correlator & 0xffffffffUL;
    400
    401	BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
    402	BUG_ON(index >= adapter->rx_buff_pool[pool].size);
    403
    404	return adapter->rx_buff_pool[pool].skbuff[index];
    405}
    406
    407/* recycle the current buffer on the rx queue */
    408static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
    409{
    410	u32 q_index = adapter->rx_queue.index;
    411	u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator;
    412	unsigned int pool = correlator >> 32;
    413	unsigned int index = correlator & 0xffffffffUL;
    414	union ibmveth_buf_desc desc;
    415	unsigned long lpar_rc;
    416	int ret = 1;
    417
    418	BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
    419	BUG_ON(index >= adapter->rx_buff_pool[pool].size);
    420
    421	if (!adapter->rx_buff_pool[pool].active) {
    422		ibmveth_rxq_harvest_buffer(adapter);
    423		ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]);
    424		goto out;
    425	}
    426
    427	desc.fields.flags_len = IBMVETH_BUF_VALID |
    428		adapter->rx_buff_pool[pool].buff_size;
    429	desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index];
    430
    431	lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
    432
    433	if (lpar_rc != H_SUCCESS) {
    434		netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed "
    435			   "during recycle rc=%ld", lpar_rc);
    436		ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
    437		ret = 0;
    438	}
    439
    440	if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
    441		adapter->rx_queue.index = 0;
    442		adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
    443	}
    444
    445out:
    446	return ret;
    447}
    448
    449static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
    450{
    451	ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
    452
    453	if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
    454		adapter->rx_queue.index = 0;
    455		adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
    456	}
    457}
    458
    459static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
    460        union ibmveth_buf_desc rxq_desc, u64 mac_address)
    461{
    462	int rc, try_again = 1;
    463
    464	/*
    465	 * After a kexec the adapter will still be open, so our attempt to
    466	 * open it will fail. So if we get a failure we free the adapter and
    467	 * try again, but only once.
    468	 */
    469retry:
    470	rc = h_register_logical_lan(adapter->vdev->unit_address,
    471				    adapter->buffer_list_dma, rxq_desc.desc,
    472				    adapter->filter_list_dma, mac_address);
    473
    474	if (rc != H_SUCCESS && try_again) {
    475		do {
    476			rc = h_free_logical_lan(adapter->vdev->unit_address);
    477		} while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY));
    478
    479		try_again = 0;
    480		goto retry;
    481	}
    482
    483	return rc;
    484}
    485
    486static int ibmveth_open(struct net_device *netdev)
    487{
    488	struct ibmveth_adapter *adapter = netdev_priv(netdev);
    489	u64 mac_address;
    490	int rxq_entries = 1;
    491	unsigned long lpar_rc;
    492	int rc;
    493	union ibmveth_buf_desc rxq_desc;
    494	int i;
    495	struct device *dev;
    496
    497	netdev_dbg(netdev, "open starting\n");
    498
    499	napi_enable(&adapter->napi);
    500
    501	for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
    502		rxq_entries += adapter->rx_buff_pool[i].size;
    503
    504	rc = -ENOMEM;
    505	adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
    506	if (!adapter->buffer_list_addr) {
    507		netdev_err(netdev, "unable to allocate list pages\n");
    508		goto out;
    509	}
    510
    511	adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL);
    512	if (!adapter->filter_list_addr) {
    513		netdev_err(netdev, "unable to allocate filter pages\n");
    514		goto out_free_buffer_list;
    515	}
    516
    517	dev = &adapter->vdev->dev;
    518
    519	adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) *
    520						rxq_entries;
    521	adapter->rx_queue.queue_addr =
    522		dma_alloc_coherent(dev, adapter->rx_queue.queue_len,
    523				   &adapter->rx_queue.queue_dma, GFP_KERNEL);
    524	if (!adapter->rx_queue.queue_addr)
    525		goto out_free_filter_list;
    526
    527	adapter->buffer_list_dma = dma_map_single(dev,
    528			adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
    529	if (dma_mapping_error(dev, adapter->buffer_list_dma)) {
    530		netdev_err(netdev, "unable to map buffer list pages\n");
    531		goto out_free_queue_mem;
    532	}
    533
    534	adapter->filter_list_dma = dma_map_single(dev,
    535			adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
    536	if (dma_mapping_error(dev, adapter->filter_list_dma)) {
    537		netdev_err(netdev, "unable to map filter list pages\n");
    538		goto out_unmap_buffer_list;
    539	}
    540
    541	adapter->rx_queue.index = 0;
    542	adapter->rx_queue.num_slots = rxq_entries;
    543	adapter->rx_queue.toggle = 1;
    544
    545	mac_address = ether_addr_to_u64(netdev->dev_addr);
    546
    547	rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
    548					adapter->rx_queue.queue_len;
    549	rxq_desc.fields.address = adapter->rx_queue.queue_dma;
    550
    551	netdev_dbg(netdev, "buffer list @ 0x%p\n", adapter->buffer_list_addr);
    552	netdev_dbg(netdev, "filter list @ 0x%p\n", adapter->filter_list_addr);
    553	netdev_dbg(netdev, "receive q   @ 0x%p\n", adapter->rx_queue.queue_addr);
    554
    555	h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
    556
    557	lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address);
    558
    559	if (lpar_rc != H_SUCCESS) {
    560		netdev_err(netdev, "h_register_logical_lan failed with %ld\n",
    561			   lpar_rc);
    562		netdev_err(netdev, "buffer TCE:0x%llx filter TCE:0x%llx rxq "
    563			   "desc:0x%llx MAC:0x%llx\n",
    564				     adapter->buffer_list_dma,
    565				     adapter->filter_list_dma,
    566				     rxq_desc.desc,
    567				     mac_address);
    568		rc = -ENONET;
    569		goto out_unmap_filter_list;
    570	}
    571
    572	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
    573		if (!adapter->rx_buff_pool[i].active)
    574			continue;
    575		if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
    576			netdev_err(netdev, "unable to alloc pool\n");
    577			adapter->rx_buff_pool[i].active = 0;
    578			rc = -ENOMEM;
    579			goto out_free_buffer_pools;
    580		}
    581	}
    582
    583	netdev_dbg(netdev, "registering irq 0x%x\n", netdev->irq);
    584	rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name,
    585			 netdev);
    586	if (rc != 0) {
    587		netdev_err(netdev, "unable to request irq 0x%x, rc %d\n",
    588			   netdev->irq, rc);
    589		do {
    590			lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
    591		} while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
    592
    593		goto out_free_buffer_pools;
    594	}
    595
    596	rc = -ENOMEM;
    597
    598	adapter->bounce_buffer = dma_alloc_coherent(&adapter->vdev->dev,
    599						    netdev->mtu + IBMVETH_BUFF_OH,
    600						    &adapter->bounce_buffer_dma, GFP_KERNEL);
    601	if (!adapter->bounce_buffer) {
    602		netdev_err(netdev, "unable to alloc bounce buffer\n");
    603		goto out_free_irq;
    604	}
    605
    606	netdev_dbg(netdev, "initial replenish cycle\n");
    607	ibmveth_interrupt(netdev->irq, netdev);
    608
    609	netif_start_queue(netdev);
    610
    611	netdev_dbg(netdev, "open complete\n");
    612
    613	return 0;
    614
    615out_free_irq:
    616	free_irq(netdev->irq, netdev);
    617out_free_buffer_pools:
    618	while (--i >= 0) {
    619		if (adapter->rx_buff_pool[i].active)
    620			ibmveth_free_buffer_pool(adapter,
    621						 &adapter->rx_buff_pool[i]);
    622	}
    623out_unmap_filter_list:
    624	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
    625			 DMA_BIDIRECTIONAL);
    626out_unmap_buffer_list:
    627	dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
    628			 DMA_BIDIRECTIONAL);
    629out_free_queue_mem:
    630	dma_free_coherent(dev, adapter->rx_queue.queue_len,
    631			  adapter->rx_queue.queue_addr,
    632			  adapter->rx_queue.queue_dma);
    633out_free_filter_list:
    634	free_page((unsigned long)adapter->filter_list_addr);
    635out_free_buffer_list:
    636	free_page((unsigned long)adapter->buffer_list_addr);
    637out:
    638	napi_disable(&adapter->napi);
    639	return rc;
    640}
    641
    642static int ibmveth_close(struct net_device *netdev)
    643{
    644	struct ibmveth_adapter *adapter = netdev_priv(netdev);
    645	struct device *dev = &adapter->vdev->dev;
    646	long lpar_rc;
    647	int i;
    648
    649	netdev_dbg(netdev, "close starting\n");
    650
    651	napi_disable(&adapter->napi);
    652
    653	if (!adapter->pool_config)
    654		netif_stop_queue(netdev);
    655
    656	h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
    657
    658	do {
    659		lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
    660	} while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY));
    661
    662	if (lpar_rc != H_SUCCESS) {
    663		netdev_err(netdev, "h_free_logical_lan failed with %lx, "
    664			   "continuing with close\n", lpar_rc);
    665	}
    666
    667	free_irq(netdev->irq, netdev);
    668
    669	ibmveth_update_rx_no_buffer(adapter);
    670
    671	dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
    672			 DMA_BIDIRECTIONAL);
    673	free_page((unsigned long)adapter->buffer_list_addr);
    674
    675	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
    676			 DMA_BIDIRECTIONAL);
    677	free_page((unsigned long)adapter->filter_list_addr);
    678
    679	dma_free_coherent(dev, adapter->rx_queue.queue_len,
    680			  adapter->rx_queue.queue_addr,
    681			  adapter->rx_queue.queue_dma);
    682
    683	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
    684		if (adapter->rx_buff_pool[i].active)
    685			ibmveth_free_buffer_pool(adapter,
    686						 &adapter->rx_buff_pool[i]);
    687
    688	dma_free_coherent(&adapter->vdev->dev,
    689			  adapter->netdev->mtu + IBMVETH_BUFF_OH,
    690			  adapter->bounce_buffer, adapter->bounce_buffer_dma);
    691
    692	netdev_dbg(netdev, "close complete\n");
    693
    694	return 0;
    695}
    696
    697static int ibmveth_set_link_ksettings(struct net_device *dev,
    698				      const struct ethtool_link_ksettings *cmd)
    699{
    700	struct ibmveth_adapter *adapter = netdev_priv(dev);
    701
    702	return ethtool_virtdev_set_link_ksettings(dev, cmd,
    703						  &adapter->speed,
    704						  &adapter->duplex);
    705}
    706
    707static int ibmveth_get_link_ksettings(struct net_device *dev,
    708				      struct ethtool_link_ksettings *cmd)
    709{
    710	struct ibmveth_adapter *adapter = netdev_priv(dev);
    711
    712	cmd->base.speed = adapter->speed;
    713	cmd->base.duplex = adapter->duplex;
    714	cmd->base.port = PORT_OTHER;
    715
    716	return 0;
    717}
    718
    719static void ibmveth_init_link_settings(struct net_device *dev)
    720{
    721	struct ibmveth_adapter *adapter = netdev_priv(dev);
    722
    723	adapter->speed = SPEED_1000;
    724	adapter->duplex = DUPLEX_FULL;
    725}
    726
    727static void netdev_get_drvinfo(struct net_device *dev,
    728			       struct ethtool_drvinfo *info)
    729{
    730	strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
    731	strlcpy(info->version, ibmveth_driver_version, sizeof(info->version));
    732}
    733
    734static netdev_features_t ibmveth_fix_features(struct net_device *dev,
    735	netdev_features_t features)
    736{
    737	/*
    738	 * Since the ibmveth firmware interface does not have the
    739	 * concept of separate tx/rx checksum offload enable, if rx
    740	 * checksum is disabled we also have to disable tx checksum
    741	 * offload. Once we disable rx checksum offload, we are no
    742	 * longer allowed to send tx buffers that are not properly
    743	 * checksummed.
    744	 */
    745
    746	if (!(features & NETIF_F_RXCSUM))
    747		features &= ~NETIF_F_CSUM_MASK;
    748
    749	return features;
    750}
    751
    752static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
    753{
    754	struct ibmveth_adapter *adapter = netdev_priv(dev);
    755	unsigned long set_attr, clr_attr, ret_attr;
    756	unsigned long set_attr6, clr_attr6;
    757	long ret, ret4, ret6;
    758	int rc1 = 0, rc2 = 0;
    759	int restart = 0;
    760
    761	if (netif_running(dev)) {
    762		restart = 1;
    763		adapter->pool_config = 1;
    764		ibmveth_close(dev);
    765		adapter->pool_config = 0;
    766	}
    767
    768	set_attr = 0;
    769	clr_attr = 0;
    770	set_attr6 = 0;
    771	clr_attr6 = 0;
    772
    773	if (data) {
    774		set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
    775		set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
    776	} else {
    777		clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
    778		clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM;
    779	}
    780
    781	ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
    782
    783	if (ret == H_SUCCESS &&
    784	    (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
    785		ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
    786					 set_attr, &ret_attr);
    787
    788		if (ret4 != H_SUCCESS) {
    789			netdev_err(dev, "unable to change IPv4 checksum "
    790					"offload settings. %d rc=%ld\n",
    791					data, ret4);
    792
    793			h_illan_attributes(adapter->vdev->unit_address,
    794					   set_attr, clr_attr, &ret_attr);
    795
    796			if (data == 1)
    797				dev->features &= ~NETIF_F_IP_CSUM;
    798
    799		} else {
    800			adapter->fw_ipv4_csum_support = data;
    801		}
    802
    803		ret6 = h_illan_attributes(adapter->vdev->unit_address,
    804					 clr_attr6, set_attr6, &ret_attr);
    805
    806		if (ret6 != H_SUCCESS) {
    807			netdev_err(dev, "unable to change IPv6 checksum "
    808					"offload settings. %d rc=%ld\n",
    809					data, ret6);
    810
    811			h_illan_attributes(adapter->vdev->unit_address,
    812					   set_attr6, clr_attr6, &ret_attr);
    813
    814			if (data == 1)
    815				dev->features &= ~NETIF_F_IPV6_CSUM;
    816
    817		} else
    818			adapter->fw_ipv6_csum_support = data;
    819
    820		if (ret4 == H_SUCCESS || ret6 == H_SUCCESS)
    821			adapter->rx_csum = data;
    822		else
    823			rc1 = -EIO;
    824	} else {
    825		rc1 = -EIO;
    826		netdev_err(dev, "unable to change checksum offload settings."
    827				     " %d rc=%ld ret_attr=%lx\n", data, ret,
    828				     ret_attr);
    829	}
    830
    831	if (restart)
    832		rc2 = ibmveth_open(dev);
    833
    834	return rc1 ? rc1 : rc2;
    835}
    836
    837static int ibmveth_set_tso(struct net_device *dev, u32 data)
    838{
    839	struct ibmveth_adapter *adapter = netdev_priv(dev);
    840	unsigned long set_attr, clr_attr, ret_attr;
    841	long ret1, ret2;
    842	int rc1 = 0, rc2 = 0;
    843	int restart = 0;
    844
    845	if (netif_running(dev)) {
    846		restart = 1;
    847		adapter->pool_config = 1;
    848		ibmveth_close(dev);
    849		adapter->pool_config = 0;
    850	}
    851
    852	set_attr = 0;
    853	clr_attr = 0;
    854
    855	if (data)
    856		set_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
    857	else
    858		clr_attr = IBMVETH_ILLAN_LRG_SR_ENABLED;
    859
    860	ret1 = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
    861
    862	if (ret1 == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
    863	    !old_large_send) {
    864		ret2 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
    865					  set_attr, &ret_attr);
    866
    867		if (ret2 != H_SUCCESS) {
    868			netdev_err(dev, "unable to change tso settings. %d rc=%ld\n",
    869				   data, ret2);
    870
    871			h_illan_attributes(adapter->vdev->unit_address,
    872					   set_attr, clr_attr, &ret_attr);
    873
    874			if (data == 1)
    875				dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
    876			rc1 = -EIO;
    877
    878		} else {
    879			adapter->fw_large_send_support = data;
    880			adapter->large_send = data;
    881		}
    882	} else {
    883		/* Older firmware version of large send offload does not
    884		 * support tcp6/ipv6
    885		 */
    886		if (data == 1) {
    887			dev->features &= ~NETIF_F_TSO6;
    888			netdev_info(dev, "TSO feature requires all partitions to have updated driver");
    889		}
    890		adapter->large_send = data;
    891	}
    892
    893	if (restart)
    894		rc2 = ibmveth_open(dev);
    895
    896	return rc1 ? rc1 : rc2;
    897}
    898
    899static int ibmveth_set_features(struct net_device *dev,
    900	netdev_features_t features)
    901{
    902	struct ibmveth_adapter *adapter = netdev_priv(dev);
    903	int rx_csum = !!(features & NETIF_F_RXCSUM);
    904	int large_send = !!(features & (NETIF_F_TSO | NETIF_F_TSO6));
    905	int rc1 = 0, rc2 = 0;
    906
    907	if (rx_csum != adapter->rx_csum) {
    908		rc1 = ibmveth_set_csum_offload(dev, rx_csum);
    909		if (rc1 && !adapter->rx_csum)
    910			dev->features =
    911				features & ~(NETIF_F_CSUM_MASK |
    912					     NETIF_F_RXCSUM);
    913	}
    914
    915	if (large_send != adapter->large_send) {
    916		rc2 = ibmveth_set_tso(dev, large_send);
    917		if (rc2 && !adapter->large_send)
    918			dev->features =
    919				features & ~(NETIF_F_TSO | NETIF_F_TSO6);
    920	}
    921
    922	return rc1 ? rc1 : rc2;
    923}
    924
    925static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
    926{
    927	int i;
    928
    929	if (stringset != ETH_SS_STATS)
    930		return;
    931
    932	for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN)
    933		memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN);
    934}
    935
    936static int ibmveth_get_sset_count(struct net_device *dev, int sset)
    937{
    938	switch (sset) {
    939	case ETH_SS_STATS:
    940		return ARRAY_SIZE(ibmveth_stats);
    941	default:
    942		return -EOPNOTSUPP;
    943	}
    944}
    945
    946static void ibmveth_get_ethtool_stats(struct net_device *dev,
    947				      struct ethtool_stats *stats, u64 *data)
    948{
    949	int i;
    950	struct ibmveth_adapter *adapter = netdev_priv(dev);
    951
    952	for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++)
    953		data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
    954}
    955
    956static const struct ethtool_ops netdev_ethtool_ops = {
    957	.get_drvinfo		         = netdev_get_drvinfo,
    958	.get_link		         = ethtool_op_get_link,
    959	.get_strings		         = ibmveth_get_strings,
    960	.get_sset_count		         = ibmveth_get_sset_count,
    961	.get_ethtool_stats	         = ibmveth_get_ethtool_stats,
    962	.get_link_ksettings	         = ibmveth_get_link_ksettings,
    963	.set_link_ksettings              = ibmveth_set_link_ksettings,
    964};
    965
    966static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
    967{
    968	return -EOPNOTSUPP;
    969}
    970
    971static int ibmveth_send(struct ibmveth_adapter *adapter,
    972			union ibmveth_buf_desc *descs, unsigned long mss)
    973{
    974	unsigned long correlator;
    975	unsigned int retry_count;
    976	unsigned long ret;
    977
    978	/*
    979	 * The retry count sets a maximum for the number of broadcast and
    980	 * multicast destinations within the system.
    981	 */
    982	retry_count = 1024;
    983	correlator = 0;
    984	do {
    985		ret = h_send_logical_lan(adapter->vdev->unit_address,
    986					     descs[0].desc, descs[1].desc,
    987					     descs[2].desc, descs[3].desc,
    988					     descs[4].desc, descs[5].desc,
    989					     correlator, &correlator, mss,
    990					     adapter->fw_large_send_support);
    991	} while ((ret == H_BUSY) && (retry_count--));
    992
    993	if (ret != H_SUCCESS && ret != H_DROPPED) {
    994		netdev_err(adapter->netdev, "tx: h_send_logical_lan failed "
    995			   "with rc=%ld\n", ret);
    996		return 1;
    997	}
    998
    999	return 0;
   1000}
   1001
   1002static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
   1003					 struct net_device *netdev)
   1004{
   1005	struct ethhdr *ether_header;
   1006	int ret = 0;
   1007
   1008	ether_header = eth_hdr(skb);
   1009
   1010	if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) {
   1011		netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n");
   1012		netdev->stats.tx_dropped++;
   1013		ret = -EOPNOTSUPP;
   1014	}
   1015
   1016	return ret;
   1017}
   1018
   1019static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
   1020				      struct net_device *netdev)
   1021{
   1022	struct ibmveth_adapter *adapter = netdev_priv(netdev);
   1023	unsigned int desc_flags;
   1024	union ibmveth_buf_desc descs[6];
   1025	int last, i;
   1026	int force_bounce = 0;
   1027	dma_addr_t dma_addr;
   1028	unsigned long mss = 0;
   1029
   1030	if (ibmveth_is_packet_unsupported(skb, netdev))
   1031		goto out;
   1032
   1033	/* veth doesn't handle frag_list, so linearize the skb.
   1034	 * When GRO is enabled SKB's can have frag_list.
   1035	 */
   1036	if (adapter->is_active_trunk &&
   1037	    skb_has_frag_list(skb) && __skb_linearize(skb)) {
   1038		netdev->stats.tx_dropped++;
   1039		goto out;
   1040	}
   1041
   1042	/*
   1043	 * veth handles a maximum of 6 segments including the header, so
   1044	 * we have to linearize the skb if there are more than this.
   1045	 */
   1046	if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) {
   1047		netdev->stats.tx_dropped++;
   1048		goto out;
   1049	}
   1050
   1051	/* veth can't checksum offload UDP */
   1052	if (skb->ip_summed == CHECKSUM_PARTIAL &&
   1053	    ((skb->protocol == htons(ETH_P_IP) &&
   1054	      ip_hdr(skb)->protocol != IPPROTO_TCP) ||
   1055	     (skb->protocol == htons(ETH_P_IPV6) &&
   1056	      ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) &&
   1057	    skb_checksum_help(skb)) {
   1058
   1059		netdev_err(netdev, "tx: failed to checksum packet\n");
   1060		netdev->stats.tx_dropped++;
   1061		goto out;
   1062	}
   1063
   1064	desc_flags = IBMVETH_BUF_VALID;
   1065
   1066	if (skb->ip_summed == CHECKSUM_PARTIAL) {
   1067		unsigned char *buf = skb_transport_header(skb) +
   1068						skb->csum_offset;
   1069
   1070		desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD);
   1071
   1072		/* Need to zero out the checksum */
   1073		buf[0] = 0;
   1074		buf[1] = 0;
   1075
   1076		if (skb_is_gso(skb) && adapter->fw_large_send_support)
   1077			desc_flags |= IBMVETH_BUF_LRG_SND;
   1078	}
   1079
   1080retry_bounce:
   1081	memset(descs, 0, sizeof(descs));
   1082
   1083	/*
   1084	 * If a linear packet is below the rx threshold then
   1085	 * copy it into the static bounce buffer. This avoids the
   1086	 * cost of a TCE insert and remove.
   1087	 */
   1088	if (force_bounce || (!skb_is_nonlinear(skb) &&
   1089				(skb->len < tx_copybreak))) {
   1090		skb_copy_from_linear_data(skb, adapter->bounce_buffer,
   1091					  skb->len);
   1092
   1093		descs[0].fields.flags_len = desc_flags | skb->len;
   1094		descs[0].fields.address = adapter->bounce_buffer_dma;
   1095
   1096		if (ibmveth_send(adapter, descs, 0)) {
   1097			adapter->tx_send_failed++;
   1098			netdev->stats.tx_dropped++;
   1099		} else {
   1100			netdev->stats.tx_packets++;
   1101			netdev->stats.tx_bytes += skb->len;
   1102		}
   1103
   1104		goto out;
   1105	}
   1106
   1107	/* Map the header */
   1108	dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
   1109				  skb_headlen(skb), DMA_TO_DEVICE);
   1110	if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
   1111		goto map_failed;
   1112
   1113	descs[0].fields.flags_len = desc_flags | skb_headlen(skb);
   1114	descs[0].fields.address = dma_addr;
   1115
   1116	/* Map the frags */
   1117	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   1118		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
   1119
   1120		dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0,
   1121					    skb_frag_size(frag), DMA_TO_DEVICE);
   1122
   1123		if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
   1124			goto map_failed_frags;
   1125
   1126		descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag);
   1127		descs[i+1].fields.address = dma_addr;
   1128	}
   1129
   1130	if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
   1131		if (adapter->fw_large_send_support) {
   1132			mss = (unsigned long)skb_shinfo(skb)->gso_size;
   1133			adapter->tx_large_packets++;
   1134		} else if (!skb_is_gso_v6(skb)) {
   1135			/* Put -1 in the IP checksum to tell phyp it
   1136			 * is a largesend packet. Put the mss in
   1137			 * the TCP checksum.
   1138			 */
   1139			ip_hdr(skb)->check = 0xffff;
   1140			tcp_hdr(skb)->check =
   1141				cpu_to_be16(skb_shinfo(skb)->gso_size);
   1142			adapter->tx_large_packets++;
   1143		}
   1144	}
   1145
   1146	if (ibmveth_send(adapter, descs, mss)) {
   1147		adapter->tx_send_failed++;
   1148		netdev->stats.tx_dropped++;
   1149	} else {
   1150		netdev->stats.tx_packets++;
   1151		netdev->stats.tx_bytes += skb->len;
   1152	}
   1153
   1154	dma_unmap_single(&adapter->vdev->dev,
   1155			 descs[0].fields.address,
   1156			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
   1157			 DMA_TO_DEVICE);
   1158
   1159	for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
   1160		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
   1161			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
   1162			       DMA_TO_DEVICE);
   1163
   1164out:
   1165	dev_consume_skb_any(skb);
   1166	return NETDEV_TX_OK;
   1167
   1168map_failed_frags:
   1169	last = i+1;
   1170	for (i = 1; i < last; i++)
   1171		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
   1172			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
   1173			       DMA_TO_DEVICE);
   1174
   1175	dma_unmap_single(&adapter->vdev->dev,
   1176			 descs[0].fields.address,
   1177			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
   1178			 DMA_TO_DEVICE);
   1179map_failed:
   1180	if (!firmware_has_feature(FW_FEATURE_CMO))
   1181		netdev_err(netdev, "tx: unable to map xmit buffer\n");
   1182	adapter->tx_map_failed++;
   1183	if (skb_linearize(skb)) {
   1184		netdev->stats.tx_dropped++;
   1185		goto out;
   1186	}
   1187	force_bounce = 1;
   1188	goto retry_bounce;
   1189}
   1190
   1191static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
   1192{
   1193	struct tcphdr *tcph;
   1194	int offset = 0;
   1195	int hdr_len;
   1196
   1197	/* only TCP packets will be aggregated */
   1198	if (skb->protocol == htons(ETH_P_IP)) {
   1199		struct iphdr *iph = (struct iphdr *)skb->data;
   1200
   1201		if (iph->protocol == IPPROTO_TCP) {
   1202			offset = iph->ihl * 4;
   1203			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
   1204		} else {
   1205			return;
   1206		}
   1207	} else if (skb->protocol == htons(ETH_P_IPV6)) {
   1208		struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data;
   1209
   1210		if (iph6->nexthdr == IPPROTO_TCP) {
   1211			offset = sizeof(struct ipv6hdr);
   1212			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
   1213		} else {
   1214			return;
   1215		}
   1216	} else {
   1217		return;
   1218	}
   1219	/* if mss is not set through Large Packet bit/mss in rx buffer,
   1220	 * expect that the mss will be written to the tcp header checksum.
   1221	 */
   1222	tcph = (struct tcphdr *)(skb->data + offset);
   1223	if (lrg_pkt) {
   1224		skb_shinfo(skb)->gso_size = mss;
   1225	} else if (offset) {
   1226		skb_shinfo(skb)->gso_size = ntohs(tcph->check);
   1227		tcph->check = 0;
   1228	}
   1229
   1230	if (skb_shinfo(skb)->gso_size) {
   1231		hdr_len = offset + tcph->doff * 4;
   1232		skb_shinfo(skb)->gso_segs =
   1233				DIV_ROUND_UP(skb->len - hdr_len,
   1234					     skb_shinfo(skb)->gso_size);
   1235	}
   1236}
   1237
   1238static void ibmveth_rx_csum_helper(struct sk_buff *skb,
   1239				   struct ibmveth_adapter *adapter)
   1240{
   1241	struct iphdr *iph = NULL;
   1242	struct ipv6hdr *iph6 = NULL;
   1243	__be16 skb_proto = 0;
   1244	u16 iphlen = 0;
   1245	u16 iph_proto = 0;
   1246	u16 tcphdrlen = 0;
   1247
   1248	skb_proto = be16_to_cpu(skb->protocol);
   1249
   1250	if (skb_proto == ETH_P_IP) {
   1251		iph = (struct iphdr *)skb->data;
   1252
   1253		/* If the IP checksum is not offloaded and if the packet
   1254		 *  is large send, the checksum must be rebuilt.
   1255		 */
   1256		if (iph->check == 0xffff) {
   1257			iph->check = 0;
   1258			iph->check = ip_fast_csum((unsigned char *)iph,
   1259						  iph->ihl);
   1260		}
   1261
   1262		iphlen = iph->ihl * 4;
   1263		iph_proto = iph->protocol;
   1264	} else if (skb_proto == ETH_P_IPV6) {
   1265		iph6 = (struct ipv6hdr *)skb->data;
   1266		iphlen = sizeof(struct ipv6hdr);
   1267		iph_proto = iph6->nexthdr;
   1268	}
   1269
   1270	/* When CSO is enabled the TCP checksum may have be set to NULL by
   1271	 * the sender given that we zeroed out TCP checksum field in
   1272	 * transmit path (refer ibmveth_start_xmit routine). In this case set
   1273	 * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will
   1274	 * then be recalculated by the destination NIC (CSO must be enabled
   1275	 * on the destination NIC).
   1276	 *
   1277	 * In an OVS environment, when a flow is not cached, specifically for a
   1278	 * new TCP connection, the first packet information is passed up to
   1279	 * the user space for finding a flow. During this process, OVS computes
   1280	 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
   1281	 *
   1282	 * So, re-compute TCP pseudo header checksum when configured for
   1283	 * trunk mode.
   1284	 */
   1285	if (iph_proto == IPPROTO_TCP) {
   1286		struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
   1287		if (tcph->check == 0x0000) {
   1288			/* Recompute TCP pseudo header checksum  */
   1289			if (adapter->is_active_trunk) {
   1290				tcphdrlen = skb->len - iphlen;
   1291				if (skb_proto == ETH_P_IP)
   1292					tcph->check =
   1293					 ~csum_tcpudp_magic(iph->saddr,
   1294					iph->daddr, tcphdrlen, iph_proto, 0);
   1295				else if (skb_proto == ETH_P_IPV6)
   1296					tcph->check =
   1297					 ~csum_ipv6_magic(&iph6->saddr,
   1298					&iph6->daddr, tcphdrlen, iph_proto, 0);
   1299			}
   1300			/* Setup SKB fields for checksum offload */
   1301			skb_partial_csum_set(skb, iphlen,
   1302					     offsetof(struct tcphdr, check));
   1303			skb_reset_network_header(skb);
   1304		}
   1305	}
   1306}
   1307
   1308static int ibmveth_poll(struct napi_struct *napi, int budget)
   1309{
   1310	struct ibmveth_adapter *adapter =
   1311			container_of(napi, struct ibmveth_adapter, napi);
   1312	struct net_device *netdev = adapter->netdev;
   1313	int frames_processed = 0;
   1314	unsigned long lpar_rc;
   1315	u16 mss = 0;
   1316
   1317	while (frames_processed < budget) {
   1318		if (!ibmveth_rxq_pending_buffer(adapter))
   1319			break;
   1320
   1321		smp_rmb();
   1322		if (!ibmveth_rxq_buffer_valid(adapter)) {
   1323			wmb(); /* suggested by larson1 */
   1324			adapter->rx_invalid_buffer++;
   1325			netdev_dbg(netdev, "recycling invalid buffer\n");
   1326			ibmveth_rxq_recycle_buffer(adapter);
   1327		} else {
   1328			struct sk_buff *skb, *new_skb;
   1329			int length = ibmveth_rxq_frame_length(adapter);
   1330			int offset = ibmveth_rxq_frame_offset(adapter);
   1331			int csum_good = ibmveth_rxq_csum_good(adapter);
   1332			int lrg_pkt = ibmveth_rxq_large_packet(adapter);
   1333			__sum16 iph_check = 0;
   1334
   1335			skb = ibmveth_rxq_get_buffer(adapter);
   1336
   1337			/* if the large packet bit is set in the rx queue
   1338			 * descriptor, the mss will be written by PHYP eight
   1339			 * bytes from the start of the rx buffer, which is
   1340			 * skb->data at this stage
   1341			 */
   1342			if (lrg_pkt) {
   1343				__be64 *rxmss = (__be64 *)(skb->data + 8);
   1344
   1345				mss = (u16)be64_to_cpu(*rxmss);
   1346			}
   1347
   1348			new_skb = NULL;
   1349			if (length < rx_copybreak)
   1350				new_skb = netdev_alloc_skb(netdev, length);
   1351
   1352			if (new_skb) {
   1353				skb_copy_to_linear_data(new_skb,
   1354							skb->data + offset,
   1355							length);
   1356				if (rx_flush)
   1357					ibmveth_flush_buffer(skb->data,
   1358						length + offset);
   1359				if (!ibmveth_rxq_recycle_buffer(adapter))
   1360					kfree_skb(skb);
   1361				skb = new_skb;
   1362			} else {
   1363				ibmveth_rxq_harvest_buffer(adapter);
   1364				skb_reserve(skb, offset);
   1365			}
   1366
   1367			skb_put(skb, length);
   1368			skb->protocol = eth_type_trans(skb, netdev);
   1369
   1370			/* PHYP without PLSO support places a -1 in the ip
   1371			 * checksum for large send frames.
   1372			 */
   1373			if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
   1374				struct iphdr *iph = (struct iphdr *)skb->data;
   1375
   1376				iph_check = iph->check;
   1377			}
   1378
   1379			if ((length > netdev->mtu + ETH_HLEN) ||
   1380			    lrg_pkt || iph_check == 0xffff) {
   1381				ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
   1382				adapter->rx_large_packets++;
   1383			}
   1384
   1385			if (csum_good) {
   1386				skb->ip_summed = CHECKSUM_UNNECESSARY;
   1387				ibmveth_rx_csum_helper(skb, adapter);
   1388			}
   1389
   1390			napi_gro_receive(napi, skb);	/* send it up */
   1391
   1392			netdev->stats.rx_packets++;
   1393			netdev->stats.rx_bytes += length;
   1394			frames_processed++;
   1395		}
   1396	}
   1397
   1398	ibmveth_replenish_task(adapter);
   1399
   1400	if (frames_processed < budget) {
   1401		napi_complete_done(napi, frames_processed);
   1402
   1403		/* We think we are done - reenable interrupts,
   1404		 * then check once more to make sure we are done.
   1405		 */
   1406		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
   1407				       VIO_IRQ_ENABLE);
   1408
   1409		BUG_ON(lpar_rc != H_SUCCESS);
   1410
   1411		if (ibmveth_rxq_pending_buffer(adapter) &&
   1412		    napi_reschedule(napi)) {
   1413			lpar_rc = h_vio_signal(adapter->vdev->unit_address,
   1414					       VIO_IRQ_DISABLE);
   1415		}
   1416	}
   1417
   1418	return frames_processed;
   1419}
   1420
   1421static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
   1422{
   1423	struct net_device *netdev = dev_instance;
   1424	struct ibmveth_adapter *adapter = netdev_priv(netdev);
   1425	unsigned long lpar_rc;
   1426
   1427	if (napi_schedule_prep(&adapter->napi)) {
   1428		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
   1429				       VIO_IRQ_DISABLE);
   1430		BUG_ON(lpar_rc != H_SUCCESS);
   1431		__napi_schedule(&adapter->napi);
   1432	}
   1433	return IRQ_HANDLED;
   1434}
   1435
   1436static void ibmveth_set_multicast_list(struct net_device *netdev)
   1437{
   1438	struct ibmveth_adapter *adapter = netdev_priv(netdev);
   1439	unsigned long lpar_rc;
   1440
   1441	if ((netdev->flags & IFF_PROMISC) ||
   1442	    (netdev_mc_count(netdev) > adapter->mcastFilterSize)) {
   1443		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
   1444					   IbmVethMcastEnableRecv |
   1445					   IbmVethMcastDisableFiltering,
   1446					   0);
   1447		if (lpar_rc != H_SUCCESS) {
   1448			netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
   1449				   "entering promisc mode\n", lpar_rc);
   1450		}
   1451	} else {
   1452		struct netdev_hw_addr *ha;
   1453		/* clear the filter table & disable filtering */
   1454		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
   1455					   IbmVethMcastEnableRecv |
   1456					   IbmVethMcastDisableFiltering |
   1457					   IbmVethMcastClearFilterTable,
   1458					   0);
   1459		if (lpar_rc != H_SUCCESS) {
   1460			netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
   1461				   "attempting to clear filter table\n",
   1462				   lpar_rc);
   1463		}
   1464		/* add the addresses to the filter table */
   1465		netdev_for_each_mc_addr(ha, netdev) {
   1466			/* add the multicast address to the filter table */
   1467			u64 mcast_addr;
   1468			mcast_addr = ether_addr_to_u64(ha->addr);
   1469			lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
   1470						   IbmVethMcastAddFilter,
   1471						   mcast_addr);
   1472			if (lpar_rc != H_SUCCESS) {
   1473				netdev_err(netdev, "h_multicast_ctrl rc=%ld "
   1474					   "when adding an entry to the filter "
   1475					   "table\n", lpar_rc);
   1476			}
   1477		}
   1478
   1479		/* re-enable filtering */
   1480		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
   1481					   IbmVethMcastEnableFiltering,
   1482					   0);
   1483		if (lpar_rc != H_SUCCESS) {
   1484			netdev_err(netdev, "h_multicast_ctrl rc=%ld when "
   1485				   "enabling filtering\n", lpar_rc);
   1486		}
   1487	}
   1488}
   1489
   1490static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
   1491{
   1492	struct ibmveth_adapter *adapter = netdev_priv(dev);
   1493	struct vio_dev *viodev = adapter->vdev;
   1494	int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
   1495	int i, rc;
   1496	int need_restart = 0;
   1497
   1498	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
   1499		if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size)
   1500			break;
   1501
   1502	if (i == IBMVETH_NUM_BUFF_POOLS)
   1503		return -EINVAL;
   1504
   1505	/* Deactivate all the buffer pools so that the next loop can activate
   1506	   only the buffer pools necessary to hold the new MTU */
   1507	if (netif_running(adapter->netdev)) {
   1508		need_restart = 1;
   1509		adapter->pool_config = 1;
   1510		ibmveth_close(adapter->netdev);
   1511		adapter->pool_config = 0;
   1512	}
   1513
   1514	/* Look for an active buffer pool that can hold the new MTU */
   1515	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
   1516		adapter->rx_buff_pool[i].active = 1;
   1517
   1518		if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) {
   1519			dev->mtu = new_mtu;
   1520			vio_cmo_set_dev_desired(viodev,
   1521						ibmveth_get_desired_dma
   1522						(viodev));
   1523			if (need_restart) {
   1524				return ibmveth_open(adapter->netdev);
   1525			}
   1526			return 0;
   1527		}
   1528	}
   1529
   1530	if (need_restart && (rc = ibmveth_open(adapter->netdev)))
   1531		return rc;
   1532
   1533	return -EINVAL;
   1534}
   1535
   1536#ifdef CONFIG_NET_POLL_CONTROLLER
   1537static void ibmveth_poll_controller(struct net_device *dev)
   1538{
   1539	ibmveth_replenish_task(netdev_priv(dev));
   1540	ibmveth_interrupt(dev->irq, dev);
   1541}
   1542#endif
   1543
   1544/**
   1545 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
   1546 *
   1547 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
   1548 *
   1549 * Return value:
   1550 *	Number of bytes of IO data the driver will need to perform well.
   1551 */
   1552static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
   1553{
   1554	struct net_device *netdev = dev_get_drvdata(&vdev->dev);
   1555	struct ibmveth_adapter *adapter;
   1556	struct iommu_table *tbl;
   1557	unsigned long ret;
   1558	int i;
   1559	int rxqentries = 1;
   1560
   1561	tbl = get_iommu_table_base(&vdev->dev);
   1562
   1563	/* netdev inits at probe time along with the structures we need below*/
   1564	if (netdev == NULL)
   1565		return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl);
   1566
   1567	adapter = netdev_priv(netdev);
   1568
   1569	ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
   1570	ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl);
   1571
   1572	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
   1573		/* add the size of the active receive buffers */
   1574		if (adapter->rx_buff_pool[i].active)
   1575			ret +=
   1576			    adapter->rx_buff_pool[i].size *
   1577			    IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
   1578					     buff_size, tbl);
   1579		rxqentries += adapter->rx_buff_pool[i].size;
   1580	}
   1581	/* add the size of the receive queue entries */
   1582	ret += IOMMU_PAGE_ALIGN(
   1583		rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl);
   1584
   1585	return ret;
   1586}
   1587
   1588static int ibmveth_set_mac_addr(struct net_device *dev, void *p)
   1589{
   1590	struct ibmveth_adapter *adapter = netdev_priv(dev);
   1591	struct sockaddr *addr = p;
   1592	u64 mac_address;
   1593	int rc;
   1594
   1595	if (!is_valid_ether_addr(addr->sa_data))
   1596		return -EADDRNOTAVAIL;
   1597
   1598	mac_address = ether_addr_to_u64(addr->sa_data);
   1599	rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address);
   1600	if (rc) {
   1601		netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc);
   1602		return rc;
   1603	}
   1604
   1605	eth_hw_addr_set(dev, addr->sa_data);
   1606
   1607	return 0;
   1608}
   1609
   1610static const struct net_device_ops ibmveth_netdev_ops = {
   1611	.ndo_open		= ibmveth_open,
   1612	.ndo_stop		= ibmveth_close,
   1613	.ndo_start_xmit		= ibmveth_start_xmit,
   1614	.ndo_set_rx_mode	= ibmveth_set_multicast_list,
   1615	.ndo_eth_ioctl		= ibmveth_ioctl,
   1616	.ndo_change_mtu		= ibmveth_change_mtu,
   1617	.ndo_fix_features	= ibmveth_fix_features,
   1618	.ndo_set_features	= ibmveth_set_features,
   1619	.ndo_validate_addr	= eth_validate_addr,
   1620	.ndo_set_mac_address    = ibmveth_set_mac_addr,
   1621#ifdef CONFIG_NET_POLL_CONTROLLER
   1622	.ndo_poll_controller	= ibmveth_poll_controller,
   1623#endif
   1624};
   1625
   1626static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
   1627{
   1628	int rc, i, mac_len;
   1629	struct net_device *netdev;
   1630	struct ibmveth_adapter *adapter;
   1631	unsigned char *mac_addr_p;
   1632	__be32 *mcastFilterSize_p;
   1633	long ret;
   1634	unsigned long ret_attr;
   1635
   1636	dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n",
   1637		dev->unit_address);
   1638
   1639	mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR,
   1640							&mac_len);
   1641	if (!mac_addr_p) {
   1642		dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n");
   1643		return -EINVAL;
   1644	}
   1645	/* Workaround for old/broken pHyp */
   1646	if (mac_len == 8)
   1647		mac_addr_p += 2;
   1648	else if (mac_len != 6) {
   1649		dev_err(&dev->dev, "VETH_MAC_ADDR attribute wrong len %d\n",
   1650			mac_len);
   1651		return -EINVAL;
   1652	}
   1653
   1654	mcastFilterSize_p = (__be32 *)vio_get_attribute(dev,
   1655							VETH_MCAST_FILTER_SIZE,
   1656							NULL);
   1657	if (!mcastFilterSize_p) {
   1658		dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE "
   1659			"attribute\n");
   1660		return -EINVAL;
   1661	}
   1662
   1663	netdev = alloc_etherdev(sizeof(struct ibmveth_adapter));
   1664
   1665	if (!netdev)
   1666		return -ENOMEM;
   1667
   1668	adapter = netdev_priv(netdev);
   1669	dev_set_drvdata(&dev->dev, netdev);
   1670
   1671	adapter->vdev = dev;
   1672	adapter->netdev = netdev;
   1673	adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
   1674	adapter->pool_config = 0;
   1675	ibmveth_init_link_settings(netdev);
   1676
   1677	netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16);
   1678
   1679	netdev->irq = dev->irq;
   1680	netdev->netdev_ops = &ibmveth_netdev_ops;
   1681	netdev->ethtool_ops = &netdev_ethtool_ops;
   1682	SET_NETDEV_DEV(netdev, &dev->dev);
   1683	netdev->hw_features = NETIF_F_SG;
   1684	if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) {
   1685		netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
   1686				       NETIF_F_RXCSUM;
   1687	}
   1688
   1689	netdev->features |= netdev->hw_features;
   1690
   1691	ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
   1692
   1693	/* If running older firmware, TSO should not be enabled by default */
   1694	if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_LRG_SND_SUPPORT) &&
   1695	    !old_large_send) {
   1696		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
   1697		netdev->features |= netdev->hw_features;
   1698	} else {
   1699		netdev->hw_features |= NETIF_F_TSO;
   1700	}
   1701
   1702	adapter->is_active_trunk = false;
   1703	if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) {
   1704		adapter->is_active_trunk = true;
   1705		netdev->hw_features |= NETIF_F_FRAGLIST;
   1706		netdev->features |= NETIF_F_FRAGLIST;
   1707	}
   1708
   1709	netdev->min_mtu = IBMVETH_MIN_MTU;
   1710	netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
   1711
   1712	eth_hw_addr_set(netdev, mac_addr_p);
   1713
   1714	if (firmware_has_feature(FW_FEATURE_CMO))
   1715		memcpy(pool_count, pool_count_cmo, sizeof(pool_count));
   1716
   1717	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
   1718		struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
   1719		int error;
   1720
   1721		ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
   1722					 pool_count[i], pool_size[i],
   1723					 pool_active[i]);
   1724		error = kobject_init_and_add(kobj, &ktype_veth_pool,
   1725					     &dev->dev.kobj, "pool%d", i);
   1726		if (!error)
   1727			kobject_uevent(kobj, KOBJ_ADD);
   1728	}
   1729
   1730	netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
   1731	netdev_dbg(netdev, "registering netdev...\n");
   1732
   1733	ibmveth_set_features(netdev, netdev->features);
   1734
   1735	rc = register_netdev(netdev);
   1736
   1737	if (rc) {
   1738		netdev_dbg(netdev, "failed to register netdev rc=%d\n", rc);
   1739		free_netdev(netdev);
   1740		return rc;
   1741	}
   1742
   1743	netdev_dbg(netdev, "registered\n");
   1744
   1745	return 0;
   1746}
   1747
   1748static void ibmveth_remove(struct vio_dev *dev)
   1749{
   1750	struct net_device *netdev = dev_get_drvdata(&dev->dev);
   1751	struct ibmveth_adapter *adapter = netdev_priv(netdev);
   1752	int i;
   1753
   1754	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
   1755		kobject_put(&adapter->rx_buff_pool[i].kobj);
   1756
   1757	unregister_netdev(netdev);
   1758
   1759	free_netdev(netdev);
   1760	dev_set_drvdata(&dev->dev, NULL);
   1761}
   1762
   1763static struct attribute veth_active_attr;
   1764static struct attribute veth_num_attr;
   1765static struct attribute veth_size_attr;
   1766
   1767static ssize_t veth_pool_show(struct kobject *kobj,
   1768			      struct attribute *attr, char *buf)
   1769{
   1770	struct ibmveth_buff_pool *pool = container_of(kobj,
   1771						      struct ibmveth_buff_pool,
   1772						      kobj);
   1773
   1774	if (attr == &veth_active_attr)
   1775		return sprintf(buf, "%d\n", pool->active);
   1776	else if (attr == &veth_num_attr)
   1777		return sprintf(buf, "%d\n", pool->size);
   1778	else if (attr == &veth_size_attr)
   1779		return sprintf(buf, "%d\n", pool->buff_size);
   1780	return 0;
   1781}
   1782
   1783static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr,
   1784			       const char *buf, size_t count)
   1785{
   1786	struct ibmveth_buff_pool *pool = container_of(kobj,
   1787						      struct ibmveth_buff_pool,
   1788						      kobj);
   1789	struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent));
   1790	struct ibmveth_adapter *adapter = netdev_priv(netdev);
   1791	long value = simple_strtol(buf, NULL, 10);
   1792	long rc;
   1793
   1794	if (attr == &veth_active_attr) {
   1795		if (value && !pool->active) {
   1796			if (netif_running(netdev)) {
   1797				if (ibmveth_alloc_buffer_pool(pool)) {
   1798					netdev_err(netdev,
   1799						   "unable to alloc pool\n");
   1800					return -ENOMEM;
   1801				}
   1802				pool->active = 1;
   1803				adapter->pool_config = 1;
   1804				ibmveth_close(netdev);
   1805				adapter->pool_config = 0;
   1806				if ((rc = ibmveth_open(netdev)))
   1807					return rc;
   1808			} else {
   1809				pool->active = 1;
   1810			}
   1811		} else if (!value && pool->active) {
   1812			int mtu = netdev->mtu + IBMVETH_BUFF_OH;
   1813			int i;
   1814			/* Make sure there is a buffer pool with buffers that
   1815			   can hold a packet of the size of the MTU */
   1816			for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
   1817				if (pool == &adapter->rx_buff_pool[i])
   1818					continue;
   1819				if (!adapter->rx_buff_pool[i].active)
   1820					continue;
   1821				if (mtu <= adapter->rx_buff_pool[i].buff_size)
   1822					break;
   1823			}
   1824
   1825			if (i == IBMVETH_NUM_BUFF_POOLS) {
   1826				netdev_err(netdev, "no active pool >= MTU\n");
   1827				return -EPERM;
   1828			}
   1829
   1830			if (netif_running(netdev)) {
   1831				adapter->pool_config = 1;
   1832				ibmveth_close(netdev);
   1833				pool->active = 0;
   1834				adapter->pool_config = 0;
   1835				if ((rc = ibmveth_open(netdev)))
   1836					return rc;
   1837			}
   1838			pool->active = 0;
   1839		}
   1840	} else if (attr == &veth_num_attr) {
   1841		if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) {
   1842			return -EINVAL;
   1843		} else {
   1844			if (netif_running(netdev)) {
   1845				adapter->pool_config = 1;
   1846				ibmveth_close(netdev);
   1847				adapter->pool_config = 0;
   1848				pool->size = value;
   1849				if ((rc = ibmveth_open(netdev)))
   1850					return rc;
   1851			} else {
   1852				pool->size = value;
   1853			}
   1854		}
   1855	} else if (attr == &veth_size_attr) {
   1856		if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) {
   1857			return -EINVAL;
   1858		} else {
   1859			if (netif_running(netdev)) {
   1860				adapter->pool_config = 1;
   1861				ibmveth_close(netdev);
   1862				adapter->pool_config = 0;
   1863				pool->buff_size = value;
   1864				if ((rc = ibmveth_open(netdev)))
   1865					return rc;
   1866			} else {
   1867				pool->buff_size = value;
   1868			}
   1869		}
   1870	}
   1871
   1872	/* kick the interrupt handler to allocate/deallocate pools */
   1873	ibmveth_interrupt(netdev->irq, netdev);
   1874	return count;
   1875}
   1876
   1877
   1878#define ATTR(_name, _mode)				\
   1879	struct attribute veth_##_name##_attr = {	\
   1880	.name = __stringify(_name), .mode = _mode,	\
   1881	};
   1882
   1883static ATTR(active, 0644);
   1884static ATTR(num, 0644);
   1885static ATTR(size, 0644);
   1886
   1887static struct attribute *veth_pool_attrs[] = {
   1888	&veth_active_attr,
   1889	&veth_num_attr,
   1890	&veth_size_attr,
   1891	NULL,
   1892};
   1893ATTRIBUTE_GROUPS(veth_pool);
   1894
   1895static const struct sysfs_ops veth_pool_ops = {
   1896	.show   = veth_pool_show,
   1897	.store  = veth_pool_store,
   1898};
   1899
   1900static struct kobj_type ktype_veth_pool = {
   1901	.release        = NULL,
   1902	.sysfs_ops      = &veth_pool_ops,
   1903	.default_groups = veth_pool_groups,
   1904};
   1905
   1906static int ibmveth_resume(struct device *dev)
   1907{
   1908	struct net_device *netdev = dev_get_drvdata(dev);
   1909	ibmveth_interrupt(netdev->irq, netdev);
   1910	return 0;
   1911}
   1912
   1913static const struct vio_device_id ibmveth_device_table[] = {
   1914	{ "network", "IBM,l-lan"},
   1915	{ "", "" }
   1916};
   1917MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
   1918
   1919static const struct dev_pm_ops ibmveth_pm_ops = {
   1920	.resume = ibmveth_resume
   1921};
   1922
   1923static struct vio_driver ibmveth_driver = {
   1924	.id_table	= ibmveth_device_table,
   1925	.probe		= ibmveth_probe,
   1926	.remove		= ibmveth_remove,
   1927	.get_desired_dma = ibmveth_get_desired_dma,
   1928	.name		= ibmveth_driver_name,
   1929	.pm		= &ibmveth_pm_ops,
   1930};
   1931
   1932static int __init ibmveth_module_init(void)
   1933{
   1934	printk(KERN_DEBUG "%s: %s %s\n", ibmveth_driver_name,
   1935	       ibmveth_driver_string, ibmveth_driver_version);
   1936
   1937	return vio_register_driver(&ibmveth_driver);
   1938}
   1939
   1940static void __exit ibmveth_module_exit(void)
   1941{
   1942	vio_unregister_driver(&ibmveth_driver);
   1943}
   1944
   1945module_init(ibmveth_module_init);
   1946module_exit(ibmveth_module_exit);