cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aoecmd.c (35924B)


      1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
      2/*
      3 * aoecmd.c
      4 * Filesystem request handling methods
      5 */
      6
      7#include <linux/ata.h>
      8#include <linux/slab.h>
      9#include <linux/hdreg.h>
     10#include <linux/blk-mq.h>
     11#include <linux/skbuff.h>
     12#include <linux/netdevice.h>
     13#include <linux/moduleparam.h>
     14#include <linux/workqueue.h>
     15#include <linux/kthread.h>
     16#include <net/net_namespace.h>
     17#include <asm/unaligned.h>
     18#include <linux/uio.h>
     19#include "aoe.h"
     20
     21#define MAXIOC (8192)	/* default meant to avoid most soft lockups */
     22
     23static void ktcomplete(struct frame *, struct sk_buff *);
     24static int count_targets(struct aoedev *d, int *untainted);
     25
     26static struct buf *nextbuf(struct aoedev *);
     27
     28static int aoe_deadsecs = 60 * 3;
     29module_param(aoe_deadsecs, int, 0644);
     30MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
     31
     32static int aoe_maxout = 64;
     33module_param(aoe_maxout, int, 0644);
     34MODULE_PARM_DESC(aoe_maxout,
     35	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
     36
     37/* The number of online cpus during module initialization gives us a
     38 * convenient heuristic cap on the parallelism used for ktio threads
     39 * doing I/O completion.  It is not important that the cap equal the
     40 * actual number of running CPUs at any given time, but because of CPU
     41 * hotplug, we take care to use ncpus instead of using
     42 * num_online_cpus() after module initialization.
     43 */
     44static int ncpus;
     45
     46/* mutex lock used for synchronization while thread spawning */
     47static DEFINE_MUTEX(ktio_spawn_lock);
     48
     49static wait_queue_head_t *ktiowq;
     50static struct ktstate *kts;
     51
     52/* io completion queue */
     53struct iocq_ktio {
     54	struct list_head head;
     55	spinlock_t lock;
     56};
     57static struct iocq_ktio *iocq;
     58
     59static struct page *empty_page;
     60
     61static struct sk_buff *
     62new_skb(ulong len)
     63{
     64	struct sk_buff *skb;
     65
     66	skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
     67	if (skb) {
     68		skb_reserve(skb, MAX_HEADER);
     69		skb_reset_mac_header(skb);
     70		skb_reset_network_header(skb);
     71		skb->protocol = __constant_htons(ETH_P_AOE);
     72		skb_checksum_none_assert(skb);
     73	}
     74	return skb;
     75}
     76
     77static struct frame *
     78getframe_deferred(struct aoedev *d, u32 tag)
     79{
     80	struct list_head *head, *pos, *nx;
     81	struct frame *f;
     82
     83	head = &d->rexmitq;
     84	list_for_each_safe(pos, nx, head) {
     85		f = list_entry(pos, struct frame, head);
     86		if (f->tag == tag) {
     87			list_del(pos);
     88			return f;
     89		}
     90	}
     91	return NULL;
     92}
     93
     94static struct frame *
     95getframe(struct aoedev *d, u32 tag)
     96{
     97	struct frame *f;
     98	struct list_head *head, *pos, *nx;
     99	u32 n;
    100
    101	n = tag % NFACTIVE;
    102	head = &d->factive[n];
    103	list_for_each_safe(pos, nx, head) {
    104		f = list_entry(pos, struct frame, head);
    105		if (f->tag == tag) {
    106			list_del(pos);
    107			return f;
    108		}
    109	}
    110	return NULL;
    111}
    112
    113/*
    114 * Leave the top bit clear so we have tagspace for userland.
    115 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
    116 * This driver reserves tag -1 to mean "unused frame."
    117 */
    118static int
    119newtag(struct aoedev *d)
    120{
    121	register ulong n;
    122
    123	n = jiffies & 0xffff;
    124	return n | (++d->lasttag & 0x7fff) << 16;
    125}
    126
    127static u32
    128aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
    129{
    130	u32 host_tag = newtag(d);
    131
    132	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
    133	memcpy(h->dst, t->addr, sizeof h->dst);
    134	h->type = __constant_cpu_to_be16(ETH_P_AOE);
    135	h->verfl = AOE_HVER;
    136	h->major = cpu_to_be16(d->aoemajor);
    137	h->minor = d->aoeminor;
    138	h->cmd = AOECMD_ATA;
    139	h->tag = cpu_to_be32(host_tag);
    140
    141	return host_tag;
    142}
    143
    144static inline void
    145put_lba(struct aoe_atahdr *ah, sector_t lba)
    146{
    147	ah->lba0 = lba;
    148	ah->lba1 = lba >>= 8;
    149	ah->lba2 = lba >>= 8;
    150	ah->lba3 = lba >>= 8;
    151	ah->lba4 = lba >>= 8;
    152	ah->lba5 = lba >>= 8;
    153}
    154
    155static struct aoeif *
    156ifrotate(struct aoetgt *t)
    157{
    158	struct aoeif *ifp;
    159
    160	ifp = t->ifp;
    161	ifp++;
    162	if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL)
    163		ifp = t->ifs;
    164	if (ifp->nd == NULL)
    165		return NULL;
    166	return t->ifp = ifp;
    167}
    168
    169static void
    170skb_pool_put(struct aoedev *d, struct sk_buff *skb)
    171{
    172	__skb_queue_tail(&d->skbpool, skb);
    173}
    174
    175static struct sk_buff *
    176skb_pool_get(struct aoedev *d)
    177{
    178	struct sk_buff *skb = skb_peek(&d->skbpool);
    179
    180	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
    181		__skb_unlink(skb, &d->skbpool);
    182		return skb;
    183	}
    184	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
    185	    (skb = new_skb(ETH_ZLEN)))
    186		return skb;
    187
    188	return NULL;
    189}
    190
    191void
    192aoe_freetframe(struct frame *f)
    193{
    194	struct aoetgt *t;
    195
    196	t = f->t;
    197	f->buf = NULL;
    198	memset(&f->iter, 0, sizeof(f->iter));
    199	f->r_skb = NULL;
    200	f->flags = 0;
    201	list_add(&f->head, &t->ffree);
    202}
    203
    204static struct frame *
    205newtframe(struct aoedev *d, struct aoetgt *t)
    206{
    207	struct frame *f;
    208	struct sk_buff *skb;
    209	struct list_head *pos;
    210
    211	if (list_empty(&t->ffree)) {
    212		if (t->falloc >= NSKBPOOLMAX*2)
    213			return NULL;
    214		f = kcalloc(1, sizeof(*f), GFP_ATOMIC);
    215		if (f == NULL)
    216			return NULL;
    217		t->falloc++;
    218		f->t = t;
    219	} else {
    220		pos = t->ffree.next;
    221		list_del(pos);
    222		f = list_entry(pos, struct frame, head);
    223	}
    224
    225	skb = f->skb;
    226	if (skb == NULL) {
    227		f->skb = skb = new_skb(ETH_ZLEN);
    228		if (!skb) {
    229bail:			aoe_freetframe(f);
    230			return NULL;
    231		}
    232	}
    233
    234	if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
    235		skb = skb_pool_get(d);
    236		if (skb == NULL)
    237			goto bail;
    238		skb_pool_put(d, f->skb);
    239		f->skb = skb;
    240	}
    241
    242	skb->truesize -= skb->data_len;
    243	skb_shinfo(skb)->nr_frags = skb->data_len = 0;
    244	skb_trim(skb, 0);
    245	return f;
    246}
    247
    248static struct frame *
    249newframe(struct aoedev *d)
    250{
    251	struct frame *f;
    252	struct aoetgt *t, **tt;
    253	int totout = 0;
    254	int use_tainted;
    255	int has_untainted;
    256
    257	if (!d->targets || !d->targets[0]) {
    258		printk(KERN_ERR "aoe: NULL TARGETS!\n");
    259		return NULL;
    260	}
    261	tt = d->tgt;	/* last used target */
    262	for (use_tainted = 0, has_untainted = 0;;) {
    263		tt++;
    264		if (tt >= &d->targets[d->ntargets] || !*tt)
    265			tt = d->targets;
    266		t = *tt;
    267		if (!t->taint) {
    268			has_untainted = 1;
    269			totout += t->nout;
    270		}
    271		if (t->nout < t->maxout
    272		&& (use_tainted || !t->taint)
    273		&& t->ifp->nd) {
    274			f = newtframe(d, t);
    275			if (f) {
    276				ifrotate(t);
    277				d->tgt = tt;
    278				return f;
    279			}
    280		}
    281		if (tt == d->tgt) {	/* we've looped and found nada */
    282			if (!use_tainted && !has_untainted)
    283				use_tainted = 1;
    284			else
    285				break;
    286		}
    287	}
    288	if (totout == 0) {
    289		d->kicked++;
    290		d->flags |= DEVFL_KICKME;
    291	}
    292	return NULL;
    293}
    294
    295static void
    296skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
    297{
    298	int frag = 0;
    299	struct bio_vec bv;
    300
    301	__bio_for_each_segment(bv, bio, iter, iter)
    302		skb_fill_page_desc(skb, frag++, bv.bv_page,
    303				   bv.bv_offset, bv.bv_len);
    304}
    305
    306static void
    307fhash(struct frame *f)
    308{
    309	struct aoedev *d = f->t->d;
    310	u32 n;
    311
    312	n = f->tag % NFACTIVE;
    313	list_add_tail(&f->head, &d->factive[n]);
    314}
    315
    316static void
    317ata_rw_frameinit(struct frame *f)
    318{
    319	struct aoetgt *t;
    320	struct aoe_hdr *h;
    321	struct aoe_atahdr *ah;
    322	struct sk_buff *skb;
    323	char writebit, extbit;
    324
    325	skb = f->skb;
    326	h = (struct aoe_hdr *) skb_mac_header(skb);
    327	ah = (struct aoe_atahdr *) (h + 1);
    328	skb_put(skb, sizeof(*h) + sizeof(*ah));
    329	memset(h, 0, skb->len);
    330
    331	writebit = 0x10;
    332	extbit = 0x4;
    333
    334	t = f->t;
    335	f->tag = aoehdr_atainit(t->d, t, h);
    336	fhash(f);
    337	t->nout++;
    338	f->waited = 0;
    339	f->waited_total = 0;
    340
    341	/* set up ata header */
    342	ah->scnt = f->iter.bi_size >> 9;
    343	put_lba(ah, f->iter.bi_sector);
    344	if (t->d->flags & DEVFL_EXT) {
    345		ah->aflags |= AOEAFL_EXT;
    346	} else {
    347		extbit = 0;
    348		ah->lba3 &= 0x0f;
    349		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
    350	}
    351	if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
    352		skb_fillup(skb, f->buf->bio, f->iter);
    353		ah->aflags |= AOEAFL_WRITE;
    354		skb->len += f->iter.bi_size;
    355		skb->data_len = f->iter.bi_size;
    356		skb->truesize += f->iter.bi_size;
    357		t->wpkts++;
    358	} else {
    359		t->rpkts++;
    360		writebit = 0;
    361	}
    362
    363	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
    364	skb->dev = t->ifp->nd;
    365}
    366
    367static int
    368aoecmd_ata_rw(struct aoedev *d)
    369{
    370	struct frame *f;
    371	struct buf *buf;
    372	struct sk_buff *skb;
    373	struct sk_buff_head queue;
    374
    375	buf = nextbuf(d);
    376	if (buf == NULL)
    377		return 0;
    378	f = newframe(d);
    379	if (f == NULL)
    380		return 0;
    381
    382	/* initialize the headers & frame */
    383	f->buf = buf;
    384	f->iter = buf->iter;
    385	f->iter.bi_size = min_t(unsigned long,
    386				d->maxbcnt ?: DEFAULTBCNT,
    387				f->iter.bi_size);
    388	bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
    389
    390	if (!buf->iter.bi_size)
    391		d->ip.buf = NULL;
    392
    393	/* mark all tracking fields and load out */
    394	buf->nframesout += 1;
    395
    396	ata_rw_frameinit(f);
    397
    398	skb = skb_clone(f->skb, GFP_ATOMIC);
    399	if (skb) {
    400		f->sent = ktime_get();
    401		__skb_queue_head_init(&queue);
    402		__skb_queue_tail(&queue, skb);
    403		aoenet_xmit(&queue);
    404	}
    405	return 1;
    406}
    407
    408/* some callers cannot sleep, and they can call this function,
    409 * transmitting the packets later, when interrupts are on
    410 */
    411static void
    412aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
    413{
    414	struct aoe_hdr *h;
    415	struct aoe_cfghdr *ch;
    416	struct sk_buff *skb;
    417	struct net_device *ifp;
    418
    419	rcu_read_lock();
    420	for_each_netdev_rcu(&init_net, ifp) {
    421		dev_hold(ifp);
    422		if (!is_aoe_netif(ifp))
    423			goto cont;
    424
    425		skb = new_skb(sizeof *h + sizeof *ch);
    426		if (skb == NULL) {
    427			printk(KERN_INFO "aoe: skb alloc failure\n");
    428			goto cont;
    429		}
    430		skb_put(skb, sizeof *h + sizeof *ch);
    431		skb->dev = ifp;
    432		__skb_queue_tail(queue, skb);
    433		h = (struct aoe_hdr *) skb_mac_header(skb);
    434		memset(h, 0, sizeof *h + sizeof *ch);
    435
    436		memset(h->dst, 0xff, sizeof h->dst);
    437		memcpy(h->src, ifp->dev_addr, sizeof h->src);
    438		h->type = __constant_cpu_to_be16(ETH_P_AOE);
    439		h->verfl = AOE_HVER;
    440		h->major = cpu_to_be16(aoemajor);
    441		h->minor = aoeminor;
    442		h->cmd = AOECMD_CFG;
    443
    444cont:
    445		dev_put(ifp);
    446	}
    447	rcu_read_unlock();
    448}
    449
    450static void
    451resend(struct aoedev *d, struct frame *f)
    452{
    453	struct sk_buff *skb;
    454	struct sk_buff_head queue;
    455	struct aoe_hdr *h;
    456	struct aoetgt *t;
    457	char buf[128];
    458	u32 n;
    459
    460	t = f->t;
    461	n = newtag(d);
    462	skb = f->skb;
    463	if (ifrotate(t) == NULL) {
    464		/* probably can't happen, but set it up to fail anyway */
    465		pr_info("aoe: resend: no interfaces to rotate to.\n");
    466		ktcomplete(f, NULL);
    467		return;
    468	}
    469	h = (struct aoe_hdr *) skb_mac_header(skb);
    470
    471	if (!(f->flags & FFL_PROBE)) {
    472		snprintf(buf, sizeof(buf),
    473			"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
    474			"retransmit", d->aoemajor, d->aoeminor,
    475			f->tag, jiffies, n,
    476			h->src, h->dst, t->nout);
    477		aoechr_error(buf);
    478	}
    479
    480	f->tag = n;
    481	fhash(f);
    482	h->tag = cpu_to_be32(n);
    483	memcpy(h->dst, t->addr, sizeof h->dst);
    484	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
    485
    486	skb->dev = t->ifp->nd;
    487	skb = skb_clone(skb, GFP_ATOMIC);
    488	if (skb == NULL)
    489		return;
    490	f->sent = ktime_get();
    491	__skb_queue_head_init(&queue);
    492	__skb_queue_tail(&queue, skb);
    493	aoenet_xmit(&queue);
    494}
    495
    496static int
    497tsince_hr(struct frame *f)
    498{
    499	u64 delta = ktime_to_ns(ktime_sub(ktime_get(), f->sent));
    500
    501	/* delta is normally under 4.2 seconds, avoid 64-bit division */
    502	if (likely(delta <= UINT_MAX))
    503		return (u32)delta / NSEC_PER_USEC;
    504
    505	/* avoid overflow after 71 minutes */
    506	if (delta > ((u64)INT_MAX * NSEC_PER_USEC))
    507		return INT_MAX;
    508
    509	return div_u64(delta, NSEC_PER_USEC);
    510}
    511
    512static int
    513tsince(u32 tag)
    514{
    515	int n;
    516
    517	n = jiffies & 0xffff;
    518	n -= tag & 0xffff;
    519	if (n < 0)
    520		n += 1<<16;
    521	return jiffies_to_usecs(n + 1);
    522}
    523
    524static struct aoeif *
    525getif(struct aoetgt *t, struct net_device *nd)
    526{
    527	struct aoeif *p, *e;
    528
    529	p = t->ifs;
    530	e = p + NAOEIFS;
    531	for (; p < e; p++)
    532		if (p->nd == nd)
    533			return p;
    534	return NULL;
    535}
    536
    537static void
    538ejectif(struct aoetgt *t, struct aoeif *ifp)
    539{
    540	struct aoeif *e;
    541	struct net_device *nd;
    542	ulong n;
    543
    544	nd = ifp->nd;
    545	e = t->ifs + NAOEIFS - 1;
    546	n = (e - ifp) * sizeof *ifp;
    547	memmove(ifp, ifp+1, n);
    548	e->nd = NULL;
    549	dev_put(nd);
    550}
    551
    552static struct frame *
    553reassign_frame(struct frame *f)
    554{
    555	struct frame *nf;
    556	struct sk_buff *skb;
    557
    558	nf = newframe(f->t->d);
    559	if (!nf)
    560		return NULL;
    561	if (nf->t == f->t) {
    562		aoe_freetframe(nf);
    563		return NULL;
    564	}
    565
    566	skb = nf->skb;
    567	nf->skb = f->skb;
    568	nf->buf = f->buf;
    569	nf->iter = f->iter;
    570	nf->waited = 0;
    571	nf->waited_total = f->waited_total;
    572	nf->sent = f->sent;
    573	f->skb = skb;
    574
    575	return nf;
    576}
    577
    578static void
    579probe(struct aoetgt *t)
    580{
    581	struct aoedev *d;
    582	struct frame *f;
    583	struct sk_buff *skb;
    584	struct sk_buff_head queue;
    585	size_t n, m;
    586	int frag;
    587
    588	d = t->d;
    589	f = newtframe(d, t);
    590	if (!f) {
    591		pr_err("%s %pm for e%ld.%d: %s\n",
    592			"aoe: cannot probe remote address",
    593			t->addr,
    594			(long) d->aoemajor, d->aoeminor,
    595			"no frame available");
    596		return;
    597	}
    598	f->flags |= FFL_PROBE;
    599	ifrotate(t);
    600	f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
    601	ata_rw_frameinit(f);
    602	skb = f->skb;
    603	for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
    604		if (n < PAGE_SIZE)
    605			m = n;
    606		else
    607			m = PAGE_SIZE;
    608		skb_fill_page_desc(skb, frag, empty_page, 0, m);
    609	}
    610	skb->len += f->iter.bi_size;
    611	skb->data_len = f->iter.bi_size;
    612	skb->truesize += f->iter.bi_size;
    613
    614	skb = skb_clone(f->skb, GFP_ATOMIC);
    615	if (skb) {
    616		f->sent = ktime_get();
    617		__skb_queue_head_init(&queue);
    618		__skb_queue_tail(&queue, skb);
    619		aoenet_xmit(&queue);
    620	}
    621}
    622
    623static long
    624rto(struct aoedev *d)
    625{
    626	long t;
    627
    628	t = 2 * d->rttavg >> RTTSCALE;
    629	t += 8 * d->rttdev >> RTTDSCALE;
    630	if (t == 0)
    631		t = 1;
    632
    633	return t;
    634}
    635
    636static void
    637rexmit_deferred(struct aoedev *d)
    638{
    639	struct aoetgt *t;
    640	struct frame *f;
    641	struct frame *nf;
    642	struct list_head *pos, *nx, *head;
    643	int since;
    644	int untainted;
    645
    646	count_targets(d, &untainted);
    647
    648	head = &d->rexmitq;
    649	list_for_each_safe(pos, nx, head) {
    650		f = list_entry(pos, struct frame, head);
    651		t = f->t;
    652		if (t->taint) {
    653			if (!(f->flags & FFL_PROBE)) {
    654				nf = reassign_frame(f);
    655				if (nf) {
    656					if (t->nout_probes == 0
    657					&& untainted > 0) {
    658						probe(t);
    659						t->nout_probes++;
    660					}
    661					list_replace(&f->head, &nf->head);
    662					pos = &nf->head;
    663					aoe_freetframe(f);
    664					f = nf;
    665					t = f->t;
    666				}
    667			} else if (untainted < 1) {
    668				/* don't probe w/o other untainted aoetgts */
    669				goto stop_probe;
    670			} else if (tsince_hr(f) < t->taint * rto(d)) {
    671				/* reprobe slowly when taint is high */
    672				continue;
    673			}
    674		} else if (f->flags & FFL_PROBE) {
    675stop_probe:		/* don't probe untainted aoetgts */
    676			list_del(pos);
    677			aoe_freetframe(f);
    678			/* leaving d->kicked, because this is routine */
    679			f->t->d->flags |= DEVFL_KICKME;
    680			continue;
    681		}
    682		if (t->nout >= t->maxout)
    683			continue;
    684		list_del(pos);
    685		t->nout++;
    686		if (f->flags & FFL_PROBE)
    687			t->nout_probes++;
    688		since = tsince_hr(f);
    689		f->waited += since;
    690		f->waited_total += since;
    691		resend(d, f);
    692	}
    693}
    694
    695/* An aoetgt accumulates demerits quickly, and successful
    696 * probing redeems the aoetgt slowly.
    697 */
    698static void
    699scorn(struct aoetgt *t)
    700{
    701	int n;
    702
    703	n = t->taint++;
    704	t->taint += t->taint * 2;
    705	if (n > t->taint)
    706		t->taint = n;
    707	if (t->taint > MAX_TAINT)
    708		t->taint = MAX_TAINT;
    709}
    710
    711static int
    712count_targets(struct aoedev *d, int *untainted)
    713{
    714	int i, good;
    715
    716	for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
    717		if (d->targets[i]->taint == 0)
    718			good++;
    719
    720	if (untainted)
    721		*untainted = good;
    722	return i;
    723}
    724
    725static void
    726rexmit_timer(struct timer_list *timer)
    727{
    728	struct aoedev *d;
    729	struct aoetgt *t;
    730	struct aoeif *ifp;
    731	struct frame *f;
    732	struct list_head *head, *pos, *nx;
    733	LIST_HEAD(flist);
    734	register long timeout;
    735	ulong flags, n;
    736	int i;
    737	int utgts;	/* number of aoetgt descriptors (not slots) */
    738	int since;
    739
    740	d = from_timer(d, timer, timer);
    741
    742	spin_lock_irqsave(&d->lock, flags);
    743
    744	/* timeout based on observed timings and variations */
    745	timeout = rto(d);
    746
    747	utgts = count_targets(d, NULL);
    748
    749	if (d->flags & DEVFL_TKILL) {
    750		spin_unlock_irqrestore(&d->lock, flags);
    751		return;
    752	}
    753
    754	/* collect all frames to rexmit into flist */
    755	for (i = 0; i < NFACTIVE; i++) {
    756		head = &d->factive[i];
    757		list_for_each_safe(pos, nx, head) {
    758			f = list_entry(pos, struct frame, head);
    759			if (tsince_hr(f) < timeout)
    760				break;	/* end of expired frames */
    761			/* move to flist for later processing */
    762			list_move_tail(pos, &flist);
    763		}
    764	}
    765
    766	/* process expired frames */
    767	while (!list_empty(&flist)) {
    768		pos = flist.next;
    769		f = list_entry(pos, struct frame, head);
    770		since = tsince_hr(f);
    771		n = f->waited_total + since;
    772		n /= USEC_PER_SEC;
    773		if (aoe_deadsecs
    774		&& n > aoe_deadsecs
    775		&& !(f->flags & FFL_PROBE)) {
    776			/* Waited too long.  Device failure.
    777			 * Hang all frames on first hash bucket for downdev
    778			 * to clean up.
    779			 */
    780			list_splice(&flist, &d->factive[0]);
    781			aoedev_downdev(d);
    782			goto out;
    783		}
    784
    785		t = f->t;
    786		n = f->waited + since;
    787		n /= USEC_PER_SEC;
    788		if (aoe_deadsecs && utgts > 0
    789		&& (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
    790			scorn(t); /* avoid this target */
    791
    792		if (t->maxout != 1) {
    793			t->ssthresh = t->maxout / 2;
    794			t->maxout = 1;
    795		}
    796
    797		if (f->flags & FFL_PROBE) {
    798			t->nout_probes--;
    799		} else {
    800			ifp = getif(t, f->skb->dev);
    801			if (ifp && ++ifp->lost > (t->nframes << 1)
    802			&& (ifp != t->ifs || t->ifs[1].nd)) {
    803				ejectif(t, ifp);
    804				ifp = NULL;
    805			}
    806		}
    807		list_move_tail(pos, &d->rexmitq);
    808		t->nout--;
    809	}
    810	rexmit_deferred(d);
    811
    812out:
    813	if ((d->flags & DEVFL_KICKME) && d->blkq) {
    814		d->flags &= ~DEVFL_KICKME;
    815		blk_mq_run_hw_queues(d->blkq, true);
    816	}
    817
    818	d->timer.expires = jiffies + TIMERTICK;
    819	add_timer(&d->timer);
    820
    821	spin_unlock_irqrestore(&d->lock, flags);
    822}
    823
    824static void
    825bufinit(struct buf *buf, struct request *rq, struct bio *bio)
    826{
    827	memset(buf, 0, sizeof(*buf));
    828	buf->rq = rq;
    829	buf->bio = bio;
    830	buf->iter = bio->bi_iter;
    831}
    832
    833static struct buf *
    834nextbuf(struct aoedev *d)
    835{
    836	struct request *rq;
    837	struct request_queue *q;
    838	struct aoe_req *req;
    839	struct buf *buf;
    840	struct bio *bio;
    841
    842	q = d->blkq;
    843	if (q == NULL)
    844		return NULL;	/* initializing */
    845	if (d->ip.buf)
    846		return d->ip.buf;
    847	rq = d->ip.rq;
    848	if (rq == NULL) {
    849		rq = list_first_entry_or_null(&d->rq_list, struct request,
    850						queuelist);
    851		if (rq == NULL)
    852			return NULL;
    853		list_del_init(&rq->queuelist);
    854		blk_mq_start_request(rq);
    855		d->ip.rq = rq;
    856		d->ip.nxbio = rq->bio;
    857
    858		req = blk_mq_rq_to_pdu(rq);
    859		req->nr_bios = 0;
    860		__rq_for_each_bio(bio, rq)
    861			req->nr_bios++;
    862	}
    863	buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
    864	if (buf == NULL) {
    865		pr_err("aoe: nextbuf: unable to mempool_alloc!\n");
    866		return NULL;
    867	}
    868	bio = d->ip.nxbio;
    869	bufinit(buf, rq, bio);
    870	bio = bio->bi_next;
    871	d->ip.nxbio = bio;
    872	if (bio == NULL)
    873		d->ip.rq = NULL;
    874	return d->ip.buf = buf;
    875}
    876
    877/* enters with d->lock held */
    878void
    879aoecmd_work(struct aoedev *d)
    880{
    881	rexmit_deferred(d);
    882	while (aoecmd_ata_rw(d))
    883		;
    884}
    885
    886/* this function performs work that has been deferred until sleeping is OK
    887 */
    888void
    889aoecmd_sleepwork(struct work_struct *work)
    890{
    891	struct aoedev *d = container_of(work, struct aoedev, work);
    892
    893	if (d->flags & DEVFL_GDALLOC)
    894		aoeblk_gdalloc(d);
    895
    896	if (d->flags & DEVFL_NEWSIZE) {
    897		set_capacity_and_notify(d->gd, d->ssize);
    898
    899		spin_lock_irq(&d->lock);
    900		d->flags |= DEVFL_UP;
    901		d->flags &= ~DEVFL_NEWSIZE;
    902		spin_unlock_irq(&d->lock);
    903	}
    904}
    905
    906static void
    907ata_ident_fixstring(u16 *id, int ns)
    908{
    909	u16 s;
    910
    911	while (ns-- > 0) {
    912		s = *id;
    913		*id++ = s >> 8 | s << 8;
    914	}
    915}
    916
    917static void
    918ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
    919{
    920	u64 ssize;
    921	u16 n;
    922
    923	/* word 83: command set supported */
    924	n = get_unaligned_le16(&id[83 << 1]);
    925
    926	/* word 86: command set/feature enabled */
    927	n |= get_unaligned_le16(&id[86 << 1]);
    928
    929	if (n & (1<<10)) {	/* bit 10: LBA 48 */
    930		d->flags |= DEVFL_EXT;
    931
    932		/* word 100: number lba48 sectors */
    933		ssize = get_unaligned_le64(&id[100 << 1]);
    934
    935		/* set as in ide-disk.c:init_idedisk_capacity */
    936		d->geo.cylinders = ssize;
    937		d->geo.cylinders /= (255 * 63);
    938		d->geo.heads = 255;
    939		d->geo.sectors = 63;
    940	} else {
    941		d->flags &= ~DEVFL_EXT;
    942
    943		/* number lba28 sectors */
    944		ssize = get_unaligned_le32(&id[60 << 1]);
    945
    946		/* NOTE: obsolete in ATA 6 */
    947		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
    948		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
    949		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
    950	}
    951
    952	ata_ident_fixstring((u16 *) &id[10<<1], 10);	/* serial */
    953	ata_ident_fixstring((u16 *) &id[23<<1], 4);	/* firmware */
    954	ata_ident_fixstring((u16 *) &id[27<<1], 20);	/* model */
    955	memcpy(d->ident, id, sizeof(d->ident));
    956
    957	if (d->ssize != ssize)
    958		printk(KERN_INFO
    959			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
    960			t->addr,
    961			d->aoemajor, d->aoeminor,
    962			d->fw_ver, (long long)ssize);
    963	d->ssize = ssize;
    964	d->geo.start = 0;
    965	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
    966		return;
    967	if (d->gd != NULL)
    968		d->flags |= DEVFL_NEWSIZE;
    969	else
    970		d->flags |= DEVFL_GDALLOC;
    971	queue_work(aoe_wq, &d->work);
    972}
    973
    974static void
    975calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
    976{
    977	register long n;
    978
    979	n = rtt;
    980
    981	/* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
    982	n -= d->rttavg >> RTTSCALE;
    983	d->rttavg += n;
    984	if (n < 0)
    985		n = -n;
    986	n -= d->rttdev >> RTTDSCALE;
    987	d->rttdev += n;
    988
    989	if (!t || t->maxout >= t->nframes)
    990		return;
    991	if (t->maxout < t->ssthresh)
    992		t->maxout += 1;
    993	else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
    994		t->maxout += 1;
    995		t->next_cwnd = t->maxout;
    996	}
    997}
    998
    999static struct aoetgt *
   1000gettgt(struct aoedev *d, char *addr)
   1001{
   1002	struct aoetgt **t, **e;
   1003
   1004	t = d->targets;
   1005	e = t + d->ntargets;
   1006	for (; t < e && *t; t++)
   1007		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
   1008			return *t;
   1009	return NULL;
   1010}
   1011
   1012static void
   1013bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
   1014{
   1015	int soff = 0;
   1016	struct bio_vec bv;
   1017
   1018	iter.bi_size = cnt;
   1019
   1020	__bio_for_each_segment(bv, bio, iter, iter) {
   1021		char *p = bvec_kmap_local(&bv);
   1022		skb_copy_bits(skb, soff, p, bv.bv_len);
   1023		kunmap_local(p);
   1024		soff += bv.bv_len;
   1025	}
   1026}
   1027
   1028void
   1029aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
   1030{
   1031	struct bio *bio;
   1032	int bok;
   1033	struct request_queue *q;
   1034	blk_status_t err = BLK_STS_OK;
   1035
   1036	q = d->blkq;
   1037	if (rq == d->ip.rq)
   1038		d->ip.rq = NULL;
   1039	do {
   1040		bio = rq->bio;
   1041		bok = !fastfail && !bio->bi_status;
   1042		if (!bok)
   1043			err = BLK_STS_IOERR;
   1044	} while (blk_update_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
   1045
   1046	__blk_mq_end_request(rq, err);
   1047
   1048	/* cf. https://lore.kernel.org/lkml/20061031071040.GS14055@kernel.dk/ */
   1049	if (!fastfail)
   1050		blk_mq_run_hw_queues(q, true);
   1051}
   1052
   1053static void
   1054aoe_end_buf(struct aoedev *d, struct buf *buf)
   1055{
   1056	struct request *rq = buf->rq;
   1057	struct aoe_req *req = blk_mq_rq_to_pdu(rq);
   1058
   1059	if (buf == d->ip.buf)
   1060		d->ip.buf = NULL;
   1061	mempool_free(buf, d->bufpool);
   1062	if (--req->nr_bios == 0)
   1063		aoe_end_request(d, rq, 0);
   1064}
   1065
   1066static void
   1067ktiocomplete(struct frame *f)
   1068{
   1069	struct aoe_hdr *hin, *hout;
   1070	struct aoe_atahdr *ahin, *ahout;
   1071	struct buf *buf;
   1072	struct sk_buff *skb;
   1073	struct aoetgt *t;
   1074	struct aoeif *ifp;
   1075	struct aoedev *d;
   1076	long n;
   1077	int untainted;
   1078
   1079	if (f == NULL)
   1080		return;
   1081
   1082	t = f->t;
   1083	d = t->d;
   1084	skb = f->r_skb;
   1085	buf = f->buf;
   1086	if (f->flags & FFL_PROBE)
   1087		goto out;
   1088	if (!skb)		/* just fail the buf. */
   1089		goto noskb;
   1090
   1091	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
   1092	ahout = (struct aoe_atahdr *) (hout+1);
   1093
   1094	hin = (struct aoe_hdr *) skb->data;
   1095	skb_pull(skb, sizeof(*hin));
   1096	ahin = (struct aoe_atahdr *) skb->data;
   1097	skb_pull(skb, sizeof(*ahin));
   1098	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
   1099		pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
   1100			ahout->cmdstat, ahin->cmdstat,
   1101			d->aoemajor, d->aoeminor);
   1102noskb:		if (buf)
   1103			buf->bio->bi_status = BLK_STS_IOERR;
   1104		goto out;
   1105	}
   1106
   1107	n = ahout->scnt << 9;
   1108	switch (ahout->cmdstat) {
   1109	case ATA_CMD_PIO_READ:
   1110	case ATA_CMD_PIO_READ_EXT:
   1111		if (skb->len < n) {
   1112			pr_err("%s e%ld.%d.  skb->len=%d need=%ld\n",
   1113				"aoe: runt data size in read from",
   1114				(long) d->aoemajor, d->aoeminor,
   1115			       skb->len, n);
   1116			buf->bio->bi_status = BLK_STS_IOERR;
   1117			break;
   1118		}
   1119		if (n > f->iter.bi_size) {
   1120			pr_err_ratelimited("%s e%ld.%d.  bytes=%ld need=%u\n",
   1121				"aoe: too-large data size in read from",
   1122				(long) d->aoemajor, d->aoeminor,
   1123				n, f->iter.bi_size);
   1124			buf->bio->bi_status = BLK_STS_IOERR;
   1125			break;
   1126		}
   1127		bvcpy(skb, f->buf->bio, f->iter, n);
   1128		fallthrough;
   1129	case ATA_CMD_PIO_WRITE:
   1130	case ATA_CMD_PIO_WRITE_EXT:
   1131		spin_lock_irq(&d->lock);
   1132		ifp = getif(t, skb->dev);
   1133		if (ifp)
   1134			ifp->lost = 0;
   1135		spin_unlock_irq(&d->lock);
   1136		break;
   1137	case ATA_CMD_ID_ATA:
   1138		if (skb->len < 512) {
   1139			pr_info("%s e%ld.%d.  skb->len=%d need=512\n",
   1140				"aoe: runt data size in ataid from",
   1141				(long) d->aoemajor, d->aoeminor,
   1142				skb->len);
   1143			break;
   1144		}
   1145		if (skb_linearize(skb))
   1146			break;
   1147		spin_lock_irq(&d->lock);
   1148		ataid_complete(d, t, skb->data);
   1149		spin_unlock_irq(&d->lock);
   1150		break;
   1151	default:
   1152		pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n",
   1153			ahout->cmdstat,
   1154			be16_to_cpu(get_unaligned(&hin->major)),
   1155			hin->minor);
   1156	}
   1157out:
   1158	spin_lock_irq(&d->lock);
   1159	if (t->taint > 0
   1160	&& --t->taint > 0
   1161	&& t->nout_probes == 0) {
   1162		count_targets(d, &untainted);
   1163		if (untainted > 0) {
   1164			probe(t);
   1165			t->nout_probes++;
   1166		}
   1167	}
   1168
   1169	aoe_freetframe(f);
   1170
   1171	if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
   1172		aoe_end_buf(d, buf);
   1173
   1174	spin_unlock_irq(&d->lock);
   1175	aoedev_put(d);
   1176	dev_kfree_skb(skb);
   1177}
   1178
   1179/* Enters with iocq.lock held.
   1180 * Returns true iff responses needing processing remain.
   1181 */
   1182static int
   1183ktio(int id)
   1184{
   1185	struct frame *f;
   1186	struct list_head *pos;
   1187	int i;
   1188	int actual_id;
   1189
   1190	for (i = 0; ; ++i) {
   1191		if (i == MAXIOC)
   1192			return 1;
   1193		if (list_empty(&iocq[id].head))
   1194			return 0;
   1195		pos = iocq[id].head.next;
   1196		list_del(pos);
   1197		f = list_entry(pos, struct frame, head);
   1198		spin_unlock_irq(&iocq[id].lock);
   1199		ktiocomplete(f);
   1200
   1201		/* Figure out if extra threads are required. */
   1202		actual_id = f->t->d->aoeminor % ncpus;
   1203
   1204		if (!kts[actual_id].active) {
   1205			BUG_ON(id != 0);
   1206			mutex_lock(&ktio_spawn_lock);
   1207			if (!kts[actual_id].active
   1208				&& aoe_ktstart(&kts[actual_id]) == 0)
   1209				kts[actual_id].active = 1;
   1210			mutex_unlock(&ktio_spawn_lock);
   1211		}
   1212		spin_lock_irq(&iocq[id].lock);
   1213	}
   1214}
   1215
   1216static int
   1217kthread(void *vp)
   1218{
   1219	struct ktstate *k;
   1220	DECLARE_WAITQUEUE(wait, current);
   1221	int more;
   1222
   1223	k = vp;
   1224	current->flags |= PF_NOFREEZE;
   1225	set_user_nice(current, -10);
   1226	complete(&k->rendez);	/* tell spawner we're running */
   1227	do {
   1228		spin_lock_irq(k->lock);
   1229		more = k->fn(k->id);
   1230		if (!more) {
   1231			add_wait_queue(k->waitq, &wait);
   1232			__set_current_state(TASK_INTERRUPTIBLE);
   1233		}
   1234		spin_unlock_irq(k->lock);
   1235		if (!more) {
   1236			schedule();
   1237			remove_wait_queue(k->waitq, &wait);
   1238		} else
   1239			cond_resched();
   1240	} while (!kthread_should_stop());
   1241	complete(&k->rendez);	/* tell spawner we're stopping */
   1242	return 0;
   1243}
   1244
   1245void
   1246aoe_ktstop(struct ktstate *k)
   1247{
   1248	kthread_stop(k->task);
   1249	wait_for_completion(&k->rendez);
   1250}
   1251
   1252int
   1253aoe_ktstart(struct ktstate *k)
   1254{
   1255	struct task_struct *task;
   1256
   1257	init_completion(&k->rendez);
   1258	task = kthread_run(kthread, k, "%s", k->name);
   1259	if (task == NULL || IS_ERR(task))
   1260		return -ENOMEM;
   1261	k->task = task;
   1262	wait_for_completion(&k->rendez); /* allow kthread to start */
   1263	init_completion(&k->rendez);	/* for waiting for exit later */
   1264	return 0;
   1265}
   1266
   1267/* pass it off to kthreads for processing */
   1268static void
   1269ktcomplete(struct frame *f, struct sk_buff *skb)
   1270{
   1271	int id;
   1272	ulong flags;
   1273
   1274	f->r_skb = skb;
   1275	id = f->t->d->aoeminor % ncpus;
   1276	spin_lock_irqsave(&iocq[id].lock, flags);
   1277	if (!kts[id].active) {
   1278		spin_unlock_irqrestore(&iocq[id].lock, flags);
   1279		/* The thread with id has not been spawned yet,
   1280		 * so delegate the work to the main thread and
   1281		 * try spawning a new thread.
   1282		 */
   1283		id = 0;
   1284		spin_lock_irqsave(&iocq[id].lock, flags);
   1285	}
   1286	list_add_tail(&f->head, &iocq[id].head);
   1287	spin_unlock_irqrestore(&iocq[id].lock, flags);
   1288	wake_up(&ktiowq[id]);
   1289}
   1290
   1291struct sk_buff *
   1292aoecmd_ata_rsp(struct sk_buff *skb)
   1293{
   1294	struct aoedev *d;
   1295	struct aoe_hdr *h;
   1296	struct frame *f;
   1297	u32 n;
   1298	ulong flags;
   1299	char ebuf[128];
   1300	u16 aoemajor;
   1301
   1302	h = (struct aoe_hdr *) skb->data;
   1303	aoemajor = be16_to_cpu(get_unaligned(&h->major));
   1304	d = aoedev_by_aoeaddr(aoemajor, h->minor, 0);
   1305	if (d == NULL) {
   1306		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
   1307			"for unknown device %d.%d\n",
   1308			aoemajor, h->minor);
   1309		aoechr_error(ebuf);
   1310		return skb;
   1311	}
   1312
   1313	spin_lock_irqsave(&d->lock, flags);
   1314
   1315	n = be32_to_cpu(get_unaligned(&h->tag));
   1316	f = getframe(d, n);
   1317	if (f) {
   1318		calc_rttavg(d, f->t, tsince_hr(f));
   1319		f->t->nout--;
   1320		if (f->flags & FFL_PROBE)
   1321			f->t->nout_probes--;
   1322	} else {
   1323		f = getframe_deferred(d, n);
   1324		if (f) {
   1325			calc_rttavg(d, NULL, tsince_hr(f));
   1326		} else {
   1327			calc_rttavg(d, NULL, tsince(n));
   1328			spin_unlock_irqrestore(&d->lock, flags);
   1329			aoedev_put(d);
   1330			snprintf(ebuf, sizeof(ebuf),
   1331				 "%15s e%d.%d    tag=%08x@%08lx s=%pm d=%pm\n",
   1332				 "unexpected rsp",
   1333				 get_unaligned_be16(&h->major),
   1334				 h->minor,
   1335				 get_unaligned_be32(&h->tag),
   1336				 jiffies,
   1337				 h->src,
   1338				 h->dst);
   1339			aoechr_error(ebuf);
   1340			return skb;
   1341		}
   1342	}
   1343	aoecmd_work(d);
   1344
   1345	spin_unlock_irqrestore(&d->lock, flags);
   1346
   1347	ktcomplete(f, skb);
   1348
   1349	/*
   1350	 * Note here that we do not perform an aoedev_put, as we are
   1351	 * leaving this reference for the ktio to release.
   1352	 */
   1353	return NULL;
   1354}
   1355
   1356void
   1357aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
   1358{
   1359	struct sk_buff_head queue;
   1360
   1361	__skb_queue_head_init(&queue);
   1362	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
   1363	aoenet_xmit(&queue);
   1364}
   1365
   1366struct sk_buff *
   1367aoecmd_ata_id(struct aoedev *d)
   1368{
   1369	struct aoe_hdr *h;
   1370	struct aoe_atahdr *ah;
   1371	struct frame *f;
   1372	struct sk_buff *skb;
   1373	struct aoetgt *t;
   1374
   1375	f = newframe(d);
   1376	if (f == NULL)
   1377		return NULL;
   1378
   1379	t = *d->tgt;
   1380
   1381	/* initialize the headers & frame */
   1382	skb = f->skb;
   1383	h = (struct aoe_hdr *) skb_mac_header(skb);
   1384	ah = (struct aoe_atahdr *) (h+1);
   1385	skb_put(skb, sizeof *h + sizeof *ah);
   1386	memset(h, 0, skb->len);
   1387	f->tag = aoehdr_atainit(d, t, h);
   1388	fhash(f);
   1389	t->nout++;
   1390	f->waited = 0;
   1391	f->waited_total = 0;
   1392
   1393	/* set up ata header */
   1394	ah->scnt = 1;
   1395	ah->cmdstat = ATA_CMD_ID_ATA;
   1396	ah->lba3 = 0xa0;
   1397
   1398	skb->dev = t->ifp->nd;
   1399
   1400	d->rttavg = RTTAVG_INIT;
   1401	d->rttdev = RTTDEV_INIT;
   1402	d->timer.function = rexmit_timer;
   1403
   1404	skb = skb_clone(skb, GFP_ATOMIC);
   1405	if (skb)
   1406		f->sent = ktime_get();
   1407
   1408	return skb;
   1409}
   1410
   1411static struct aoetgt **
   1412grow_targets(struct aoedev *d)
   1413{
   1414	ulong oldn, newn;
   1415	struct aoetgt **tt;
   1416
   1417	oldn = d->ntargets;
   1418	newn = oldn * 2;
   1419	tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
   1420	if (!tt)
   1421		return NULL;
   1422	memmove(tt, d->targets, sizeof(*d->targets) * oldn);
   1423	d->tgt = tt + (d->tgt - d->targets);
   1424	kfree(d->targets);
   1425	d->targets = tt;
   1426	d->ntargets = newn;
   1427
   1428	return &d->targets[oldn];
   1429}
   1430
   1431static struct aoetgt *
   1432addtgt(struct aoedev *d, char *addr, ulong nframes)
   1433{
   1434	struct aoetgt *t, **tt, **te;
   1435
   1436	tt = d->targets;
   1437	te = tt + d->ntargets;
   1438	for (; tt < te && *tt; tt++)
   1439		;
   1440
   1441	if (tt == te) {
   1442		tt = grow_targets(d);
   1443		if (!tt)
   1444			goto nomem;
   1445	}
   1446	t = kzalloc(sizeof(*t), GFP_ATOMIC);
   1447	if (!t)
   1448		goto nomem;
   1449	t->nframes = nframes;
   1450	t->d = d;
   1451	memcpy(t->addr, addr, sizeof t->addr);
   1452	t->ifp = t->ifs;
   1453	aoecmd_wreset(t);
   1454	t->maxout = t->nframes / 2;
   1455	INIT_LIST_HEAD(&t->ffree);
   1456	return *tt = t;
   1457
   1458 nomem:
   1459	pr_info("aoe: cannot allocate memory to add target\n");
   1460	return NULL;
   1461}
   1462
   1463static void
   1464setdbcnt(struct aoedev *d)
   1465{
   1466	struct aoetgt **t, **e;
   1467	int bcnt = 0;
   1468
   1469	t = d->targets;
   1470	e = t + d->ntargets;
   1471	for (; t < e && *t; t++)
   1472		if (bcnt == 0 || bcnt > (*t)->minbcnt)
   1473			bcnt = (*t)->minbcnt;
   1474	if (bcnt != d->maxbcnt) {
   1475		d->maxbcnt = bcnt;
   1476		pr_info("aoe: e%ld.%d: setting %d byte data frames\n",
   1477			d->aoemajor, d->aoeminor, bcnt);
   1478	}
   1479}
   1480
   1481static void
   1482setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt)
   1483{
   1484	struct aoedev *d;
   1485	struct aoeif *p, *e;
   1486	int minbcnt;
   1487
   1488	d = t->d;
   1489	minbcnt = bcnt;
   1490	p = t->ifs;
   1491	e = p + NAOEIFS;
   1492	for (; p < e; p++) {
   1493		if (p->nd == NULL)
   1494			break;		/* end of the valid interfaces */
   1495		if (p->nd == nd) {
   1496			p->bcnt = bcnt;	/* we're updating */
   1497			nd = NULL;
   1498		} else if (minbcnt > p->bcnt)
   1499			minbcnt = p->bcnt; /* find the min interface */
   1500	}
   1501	if (nd) {
   1502		if (p == e) {
   1503			pr_err("aoe: device setifbcnt failure; too many interfaces.\n");
   1504			return;
   1505		}
   1506		dev_hold(nd);
   1507		p->nd = nd;
   1508		p->bcnt = bcnt;
   1509	}
   1510	t->minbcnt = minbcnt;
   1511	setdbcnt(d);
   1512}
   1513
   1514void
   1515aoecmd_cfg_rsp(struct sk_buff *skb)
   1516{
   1517	struct aoedev *d;
   1518	struct aoe_hdr *h;
   1519	struct aoe_cfghdr *ch;
   1520	struct aoetgt *t;
   1521	ulong flags, aoemajor;
   1522	struct sk_buff *sl;
   1523	struct sk_buff_head queue;
   1524	u16 n;
   1525
   1526	sl = NULL;
   1527	h = (struct aoe_hdr *) skb_mac_header(skb);
   1528	ch = (struct aoe_cfghdr *) (h+1);
   1529
   1530	/*
   1531	 * Enough people have their dip switches set backwards to
   1532	 * warrant a loud message for this special case.
   1533	 */
   1534	aoemajor = get_unaligned_be16(&h->major);
   1535	if (aoemajor == 0xfff) {
   1536		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
   1537			"Check shelf dip switches.\n");
   1538		return;
   1539	}
   1540	if (aoemajor == 0xffff) {
   1541		pr_info("aoe: e%ld.%d: broadcast shelf number invalid\n",
   1542			aoemajor, (int) h->minor);
   1543		return;
   1544	}
   1545	if (h->minor == 0xff) {
   1546		pr_info("aoe: e%ld.%d: broadcast slot number invalid\n",
   1547			aoemajor, (int) h->minor);
   1548		return;
   1549	}
   1550
   1551	n = be16_to_cpu(ch->bufcnt);
   1552	if (n > aoe_maxout)	/* keep it reasonable */
   1553		n = aoe_maxout;
   1554
   1555	d = aoedev_by_aoeaddr(aoemajor, h->minor, 1);
   1556	if (d == NULL) {
   1557		pr_info("aoe: device allocation failure\n");
   1558		return;
   1559	}
   1560
   1561	spin_lock_irqsave(&d->lock, flags);
   1562
   1563	t = gettgt(d, h->src);
   1564	if (t) {
   1565		t->nframes = n;
   1566		if (n < t->maxout)
   1567			aoecmd_wreset(t);
   1568	} else {
   1569		t = addtgt(d, h->src, n);
   1570		if (!t)
   1571			goto bail;
   1572	}
   1573	n = skb->dev->mtu;
   1574	n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
   1575	n /= 512;
   1576	if (n > ch->scnt)
   1577		n = ch->scnt;
   1578	n = n ? n * 512 : DEFAULTBCNT;
   1579	setifbcnt(t, skb->dev, n);
   1580
   1581	/* don't change users' perspective */
   1582	if (d->nopen == 0) {
   1583		d->fw_ver = be16_to_cpu(ch->fwver);
   1584		sl = aoecmd_ata_id(d);
   1585	}
   1586bail:
   1587	spin_unlock_irqrestore(&d->lock, flags);
   1588	aoedev_put(d);
   1589	if (sl) {
   1590		__skb_queue_head_init(&queue);
   1591		__skb_queue_tail(&queue, sl);
   1592		aoenet_xmit(&queue);
   1593	}
   1594}
   1595
   1596void
   1597aoecmd_wreset(struct aoetgt *t)
   1598{
   1599	t->maxout = 1;
   1600	t->ssthresh = t->nframes / 2;
   1601	t->next_cwnd = t->nframes;
   1602}
   1603
   1604void
   1605aoecmd_cleanslate(struct aoedev *d)
   1606{
   1607	struct aoetgt **t, **te;
   1608
   1609	d->rttavg = RTTAVG_INIT;
   1610	d->rttdev = RTTDEV_INIT;
   1611	d->maxbcnt = 0;
   1612
   1613	t = d->targets;
   1614	te = t + d->ntargets;
   1615	for (; t < te && *t; t++)
   1616		aoecmd_wreset(*t);
   1617}
   1618
   1619void
   1620aoe_failbuf(struct aoedev *d, struct buf *buf)
   1621{
   1622	if (buf == NULL)
   1623		return;
   1624	buf->iter.bi_size = 0;
   1625	buf->bio->bi_status = BLK_STS_IOERR;
   1626	if (buf->nframesout == 0)
   1627		aoe_end_buf(d, buf);
   1628}
   1629
   1630void
   1631aoe_flush_iocq(void)
   1632{
   1633	int i;
   1634
   1635	for (i = 0; i < ncpus; i++) {
   1636		if (kts[i].active)
   1637			aoe_flush_iocq_by_index(i);
   1638	}
   1639}
   1640
   1641void
   1642aoe_flush_iocq_by_index(int id)
   1643{
   1644	struct frame *f;
   1645	struct aoedev *d;
   1646	LIST_HEAD(flist);
   1647	struct list_head *pos;
   1648	struct sk_buff *skb;
   1649	ulong flags;
   1650
   1651	spin_lock_irqsave(&iocq[id].lock, flags);
   1652	list_splice_init(&iocq[id].head, &flist);
   1653	spin_unlock_irqrestore(&iocq[id].lock, flags);
   1654	while (!list_empty(&flist)) {
   1655		pos = flist.next;
   1656		list_del(pos);
   1657		f = list_entry(pos, struct frame, head);
   1658		d = f->t->d;
   1659		skb = f->r_skb;
   1660		spin_lock_irqsave(&d->lock, flags);
   1661		if (f->buf) {
   1662			f->buf->nframesout--;
   1663			aoe_failbuf(d, f->buf);
   1664		}
   1665		aoe_freetframe(f);
   1666		spin_unlock_irqrestore(&d->lock, flags);
   1667		dev_kfree_skb(skb);
   1668		aoedev_put(d);
   1669	}
   1670}
   1671
   1672int __init
   1673aoecmd_init(void)
   1674{
   1675	void *p;
   1676	int i;
   1677	int ret;
   1678
   1679	/* get_zeroed_page returns page with ref count 1 */
   1680	p = (void *) get_zeroed_page(GFP_KERNEL);
   1681	if (!p)
   1682		return -ENOMEM;
   1683	empty_page = virt_to_page(p);
   1684
   1685	ncpus = num_online_cpus();
   1686
   1687	iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
   1688	if (!iocq)
   1689		return -ENOMEM;
   1690
   1691	kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
   1692	if (!kts) {
   1693		ret = -ENOMEM;
   1694		goto kts_fail;
   1695	}
   1696
   1697	ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
   1698	if (!ktiowq) {
   1699		ret = -ENOMEM;
   1700		goto ktiowq_fail;
   1701	}
   1702
   1703	for (i = 0; i < ncpus; i++) {
   1704		INIT_LIST_HEAD(&iocq[i].head);
   1705		spin_lock_init(&iocq[i].lock);
   1706		init_waitqueue_head(&ktiowq[i]);
   1707		snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
   1708		kts[i].fn = ktio;
   1709		kts[i].waitq = &ktiowq[i];
   1710		kts[i].lock = &iocq[i].lock;
   1711		kts[i].id = i;
   1712		kts[i].active = 0;
   1713	}
   1714	kts[0].active = 1;
   1715	if (aoe_ktstart(&kts[0])) {
   1716		ret = -ENOMEM;
   1717		goto ktstart_fail;
   1718	}
   1719	return 0;
   1720
   1721ktstart_fail:
   1722	kfree(ktiowq);
   1723ktiowq_fail:
   1724	kfree(kts);
   1725kts_fail:
   1726	kfree(iocq);
   1727
   1728	return ret;
   1729}
   1730
   1731void
   1732aoecmd_exit(void)
   1733{
   1734	int i;
   1735
   1736	for (i = 0; i < ncpus; i++)
   1737		if (kts[i].active)
   1738			aoe_ktstop(&kts[i]);
   1739
   1740	aoe_flush_iocq();
   1741
   1742	/* Free up the iocq and thread speicific configuration
   1743	* allocated during startup.
   1744	*/
   1745	kfree(iocq);
   1746	kfree(kts);
   1747	kfree(ktiowq);
   1748
   1749	free_page((unsigned long) page_address(empty_page));
   1750	empty_page = NULL;
   1751}