l2t.c (12953B)
1/* 2 * Copyright (c) 2003-2008 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32#include <linux/skbuff.h> 33#include <linux/netdevice.h> 34#include <linux/if.h> 35#include <linux/if_vlan.h> 36#include <linux/jhash.h> 37#include <linux/slab.h> 38#include <linux/export.h> 39#include <net/neighbour.h> 40#include "common.h" 41#include "t3cdev.h" 42#include "cxgb3_defs.h" 43#include "l2t.h" 44#include "t3_cpl.h" 45#include "firmware_exports.h" 46 47#define VLAN_NONE 0xfff 48 49/* 50 * Module locking notes: There is a RW lock protecting the L2 table as a 51 * whole plus a spinlock per L2T entry. Entry lookups and allocations happen 52 * under the protection of the table lock, individual entry changes happen 53 * while holding that entry's spinlock. The table lock nests outside the 54 * entry locks. Allocations of new entries take the table lock as writers so 55 * no other lookups can happen while allocating new entries. Entry updates 56 * take the table lock as readers so multiple entries can be updated in 57 * parallel. An L2T entry can be dropped by decrementing its reference count 58 * and therefore can happen in parallel with entry allocation but no entry 59 * can change state or increment its ref count during allocation as both of 60 * these perform lookups. 61 */ 62 63static inline unsigned int vlan_prio(const struct l2t_entry *e) 64{ 65 return e->vlan >> 13; 66} 67 68static inline unsigned int arp_hash(u32 key, int ifindex, 69 const struct l2t_data *d) 70{ 71 return jhash_2words(key, ifindex, 0) & (d->nentries - 1); 72} 73 74static inline void neigh_replace(struct l2t_entry *e, struct neighbour *n) 75{ 76 neigh_hold(n); 77 if (e->neigh) 78 neigh_release(e->neigh); 79 e->neigh = n; 80} 81 82/* 83 * Set up an L2T entry and send any packets waiting in the arp queue. The 84 * supplied skb is used for the CPL_L2T_WRITE_REQ. Must be called with the 85 * entry locked. 86 */ 87static int setup_l2e_send_pending(struct t3cdev *dev, struct sk_buff *skb, 88 struct l2t_entry *e) 89{ 90 struct cpl_l2t_write_req *req; 91 struct sk_buff *tmp; 92 93 if (!skb) { 94 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 95 if (!skb) 96 return -ENOMEM; 97 } 98 99 req = __skb_put(skb, sizeof(*req)); 100 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 101 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); 102 req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | 103 V_L2T_W_VLAN(e->vlan & VLAN_VID_MASK) | 104 V_L2T_W_PRIO(vlan_prio(e))); 105 memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac)); 106 memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); 107 skb->priority = CPL_PRIORITY_CONTROL; 108 cxgb3_ofld_send(dev, skb); 109 110 skb_queue_walk_safe(&e->arpq, skb, tmp) { 111 __skb_unlink(skb, &e->arpq); 112 cxgb3_ofld_send(dev, skb); 113 } 114 e->state = L2T_STATE_VALID; 115 116 return 0; 117} 118 119/* 120 * Add a packet to the an L2T entry's queue of packets awaiting resolution. 121 * Must be called with the entry's lock held. 122 */ 123static inline void arpq_enqueue(struct l2t_entry *e, struct sk_buff *skb) 124{ 125 __skb_queue_tail(&e->arpq, skb); 126} 127 128int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, 129 struct l2t_entry *e) 130{ 131again: 132 switch (e->state) { 133 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 134 neigh_event_send(e->neigh, NULL); 135 spin_lock_bh(&e->lock); 136 if (e->state == L2T_STATE_STALE) 137 e->state = L2T_STATE_VALID; 138 spin_unlock_bh(&e->lock); 139 fallthrough; 140 case L2T_STATE_VALID: /* fast-path, send the packet on */ 141 return cxgb3_ofld_send(dev, skb); 142 case L2T_STATE_RESOLVING: 143 spin_lock_bh(&e->lock); 144 if (e->state != L2T_STATE_RESOLVING) { 145 /* ARP already completed */ 146 spin_unlock_bh(&e->lock); 147 goto again; 148 } 149 arpq_enqueue(e, skb); 150 spin_unlock_bh(&e->lock); 151 152 /* 153 * Only the first packet added to the arpq should kick off 154 * resolution. However, because the alloc_skb below can fail, 155 * we allow each packet added to the arpq to retry resolution 156 * as a way of recovering from transient memory exhaustion. 157 * A better way would be to use a work request to retry L2T 158 * entries when there's no memory. 159 */ 160 if (!neigh_event_send(e->neigh, NULL)) { 161 skb = alloc_skb(sizeof(struct cpl_l2t_write_req), 162 GFP_ATOMIC); 163 if (!skb) 164 break; 165 166 spin_lock_bh(&e->lock); 167 if (!skb_queue_empty(&e->arpq)) 168 setup_l2e_send_pending(dev, skb, e); 169 else /* we lost the race */ 170 __kfree_skb(skb); 171 spin_unlock_bh(&e->lock); 172 } 173 } 174 return 0; 175} 176 177EXPORT_SYMBOL(t3_l2t_send_slow); 178 179void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e) 180{ 181again: 182 switch (e->state) { 183 case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ 184 neigh_event_send(e->neigh, NULL); 185 spin_lock_bh(&e->lock); 186 if (e->state == L2T_STATE_STALE) { 187 e->state = L2T_STATE_VALID; 188 } 189 spin_unlock_bh(&e->lock); 190 return; 191 case L2T_STATE_VALID: /* fast-path, send the packet on */ 192 return; 193 case L2T_STATE_RESOLVING: 194 spin_lock_bh(&e->lock); 195 if (e->state != L2T_STATE_RESOLVING) { 196 /* ARP already completed */ 197 spin_unlock_bh(&e->lock); 198 goto again; 199 } 200 spin_unlock_bh(&e->lock); 201 202 /* 203 * Only the first packet added to the arpq should kick off 204 * resolution. However, because the alloc_skb below can fail, 205 * we allow each packet added to the arpq to retry resolution 206 * as a way of recovering from transient memory exhaustion. 207 * A better way would be to use a work request to retry L2T 208 * entries when there's no memory. 209 */ 210 neigh_event_send(e->neigh, NULL); 211 } 212} 213 214EXPORT_SYMBOL(t3_l2t_send_event); 215 216/* 217 * Allocate a free L2T entry. Must be called with l2t_data.lock held. 218 */ 219static struct l2t_entry *alloc_l2e(struct l2t_data *d) 220{ 221 struct l2t_entry *end, *e, **p; 222 223 if (!atomic_read(&d->nfree)) 224 return NULL; 225 226 /* there's definitely a free entry */ 227 for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) 228 if (atomic_read(&e->refcnt) == 0) 229 goto found; 230 231 for (e = &d->l2tab[1]; atomic_read(&e->refcnt); ++e) ; 232found: 233 d->rover = e + 1; 234 atomic_dec(&d->nfree); 235 236 /* 237 * The entry we found may be an inactive entry that is 238 * presently in the hash table. We need to remove it. 239 */ 240 if (e->state != L2T_STATE_UNUSED) { 241 int hash = arp_hash(e->addr, e->ifindex, d); 242 243 for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) 244 if (*p == e) { 245 *p = e->next; 246 break; 247 } 248 e->state = L2T_STATE_UNUSED; 249 } 250 return e; 251} 252 253/* 254 * Called when an L2T entry has no more users. The entry is left in the hash 255 * table since it is likely to be reused but we also bump nfree to indicate 256 * that the entry can be reallocated for a different neighbor. We also drop 257 * the existing neighbor reference in case the neighbor is going away and is 258 * waiting on our reference. 259 * 260 * Because entries can be reallocated to other neighbors once their ref count 261 * drops to 0 we need to take the entry's lock to avoid races with a new 262 * incarnation. 263 */ 264void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) 265{ 266 spin_lock_bh(&e->lock); 267 if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */ 268 if (e->neigh) { 269 neigh_release(e->neigh); 270 e->neigh = NULL; 271 } 272 } 273 spin_unlock_bh(&e->lock); 274 atomic_inc(&d->nfree); 275} 276 277EXPORT_SYMBOL(t3_l2e_free); 278 279/* 280 * Update an L2T entry that was previously used for the same next hop as neigh. 281 * Must be called with softirqs disabled. 282 */ 283static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh) 284{ 285 unsigned int nud_state; 286 287 spin_lock(&e->lock); /* avoid race with t3_l2t_free */ 288 289 if (neigh != e->neigh) 290 neigh_replace(e, neigh); 291 nud_state = neigh->nud_state; 292 if (memcmp(e->dmac, neigh->ha, sizeof(e->dmac)) || 293 !(nud_state & NUD_VALID)) 294 e->state = L2T_STATE_RESOLVING; 295 else if (nud_state & NUD_CONNECTED) 296 e->state = L2T_STATE_VALID; 297 else 298 e->state = L2T_STATE_STALE; 299 spin_unlock(&e->lock); 300} 301 302struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, 303 struct net_device *dev, const void *daddr) 304{ 305 struct l2t_entry *e = NULL; 306 struct neighbour *neigh; 307 struct port_info *p; 308 struct l2t_data *d; 309 int hash; 310 u32 addr; 311 int ifidx; 312 int smt_idx; 313 314 rcu_read_lock(); 315 neigh = dst_neigh_lookup(dst, daddr); 316 if (!neigh) 317 goto done_rcu; 318 319 addr = *(u32 *) neigh->primary_key; 320 ifidx = neigh->dev->ifindex; 321 322 if (!dev) 323 dev = neigh->dev; 324 p = netdev_priv(dev); 325 smt_idx = p->port_id; 326 327 d = L2DATA(cdev); 328 if (!d) 329 goto done_rcu; 330 331 hash = arp_hash(addr, ifidx, d); 332 333 write_lock_bh(&d->lock); 334 for (e = d->l2tab[hash].first; e; e = e->next) 335 if (e->addr == addr && e->ifindex == ifidx && 336 e->smt_idx == smt_idx) { 337 l2t_hold(d, e); 338 if (atomic_read(&e->refcnt) == 1) 339 reuse_entry(e, neigh); 340 goto done_unlock; 341 } 342 343 /* Need to allocate a new entry */ 344 e = alloc_l2e(d); 345 if (e) { 346 spin_lock(&e->lock); /* avoid race with t3_l2t_free */ 347 e->next = d->l2tab[hash].first; 348 d->l2tab[hash].first = e; 349 e->state = L2T_STATE_RESOLVING; 350 e->addr = addr; 351 e->ifindex = ifidx; 352 e->smt_idx = smt_idx; 353 atomic_set(&e->refcnt, 1); 354 neigh_replace(e, neigh); 355 if (is_vlan_dev(neigh->dev)) 356 e->vlan = vlan_dev_vlan_id(neigh->dev); 357 else 358 e->vlan = VLAN_NONE; 359 spin_unlock(&e->lock); 360 } 361done_unlock: 362 write_unlock_bh(&d->lock); 363done_rcu: 364 if (neigh) 365 neigh_release(neigh); 366 rcu_read_unlock(); 367 return e; 368} 369 370EXPORT_SYMBOL(t3_l2t_get); 371 372/* 373 * Called when address resolution fails for an L2T entry to handle packets 374 * on the arpq head. If a packet specifies a failure handler it is invoked, 375 * otherwise the packets is sent to the offload device. 376 * 377 * XXX: maybe we should abandon the latter behavior and just require a failure 378 * handler. 379 */ 380static void handle_failed_resolution(struct t3cdev *dev, struct sk_buff_head *arpq) 381{ 382 struct sk_buff *skb, *tmp; 383 384 skb_queue_walk_safe(arpq, skb, tmp) { 385 struct l2t_skb_cb *cb = L2T_SKB_CB(skb); 386 387 __skb_unlink(skb, arpq); 388 if (cb->arp_failure_handler) 389 cb->arp_failure_handler(dev, skb); 390 else 391 cxgb3_ofld_send(dev, skb); 392 } 393} 394 395/* 396 * Called when the host's ARP layer makes a change to some entry that is 397 * loaded into the HW L2 table. 398 */ 399void t3_l2t_update(struct t3cdev *dev, struct neighbour *neigh) 400{ 401 struct sk_buff_head arpq; 402 struct l2t_entry *e; 403 struct l2t_data *d = L2DATA(dev); 404 u32 addr = *(u32 *) neigh->primary_key; 405 int ifidx = neigh->dev->ifindex; 406 int hash = arp_hash(addr, ifidx, d); 407 408 read_lock_bh(&d->lock); 409 for (e = d->l2tab[hash].first; e; e = e->next) 410 if (e->addr == addr && e->ifindex == ifidx) { 411 spin_lock(&e->lock); 412 goto found; 413 } 414 read_unlock_bh(&d->lock); 415 return; 416 417found: 418 __skb_queue_head_init(&arpq); 419 420 read_unlock(&d->lock); 421 if (atomic_read(&e->refcnt)) { 422 if (neigh != e->neigh) 423 neigh_replace(e, neigh); 424 425 if (e->state == L2T_STATE_RESOLVING) { 426 if (neigh->nud_state & NUD_FAILED) { 427 skb_queue_splice_init(&e->arpq, &arpq); 428 } else if (neigh->nud_state & (NUD_CONNECTED|NUD_STALE)) 429 setup_l2e_send_pending(dev, NULL, e); 430 } else { 431 e->state = neigh->nud_state & NUD_CONNECTED ? 432 L2T_STATE_VALID : L2T_STATE_STALE; 433 if (!ether_addr_equal(e->dmac, neigh->ha)) 434 setup_l2e_send_pending(dev, NULL, e); 435 } 436 } 437 spin_unlock_bh(&e->lock); 438 439 if (!skb_queue_empty(&arpq)) 440 handle_failed_resolution(dev, &arpq); 441} 442 443struct l2t_data *t3_init_l2t(unsigned int l2t_capacity) 444{ 445 struct l2t_data *d; 446 int i; 447 448 d = kvzalloc(struct_size(d, l2tab, l2t_capacity), GFP_KERNEL); 449 if (!d) 450 return NULL; 451 452 d->nentries = l2t_capacity; 453 d->rover = &d->l2tab[1]; /* entry 0 is not used */ 454 atomic_set(&d->nfree, l2t_capacity - 1); 455 rwlock_init(&d->lock); 456 457 for (i = 0; i < l2t_capacity; ++i) { 458 d->l2tab[i].idx = i; 459 d->l2tab[i].state = L2T_STATE_UNUSED; 460 __skb_queue_head_init(&d->l2tab[i].arpq); 461 spin_lock_init(&d->l2tab[i].lock); 462 atomic_set(&d->l2tab[i].refcnt, 0); 463 } 464 return d; 465}