From e5b13cb10de209f924fdf9478214bcf7e4008d6d Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 28 Feb 2008 20:51:43 -0800 Subject: [NETNS]: Process devinet ioctl in the correct namespace. Add namespace parameter to devinet_ioctl and locate device inside it for state changes. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fc4e3db649e8..da05ab47ff2f 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -129,7 +129,7 @@ extern int unregister_inetaddr_notifier(struct notifier_block *nb); extern struct net_device *ip_dev_find(struct net *net, __be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -extern int devinet_ioctl(unsigned int cmd, void __user *); +extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern void devinet_init(void); extern struct in_device *inetdev_by_index(struct net *, int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); -- cgit v1.2.3-71-gd317 From 4c563f7669c10a12354b72b518c2287ffc6ebfb3 Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Thu, 28 Feb 2008 21:31:08 -0800 Subject: [XFRM]: Speed up xfrm_policy and xfrm_state walking Change xfrm_policy and xfrm_state walking algorithm from O(n^2) to O(n). This is achieved adding the entries to one more list which is used solely for walking the entries. This also fixes some races where the dump can have duplicate or missing entries when the SPD/SADB is modified during an ongoing dump. Dumping SADB with 20000 entries using "time ip xfrm state" the sys time dropped from 1.012s to 0.080s. Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/linux/xfrm.h | 3 +- include/net/xfrm.h | 52 +++++++++++++++++++++++++++++++-- net/key/af_key.c | 24 ++++++++++++--- net/xfrm/xfrm_policy.c | 79 +++++++++++++++++++++++++++++--------------------- net/xfrm/xfrm_state.c | 53 ++++++++++++++++++++++----------- net/xfrm/xfrm_user.c | 71 +++++++++++++++++++++++++++------------------ 6 files changed, 197 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index e31b8c84f2c9..0c82c80b277f 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -113,7 +113,8 @@ enum { XFRM_POLICY_TYPE_MAIN = 0, XFRM_POLICY_TYPE_SUB = 1, - XFRM_POLICY_TYPE_MAX = 2 + XFRM_POLICY_TYPE_MAX = 2, + XFRM_POLICY_TYPE_ANY = 255 }; enum diff --git a/include/net/xfrm.h b/include/net/xfrm.h index eea7785cc757..9b6205665190 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -121,6 +121,7 @@ extern struct mutex xfrm_cfg_mutex; struct xfrm_state { /* Note: bydst is re-used during gc */ + struct list_head all; struct hlist_node bydst; struct hlist_node bysrc; struct hlist_node byspi; @@ -424,6 +425,7 @@ struct xfrm_tmpl struct xfrm_policy { struct xfrm_policy *next; + struct list_head bytype; struct hlist_node bydst; struct hlist_node byidx; @@ -1160,6 +1162,18 @@ struct xfrm6_tunnel { int priority; }; +struct xfrm_state_walk { + struct xfrm_state *state; + int count; + u8 proto; +}; + +struct xfrm_policy_walk { + struct xfrm_policy *policy; + int count; + u8 type, cur_type; +}; + extern void xfrm_init(void); extern void xfrm4_init(void); extern void xfrm_state_init(void); @@ -1184,7 +1198,23 @@ static inline void xfrm6_fini(void) extern int xfrm_proc_init(void); #endif -extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *); +static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + walk->proto = proto; + walk->state = NULL; + walk->count = 0; +} + +static inline void xfrm_state_walk_done(struct xfrm_state_walk *walk) +{ + if (walk->state != NULL) { + xfrm_state_put(walk->state); + walk->state = NULL; + } +} + +extern int xfrm_state_walk(struct xfrm_state_walk *walk, + int (*func)(struct xfrm_state *, int, void*), void *); extern struct xfrm_state *xfrm_state_alloc(void); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -1306,7 +1336,25 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) #endif struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *); + +static inline void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + walk->cur_type = XFRM_POLICY_TYPE_MAIN; + walk->type = type; + walk->policy = NULL; + walk->count = 0; +} + +static inline void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (walk->policy != NULL) { + xfrm_pol_put(walk->policy); + walk->policy = NULL; + } +} + +extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, diff --git a/net/key/af_key.c b/net/key/af_key.c index 8b5f486ac80f..7cb6f1213360 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1742,12 +1742,18 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr { u8 proto; struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; + struct xfrm_state_walk walk; + int rc; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - return xfrm_state_walk(proto, dump_sa, &data); + xfrm_state_walk_init(&walk, proto); + rc = xfrm_state_walk(&walk, dump_sa, &data); + xfrm_state_walk_done(&walk); + + return rc; } static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) @@ -1780,7 +1786,9 @@ static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) static u32 gen_reqid(void) { + struct xfrm_policy_walk walk; u32 start; + int rc; static u32 reqid = IPSEC_MANUAL_REQID_MAX; start = reqid; @@ -1788,8 +1796,10 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, - (void*)&reqid) != -EEXIST) + xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); + rc = xfrm_policy_walk(&walk, check_reqid, (void*)&reqid); + xfrm_policy_walk_done(&walk); + if (rc != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -2665,8 +2675,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; + struct xfrm_policy_walk walk; + int rc; + + xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); + rc = xfrm_policy_walk(&walk, dump_sp, &data); + xfrm_policy_walk_done(&walk); - return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); + return rc; } static int key_notify_policy_flush(struct km_event *c) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9fc4c315f6cd..bae94a8031a2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,6 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); +static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -208,6 +209,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + INIT_LIST_HEAD(&policy->bytype); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -230,6 +232,10 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) if (del_timer(&policy->timer)) BUG(); + write_lock_bh(&xfrm_policy_lock); + list_del(&policy->bytype); + write_unlock_bh(&xfrm_policy_lock); + security_xfrm_policy_free(policy); kfree(policy); } @@ -584,6 +590,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); + list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -822,57 +829,60 @@ out: } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(struct xfrm_policy_walk *walk, + int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol, *last = NULL; - struct hlist_node *entry; - int dir, last_dir = 0, count, error; + struct xfrm_policy *old, *pol, *last = NULL; + int error = 0; + + if (walk->type >= XFRM_POLICY_TYPE_MAX && + walk->type != XFRM_POLICY_TYPE_ANY) + return -EINVAL; + if (walk->policy == NULL && walk->count != 0) + return 0; + + old = pol = walk->policy; + walk->policy = NULL; read_lock_bh(&xfrm_policy_lock); - count = 0; - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; + for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { + if (walk->type != walk->cur_type && + walk->type != XFRM_POLICY_TYPE_ANY) + continue; - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type != type) + if (pol == NULL) { + pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], + struct xfrm_policy, bytype); + } + list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { + if (pol->dead) continue; if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) + error = func(last, xfrm_policy_id2dir(last->index), + walk->count, data); + if (error) { + xfrm_pol_hold(last); + walk->policy = last; goto out; - } - last = pol; - last_dir = dir; - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type != type) - continue; - if (last) { - error = func(last, last_dir % XFRM_POLICY_MAX, - count, data); - if (error) - goto out; } - last = pol; - last_dir = dir; - count++; } + last = pol; + walk->count++; } + pol = NULL; } - if (count == 0) { + if (walk->count == 0) { error = -ENOENT; goto out; } - error = func(last, last_dir % XFRM_POLICY_MAX, 0, data); + if (last) + error = func(last, xfrm_policy_id2dir(last->index), 0, data); out: read_unlock_bh(&xfrm_policy_lock); + if (old != NULL) + xfrm_pol_put(old); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -2365,6 +2375,9 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } + for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) + INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7ba65e82941c..9880b792e6a5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -50,6 +50,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ +static LIST_HEAD(xfrm_state_all); static struct hlist_head *xfrm_state_bydst __read_mostly; static struct hlist_head *xfrm_state_bysrc __read_mostly; static struct hlist_head *xfrm_state_byspi __read_mostly; @@ -510,6 +511,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); + INIT_LIST_HEAD(&x->all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -533,6 +535,10 @@ void __xfrm_state_destroy(struct xfrm_state *x) { BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + spin_lock_bh(&xfrm_state_lock); + list_del(&x->all); + spin_unlock_bh(&xfrm_state_lock); + spin_lock_bh(&xfrm_state_gc_lock); hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); @@ -909,6 +915,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; + list_add_tail(&x->all, &xfrm_state_all); + h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); @@ -1518,36 +1526,47 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), +int xfrm_state_walk(struct xfrm_state_walk *walk, + int (*func)(struct xfrm_state *, int, void*), void *data) { - int i; - struct xfrm_state *x, *last = NULL; - struct hlist_node *entry; - int count = 0; + struct xfrm_state *old, *x, *last = NULL; int err = 0; + if (walk->state == NULL && walk->count != 0) + return 0; + + old = x = walk->state; + walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (!xfrm_id_proto_match(x->id.proto, proto)) - continue; - if (last) { - err = func(last, count, data); - if (err) - goto out; + if (x == NULL) + x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + list_for_each_entry_from(x, &xfrm_state_all, all) { + if (x->km.state == XFRM_STATE_DEAD) + continue; + if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + continue; + if (last) { + err = func(last, walk->count, data); + if (err) { + xfrm_state_hold(last); + walk->state = last; + goto out; } - last = x; - count++; } + last = x; + walk->count++; } - if (count == 0) { + if (walk->count == 0) { err = -ENOENT; goto out; } - err = func(last, 0, data); + if (last) + err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); + if (old != NULL) + xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f971ca5645f8..f5fd5b3147cc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -532,8 +532,6 @@ struct xfrm_dump_info { struct sk_buff *out_skb; u32 nlmsg_seq; u16 nlmsg_flags; - int start_idx; - int this_idx; }; static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) @@ -600,9 +598,6 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - if (sp->this_idx < sp->start_idx) - goto out; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) @@ -615,8 +610,6 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) goto nla_put_failure; nlmsg_end(skb, nlh); -out: - sp->this_idx++; return 0; nla_put_failure: @@ -624,18 +617,32 @@ nla_put_failure: return err; } +static int xfrm_dump_sa_done(struct netlink_callback *cb) +{ + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; + xfrm_state_walk_done(walk); + return 0; +} + static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { + struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_state_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_state_walk(0, dump_one_state, &info); - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + cb->args[0] = 1; + xfrm_state_walk_init(walk, 0); + } + + (void) xfrm_state_walk(walk, dump_one_state, &info); return skb->len; } @@ -654,7 +661,6 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; if (dump_one_state(x, 0, &info)) { kfree_skb(skb); @@ -1232,9 +1238,6 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - if (sp->this_idx < sp->start_idx) - goto out; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) @@ -1250,8 +1253,6 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; nlmsg_end(skb, nlh); -out: - sp->this_idx++; return 0; nlmsg_failure: @@ -1259,21 +1260,33 @@ nlmsg_failure: return -EMSGSIZE; } +static int xfrm_dump_policy_done(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + + xfrm_policy_walk_done(walk); + return 0; +} + static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; struct xfrm_dump_info info; + BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > + sizeof(cb->args) - sizeof(cb->args[0])); + info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); -#ifdef CONFIG_XFRM_SUB_POLICY - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); -#endif - cb->args[0] = info.this_idx; + + if (!cb->args[0]) { + cb->args[0] = 1; + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + } + + (void) xfrm_policy_walk(walk, dump_one_policy, &info); return skb->len; } @@ -1293,7 +1306,6 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; - info.this_idx = info.start_idx = 0; if (dump_one_policy(xp, dir, 0, &info) < 0) { kfree_skb(skb); @@ -1891,15 +1903,18 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { static struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); + int (*done)(struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, [XFRM_MSG_GETSA - XFRM_MSG_BASE] = { .doit = xfrm_get_sa, - .dump = xfrm_dump_sa }, + .dump = xfrm_dump_sa, + .done = xfrm_dump_sa_done }, [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, - .dump = xfrm_dump_policy }, + .dump = xfrm_dump_policy, + .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_acquire }, [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_sa_expire }, @@ -1938,7 +1953,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL); + return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, link->done); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, -- cgit v1.2.3-71-gd317 From 9b0f976f27f00a81cf47643d90854659626795b4 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 29 Feb 2008 11:13:15 -0800 Subject: [INET]: Remove struct net_proto_family* from _init calls. struct net_proto_family* is not used in icmp[v6]_init, ndisc_init, igmp_init and tcp_v4_init. Remove it. Signed-off-by: Denis V. Lunev Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 2 +- include/net/icmp.h | 2 +- include/net/ndisc.h | 4 ++-- include/net/tcp.h | 2 +- net/ipv4/af_inet.c | 4 ++-- net/ipv4/icmp.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/af_inet6.c | 6 +++--- net/ipv6/icmp.c | 2 +- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 7c5e9817e998..8f86d6b621c8 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -176,7 +176,7 @@ extern void icmpv6_send(struct sk_buff *skb, __u32 info, struct net_device *dev); -extern int icmpv6_init(struct net_proto_family *ops); +extern int icmpv6_init(void); extern int icmpv6_err_convert(int type, int code, int *err); extern void icmpv6_cleanup(void); diff --git a/include/net/icmp.h b/include/net/icmp.h index 9f7ef3c8baef..7bf714d9d7c7 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -48,7 +48,7 @@ struct sk_buff; extern void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); extern int icmp_rcv(struct sk_buff *skb); extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern void icmp_init(struct net_proto_family *ops); +extern void icmp_init(void); extern void icmp_out_count(unsigned char type); /* Move into dst.h ? */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 59b70624b056..5aedf324de66 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -77,7 +77,7 @@ struct nd_opt_hdr { } __attribute__((__packed__)); -extern int ndisc_init(struct net_proto_family *ops); +extern int ndisc_init(void); extern void ndisc_cleanup(void); @@ -107,7 +107,7 @@ extern int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *d /* * IGMP */ -extern int igmp6_init(struct net_proto_family *ops); +extern int igmp6_init(void); extern void igmp6_cleanup(void); diff --git a/include/net/tcp.h b/include/net/tcp.h index 7de4ea3a04d9..ae9774b478f5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1373,7 +1373,7 @@ struct tcp_request_sock_ops { #endif }; -extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_v4_init(void); extern void tcp_init(void); #endif /* _TCP_H */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c270080f370e..a7a99ac856dc 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1415,7 +1415,7 @@ static int __init inet_init(void) ip_init(); - tcp_v4_init(&inet_family_ops); + tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -1430,7 +1430,7 @@ static int __init inet_init(void) * Set the ICMP layer up */ - icmp_init(&inet_family_ops); + icmp_init(); /* * Initialise the multicast router diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a13c074dac09..98372db66c66 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1139,7 +1139,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { }, }; -void __init icmp_init(struct net_proto_family *ops) +void __init icmp_init(void) { struct inet_sock *inet; int i; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00156bf421ca..256032a41069 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2443,7 +2443,7 @@ struct proto tcp_prot = { REF_PROTO_INUSE(tcp) }; -void __init tcp_v4_init(struct net_proto_family *ops) +void __init tcp_v4_init(void) { if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f0aa97738746..9869f87243cf 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -808,13 +808,13 @@ static int __init inet6_init(void) if (err) goto sysctl_fail; #endif - err = icmpv6_init(&inet6_family_ops); + err = icmpv6_init(); if (err) goto icmp_fail; - err = ndisc_init(&inet6_family_ops); + err = ndisc_init(); if (err) goto ndisc_fail; - err = igmp6_init(&inet6_family_ops); + err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 121d517bf91c..b9b13a77ba30 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -780,7 +780,7 @@ drop_no_count: */ static struct lock_class_key icmpv6_socket_sk_dst_lock_key; -int __init icmpv6_init(struct net_proto_family *ops) +int __init icmpv6_init(void) { struct sock *sk; int err, i, j; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ab228d1ea114..8ce894d90063 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2597,7 +2597,7 @@ static const struct file_operations igmp6_mcf_seq_fops = { }; #endif -int __init igmp6_init(struct net_proto_family *ops) +int __init igmp6_init(void) { struct ipv6_pinfo *np; struct sock *sk; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0d33a7d32125..1fc33c8c7232 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1733,7 +1733,7 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, #endif -int __init ndisc_init(struct net_proto_family *ops) +int __init ndisc_init(void) { struct ipv6_pinfo *np; struct sock *sk; -- cgit v1.2.3-71-gd317 From ee688b000d35f413f33561ec9c7d3355be561e2f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Jan 2008 19:38:39 +0100 Subject: nl80211: export hardware bitrate/channel capabilities This makes nl80211 export the hardware bitrate/channel capabilities as registered in a wiphy. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 64 ++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 9fecf902419c..63695060db9f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -161,6 +161,9 @@ enum nl80211_commands { * given for %NL80211_CMD_GET_STATION, nested attribute containing * info as possible, see &enum nl80211_sta_stats. * + * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands, + * consisting of a nested array. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -195,6 +198,8 @@ enum nl80211_attrs { NL80211_ATTR_STA_VLAN, NL80211_ATTR_STA_STATS, + NL80211_ATTR_WIPHY_BANDS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -280,4 +285,63 @@ enum nl80211_sta_stats { NL80211_STA_STAT_MAX = __NL80211_STA_STAT_AFTER_LAST - 1 }; +/** + * enum nl80211_band_attr - band attributes + * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BAND_ATTR_FREQS: supported frequencies in this band, + * an array of nested frequency attributes + * @NL80211_BAND_ATTR_RATES: supported bitrates in this band, + * an array of nested bitrate attributes + */ +enum nl80211_band_attr { + __NL80211_BAND_ATTR_INVALID, + NL80211_BAND_ATTR_FREQS, + NL80211_BAND_ATTR_RATES, + + /* keep last */ + __NL80211_BAND_ATTR_AFTER_LAST, + NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_frequency_attr - frequency attributes + * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz + * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current + * regulatory domain. + * @NL80211_FREQUENCY_ATTR_PASSIVE_SCAN: Only passive scanning is + * permitted on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_IBSS: IBSS networks are not permitted + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory + * on this channel in current regulatory domain. + */ +enum nl80211_frequency_attr { + __NL80211_FREQUENCY_ATTR_INVALID, + NL80211_FREQUENCY_ATTR_FREQ, + NL80211_FREQUENCY_ATTR_DISABLED, + NL80211_FREQUENCY_ATTR_PASSIVE_SCAN, + NL80211_FREQUENCY_ATTR_NO_IBSS, + NL80211_FREQUENCY_ATTR_RADAR, + + /* keep last */ + __NL80211_FREQUENCY_ATTR_AFTER_LAST, + NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_bitrate_attr - bitrate attributes + * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps + * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported + * in 2.4 GHz band. + */ +enum nl80211_bitrate_attr { + __NL80211_BITRATE_ATTR_INVALID, + NL80211_BITRATE_ATTR_RATE, + NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE, + + /* keep last */ + __NL80211_BITRATE_ATTR_AFTER_LAST, + NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e3a214f63f91..b123f58d3909 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -98,6 +98,13 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct cfg80211_registered_device *dev) { void *hdr; + struct nlattr *nl_bands, *nl_band; + struct nlattr *nl_freqs, *nl_freq; + struct nlattr *nl_rates, *nl_rate; + enum ieee80211_band band; + struct ieee80211_channel *chan; + struct ieee80211_rate *rate; + int i; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -105,6 +112,73 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + + nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); + if (!nl_bands) + goto nla_put_failure; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!dev->wiphy.bands[band]) + continue; + + nl_band = nla_nest_start(msg, band); + if (!nl_band) + goto nla_put_failure; + + /* add frequencies */ + nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); + if (!nl_freqs) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) { + nl_freq = nla_nest_start(msg, i); + if (!nl_freq) + goto nla_put_failure; + + chan = &dev->wiphy.bands[band]->channels[i]; + NLA_PUT_U32(msg, NL80211_FREQUENCY_ATTR_FREQ, + chan->center_freq); + + if (chan->flags & IEEE80211_CHAN_DISABLED) + NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_DISABLED); + if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) + NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_PASSIVE_SCAN); + if (chan->flags & IEEE80211_CHAN_NO_IBSS) + NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_NO_IBSS); + if (chan->flags & IEEE80211_CHAN_RADAR) + NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR); + + nla_nest_end(msg, nl_freq); + } + + nla_nest_end(msg, nl_freqs); + + /* add bitrates */ + nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); + if (!nl_rates) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) { + nl_rate = nla_nest_start(msg, i); + if (!nl_rate) + goto nla_put_failure; + + rate = &dev->wiphy.bands[band]->bitrates[i]; + NLA_PUT_U32(msg, NL80211_BITRATE_ATTR_RATE, + rate->bitrate); + if (rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) + NLA_PUT_FLAG(msg, + NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE); + + nla_nest_end(msg, nl_rate); + } + + nla_nest_end(msg, nl_rates); + + nla_nest_end(msg, nl_band); + } + nla_nest_end(msg, nl_bands); + return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3-71-gd317 From 66f7ac50ed7cc5c19a62bc97e8f6e7891004a03a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 31 Jan 2008 19:48:22 +0100 Subject: nl80211: Add monitor interface configuration flags This allows precise control over what a monitor interface shows. Signed-off-by: Michael Wu Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 35 +++++++++++++++++++++++++++++++++++ include/net/cfg80211.h | 24 ++++++++++++++++++++++-- net/mac80211/cfg.c | 4 ++-- net/wireless/nl80211.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 101 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 63695060db9f..a9f0b93324a2 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -164,6 +164,9 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands, * consisting of a nested array. * + * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of + * &enum nl80211_mntr_flags. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -200,6 +203,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_BANDS, + NL80211_ATTR_MNTR_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -344,4 +349,34 @@ enum nl80211_bitrate_attr { NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_mntr_flags - monitor configuration flags + * + * Monitor configuration flags. + * + * @__NL80211_MNTR_FLAG_INVALID: reserved + * + * @NL80211_MNTR_FLAG_FCSFAIL: pass frames with bad FCS + * @NL80211_MNTR_FLAG_PLCPFAIL: pass frames with bad PLCP + * @NL80211_MNTR_FLAG_CONTROL: pass control frames + * @NL80211_MNTR_FLAG_OTHER_BSS: disable BSSID filtering + * @NL80211_MNTR_FLAG_COOK_FRAMES: report frames after processing. + * overrides all other flags. + * + * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use + * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag + */ +enum nl80211_mntr_flags { + __NL80211_MNTR_FLAG_INVALID, + NL80211_MNTR_FLAG_FCSFAIL, + NL80211_MNTR_FLAG_PLCPFAIL, + NL80211_MNTR_FLAG_CONTROL, + NL80211_MNTR_FLAG_OTHER_BSS, + NL80211_MNTR_FLAG_COOK_FRAMES, + + /* keep last */ + __NL80211_MNTR_FLAG_AFTER_LAST, + NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bcc480b8892a..ab4caf63954f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -163,6 +163,26 @@ struct station_stats { u32 tx_bytes; }; +/** + * enum monitor_flags - monitor flags + * + * Monitor interface configuration flags. Note that these must be the bits + * according to the nl80211 flags. + * + * @MONITOR_FLAG_FCSFAIL: pass frames with bad FCS + * @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP + * @MONITOR_FLAG_CONTROL: pass control frames + * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering + * @MONITOR_FLAG_COOK_FRAMES: report frames after processing + */ +enum monitor_flags { + MONITOR_FLAG_FCSFAIL = 1<attrs[NL80211_ATTR_IFTYPE]) { type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); @@ -362,7 +393,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, type); + err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? + info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, + &flags); + err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, + type, err ? NULL : &flags); rtnl_unlock(); unlock: @@ -375,6 +410,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *drv; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; + u32 flags; if (!info->attrs[NL80211_ATTR_IFNAME]) return -EINVAL; @@ -395,8 +431,12 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); + err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? + info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, + &flags); err = drv->ops->add_virtual_intf(&drv->wiphy, - nla_data(info->attrs[NL80211_ATTR_IFNAME]), type); + nla_data(info->attrs[NL80211_ATTR_IFNAME]), + type, err ? NULL : &flags); rtnl_unlock(); unlock: -- cgit v1.2.3-71-gd317 From ffc7689ddae5cbe12bde437ae0f2b386d568b5cd Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 20 Feb 2008 19:08:10 +0100 Subject: ssb: Add support for 8bit register access This adds support for 8bit wide register reads/writes. This is needed in order to support the gigabit ethernet core. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/main.c | 18 ++++++++++++++++++ drivers/ssb/pci.c | 28 ++++++++++++++++++++++++++++ drivers/ssb/pcmcia.c | 32 ++++++++++++++++++++++++++++++++ include/linux/ssb/ssb.h | 10 ++++++++++ 4 files changed, 88 insertions(+) (limited to 'include/linux') diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index bedb2b4ee9d2..8db40c4b86e9 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -505,6 +505,14 @@ error: return err; } +static u8 ssb_ssb_read8(struct ssb_device *dev, u16 offset) +{ + struct ssb_bus *bus = dev->bus; + + offset += dev->core_index * SSB_CORE_SIZE; + return readb(bus->mmio + offset); +} + static u16 ssb_ssb_read16(struct ssb_device *dev, u16 offset) { struct ssb_bus *bus = dev->bus; @@ -521,6 +529,14 @@ static u32 ssb_ssb_read32(struct ssb_device *dev, u16 offset) return readl(bus->mmio + offset); } +static void ssb_ssb_write8(struct ssb_device *dev, u16 offset, u8 value) +{ + struct ssb_bus *bus = dev->bus; + + offset += dev->core_index * SSB_CORE_SIZE; + writeb(value, bus->mmio + offset); +} + static void ssb_ssb_write16(struct ssb_device *dev, u16 offset, u16 value) { struct ssb_bus *bus = dev->bus; @@ -539,8 +555,10 @@ static void ssb_ssb_write32(struct ssb_device *dev, u16 offset, u32 value) /* Ops for the plain SSB bus without a host-device (no PCI or PCMCIA). */ static const struct ssb_bus_ops ssb_ssb_ops = { + .read8 = ssb_ssb_read8, .read16 = ssb_ssb_read16, .read32 = ssb_ssb_read32, + .write8 = ssb_ssb_write8, .write16 = ssb_ssb_write16, .write32 = ssb_ssb_write32, }; diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index b434df75047f..1facc7620fc8 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -572,6 +572,19 @@ static inline int ssb_pci_assert_buspower(struct ssb_bus *bus) } #endif /* DEBUG */ +static u8 ssb_pci_read8(struct ssb_device *dev, u16 offset) +{ + struct ssb_bus *bus = dev->bus; + + if (unlikely(ssb_pci_assert_buspower(bus))) + return 0xFF; + if (unlikely(bus->mapped_device != dev)) { + if (unlikely(ssb_pci_switch_core(bus, dev))) + return 0xFF; + } + return ioread8(bus->mmio + offset); +} + static u16 ssb_pci_read16(struct ssb_device *dev, u16 offset) { struct ssb_bus *bus = dev->bus; @@ -598,6 +611,19 @@ static u32 ssb_pci_read32(struct ssb_device *dev, u16 offset) return ioread32(bus->mmio + offset); } +static void ssb_pci_write8(struct ssb_device *dev, u16 offset, u8 value) +{ + struct ssb_bus *bus = dev->bus; + + if (unlikely(ssb_pci_assert_buspower(bus))) + return; + if (unlikely(bus->mapped_device != dev)) { + if (unlikely(ssb_pci_switch_core(bus, dev))) + return; + } + iowrite8(value, bus->mmio + offset); +} + static void ssb_pci_write16(struct ssb_device *dev, u16 offset, u16 value) { struct ssb_bus *bus = dev->bus; @@ -626,8 +652,10 @@ static void ssb_pci_write32(struct ssb_device *dev, u16 offset, u32 value) /* Not "static", as it's used in main.c */ const struct ssb_bus_ops ssb_pci_ops = { + .read8 = ssb_pci_read8, .read16 = ssb_pci_read16, .read32 = ssb_pci_read32, + .write8 = ssb_pci_write8, .write16 = ssb_pci_write16, .write32 = ssb_pci_write32, }; diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index 46816cda8b98..84b3a845a8a8 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -172,6 +172,22 @@ static int select_core_and_segment(struct ssb_device *dev, return 0; } +static u8 ssb_pcmcia_read8(struct ssb_device *dev, u16 offset) +{ + struct ssb_bus *bus = dev->bus; + unsigned long flags; + int err; + u8 value = 0xFF; + + spin_lock_irqsave(&bus->bar_lock, flags); + err = select_core_and_segment(dev, &offset); + if (likely(!err)) + value = readb(bus->mmio + offset); + spin_unlock_irqrestore(&bus->bar_lock, flags); + + return value; +} + static u16 ssb_pcmcia_read16(struct ssb_device *dev, u16 offset) { struct ssb_bus *bus = dev->bus; @@ -206,6 +222,20 @@ static u32 ssb_pcmcia_read32(struct ssb_device *dev, u16 offset) return (lo | (hi << 16)); } +static void ssb_pcmcia_write8(struct ssb_device *dev, u16 offset, u8 value) +{ + struct ssb_bus *bus = dev->bus; + unsigned long flags; + int err; + + spin_lock_irqsave(&bus->bar_lock, flags); + err = select_core_and_segment(dev, &offset); + if (likely(!err)) + writeb(value, bus->mmio + offset); + mmiowb(); + spin_unlock_irqrestore(&bus->bar_lock, flags); +} + static void ssb_pcmcia_write16(struct ssb_device *dev, u16 offset, u16 value) { struct ssb_bus *bus = dev->bus; @@ -238,8 +268,10 @@ static void ssb_pcmcia_write32(struct ssb_device *dev, u16 offset, u32 value) /* Not "static", as it's used in main.c */ const struct ssb_bus_ops ssb_pcmcia_ops = { + .read8 = ssb_pcmcia_read8, .read16 = ssb_pcmcia_read16, .read32 = ssb_pcmcia_read32, + .write8 = ssb_pcmcia_write8, .write16 = ssb_pcmcia_write16, .write32 = ssb_pcmcia_write32, }; diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 20add65215af..860d28c6d149 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -72,8 +72,10 @@ struct ssb_device; /* Lowlevel read/write operations on the device MMIO. * Internal, don't use that outside of ssb. */ struct ssb_bus_ops { + u8 (*read8)(struct ssb_device *dev, u16 offset); u16 (*read16)(struct ssb_device *dev, u16 offset); u32 (*read32)(struct ssb_device *dev, u16 offset); + void (*write8)(struct ssb_device *dev, u16 offset, u8 value); void (*write16)(struct ssb_device *dev, u16 offset, u16 value); void (*write32)(struct ssb_device *dev, u16 offset, u32 value); }; @@ -344,6 +346,10 @@ void ssb_device_disable(struct ssb_device *dev, u32 core_specific_flags); /* Device MMIO register read/write functions. */ +static inline u8 ssb_read8(struct ssb_device *dev, u16 offset) +{ + return dev->ops->read8(dev, offset); +} static inline u16 ssb_read16(struct ssb_device *dev, u16 offset) { return dev->ops->read16(dev, offset); @@ -352,6 +358,10 @@ static inline u32 ssb_read32(struct ssb_device *dev, u16 offset) { return dev->ops->read32(dev, offset); } +static inline void ssb_write8(struct ssb_device *dev, u16 offset, u8 value) +{ + dev->ops->write8(dev, offset, value); +} static inline void ssb_write16(struct ssb_device *dev, u16 offset, u16 value) { dev->ops->write16(dev, offset, value); -- cgit v1.2.3-71-gd317 From 28de57d1a9eb7e67badb731297197fcbef0cc19e Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 22 Feb 2008 16:14:58 +0100 Subject: ssb: Add CHIPCO IRQ access functions This patch adds functions to setup and read the CHIPCO IRQ. Signed-off-by: Aurelien Jarno Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/driver_chipcommon.c | 10 ++++++++++ include/linux/ssb/ssb_driver_chipcommon.h | 4 ++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c index e586321a473a..45b672a69003 100644 --- a/drivers/ssb/driver_chipcommon.c +++ b/drivers/ssb/driver_chipcommon.c @@ -353,6 +353,16 @@ void ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc, u32 ticks) chipco_write32(cc, SSB_CHIPCO_WATCHDOG, ticks); } +void ssb_chipco_irq_mask(struct ssb_chipcommon *cc, u32 mask, u32 value) +{ + chipco_write32_masked(cc, SSB_CHIPCO_IRQMASK, mask, value); +} + +u32 ssb_chipco_irq_status(struct ssb_chipcommon *cc, u32 mask) +{ + return chipco_read32(cc, SSB_CHIPCO_IRQSTAT) & mask; +} + u32 ssb_chipco_gpio_in(struct ssb_chipcommon *cc, u32 mask) { return chipco_read32(cc, SSB_CHIPCO_GPIOIN) & mask; diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h index 536851b946f6..b548a54ff1f5 100644 --- a/include/linux/ssb/ssb_driver_chipcommon.h +++ b/include/linux/ssb/ssb_driver_chipcommon.h @@ -390,6 +390,10 @@ extern void ssb_chipco_set_clockmode(struct ssb_chipcommon *cc, extern void ssb_chipco_watchdog_timer_set(struct ssb_chipcommon *cc, u32 ticks); +void ssb_chipco_irq_mask(struct ssb_chipcommon *cc, u32 mask, u32 value); + +u32 ssb_chipco_irq_status(struct ssb_chipcommon *cc, u32 mask); + /* Chipcommon GPIO pin access. */ u32 ssb_chipco_gpio_in(struct ssb_chipcommon *cc, u32 mask); u32 ssb_chipco_gpio_out(struct ssb_chipcommon *cc, u32 mask, u32 value); -- cgit v1.2.3-71-gd317 From 988b705077d8f922408913f4f521ae073256d4a1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 3 Mar 2008 12:20:57 -0800 Subject: [ARP]: Introduce the arp_hdr_len helper. There are some place, that calculate the ARP header length. These calculations are correct, but a) some operate with "magic" constants, b) enlarge the code length (sometimes at the cost of coding style), c) are not informative from the first glance. The proposal is to introduce a helper, that includes all the good sides of these calculations. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +---- include/linux/if_arp.h | 6 ++++++ net/core/netpoll.c | 6 ++---- net/ipv4/arp.c | 9 +++------ net/ipv4/ipconfig.c | 5 +---- net/ipv4/netfilter/arp_tables.c | 5 +---- 6 files changed, 14 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 966643473da7..5fc9d8d58ece 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2646,10 +2646,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack if (!slave || !slave_do_arp_validate(bond, slave)) goto out_unlock; - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto out_unlock; arp = arp_hdr(skb); diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 296e8e86e91d..4d3401812e6c 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -156,6 +156,12 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) { return (struct arphdr *)skb_network_header(skb); } + +static inline int arp_hdr_len(struct net_device *dev) +{ + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; +} #endif #endif /* _LINUX_IF_ARP_H */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6faa128a4c8e..7ae98659d79d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -384,9 +384,7 @@ static void arp_reply(struct sk_buff *skb) if (skb->dev->flags & IFF_NOARP) return; - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * skb->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return; skb_reset_network_header(skb); @@ -414,7 +412,7 @@ static void arp_reply(struct sk_buff *skb) ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) return; - size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); + size = arp_hdr_len(skb->dev); send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8e17f65f4002..69e80bd9774a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, * Allocate a buffer */ - skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) - + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb_reset_network_header(skb); - arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); + arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; skb->protocol = htons(ETH_P_ARP); if (src_hw == NULL) @@ -916,9 +915,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, goto freeskb; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto freeskb; arp = arp_hdr(skb); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index c90e75a66e81..f84041d1f623 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -459,10 +459,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (rarp->ar_pro != htons(ETH_P_IP)) goto drop; - if (!pskb_may_pull(skb, - sizeof(struct arphdr) + - (2 * dev->addr_len) + - (2 * 4))) + if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto drop; /* OK, it is all there and looks valid, process... */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a7591ce344d2..9b5904486184 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -233,10 +233,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, void *table_base; struct xt_table_info *private; - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * skb->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; indev = in ? in->name : nulldevname; -- cgit v1.2.3-71-gd317 From e898d4db2749c6052072e9bc4448e396cbdeb06a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sat, 1 Mar 2008 01:06:47 +0900 Subject: [UDP]: Allow users to configure UDP-Lite. Let's give users an option for disabling UDP-Lite (~4K). old: | text data bss dec hex filename | 286498 12432 6072 305002 4a76a net/ipv4/built-in.o | 193830 8192 3204 205226 321aa net/ipv6/ipv6.o new (without UDP-Lite): | text data bss dec hex filename | 284086 12136 5432 301654 49a56 net/ipv4/built-in.o | 191835 7832 3076 202743 317f7 net/ipv6/ipv6.o Signed-off-by: YOSHIFUJI Hideaki --- include/linux/udp.h | 11 +++++++++++ include/net/ipv6.h | 5 +++++ include/net/transp_v6.h | 5 +++++ include/net/udplite.h | 9 +++++++-- net/ipv4/Kconfig | 10 ++++++++++ net/ipv4/Makefile | 3 ++- net/ipv4/af_inet.c | 7 ++++++- net/ipv4/proc.c | 5 ++++- net/ipv4/udp.c | 24 ++++++++++++++---------- net/ipv6/Makefile | 3 ++- net/ipv6/af_inet6.c | 14 ++++++++++++++ net/ipv6/ipv6_sockglue.c | 6 +++++- net/ipv6/proc.c | 6 ++++++ net/ipv6/udp.c | 16 ++++++++-------- 14 files changed, 99 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 8ec703f462da..4144664d69d9 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -70,8 +70,10 @@ struct udp_sock { #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ +#ifdef CONFIG_IP_UDPLITE __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ __u8 unused[3]; +#endif /* * For encapsulation sockets. */ @@ -82,7 +84,16 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; } + +#ifdef CONFIG_IP_UDPLITE #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) +#define IS_PROTO_UDPLITE(__proto) ((__proto) == IPPROTO_UDPLITE) +#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP || (level) == SOL_UDPLITE) +#else +#define IS_UDPLITE(__sk) 0 +#define IS_PROTO_UDPLITE(__proto) 0 +#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP) +#endif #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8b05c65415cb..96b1763bfcaa 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -603,8 +603,13 @@ extern int tcp6_proc_init(void); extern void tcp6_proc_exit(void); extern int udp6_proc_init(void); extern void udp6_proc_exit(void); +#ifdef CONFIG_IP_UDPLITE extern int udplite6_proc_init(void); extern void udplite6_proc_exit(void); +#else +static inline int udplite6_proc_init(void) { return 0; } +static inline void udplite6_proc_exit(void) { } +#endif extern int ipv6_misc_proc_init(void); extern void ipv6_misc_proc_exit(void); extern int snmp6_register_dev(struct inet6_dev *idev); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 27394e0447d8..902e6c6bc793 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -27,8 +27,13 @@ extern int rawv6_init(void); extern void rawv6_exit(void); extern int udpv6_init(void); extern void udpv6_exit(void); +#ifdef CONFIG_IP_UDPLITE extern int udplitev6_init(void); extern void udplitev6_exit(void); +#else +static inline int udplitev6_init(void) { return 0; } +static inline void udplitev6_exit(void) { } +#endif extern int tcpv6_init(void); extern void tcpv6_exit(void); diff --git a/include/net/udplite.h b/include/net/udplite.h index b76b2e377af4..01ddb2c20264 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -25,7 +25,9 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { +#ifdef CONFIG_IP_UDPLITE udp_sk(sk)->pcflag = UDPLITE_BIT; +#endif return 0; } @@ -69,7 +71,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) { int cscov = up->len; - +#ifdef CONFIG_IP_UDPLITE /* * Sender has set `partial coverage' option on UDP-Lite socket */ @@ -93,13 +95,15 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) * illegal, we fall back to the defaults here. */ } +#endif return cscov; } static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) { - int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); __wsum csum = 0; +#ifdef CONFIG_IP_UDPLITE + int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ @@ -112,6 +116,7 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) if ((cscov -= len) <= 0) break; } +#endif return csum; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 19880b086e71..efe3832c4ad8 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -632,5 +632,15 @@ config TCP_MD5SIG If unsure, say N. +config IP_UDPLITE + bool "IP: UDP-Lite Protocol (RFC 3828)" + default n + ---help--- + UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length + checksum. Read for + details. + + If unsure, say N. + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9ebc..e88cebdf3e30 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ - datagram.o raw.o udp.o udplite.o \ + datagram.o raw.o udp.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o \ inet_fragment.o @@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_IP_UDPLITE) += udplite.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4f539bd48718..67260c0eaaa8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1317,15 +1317,18 @@ static int __init init_ipv4_mibs(void) if (snmp_mib_init((void **)udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; +#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; - +#endif tcp_mib_init(); return 0; +#ifdef CONFIG_IP_UDPLITE err_udplite_mib: +#endif snmp_mib_free((void **)udp_statistics); err_udp_mib: snmp_mib_free((void **)tcp_statistics); @@ -1423,8 +1426,10 @@ static int __init inet_init(void) /* Setup UDP memory threshold */ udp_init(); +#ifdef CONFIG_IP_UDPLITE /* Add UDP-Lite (RFC 3828) */ udplite4_register(); +#endif /* * Set the ICMP layer up diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d63474c6b400..d75ddb7fa4b8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,7 +59,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), atomic_read(&udp_memory_allocated)); +#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); +#endif seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); @@ -349,6 +351,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)udp_statistics, snmp4_udp_list[i].entry)); +#ifdef CONFIG_IP_UDPLITE /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) @@ -359,7 +362,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %lu", snmp_fold_field((void **)udplite_statistics, snmp4_udp_list[i].entry)); - +#endif seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7ea1b67b6de1..acc353aa89eb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1127,7 +1127,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->cscov = skb->len; - if (proto == IPPROTO_UDPLITE) { + if (IS_PROTO_UDPLITE(proto)) { err = udplite_checksum_init(skb, uh); if (err) return err; @@ -1175,7 +1175,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (ulen > skb->len) goto short_packet; - if (proto == IPPROTO_UDP) { + if (IS_PROTO_UDPLITE(proto)) { /* UDP validates ulen. */ if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) goto short_packet; @@ -1217,7 +1217,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1229,7 +1229,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", + IS_PROTO_UDPLITE(proto) ? "-Lite" : "", NIPQUAD(saddr), ntohs(uh->source), ulen, @@ -1244,14 +1244,14 @@ csum_error: * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", + IS_PROTO_UDPLITE(proto) ? "-Lite" : "", NIPQUAD(saddr), ntohs(uh->source), NIPQUAD(daddr), ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); kfree_skb(skb); return 0; } @@ -1279,7 +1279,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val; int err = 0; +#ifdef CONFIG_IP_UDPLITE int is_udplite = IS_UDPLITE(sk); +#endif if (optlenpcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; +#endif default: err = -ENOPROTOOPT; @@ -1352,7 +1356,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); return ip_setsockopt(sk, level, optname, optval, optlen); @@ -1362,7 +1366,7 @@ int udp_setsockopt(struct sock *sk, int level, int optname, int compat_udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); return compat_ip_setsockopt(sk, level, optname, optval, optlen); @@ -1416,7 +1420,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ip_getsockopt(sk, level, optname, optval, optlen); } @@ -1425,7 +1429,7 @@ int udp_getsockopt(struct sock *sk, int level, int optname, int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ip_getsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index ae14617e607f..81969479955f 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o @@ -17,6 +17,7 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o +ipv6-$(CONFIG_IP_UDPLITE) += udplite.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9869f87243cf..243c42a6b80d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -691,12 +691,16 @@ static int __init init_ipv6_mibs(void) goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; +#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; +#endif return 0; +#ifdef CONFIG_IP_UDPLITE err_udplite_mib: +#endif snmp_mib_free((void **)udp_stats_in6); err_udp_mib: snmp_mib_free((void **)icmpv6msg_statistics); @@ -715,7 +719,9 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); +#ifdef CONFIG_IP_UDPLITE snmp_mib_free((void **)udplite_stats_in6); +#endif } static int inet6_net_init(struct net *net) @@ -760,9 +766,11 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; +#ifdef CONFIG_IP_UDPLITE err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; +#endif err = proto_register(&rawv6_prot, 1); if (err) @@ -933,8 +941,10 @@ out_sock_register_fail: out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: +#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); out_unregister_udp_proto: +#endif proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); @@ -950,7 +960,9 @@ static void __exit inet6_exit(void) rtnl_unregister_all(PF_INET6); udpv6_exit(); +#ifdef CONFIG_IP_UDPLITE udplitev6_exit(); +#endif tcpv6_exit(); /* Cleanup code parts. */ @@ -982,7 +994,9 @@ static void __exit inet6_exit(void) unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); +#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); +#endif proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bf2a686aa13d..0a18fecb93d1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -239,7 +239,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && +#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && +#endif sk->sk_protocol != IPPROTO_TCP) break; @@ -279,7 +281,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } else { struct proto *prot = &udp_prot; - if (sk->sk_protocol == IPPROTO_UDPLITE) + if (IS_PROTO_UDPLITE(sk->sk_protocol)) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(sk->sk_prot, -1); @@ -844,7 +846,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && +#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && +#endif sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 199ef379e501..5ba7ae849d04 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,8 +39,10 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(&udpv6_prot)); +#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE6: inuse %d\n", sock_prot_inuse_get(&udplitev6_prot)); +#endif seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -111,6 +113,7 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; +#ifdef CONFIG_IP_UDPLITE static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), @@ -118,6 +121,7 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_SENTINEL }; +#endif static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { @@ -176,7 +180,9 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); +#ifdef CONFIG_IP_UDPLITE snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); +#endif } return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 53739de829db..55feac7ba717 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -400,7 +400,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->cscov = skb->len; - if (proto == IPPROTO_UDPLITE) { + if (IS_PROTO_UDPLITE(proto)) { err = udplite_checksum_init(skb, uh); if (err) return err; @@ -489,7 +489,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -510,11 +510,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", + IS_PROTO_UDPLITE(proto) ? "-Lite" : "", ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); kfree_skb(skb); return 0; } @@ -890,7 +890,7 @@ int udpv6_destroy_sock(struct sock *sk) int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -900,7 +900,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -910,7 +910,7 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -919,7 +919,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (IS_SOL_UDPFAMILY(level)) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } -- cgit v1.2.3-71-gd317 From 95e41e93e18d8e1e272ce23d96bae4f17ce11d42 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 6 Dec 2007 15:43:30 -0800 Subject: [IPV6]: Make ndisc_flow_init() common for later use. For later use, this patch is renaming ndisc_flow_init() to icmpv6_flow_init() and putting it in common place. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/icmpv6.h | 8 ++++++++ net/ipv6/icmp.c | 16 ++++++++++++++++ net/ipv6/ndisc.c | 23 ++++------------------- 3 files changed, 28 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 8f86d6b621c8..e4d4300d768f 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -182,6 +182,14 @@ extern int icmpv6_err_convert(int type, int code, extern void icmpv6_cleanup(void); extern void icmpv6_param_prob(struct sk_buff *skb, int code, int pos); + +struct flowi; +extern void icmpv6_flow_init(struct sock *sk, + struct flowi *fl, + u8 type, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int oif); #endif #endif diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 12c0b85d6c46..cff74127ea32 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -777,6 +777,22 @@ drop_no_count: return 0; } +void icmpv6_flow_init(struct sock *sk, struct flowi *fl, + u8 type, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int oif) +{ + memset(fl, 0, sizeof(*fl)); + ipv6_addr_copy(&fl->fl6_src, saddr); + ipv6_addr_copy(&fl->fl6_dst, daddr); + fl->proto = IPPROTO_ICMPV6; + fl->fl_icmp_type = type; + fl->fl_icmp_code = 0; + fl->oif = oif; + security_sk_classify_flow(sk, fl); +} + /* * Special lock-class for __icmpv6_sk: */ diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1fc33c8c7232..8db5f4a419aa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -441,21 +441,6 @@ static void pndisc_destructor(struct pneigh_entry *n) /* * Send a Neighbour Advertisement */ - -static inline void ndisc_flow_init(struct flowi *fl, u8 type, - struct in6_addr *saddr, struct in6_addr *daddr, - int oif) -{ - memset(fl, 0, sizeof(*fl)); - ipv6_addr_copy(&fl->fl6_src, saddr); - ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; - fl->fl_icmp_type = type; - fl->fl_icmp_code = 0; - fl->oif = oif; - security_sk_classify_flow(ndisc_socket->sk, fl); -} - static void __ndisc_send(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *saddr, @@ -474,8 +459,8 @@ static void __ndisc_send(struct net_device *dev, type = icmp6h->icmp6_type; - ndisc_flow_init(&fl, type, saddr, daddr, - dev->ifindex); + icmpv6_flow_init(ndisc_socket->sk, &fl, type, + saddr, daddr, dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -1439,8 +1424,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, - dev->ifindex); + icmpv6_flow_init(ndisc_socket->sk, &fl, NDISC_REDIRECT, + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(NULL, &fl); if (dst == NULL) -- cgit v1.2.3-71-gd317 From ee6b967301b4aa5d4a4b61e2f682f086266db9fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Mar 2008 18:30:47 -0800 Subject: [IPV4]: Add 'rtable' field in struct sk_buff to alias 'dst' and avoid casts (Anonymous) unions can help us to avoid ugly casts. A common cast it the (struct rtable *)skb->dst one. Defining an union like : union { struct dst_entry *dst; struct rtable *rtable; }; permits to use skb->rtable in place. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++++- include/net/inet_sock.h | 2 +- net/bridge/br_netfilter.c | 14 +++++++------- net/dccp/ipv4.c | 7 +++---- net/ipv4/arp.c | 4 ++-- net/ipv4/icmp.c | 10 +++++----- net/ipv4/igmp.c | 2 +- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_options.c | 16 ++++++++-------- net/ipv4/ip_output.c | 10 +++++----- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 6 +++--- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/nf_nat_helper.c | 4 ++-- net/ipv4/route.c | 22 +++++++++++----------- net/ipv4/tcp_ipv4.c | 5 ++--- net/ipv4/udp_ipv4.c | 2 +- net/netfilter/nf_conntrack_netbios_ns.c | 2 +- net/sched/em_meta.c | 4 ++-- net/sctp/protocol.c | 8 ++++---- 23 files changed, 69 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bbd8d0027e2f..7beb239d2ee0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -256,7 +256,10 @@ struct sk_buff { ktime_t tstamp; struct net_device *dev; - struct dst_entry *dst; + union { + struct dst_entry *dst; + struct rtable *rtable; + }; struct sec_path *sp; /* diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 89cd011edb99..8660cb0fa0dd 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -195,7 +195,7 @@ static inline int inet_sk_ehashfn(const struct sock *sk) static inline int inet_iif(const struct sk_buff *skb) { - return ((struct rtable *)skb->dst)->rt_iif; + return skb->rtable->rt_iif; } #endif /* _INET_SOCK_H */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1c0efd8ad9f3..0278a069c6f1 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -223,8 +223,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - skb->dst = (struct dst_entry *)&__fake_rtable; - dst_hold(skb->dst); + skb->rtable = &__fake_rtable; + dst_hold(&__fake_rtable.u.dst); skb->dev = nf_bridge->physindev; nf_bridge_push_encap_header(skb); @@ -388,8 +388,8 @@ bridged_dnat: skb->pkt_type = PACKET_HOST; } } else { - skb->dst = (struct dst_entry *)&__fake_rtable; - dst_hold(skb->dst); + skb->rtable = &__fake_rtable; + dst_hold(&__fake_rtable.u.dst); } skb->dev = nf_bridge->physindev; @@ -608,9 +608,9 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (skb->dst == (struct dst_entry *)&__fake_rtable) { - dst_release(skb->dst); - skb->dst = NULL; + if (skb->rtable == &__fake_rtable) { + dst_release(&__fake_rtable.u.dst); + skb->rtable = NULL; } return NF_ACCEPT; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 514a40b7fc7f..17ad69e90e48 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -450,7 +450,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + struct flowi fl = { .oif = skb->rtable->rt_iif, .nl_u = { .ip4_u = { .daddr = ip_hdr(skb)->saddr, .saddr = ip_hdr(skb)->daddr, @@ -511,7 +511,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + if (rxskb->rtable->rt_type != RTN_LOCAL) return; dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb); @@ -563,8 +563,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) + if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) return 0; /* discard, don't send a reset here */ if (dccp_bad_service_code(sk, service)) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 69e80bd9774a..efe01df8fc0e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -475,7 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) return 1; } - paddr = ((struct rtable*)skb->dst)->rt_gateway; + paddr = skb->rtable->rt_gateway; if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev)) return 0; @@ -814,7 +814,7 @@ static int arp_process(struct sk_buff *skb) if (arp->ar_op == htons(ARPOP_REQUEST) && ip_route_input(skb, tip, sip, 0, dev) == 0) { - rt = (struct rtable*)skb->dst; + rt = skb->rtable; addr_type = rt->rt_type; if (addr_type == RTN_LOCAL) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index cee77d606fbe..ff9a8e643fcc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -381,7 +381,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct net *net = rt->u.dst.dev->nd_net; struct sock *sk = icmp_sk(net); struct inet_sock *inet = inet_sk(sk); @@ -438,7 +438,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct iphdr *iph; int room; struct icmp_bxm icmp_param; - struct rtable *rt = (struct rtable *)skb_in->dst; + struct rtable *rt = skb_in->rtable; struct ipcm_cookie ipc; __be32 saddr; u8 tos; @@ -616,7 +616,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) RT_TOS(tos), rt2->u.dst.dev); dst_release(&rt2->u.dst); - rt2 = (struct rtable *)skb_in->dst; + rt2 = skb_in->rtable; skb_in->dst = odst; } @@ -943,7 +943,7 @@ static void icmp_address(struct sk_buff *skb) static void icmp_address_reply(struct sk_buff *skb) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct net_device *dev = skb->dev; struct in_device *in_dev; struct in_ifaddr *ifa; @@ -988,7 +988,7 @@ static void icmp_discard(struct sk_buff *skb) int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { int nh; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d3f34a772f3b..6a4ee8da6994 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb) case IGMPV2_HOST_MEMBERSHIP_REPORT: case IGMPV3_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ - if (((struct rtable*)skb->dst)->fl.iif == 0) + if (skb->rtable->fl.iif == 0) break; /* don't rely on MC router hearing unicast reports */ if (skb->pkt_type == PACKET_MULTICAST || diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 0b3b328d82db..9d6d3befd854 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -80,7 +80,7 @@ int ip_forward(struct sk_buff *skb) if (!xfrm4_route_forward(skb)) goto drop; - rt = (struct rtable*)skb->dst; + rt = skb->rtable; if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e7821ba7a9a0..f9ee84420cb3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -619,7 +619,7 @@ static int ipgre_rcv(struct sk_buff *skb) #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ - if (((struct rtable*)skb->dst)->fl.iif == 0) + if (skb->rtable->fl.iif == 0) goto drop; tunnel->stat.multicast++; skb->pkt_type = PACKET_BROADCAST; @@ -699,7 +699,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb->protocol == htons(ETH_P_IP)) { - rt = (struct rtable*)skb->dst; + rt = skb->rtable; if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 65631391d479..d36e310b314d 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -351,7 +351,7 @@ static int ip_rcv_finish(struct sk_buff *skb) if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; - rt = (struct rtable*)skb->dst; + rt = skb->rtable; if (rt->rt_type == RTN_MULTICAST) IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); else if (rt->rt_type == RTN_BROADCAST) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index baaedd9689a0..df93a9c2efda 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -107,7 +107,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) sptr = skb_network_header(skb); dptr = dopt->__data; - daddr = ((struct rtable*)skb->dst)->rt_spec_dst; + daddr = skb->rtable->rt_spec_dst; if (sopt->rr) { optlen = sptr[sopt->rr+1]; @@ -258,7 +258,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) unsigned char * optptr; int optlen; unsigned char * pp_ptr = NULL; - struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; + struct rtable *rt = skb ? skb->rtable : NULL; if (!opt) { opt = &(IPCB(skb)->opt); @@ -558,7 +558,7 @@ void ip_forward_options(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); unsigned char * optptr; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; unsigned char *raw = skb_network_header(skb); if (opt->rr_needaddr) { @@ -606,7 +606,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) __be32 nexthop; struct iphdr *iph = ip_hdr(skb); unsigned char *optptr = skb_network_header(skb) + opt->srr; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; struct rtable *rt2; int err; @@ -631,13 +631,13 @@ int ip_options_rcv_srr(struct sk_buff *skb) } memcpy(&nexthop, &optptr[srrptr-1], 4); - rt = (struct rtable*)skb->dst; - skb->dst = NULL; + rt = skb->rtable; + skb->rtable = NULL; err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); - rt2 = (struct rtable*)skb->dst; + rt2 = skb->rtable; if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { ip_rt_put(rt2); - skb->dst = &rt->u.dst; + skb->rtable = rt; return -EINVAL; } ip_rt_put(rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 341779e685d9..dc494ea594a7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -142,7 +142,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options *opt) { struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct iphdr *iph; /* Build the IP header. */ @@ -240,7 +240,7 @@ static int ip_finish_output(struct sk_buff *skb) int ip_mc_output(struct sk_buff *skb) { struct sock *sk = skb->sk; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; struct net_device *dev = rt->u.dst.dev; /* @@ -321,7 +321,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. */ - rt = (struct rtable *) skb->dst; + rt = skb->rtable; if (rt != NULL) goto packet_routed; @@ -441,7 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) unsigned int mtu, hlen, left, len, ll_rs, pad; int offset; __be16 not_last_frag; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; int err = 0; dev = rt->u.dst.dev; @@ -1357,7 +1357,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } replyopts; struct ipcm_cookie ipc; __be32 daddr; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; if (ip_options_echo(&replyopts.opt, skb)) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index de0572c88859..e7c9e4e72327 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -57,7 +57,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { struct in_pktinfo info; - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; info.ipi_addr.s_addr = ip_hdr(skb)->daddr; if (rt) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index dbaed69de06a..894bce96284a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -528,7 +528,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ - if ((rt = (struct rtable*)skb->dst) == NULL) { + if ((rt = skb->rtable) == NULL) { tunnel->stat.tx_fifo_errors++; goto tx_error; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a94f52c207a7..7d63d74ef62a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1283,7 +1283,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local if (vif_table[vif].dev != skb->dev) { int true_vifi; - if (((struct rtable*)skb->dst)->fl.iif == 0) { + if (skb->rtable->fl.iif == 0) { /* It is our own packet, looped back. Very complicated situation... @@ -1357,7 +1357,7 @@ dont_forward: int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; - int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; + int local = skb->rtable->rt_flags&RTCF_LOCAL; /* Packet is looped back after forward, it should not be forwarded second time, but still can be delivered locally. @@ -1594,7 +1594,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) { int err; struct mfc_cache *cache; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; read_lock(&mrt_lock); cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 313b3fcf387e..c6817b18366a 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -77,7 +77,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, return NF_ACCEPT; mr = targinfo; - rt = (struct rtable *)skb->dst; + rt = skb->rtable; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("MASQUERADE: %s ate my IP address\n", out->name); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index ca57f47bbd25..2fca727aa8ba 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -139,7 +139,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct iphdr *iph; struct tcphdr *tcph; int oldlen, datalen; @@ -217,7 +217,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct iphdr *iph; struct udphdr *udph; int datalen, oldlen; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c3e165f0034..1051326c36b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1289,7 +1289,7 @@ reject_redirect: static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { - struct rtable *rt = (struct rtable*)dst; + struct rtable *rt = (struct rtable *)dst; struct dst_entry *ret = dst; if (rt) { @@ -1330,7 +1330,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) void ip_rt_send_redirect(struct sk_buff *skb) { - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; struct in_device *in_dev = in_dev_get(rt->u.dst.dev); if (!in_dev) @@ -1379,7 +1379,7 @@ out: static int ip_error(struct sk_buff *skb) { - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; unsigned long now; int code; @@ -1548,7 +1548,7 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); - rt = (struct rtable *) skb->dst; + rt = skb->rtable; if (rt) dst_set_expires(&rt->u.dst, 0); } @@ -1708,7 +1708,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex); - return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); + return rt_intern_hash(hash, rth, &skb->rtable); e_nobufs: in_dev_put(in_dev); @@ -1869,7 +1869,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif); - return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + return rt_intern_hash(hash, rth, &skb->rtable); } /* @@ -2025,7 +2025,7 @@ local_input: } rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif); - err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + err = rt_intern_hash(hash, rth, &skb->rtable); goto done; no_route: @@ -2091,7 +2091,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); - skb->dst = (struct dst_entry*)rth; + skb->rtable = rth; return 0; } RT_CACHE_STAT_INC(in_hlist_search); @@ -2598,7 +2598,7 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; struct rtmsg *r; struct nlmsghdr *nlh; long expires; @@ -2742,7 +2742,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = (struct rtable*) skb->dst; + rt = skb->rtable; if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { @@ -2762,7 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb->dst = &rt->u.dst; + skb->rtable = rt; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3873c4dbeaeb..a79e324638eb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -552,7 +552,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (th->rst) return; - if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) + if (skb->rtable->rt_type != RTN_LOCAL) return; /* Swap the send and the receive. */ @@ -1262,8 +1262,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) #endif /* Never answer to SYNs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) + if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; /* TW buckets are converted to open requests without diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c index 40978de7fb51..fd14c2c50ed4 100644 --- a/net/ipv4/udp_ipv4.c +++ b/net/ipv4/udp_ipv4.c @@ -893,7 +893,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct sock *sk; struct udphdr *uh = udp_hdr(skb); unsigned short ulen; - struct rtable *rt = (struct rtable*)skb->dst; + struct rtable *rt = skb->rtable; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 9810d81e2a06..60dedaded84e 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -47,7 +47,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, { struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = skb->rtable; struct in_device *in_dev; __be32 mask = 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 3da4129b89d1..72cf86e3c090 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -256,10 +256,10 @@ META_COLLECTOR(int_rtclassid) META_COLLECTOR(int_rtiif) { - if (unlikely(skb->dst == NULL)) + if (unlikely(skb->rtable == NULL)) *err = -1; else - dst->value = ((struct rtable*) skb->dst)->fl.iif; + dst->value = skb->rtable->fl.iif; } /************************************************************************** diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8d9d929f6cea..1afef08f6c1d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -363,7 +363,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, return 0; /* Is this a broadcast address? */ - if (skb && ((struct rtable *)skb->dst)->rt_flags & RTCF_BROADCAST) + if (skb && skb->rtable->rt_flags & RTCF_BROADCAST) return 0; return 1; @@ -539,7 +539,7 @@ static void sctp_v4_get_saddr(struct sctp_association *asoc, /* What interface did this skb arrive on? */ static int sctp_v4_skb_iif(const struct sk_buff *skb) { - return ((struct rtable *)skb->dst)->rt_iif; + return skb->rtable->rt_iif; } /* Was this packet marked by Explicit Congestion Notification? */ @@ -828,8 +828,8 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, " "src:%u.%u.%u.%u, dst:%u.%u.%u.%u\n", __FUNCTION__, skb, skb->len, - NIPQUAD(((struct rtable *)skb->dst)->rt_src), - NIPQUAD(((struct rtable *)skb->dst)->rt_dst)); + NIPQUAD(skb->rtable->rt_src), + NIPQUAD(skb->rtable->rt_dst)); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); return ip_queue_xmit(skb, ipfragok); -- cgit v1.2.3-71-gd317 From f59d43899e279c77924a7ada4bec8c70e5aeca06 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Mar 2008 20:58:10 -0800 Subject: [IPV6]: Fix powerpc allmodconfig build warnings. Introduced by changeset 95e41e93e18d8e1e272ce23d96bae4f17ce11d42 ("[IPV6]: Make ndisc_flow_init() common for later use.") Reported by Stephen Rothwell. In file included from net/ipv6/netfilter/ip6_tables.c:21: include/linux/icmpv6.h:192: warning: 'struct in6_addr' declared inside parameter list include/linux/icmpv6.h:192: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index e4d4300d768f..03067443198a 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -184,6 +184,7 @@ extern void icmpv6_param_prob(struct sk_buff *skb, int code, int pos); struct flowi; +struct in6_addr; extern void icmpv6_flow_init(struct sock *sk, struct flowi *fl, u8 type, -- cgit v1.2.3-71-gd317 From 37c5798968d0ce4d479f114f1d5785551b57bfa5 Mon Sep 17 00:00:00 2001 From: Luis Carlos Cobo Date: Sat, 23 Feb 2008 15:17:04 +0100 Subject: wireless: various definitions for mesh networking Signed-off-by: Luis Carlos Cobo Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f577c8f1c66d..f27d11ab418b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -97,6 +97,7 @@ #define IEEE80211_MAX_FRAME_LEN 2352 #define IEEE80211_MAX_SSID_LEN 32 +#define IEEE80211_MAX_MESH_ID_LEN 32 struct ieee80211_hdr { __le16 frame_control; @@ -109,6 +110,16 @@ struct ieee80211_hdr { } __attribute__ ((packed)); +struct ieee80211s_hdr { + u8 flags; + u8 ttl; + u8 seqnum[3]; + u8 eaddr1[6]; + u8 eaddr2[6]; + u8 eaddr3[6]; +} __attribute__ ((packed)); + + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -206,6 +217,23 @@ struct ieee80211_mgmt { __le16 params; __le16 reason_code; } __attribute__((packed)) delba; + struct{ + u8 action_code; + /* capab_info for open and confirm, + * reason for close + */ + __le16 aux; + /* Followed in plink_confirm by status + * code, AID and supported rates, + * and directly by supported rates in + * plink_open and plink_close + */ + u8 variable[0]; + } __attribute__((packed)) plink_action; + struct{ + u8 action_code; + u8 variable[0]; + } __attribute__((packed)) mesh_action; } u; } __attribute__ ((packed)) action; } u; @@ -437,6 +465,13 @@ enum ieee80211_eid { WLAN_EID_TS_DELAY = 43, WLAN_EID_TCLAS_PROCESSING = 44, WLAN_EID_QOS_CAPA = 46, + /* 802.11s */ + WLAN_EID_MESH_CONFIG = 36, /* Pending IEEE 802.11 ANA approval */ + WLAN_EID_MESH_ID = 37, /* Pending IEEE 802.11 ANA approval */ + WLAN_EID_PEER_LINK = 40, /* Pending IEEE 802.11 ANA approval */ + WLAN_EID_PREQ = 53, /* Pending IEEE 802.11 ANA approval */ + WLAN_EID_PREP = 54, /* Pending IEEE 802.11 ANA approval */ + WLAN_EID_PERR = 55, /* Pending IEEE 802.11 ANA approval */ /* 802.11h */ WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, -- cgit v1.2.3-71-gd317 From cc0672a1066829be7e1b0128a13e36a2d0a15479 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 23 Feb 2008 15:17:05 +0100 Subject: WEXT: add mesh interface type This introduces a new WEXT type IW_MODE_MESH for mesh networks, used for scan results. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/wireless.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 3160dfed73ca..2864b1699ecc 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -455,6 +455,7 @@ #define IW_MODE_REPEAT 4 /* Wireless Repeater (forwarder) */ #define IW_MODE_SECOND 5 /* Secondary master/repeater (backup) */ #define IW_MODE_MONITOR 6 /* Passive monitor (listen only) */ +#define IW_MODE_MESH 7 /* Mesh (IEEE 802.11s) network */ /* Statistics flags (bitmask in updated) */ #define IW_QUAL_QUAL_UPDATED 0x01 /* Value was updated since last read */ -- cgit v1.2.3-71-gd317 From 2ec600d672e74488f8d1acf67a0a2baed222564c Mon Sep 17 00:00:00 2001 From: Luis Carlos Cobo Date: Sat, 23 Feb 2008 15:17:06 +0100 Subject: nl80211/cfg80211: support for mesh, sta dumping Added support for mesh id and mesh path operation as well as station structure dumping. Signed-off-by: Luis Carlos Cobo Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 119 +++++++++++-- include/net/cfg80211.h | 139 +++++++++++++-- net/mac80211/cfg.c | 20 ++- net/wireless/nl80211.c | 438 +++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 650 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index a9f0b93324a2..ea6517e58b04 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -78,6 +78,18 @@ * or, if no MAC address given, all stations, on the interface identified * by %NL80211_ATTR_IFINDEX. * + * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_MPATH: Set mesh path attributes for mesh path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the + * the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC + * or, if no MAC address given, all mesh paths, on the interface identified + * by %NL80211_ATTR_IFINDEX. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -112,6 +124,11 @@ enum nl80211_commands { /* add commands here */ + NL80211_CMD_GET_MPATH, + NL80211_CMD_SET_MPATH, + NL80211_CMD_NEW_MPATH, + NL80211_CMD_DEL_MPATH, + /* used to define NL80211_CMD_MAX below */ __NL80211_CMD_AFTER_LAST, NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 @@ -157,13 +174,21 @@ enum nl80211_commands { * restriction (at most %NL80211_MAX_SUPP_RATES). * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station * to, or the AP interface the station was originally added to to. - * @NL80211_ATTR_STA_STATS: statistics for a station, part of station info + * @NL80211_ATTR_STA_INFO: information about a station, part of station info * given for %NL80211_CMD_GET_STATION, nested attribute containing - * info as possible, see &enum nl80211_sta_stats. + * info as possible, see &enum nl80211_sta_info. * * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands, * consisting of a nested array. * + * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes). + * @NL80211_ATTR_PLINK_ACTION: action to perform on the mesh peer link. + * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path. + * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path + * info given for %NL80211_CMD_GET_MPATH, nested attribute described at + * &enum nl80211_mpath_info. + * + * * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * @@ -199,7 +224,7 @@ enum nl80211_attrs { NL80211_ATTR_STA_LISTEN_INTERVAL, NL80211_ATTR_STA_SUPPORTED_RATES, NL80211_ATTR_STA_VLAN, - NL80211_ATTR_STA_STATS, + NL80211_ATTR_STA_INFO, NL80211_ATTR_WIPHY_BANDS, @@ -207,6 +232,11 @@ enum nl80211_attrs { /* add attributes here, update the policy in nl80211.c */ + NL80211_ATTR_MESH_ID, + NL80211_ATTR_STA_PLINK_ACTION, + NL80211_ATTR_MPATH_NEXT_HOP, + NL80211_ATTR_MPATH_INFO, + __NL80211_ATTR_AFTER_LAST, NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; @@ -223,6 +253,7 @@ enum nl80211_attrs { * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points * @NL80211_IFTYPE_WDS: wireless distribution interface * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames + * @NL80211_IFTYPE_MESH_POINT: mesh point * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @__NL80211_IFTYPE_AFTER_LAST: internal use * @@ -238,6 +269,7 @@ enum nl80211_iftype { NL80211_IFTYPE_AP_VLAN, NL80211_IFTYPE_WDS, NL80211_IFTYPE_MONITOR, + NL80211_IFTYPE_MESH_POINT, /* keep last */ __NL80211_IFTYPE_AFTER_LAST, @@ -267,27 +299,78 @@ enum nl80211_sta_flags { }; /** - * enum nl80211_sta_stats - station statistics + * enum nl80211_sta_info - station information * - * These attribute types are used with %NL80211_ATTR_STA_STATS + * These attribute types are used with %NL80211_ATTR_STA_INFO * when getting information about a station. * - * @__NL80211_STA_STAT_INVALID: attribute number 0 is reserved - * @NL80211_STA_STAT_INACTIVE_TIME: time since last activity (u32, msecs) - * @NL80211_STA_STAT_RX_BYTES: total received bytes (u32, from this station) - * @NL80211_STA_STAT_TX_BYTES: total transmitted bytes (u32, to this station) - * @__NL80211_STA_STAT_AFTER_LAST: internal - * @NL80211_STA_STAT_MAX: highest possible station stats attribute + * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved + * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs) + * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station) + * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) + * @__NL80211_STA_INFO_AFTER_LAST: internal + * @NL80211_STA_INFO_MAX: highest possible station info attribute + */ +enum nl80211_sta_info { + __NL80211_STA_INFO_INVALID, + NL80211_STA_INFO_INACTIVE_TIME, + NL80211_STA_INFO_RX_BYTES, + NL80211_STA_INFO_TX_BYTES, + NL80211_STA_INFO_LLID, + NL80211_STA_INFO_PLID, + NL80211_STA_INFO_PLINK_STATE, + + /* keep last */ + __NL80211_STA_INFO_AFTER_LAST, + NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1 +}; + +/** + * enum nl80211_mpath_flags - nl80211 mesh path flags + * + * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active + * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running + * @NL80211_MPATH_FLAG_DSN_VALID: the mesh path contains a valid DSN + * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set + * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded + */ +enum nl80211_mpath_flags { + NL80211_MPATH_FLAG_ACTIVE = 1<<0, + NL80211_MPATH_FLAG_RESOLVING = 1<<1, + NL80211_MPATH_FLAG_DSN_VALID = 1<<2, + NL80211_MPATH_FLAG_FIXED = 1<<3, + NL80211_MPATH_FLAG_RESOLVED = 1<<4, +}; + +/** + * enum nl80211_mpath_info - mesh path information + * + * These attribute types are used with %NL80211_ATTR_MPATH_INFO when getting + * information about a mesh path. + * + * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved + * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination + * @NL80211_ATTR_MPATH_DSN: destination sequence number + * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path + * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now + * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in + * &enum nl80211_mpath_flags; + * @NL80211_ATTR_MPATH_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_ATTR_MPATH_DISCOVERY_RETRIES: mesh path discovery retries */ -enum nl80211_sta_stats { - __NL80211_STA_STAT_INVALID, - NL80211_STA_STAT_INACTIVE_TIME, - NL80211_STA_STAT_RX_BYTES, - NL80211_STA_STAT_TX_BYTES, +enum nl80211_mpath_info { + __NL80211_MPATH_INFO_INVALID, + NL80211_MPATH_INFO_FRAME_QLEN, + NL80211_MPATH_INFO_DSN, + NL80211_MPATH_INFO_METRIC, + NL80211_MPATH_INFO_EXPTIME, + NL80211_MPATH_INFO_FLAGS, + NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, + NL80211_MPATH_INFO_DISCOVERY_RETRIES, /* keep last */ - __NL80211_STA_STAT_AFTER_LAST, - NL80211_STA_STAT_MAX = __NL80211_STA_STAT_AFTER_LAST - 1 + __NL80211_MPATH_INFO_AFTER_LAST, + NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1 }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ab4caf63954f..e00750836ba5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -12,6 +12,16 @@ * Copyright 2006, 2007 Johannes Berg */ +/** + * struct vif_params - describes virtual interface parameters + * @mesh_id: mesh ID to use + * @mesh_id_len: length of the mesh ID + */ +struct vif_params { + u8 *mesh_id; + int mesh_id_len; +}; + /* Radiotap header iteration * implemented in net/wireless/radiotap.c * docs in Documentation/networking/radiotap-headers.txt @@ -108,6 +118,19 @@ enum station_flags { STATION_FLAG_WME = 1<ieee80211_ptr); struct sta_info *sta; @@ -307,13 +309,13 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, /* XXX: verify sta->dev == dev */ - stats->filled = STATION_STAT_INACTIVE_TIME | - STATION_STAT_RX_BYTES | - STATION_STAT_TX_BYTES; + sinfo->filled = STATION_INFO_INACTIVE_TIME | + STATION_INFO_RX_BYTES | + STATION_INFO_TX_BYTES; - stats->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); - stats->rx_bytes = sta->rx_bytes; - stats->tx_bytes = sta->tx_bytes; + sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); + sinfo->rx_bytes = sta->rx_bytes; + sinfo->tx_bytes = sta->tx_bytes; sta_info_put(sta); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5b3474798b8d..64a7460af734 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -81,8 +81,12 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 }, [NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, + [NL80211_ATTR_STA_PLINK_ACTION] = { .type = NLA_U8 }, [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, [NL80211_ATTR_MNTR_FLAGS] = { .type = NLA_NESTED }, + [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_MESH_ID_LEN }, + [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, }; /* message building helper */ @@ -369,11 +373,14 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; + struct vif_params params; int err, ifindex; enum nl80211_iftype type; struct net_device *dev; u32 flags; + memset(¶ms, 0, sizeof(params)); + if (info->attrs[NL80211_ATTR_IFTYPE]) { type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); if (type > NL80211_IFTYPE_MAX) @@ -392,12 +399,18 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) goto unlock; } + if (type == NL80211_IFTYPE_MESH_POINT && + info->attrs[NL80211_ATTR_MESH_ID]) { + params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); + params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + } + rtnl_lock(); err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, - type, err ? NULL : &flags); + type, err ? NULL : &flags, ¶ms); rtnl_unlock(); unlock: @@ -408,10 +421,13 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; + struct vif_params params; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; + memset(¶ms, 0, sizeof(params)); + if (!info->attrs[NL80211_ATTR_IFNAME]) return -EINVAL; @@ -430,15 +446,22 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) goto unlock; } + if (type == NL80211_IFTYPE_MESH_POINT && + info->attrs[NL80211_ATTR_MESH_ID]) { + params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); + params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + } + rtnl_lock(); err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); err = drv->ops->add_virtual_intf(&drv->wiphy, nla_data(info->attrs[NL80211_ATTR_IFNAME]), - type, err ? NULL : &flags); + type, err ? NULL : &flags, ¶ms); rtnl_unlock(); + unlock: cfg80211_put_dev(drv); return err; @@ -866,10 +889,10 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags) static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev, - u8 *mac_addr, struct station_stats *stats) + u8 *mac_addr, struct station_info *sinfo) { void *hdr; - struct nlattr *statsattr; + struct nlattr *sinfoattr; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) @@ -878,20 +901,29 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); - statsattr = nla_nest_start(msg, NL80211_ATTR_STA_STATS); - if (!statsattr) + sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); + if (!sinfoattr) goto nla_put_failure; - if (stats->filled & STATION_STAT_INACTIVE_TIME) - NLA_PUT_U32(msg, NL80211_STA_STAT_INACTIVE_TIME, - stats->inactive_time); - if (stats->filled & STATION_STAT_RX_BYTES) - NLA_PUT_U32(msg, NL80211_STA_STAT_RX_BYTES, - stats->rx_bytes); - if (stats->filled & STATION_STAT_TX_BYTES) - NLA_PUT_U32(msg, NL80211_STA_STAT_TX_BYTES, - stats->tx_bytes); - - nla_nest_end(msg, statsattr); + if (sinfo->filled & STATION_INFO_INACTIVE_TIME) + NLA_PUT_U32(msg, NL80211_STA_INFO_INACTIVE_TIME, + sinfo->inactive_time); + if (sinfo->filled & STATION_INFO_RX_BYTES) + NLA_PUT_U32(msg, NL80211_STA_INFO_RX_BYTES, + sinfo->rx_bytes); + if (sinfo->filled & STATION_INFO_TX_BYTES) + NLA_PUT_U32(msg, NL80211_STA_INFO_TX_BYTES, + sinfo->tx_bytes); + if (sinfo->filled & STATION_INFO_LLID) + NLA_PUT_U16(msg, NL80211_STA_INFO_LLID, + sinfo->llid); + if (sinfo->filled & STATION_INFO_PLID) + NLA_PUT_U16(msg, NL80211_STA_INFO_PLID, + sinfo->plid); + if (sinfo->filled & STATION_INFO_PLINK_STATE) + NLA_PUT_U8(msg, NL80211_STA_INFO_PLINK_STATE, + sinfo->plink_state); + + nla_nest_end(msg, sinfoattr); return genlmsg_end(msg, hdr); @@ -899,17 +931,80 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_cancel(msg, hdr); } +static int nl80211_dump_station(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int wp_idx = 0; + int if_idx = 0; + int sta_idx = cb->args[2]; + int wp_start = cb->args[0]; + int if_start = cb->args[1]; + struct station_info sinfo; + struct cfg80211_registered_device *dev; + struct wireless_dev *wdev; + u8 mac_addr[ETH_ALEN]; + int err; + int exit = 0; + + /* TODO: filter by device */ + mutex_lock(&cfg80211_drv_mutex); + list_for_each_entry(dev, &cfg80211_drv_list, list) { + if (exit) + break; + if (++wp_idx < wp_start) + continue; + if_idx = 0; + + mutex_lock(&dev->devlist_mtx); + list_for_each_entry(wdev, &dev->netdev_list, list) { + if (exit) + break; + if (++if_idx < if_start) + continue; + if (!dev->ops->dump_station) + continue; + + for (;; ++sta_idx) { + rtnl_lock(); + err = dev->ops->dump_station(&dev->wiphy, + wdev->netdev, sta_idx, mac_addr, + &sinfo); + rtnl_unlock(); + if (err) { + sta_idx = 0; + break; + } + if (nl80211_send_station(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + wdev->netdev, mac_addr, + &sinfo) < 0) { + exit = 1; + break; + } + } + } + mutex_unlock(&dev->devlist_mtx); + } + mutex_unlock(&cfg80211_drv_mutex); + + cb->args[0] = wp_idx; + cb->args[1] = if_idx; + cb->args[2] = sta_idx; + + return skb->len; +} static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; int err; struct net_device *dev; - struct station_stats stats; + struct station_info sinfo; struct sk_buff *msg; u8 *mac_addr = NULL; - memset(&stats, 0, sizeof(stats)); + memset(&sinfo, 0, sizeof(sinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -926,15 +1021,18 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &stats); + err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &sinfo); rtnl_unlock(); + if (err) + goto out; + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg) goto out; if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, - dev, mac_addr, &stats) < 0) + dev, mac_addr, &sinfo) < 0) goto out_free; err = genlmsg_unicast(msg, info->snd_pid); @@ -1005,6 +1103,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) ¶ms.station_flags)) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + params.plink_action = + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); if (err) return err; @@ -1119,6 +1221,273 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, + int flags, struct net_device *dev, + u8 *dst, u8 *next_hop, + struct mpath_info *pinfo) +{ + void *hdr; + struct nlattr *pinfoattr; + + hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, dst); + NLA_PUT(msg, NL80211_ATTR_MPATH_NEXT_HOP, ETH_ALEN, next_hop); + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_MPATH_INFO); + if (!pinfoattr) + goto nla_put_failure; + if (pinfo->filled & MPATH_INFO_FRAME_QLEN) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_FRAME_QLEN, + pinfo->frame_qlen); + if (pinfo->filled & MPATH_INFO_DSN) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_DSN, + pinfo->dsn); + if (pinfo->filled & MPATH_INFO_METRIC) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_METRIC, + pinfo->metric); + if (pinfo->filled & MPATH_INFO_EXPTIME) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_EXPTIME, + pinfo->exptime); + if (pinfo->filled & MPATH_INFO_FLAGS) + NLA_PUT_U8(msg, NL80211_MPATH_INFO_FLAGS, + pinfo->flags); + if (pinfo->filled & MPATH_INFO_DISCOVERY_TIMEOUT) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, + pinfo->discovery_timeout); + if (pinfo->filled & MPATH_INFO_DISCOVERY_RETRIES) + NLA_PUT_U8(msg, NL80211_MPATH_INFO_DISCOVERY_RETRIES, + pinfo->discovery_retries); + + nla_nest_end(msg, pinfoattr); + + return genlmsg_end(msg, hdr); + + nla_put_failure: + return genlmsg_cancel(msg, hdr); +} + +static int nl80211_dump_mpath(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int wp_idx = 0; + int if_idx = 0; + int sta_idx = cb->args[2]; + int wp_start = cb->args[0]; + int if_start = cb->args[1]; + struct mpath_info pinfo; + struct cfg80211_registered_device *dev; + struct wireless_dev *wdev; + u8 dst[ETH_ALEN]; + u8 next_hop[ETH_ALEN]; + int err; + int exit = 0; + + /* TODO: filter by device */ + mutex_lock(&cfg80211_drv_mutex); + list_for_each_entry(dev, &cfg80211_drv_list, list) { + if (exit) + break; + if (++wp_idx < wp_start) + continue; + if_idx = 0; + + mutex_lock(&dev->devlist_mtx); + list_for_each_entry(wdev, &dev->netdev_list, list) { + if (exit) + break; + if (++if_idx < if_start) + continue; + if (!dev->ops->dump_mpath) + continue; + + for (;; ++sta_idx) { + rtnl_lock(); + err = dev->ops->dump_mpath(&dev->wiphy, + wdev->netdev, sta_idx, dst, + next_hop, &pinfo); + rtnl_unlock(); + if (err) { + sta_idx = 0; + break; + } + if (nl80211_send_mpath(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + wdev->netdev, dst, next_hop, + &pinfo) < 0) { + exit = 1; + break; + } + } + } + mutex_unlock(&dev->devlist_mtx); + } + mutex_unlock(&cfg80211_drv_mutex); + + cb->args[0] = wp_idx; + cb->args[1] = if_idx; + cb->args[2] = sta_idx; + + return skb->len; +} + +static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + struct mpath_info pinfo; + struct sk_buff *msg; + u8 *dst = NULL; + u8 next_hop[ETH_ALEN]; + + memset(&pinfo, 0, sizeof(pinfo)); + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + dst = nla_data(info->attrs[NL80211_ATTR_MAC]); + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + + if (!drv->ops->get_mpath) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->get_mpath(&drv->wiphy, dev, dst, next_hop, &pinfo); + rtnl_unlock(); + + if (err) + goto out; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out; + + if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, + dev, dst, next_hop, &pinfo) < 0) + goto out_free; + + err = genlmsg_unicast(msg, info->snd_pid); + goto out; + + out_free: + nlmsg_free(msg); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + u8 *dst = NULL; + u8 *next_hop = NULL; + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]) + return -EINVAL; + + dst = nla_data(info->attrs[NL80211_ATTR_MAC]); + next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + + if (!drv->ops->change_mpath) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->change_mpath(&drv->wiphy, dev, dst, next_hop); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} +static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + u8 *dst = NULL; + u8 *next_hop = NULL; + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]) + return -EINVAL; + + dst = nla_data(info->attrs[NL80211_ATTR_MAC]); + next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + + if (!drv->ops->add_mpath) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->add_mpath(&drv->wiphy, dev, dst, next_hop); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + u8 *dst = NULL; + + if (info->attrs[NL80211_ATTR_MAC]) + dst = nla_data(info->attrs[NL80211_ATTR_MAC]); + + err = get_drv_dev_by_info_ifindex(info, &drv, &dev); + if (err) + return err; + + if (!drv->ops->del_mpath) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->del_mpath(&drv->wiphy, dev, dst); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -1203,7 +1572,7 @@ static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_STATION, .doit = nl80211_get_station, - /* TODO: implement dumpit */ + .dumpit = nl80211_dump_station, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, @@ -1225,6 +1594,31 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_GET_MPATH, + .doit = nl80211_get_mpath, + .dumpit = nl80211_dump_mpath, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_SET_MPATH, + .doit = nl80211_set_mpath, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_NEW_MPATH, + .doit = nl80211_new_mpath, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DEL_MPATH, + .doit = nl80211_del_mpath, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ -- cgit v1.2.3-71-gd317 From aab547ce0d1493d400b6468c521a0137cd8c1edf Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Fri, 29 Feb 2008 11:36:12 +0100 Subject: ssb: Add Gigabit Ethernet driver This adds the Gigabit Ethernet driver for the SSB Gigabit Ethernet core. This driver actually is a frontend to the Tigon3 driver. So the real work is done by tg3. This device is used in the Linksys WRT350N. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/Kconfig | 9 ++ drivers/ssb/Makefile | 1 + drivers/ssb/driver_gige.c | 294 ++++++++++++++++++++++++++++++++++++ drivers/ssb/driver_mipscore.c | 1 + drivers/ssb/driver_pcicore.c | 160 +++++++++++--------- drivers/ssb/embedded.c | 90 +++++++++++ drivers/ssb/main.c | 30 +++- drivers/ssb/ssb_private.h | 2 + include/linux/ssb/ssb.h | 7 + include/linux/ssb/ssb_driver_gige.h | 174 +++++++++++++++++++++ include/linux/ssb/ssb_driver_pci.h | 19 +++ 11 files changed, 715 insertions(+), 72 deletions(-) create mode 100644 drivers/ssb/driver_gige.c create mode 100644 include/linux/ssb/ssb_driver_gige.h (limited to 'include/linux') diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index adea792fb675..f69ef0ba2613 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -125,4 +125,13 @@ config SSB_DRIVER_EXTIF If unsure, say N +config SSB_DRIVER_GIGE + bool "SSB Broadcom Gigabit Ethernet driver" + depends on SSB_PCIHOST_POSSIBLE && SSB_EMBEDDED && MIPS + help + Driver for the Sonics Silicon Backplane attached + Broadcom Gigabit Ethernet. + + If unsure, say N + endmenu diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile index de94c2eb7a37..910f35e32fc9 100644 --- a/drivers/ssb/Makefile +++ b/drivers/ssb/Makefile @@ -11,6 +11,7 @@ ssb-y += driver_chipcommon.o ssb-$(CONFIG_SSB_DRIVER_MIPS) += driver_mipscore.o ssb-$(CONFIG_SSB_DRIVER_EXTIF) += driver_extif.o ssb-$(CONFIG_SSB_DRIVER_PCICORE) += driver_pcicore.o +ssb-$(CONFIG_SSB_DRIVER_GIGE) += driver_gige.o # b43 pci-ssb-bridge driver # Not strictly a part of SSB, but kept here for convenience diff --git a/drivers/ssb/driver_gige.c b/drivers/ssb/driver_gige.c new file mode 100644 index 000000000000..172f90407b93 --- /dev/null +++ b/drivers/ssb/driver_gige.c @@ -0,0 +1,294 @@ +/* + * Sonics Silicon Backplane + * Broadcom Gigabit Ethernet core driver + * + * Copyright 2008, Broadcom Corporation + * Copyright 2008, Michael Buesch + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include +#include +#include +#include + + +/* +MODULE_DESCRIPTION("SSB Broadcom Gigabit Ethernet driver"); +MODULE_AUTHOR("Michael Buesch"); +MODULE_LICENSE("GPL"); +*/ + +static const struct ssb_device_id ssb_gige_tbl[] = { + SSB_DEVICE(SSB_VENDOR_BROADCOM, SSB_DEV_ETHERNET_GBIT, SSB_ANY_REV), + SSB_DEVTABLE_END +}; +/* MODULE_DEVICE_TABLE(ssb, ssb_gige_tbl); */ + + +static inline u8 gige_read8(struct ssb_gige *dev, u16 offset) +{ + return ssb_read8(dev->dev, offset); +} + +static inline u16 gige_read16(struct ssb_gige *dev, u16 offset) +{ + return ssb_read16(dev->dev, offset); +} + +static inline u32 gige_read32(struct ssb_gige *dev, u16 offset) +{ + return ssb_read32(dev->dev, offset); +} + +static inline void gige_write8(struct ssb_gige *dev, + u16 offset, u8 value) +{ + ssb_write8(dev->dev, offset, value); +} + +static inline void gige_write16(struct ssb_gige *dev, + u16 offset, u16 value) +{ + ssb_write16(dev->dev, offset, value); +} + +static inline void gige_write32(struct ssb_gige *dev, + u16 offset, u32 value) +{ + ssb_write32(dev->dev, offset, value); +} + +static inline +u8 gige_pcicfg_read8(struct ssb_gige *dev, unsigned int offset) +{ + BUG_ON(offset >= 256); + return gige_read8(dev, SSB_GIGE_PCICFG + offset); +} + +static inline +u16 gige_pcicfg_read16(struct ssb_gige *dev, unsigned int offset) +{ + BUG_ON(offset >= 256); + return gige_read16(dev, SSB_GIGE_PCICFG + offset); +} + +static inline +u32 gige_pcicfg_read32(struct ssb_gige *dev, unsigned int offset) +{ + BUG_ON(offset >= 256); + return gige_read32(dev, SSB_GIGE_PCICFG + offset); +} + +static inline +void gige_pcicfg_write8(struct ssb_gige *dev, + unsigned int offset, u8 value) +{ + BUG_ON(offset >= 256); + gige_write8(dev, SSB_GIGE_PCICFG + offset, value); +} + +static inline +void gige_pcicfg_write16(struct ssb_gige *dev, + unsigned int offset, u16 value) +{ + BUG_ON(offset >= 256); + gige_write16(dev, SSB_GIGE_PCICFG + offset, value); +} + +static inline +void gige_pcicfg_write32(struct ssb_gige *dev, + unsigned int offset, u32 value) +{ + BUG_ON(offset >= 256); + gige_write32(dev, SSB_GIGE_PCICFG + offset, value); +} + +static int ssb_gige_pci_read_config(struct pci_bus *bus, unsigned int devfn, + int reg, int size, u32 *val) +{ + struct ssb_gige *dev = container_of(bus->ops, struct ssb_gige, pci_ops); + unsigned long flags; + + if ((PCI_SLOT(devfn) > 0) || (PCI_FUNC(devfn) > 0)) + return PCIBIOS_DEVICE_NOT_FOUND; + if (reg >= 256) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&dev->lock, flags); + switch (size) { + case 1: + *val = gige_pcicfg_read8(dev, reg); + break; + case 2: + *val = gige_pcicfg_read16(dev, reg); + break; + case 4: + *val = gige_pcicfg_read32(dev, reg); + break; + default: + WARN_ON(1); + } + spin_unlock_irqrestore(&dev->lock, flags); + + return PCIBIOS_SUCCESSFUL; +} + +static int ssb_gige_pci_write_config(struct pci_bus *bus, unsigned int devfn, + int reg, int size, u32 val) +{ + struct ssb_gige *dev = container_of(bus->ops, struct ssb_gige, pci_ops); + unsigned long flags; + + if ((PCI_SLOT(devfn) > 0) || (PCI_FUNC(devfn) > 0)) + return PCIBIOS_DEVICE_NOT_FOUND; + if (reg >= 256) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&dev->lock, flags); + switch (size) { + case 1: + gige_pcicfg_write8(dev, reg, val); + break; + case 2: + gige_pcicfg_write16(dev, reg, val); + break; + case 4: + gige_pcicfg_write32(dev, reg, val); + break; + default: + WARN_ON(1); + } + spin_unlock_irqrestore(&dev->lock, flags); + + return PCIBIOS_SUCCESSFUL; +} + +static int ssb_gige_probe(struct ssb_device *sdev, const struct ssb_device_id *id) +{ + struct ssb_gige *dev; + u32 base, tmslow, tmshigh; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + dev->dev = sdev; + + spin_lock_init(&dev->lock); + dev->pci_controller.pci_ops = &dev->pci_ops; + dev->pci_controller.io_resource = &dev->io_resource; + dev->pci_controller.mem_resource = &dev->mem_resource; + dev->pci_controller.io_map_base = 0x800; + dev->pci_ops.read = ssb_gige_pci_read_config; + dev->pci_ops.write = ssb_gige_pci_write_config; + + dev->io_resource.name = SSB_GIGE_IO_RES_NAME; + dev->io_resource.start = 0x800; + dev->io_resource.end = 0x8FF; + dev->io_resource.flags = IORESOURCE_IO | IORESOURCE_PCI_FIXED; + + if (!ssb_device_is_enabled(sdev)) + ssb_device_enable(sdev, 0); + + /* Setup BAR0. This is a 64k MMIO region. */ + base = ssb_admatch_base(ssb_read32(sdev, SSB_ADMATCH1)); + gige_pcicfg_write32(dev, PCI_BASE_ADDRESS_0, base); + gige_pcicfg_write32(dev, PCI_BASE_ADDRESS_1, 0); + + dev->mem_resource.name = SSB_GIGE_MEM_RES_NAME; + dev->mem_resource.start = base; + dev->mem_resource.end = base + 0x10000 - 1; + dev->mem_resource.flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; + + /* Enable the memory region. */ + gige_pcicfg_write16(dev, PCI_COMMAND, + gige_pcicfg_read16(dev, PCI_COMMAND) + | PCI_COMMAND_MEMORY); + + /* Write flushing is controlled by the Flush Status Control register. + * We want to flush every register write with a timeout and we want + * to disable the IRQ mask while flushing to avoid concurrency. + * Note that automatic write flushing does _not_ work from + * an IRQ handler. The driver must flush manually by reading a register. + */ + gige_write32(dev, SSB_GIGE_SHIM_FLUSHSTAT, 0x00000068); + + /* Check if we have an RGMII or GMII PHY-bus. + * On RGMII do not bypass the DLLs */ + tmslow = ssb_read32(sdev, SSB_TMSLOW); + tmshigh = ssb_read32(sdev, SSB_TMSHIGH); + if (tmshigh & SSB_GIGE_TMSHIGH_RGMII) { + tmslow &= ~SSB_GIGE_TMSLOW_TXBYPASS; + tmslow &= ~SSB_GIGE_TMSLOW_RXBYPASS; + dev->has_rgmii = 1; + } else { + tmslow |= SSB_GIGE_TMSLOW_TXBYPASS; + tmslow |= SSB_GIGE_TMSLOW_RXBYPASS; + dev->has_rgmii = 0; + } + tmslow |= SSB_GIGE_TMSLOW_DLLEN; + ssb_write32(sdev, SSB_TMSLOW, tmslow); + + ssb_set_drvdata(sdev, dev); + register_pci_controller(&dev->pci_controller); + + return 0; +} + +bool pdev_is_ssb_gige_core(struct pci_dev *pdev) +{ + if (!pdev->resource[0].name) + return 0; + return (strcmp(pdev->resource[0].name, SSB_GIGE_MEM_RES_NAME) == 0); +} +EXPORT_SYMBOL(pdev_is_ssb_gige_core); + +int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev, + struct pci_dev *pdev) +{ + struct ssb_gige *dev = ssb_get_drvdata(sdev); + struct resource *res; + + if (pdev->bus->ops != &dev->pci_ops) { + /* The PCI device is not on this SSB GigE bridge device. */ + return -ENODEV; + } + + /* Fixup the PCI resources. */ + res = &(pdev->resource[0]); + res->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; + res->name = dev->mem_resource.name; + res->start = dev->mem_resource.start; + res->end = dev->mem_resource.end; + + /* Fixup interrupt lines. */ + pdev->irq = ssb_mips_irq(sdev) + 2; + pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, pdev->irq); + + return 0; +} + +int ssb_gige_map_irq(struct ssb_device *sdev, + const struct pci_dev *pdev) +{ + struct ssb_gige *dev = ssb_get_drvdata(sdev); + + if (pdev->bus->ops != &dev->pci_ops) { + /* The PCI device is not on this SSB GigE bridge device. */ + return -ENODEV; + } + + return ssb_mips_irq(sdev) + 2; +} + +static struct ssb_driver ssb_gige_driver = { + .name = "BCM-GigE", + .id_table = ssb_gige_tbl, + .probe = ssb_gige_probe, +}; + +int ssb_gige_init(void) +{ + return ssb_driver_register(&ssb_gige_driver); +} diff --git a/drivers/ssb/driver_mipscore.c b/drivers/ssb/driver_mipscore.c index 3d3dd32bf3ab..e3fad3123ecb 100644 --- a/drivers/ssb/driver_mipscore.c +++ b/drivers/ssb/driver_mipscore.c @@ -209,6 +209,7 @@ void ssb_mipscore_init(struct ssb_mipscore *mcore) /* fallthrough */ case SSB_DEV_PCI: case SSB_DEV_ETHERNET: + case SSB_DEV_ETHERNET_GBIT: case SSB_DEV_80211: case SSB_DEV_USB20_HOST: /* These devices get their own IRQ line if available, the rest goes on IRQ0 */ diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c index 74b9a8aea52b..33a7d5620474 100644 --- a/drivers/ssb/driver_pcicore.c +++ b/drivers/ssb/driver_pcicore.c @@ -60,77 +60,6 @@ static DEFINE_SPINLOCK(cfgspace_lock); /* Core to access the external PCI config space. Can only have one. */ static struct ssb_pcicore *extpci_core; -static u32 ssb_pcicore_pcibus_iobase = 0x100; -static u32 ssb_pcicore_pcibus_membase = SSB_PCI_DMA; - -int pcibios_plat_dev_init(struct pci_dev *d) -{ - struct resource *res; - int pos, size; - u32 *base; - - ssb_printk(KERN_INFO "PCI: Fixing up device %s\n", - pci_name(d)); - - /* Fix up resource bases */ - for (pos = 0; pos < 6; pos++) { - res = &d->resource[pos]; - if (res->flags & IORESOURCE_IO) - base = &ssb_pcicore_pcibus_iobase; - else - base = &ssb_pcicore_pcibus_membase; - res->flags |= IORESOURCE_PCI_FIXED; - if (res->end) { - size = res->end - res->start + 1; - if (*base & (size - 1)) - *base = (*base + size) & ~(size - 1); - res->start = *base; - res->end = res->start + size - 1; - *base += size; - pci_write_config_dword(d, PCI_BASE_ADDRESS_0 + (pos << 2), res->start); - } - /* Fix up PCI bridge BAR0 only */ - if (d->bus->number == 0 && PCI_SLOT(d->devfn) == 0) - break; - } - /* Fix up interrupt lines */ - d->irq = ssb_mips_irq(extpci_core->dev) + 2; - pci_write_config_byte(d, PCI_INTERRUPT_LINE, d->irq); - - return 0; -} - -static void __init ssb_fixup_pcibridge(struct pci_dev *dev) -{ - u8 lat; - - if (dev->bus->number != 0 || PCI_SLOT(dev->devfn) != 0) - return; - - ssb_printk(KERN_INFO "PCI: Fixing up bridge %s\n", pci_name(dev)); - - /* Enable PCI bridge bus mastering and memory space */ - pci_set_master(dev); - if (pcibios_enable_device(dev, ~0) < 0) { - ssb_printk(KERN_ERR "PCI: SSB bridge enable failed\n"); - return; - } - - /* Enable PCI bridge BAR1 prefetch and burst */ - pci_write_config_dword(dev, SSB_BAR1_CONTROL, 3); - - /* Make sure our latency is high enough to handle the devices behind us */ - lat = 168; - ssb_printk(KERN_INFO "PCI: Fixing latency timer of device %s to %u\n", - pci_name(dev), lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} -DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, ssb_fixup_pcibridge); - -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - return ssb_mips_irq(extpci_core->dev) + 2; -} static u32 get_cfgspace_addr(struct ssb_pcicore *pc, unsigned int bus, unsigned int dev, @@ -320,6 +249,95 @@ static struct pci_controller ssb_pcicore_controller = { .mem_offset = 0x24000000, }; +static u32 ssb_pcicore_pcibus_iobase = 0x100; +static u32 ssb_pcicore_pcibus_membase = SSB_PCI_DMA; + +/* This function is called when doing a pci_enable_device(). + * We must first check if the device is a device on the PCI-core bridge. */ +int ssb_pcicore_plat_dev_init(struct pci_dev *d) +{ + struct resource *res; + int pos, size; + u32 *base; + + if (d->bus->ops != &ssb_pcicore_pciops) { + /* This is not a device on the PCI-core bridge. */ + return -ENODEV; + } + + ssb_printk(KERN_INFO "PCI: Fixing up device %s\n", + pci_name(d)); + + /* Fix up resource bases */ + for (pos = 0; pos < 6; pos++) { + res = &d->resource[pos]; + if (res->flags & IORESOURCE_IO) + base = &ssb_pcicore_pcibus_iobase; + else + base = &ssb_pcicore_pcibus_membase; + res->flags |= IORESOURCE_PCI_FIXED; + if (res->end) { + size = res->end - res->start + 1; + if (*base & (size - 1)) + *base = (*base + size) & ~(size - 1); + res->start = *base; + res->end = res->start + size - 1; + *base += size; + pci_write_config_dword(d, PCI_BASE_ADDRESS_0 + (pos << 2), res->start); + } + /* Fix up PCI bridge BAR0 only */ + if (d->bus->number == 0 && PCI_SLOT(d->devfn) == 0) + break; + } + /* Fix up interrupt lines */ + d->irq = ssb_mips_irq(extpci_core->dev) + 2; + pci_write_config_byte(d, PCI_INTERRUPT_LINE, d->irq); + + return 0; +} + +/* Early PCI fixup for a device on the PCI-core bridge. */ +static void ssb_pcicore_fixup_pcibridge(struct pci_dev *dev) +{ + u8 lat; + + if (dev->bus->ops != &ssb_pcicore_pciops) { + /* This is not a device on the PCI-core bridge. */ + return; + } + if (dev->bus->number != 0 || PCI_SLOT(dev->devfn) != 0) + return; + + ssb_printk(KERN_INFO "PCI: Fixing up bridge %s\n", pci_name(dev)); + + /* Enable PCI bridge bus mastering and memory space */ + pci_set_master(dev); + if (pcibios_enable_device(dev, ~0) < 0) { + ssb_printk(KERN_ERR "PCI: SSB bridge enable failed\n"); + return; + } + + /* Enable PCI bridge BAR1 prefetch and burst */ + pci_write_config_dword(dev, SSB_BAR1_CONTROL, 3); + + /* Make sure our latency is high enough to handle the devices behind us */ + lat = 168; + ssb_printk(KERN_INFO "PCI: Fixing latency timer of device %s to %u\n", + pci_name(dev), lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, ssb_pcicore_fixup_pcibridge); + +/* PCI device IRQ mapping. */ +int ssb_pcicore_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + if (dev->bus->ops != &ssb_pcicore_pciops) { + /* This is not a device on the PCI-core bridge. */ + return -ENODEV; + } + return ssb_mips_irq(extpci_core->dev) + 2; +} + static void ssb_pcicore_init_hostmode(struct ssb_pcicore *pc) { u32 val; diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c index d3ade821555c..7dc3a6b41397 100644 --- a/drivers/ssb/embedded.c +++ b/drivers/ssb/embedded.c @@ -10,6 +10,9 @@ #include #include +#include +#include +#include #include "ssb_private.h" @@ -130,3 +133,90 @@ u32 ssb_gpio_polarity(struct ssb_bus *bus, u32 mask, u32 value) return res; } EXPORT_SYMBOL(ssb_gpio_polarity); + +#ifdef CONFIG_SSB_DRIVER_GIGE +static int gige_pci_init_callback(struct ssb_bus *bus, unsigned long data) +{ + struct pci_dev *pdev = (struct pci_dev *)data; + struct ssb_device *dev; + unsigned int i; + int res; + + for (i = 0; i < bus->nr_devices; i++) { + dev = &(bus->devices[i]); + if (dev->id.coreid != SSB_DEV_ETHERNET_GBIT) + continue; + if (!dev->dev || + !dev->dev->driver || + !device_is_registered(dev->dev)) + continue; + res = ssb_gige_pcibios_plat_dev_init(dev, pdev); + if (res >= 0) + return res; + } + + return -ENODEV; +} +#endif /* CONFIG_SSB_DRIVER_GIGE */ + +int ssb_pcibios_plat_dev_init(struct pci_dev *dev) +{ + int err; + + err = ssb_pcicore_plat_dev_init(dev); + if (!err) + return 0; +#ifdef CONFIG_SSB_DRIVER_GIGE + err = ssb_for_each_bus_call((unsigned long)dev, gige_pci_init_callback); + if (err >= 0) + return err; +#endif + /* This is not a PCI device on any SSB device. */ + + return -ENODEV; +} + +#ifdef CONFIG_SSB_DRIVER_GIGE +static int gige_map_irq_callback(struct ssb_bus *bus, unsigned long data) +{ + const struct pci_dev *pdev = (const struct pci_dev *)data; + struct ssb_device *dev; + unsigned int i; + int res; + + for (i = 0; i < bus->nr_devices; i++) { + dev = &(bus->devices[i]); + if (dev->id.coreid != SSB_DEV_ETHERNET_GBIT) + continue; + if (!dev->dev || + !dev->dev->driver || + !device_is_registered(dev->dev)) + continue; + res = ssb_gige_map_irq(dev, pdev); + if (res >= 0) + return res; + } + + return -ENODEV; +} +#endif /* CONFIG_SSB_DRIVER_GIGE */ + +int ssb_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + int res; + + /* Check if this PCI device is a device on a SSB bus or device + * and return the IRQ number for it. */ + + res = ssb_pcicore_pcibios_map_irq(dev, slot, pin); + if (res >= 0) + return res; +#ifdef CONFIG_SSB_DRIVER_GIGE + res = ssb_for_each_bus_call((unsigned long)dev, gige_map_irq_callback); + if (res >= 0) + return res; +#endif + /* This is not a PCI device on any SSB device. */ + + return -ENODEV; +} diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 8db40c4b86e9..49d7bbb9bea7 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -68,6 +69,25 @@ found: } #endif /* CONFIG_SSB_PCIHOST */ +int ssb_for_each_bus_call(unsigned long data, + int (*func)(struct ssb_bus *bus, unsigned long data)) +{ + struct ssb_bus *bus; + int res; + + ssb_buses_lock(); + list_for_each_entry(bus, &buses, list) { + res = func(bus, data); + if (res >= 0) { + ssb_buses_unlock(); + return res; + } + } + ssb_buses_unlock(); + + return -ENODEV; +} + static struct ssb_device *ssb_device_get(struct ssb_device *dev) { if (dev) @@ -1171,7 +1191,14 @@ static int __init ssb_modinit(void) err = b43_pci_ssb_bridge_init(); if (err) { ssb_printk(KERN_ERR "Broadcom 43xx PCI-SSB-bridge " - "initialization failed"); + "initialization failed\n"); + /* don't fail SSB init because of this */ + err = 0; + } + err = ssb_gige_init(); + if (err) { + ssb_printk(KERN_ERR "SSB Broadcom Gigabit Ethernet " + "driver initialization failed\n"); /* don't fail SSB init because of this */ err = 0; } @@ -1185,6 +1212,7 @@ fs_initcall(ssb_modinit); static void __exit ssb_modexit(void) { + ssb_gige_exit(); b43_pci_ssb_bridge_exit(); bus_unregister(&ssb_bustype); } diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h index 21eca2b5118b..d03b20983b1e 100644 --- a/drivers/ssb/ssb_private.h +++ b/drivers/ssb/ssb_private.h @@ -118,6 +118,8 @@ extern u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m); extern int ssb_devices_freeze(struct ssb_bus *bus); extern int ssb_devices_thaw(struct ssb_bus *bus); extern struct ssb_bus *ssb_pci_dev_to_bus(struct pci_dev *pdev); +int ssb_for_each_bus_call(unsigned long data, + int (*func)(struct ssb_bus *bus, unsigned long data)); /* b43_pci_bridge.c */ #ifdef CONFIG_SSB_B43_PCI_BRIDGE diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 860d28c6d149..b7c388972fcf 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -422,5 +422,12 @@ extern int ssb_bus_powerup(struct ssb_bus *bus, bool dynamic_pctl); extern u32 ssb_admatch_base(u32 adm); extern u32 ssb_admatch_size(u32 adm); +/* PCI device mapping and fixup routines. + * Called from the architecture pcibios init code. + * These are only available on SSB_EMBEDDED configurations. */ +#ifdef CONFIG_SSB_EMBEDDED +int ssb_pcibios_plat_dev_init(struct pci_dev *dev); +int ssb_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); +#endif /* CONFIG_SSB_EMBEDDED */ #endif /* LINUX_SSB_H_ */ diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h new file mode 100644 index 000000000000..01fbdf5fef22 --- /dev/null +++ b/include/linux/ssb/ssb_driver_gige.h @@ -0,0 +1,174 @@ +#ifndef LINUX_SSB_DRIVER_GIGE_H_ +#define LINUX_SSB_DRIVER_GIGE_H_ + +#include +#include +#include + + +#ifdef CONFIG_SSB_DRIVER_GIGE + + +#define SSB_GIGE_PCIIO 0x0000 /* PCI I/O Registers (1024 bytes) */ +#define SSB_GIGE_RESERVED 0x0400 /* Reserved (1024 bytes) */ +#define SSB_GIGE_PCICFG 0x0800 /* PCI config space (256 bytes) */ +#define SSB_GIGE_SHIM_FLUSHSTAT 0x0C00 /* PCI to OCP: Flush status control (32bit) */ +#define SSB_GIGE_SHIM_FLUSHRDA 0x0C04 /* PCI to OCP: Flush read address (32bit) */ +#define SSB_GIGE_SHIM_FLUSHTO 0x0C08 /* PCI to OCP: Flush timeout counter (32bit) */ +#define SSB_GIGE_SHIM_BARRIER 0x0C0C /* PCI to OCP: Barrier register (32bit) */ +#define SSB_GIGE_SHIM_MAOCPSI 0x0C10 /* PCI to OCP: MaocpSI Control (32bit) */ +#define SSB_GIGE_SHIM_SIOCPMA 0x0C14 /* PCI to OCP: SiocpMa Control (32bit) */ + +/* TM Status High flags */ +#define SSB_GIGE_TMSHIGH_RGMII 0x00010000 /* Have an RGMII PHY-bus */ +/* TM Status Low flags */ +#define SSB_GIGE_TMSLOW_TXBYPASS 0x00080000 /* TX bypass (no delay) */ +#define SSB_GIGE_TMSLOW_RXBYPASS 0x00100000 /* RX bypass (no delay) */ +#define SSB_GIGE_TMSLOW_DLLEN 0x01000000 /* Enable DLL controls */ + +/* Boardflags (low) */ +#define SSB_GIGE_BFL_ROBOSWITCH 0x0010 + + +#define SSB_GIGE_MEM_RES_NAME "SSB Broadcom 47xx GigE memory" +#define SSB_GIGE_IO_RES_NAME "SSB Broadcom 47xx GigE I/O" + +struct ssb_gige { + struct ssb_device *dev; + + spinlock_t lock; + + /* True, if the device has an RGMII bus. + * False, if the device has a GMII bus. */ + bool has_rgmii; + + /* The PCI controller device. */ + struct pci_controller pci_controller; + struct pci_ops pci_ops; + struct resource mem_resource; + struct resource io_resource; +}; + +/* Check whether a PCI device is a SSB Gigabit Ethernet core. */ +extern bool pdev_is_ssb_gige_core(struct pci_dev *pdev); + +/* Convert a pci_dev pointer to a ssb_gige pointer. */ +static inline struct ssb_gige * pdev_to_ssb_gige(struct pci_dev *pdev) +{ + if (!pdev_is_ssb_gige_core(pdev)) + return NULL; + return container_of(pdev->bus->ops, struct ssb_gige, pci_ops); +} + +/* Returns whether the PHY is connected by an RGMII bus. */ +static inline bool ssb_gige_is_rgmii(struct pci_dev *pdev) +{ + struct ssb_gige *dev = pdev_to_ssb_gige(pdev); + return (dev ? dev->has_rgmii : 0); +} + +/* Returns whether we have a Roboswitch. */ +static inline bool ssb_gige_have_roboswitch(struct pci_dev *pdev) +{ + struct ssb_gige *dev = pdev_to_ssb_gige(pdev); + if (dev) + return !!(dev->dev->bus->sprom.boardflags_lo & + SSB_GIGE_BFL_ROBOSWITCH); + return 0; +} + +/* Returns whether we can only do one DMA at once. */ +static inline bool ssb_gige_one_dma_at_once(struct pci_dev *pdev) +{ + struct ssb_gige *dev = pdev_to_ssb_gige(pdev); + if (dev) + return ((dev->dev->bus->chip_id == 0x4785) && + (dev->dev->bus->chip_rev < 2)); + return 0; +} + +/* Returns whether we must flush posted writes. */ +static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) +{ + struct ssb_gige *dev = pdev_to_ssb_gige(pdev); + if (dev) + return (dev->dev->bus->chip_id == 0x4785); + return 0; +} + +extern char * nvram_get(const char *name); +/* Get the device MAC address */ +static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) +{ +#ifdef CONFIG_BCM947XX + char *res = nvram_get("et0macaddr"); + if (res) + memcpy(macaddr, res, 6); +#endif +} + +extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev, + struct pci_dev *pdev); +extern int ssb_gige_map_irq(struct ssb_device *sdev, + const struct pci_dev *pdev); + +/* The GigE driver is not a standalone module, because we don't have support + * for unregistering the driver. So we could not unload the module anyway. */ +extern int ssb_gige_init(void); +static inline void ssb_gige_exit(void) +{ + /* Currently we can not unregister the GigE driver, + * because we can not unregister the PCI bridge. */ + BUG(); +} + + +#else /* CONFIG_SSB_DRIVER_GIGE */ +/* Gigabit Ethernet driver disabled */ + + +static inline int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev, + struct pci_dev *pdev) +{ + return -ENOSYS; +} +static inline int ssb_gige_map_irq(struct ssb_device *sdev, + const struct pci_dev *pdev) +{ + return -ENOSYS; +} +static inline int ssb_gige_init(void) +{ + return 0; +} +static inline void ssb_gige_exit(void) +{ +} + +static inline bool pdev_is_ssb_gige_core(struct pci_dev *pdev) +{ + return 0; +} +static inline struct ssb_gige * pdev_to_ssb_gige(struct pci_dev *pdev) +{ + return NULL; +} +static inline bool ssb_gige_is_rgmii(struct pci_dev *pdev) +{ + return 0; +} +static inline bool ssb_gige_have_roboswitch(struct pci_dev *pdev) +{ + return 0; +} +static inline bool ssb_gige_one_dma_at_once(struct pci_dev *pdev) +{ + return 0; +} +static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) +{ + return 0; +} + +#endif /* CONFIG_SSB_DRIVER_GIGE */ +#endif /* LINUX_SSB_DRIVER_GIGE_H_ */ diff --git a/include/linux/ssb/ssb_driver_pci.h b/include/linux/ssb/ssb_driver_pci.h index 5e25bac4ed31..41e330e51c2a 100644 --- a/include/linux/ssb/ssb_driver_pci.h +++ b/include/linux/ssb/ssb_driver_pci.h @@ -1,6 +1,11 @@ #ifndef LINUX_SSB_PCICORE_H_ #define LINUX_SSB_PCICORE_H_ +#include + +struct pci_dev; + + #ifdef CONFIG_SSB_DRIVER_PCICORE /* PCI core registers. */ @@ -88,6 +93,9 @@ extern void ssb_pcicore_init(struct ssb_pcicore *pc); extern int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc, struct ssb_device *dev); +int ssb_pcicore_plat_dev_init(struct pci_dev *d); +int ssb_pcicore_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); + #else /* CONFIG_SSB_DRIVER_PCICORE */ @@ -107,5 +115,16 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc, return 0; } +static inline +int ssb_pcicore_plat_dev_init(struct pci_dev *d) +{ + return -ENODEV; +} +static inline +int ssb_pcicore_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + return -ENODEV; +} + #endif /* CONFIG_SSB_DRIVER_PCICORE */ #endif /* LINUX_SSB_PCICORE_H_ */ -- cgit v1.2.3-71-gd317 From db8dac20d5199307dcfcf4e01dac4bda5edf9e89 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Mar 2008 16:22:02 -0800 Subject: [UDP]: Revert udplite and code split. This reverts commit db1ed684f6c430c4cdad67d058688b8a1b5e607c ("[IPV6] UDP: Rename IPv6 UDP files."), commit 8be8af8fa4405652e6c0797db5465a4be8afb998 ("[IPV4] UDP: Move IPv4-specific bits to other file.") and commit e898d4db2749c6052072e9bc4448e396cbdeb06a ("[UDP]: Allow users to configure UDP-Lite."). First, udplite is of such small cost, and it is a core protocol just like TCP and normal UDP are. We spent enormous amounts of effort to make udplite share as much code with core UDP as possible. All of that work is less valuable if we're just going to slap a config option on udplite support. It is also causing build failures, as reported on linux-next, showing that the changeset was not tested very well. In fact, this is the second build failure resulting from the udplite change. Finally, the config options provided was a bool, instead of a modular option. Meaning the udplite code does not even get build tested by allmodconfig builds, and furthermore the user is not presented with a reasonable modular build option which is particularly needed by distribution vendors. Signed-off-by: David S. Miller --- include/linux/udp.h | 10 - include/net/ipv6.h | 5 - include/net/transp_v6.h | 5 - include/net/udplite.h | 9 +- net/ipv4/Kconfig | 10 - net/ipv4/Makefile | 3 +- net/ipv4/af_inet.c | 7 +- net/ipv4/proc.c | 5 +- net/ipv4/udp.c | 1090 +++++++++++++++++++++++++++++++++++++++++++- net/ipv4/udp_ipv4.c | 1134 ---------------------------------------------- net/ipv4/udplite.c | 121 +++++ net/ipv4/udplite_ipv4.c | 121 ----- net/ipv6/Makefile | 3 +- net/ipv6/af_inet6.c | 14 - net/ipv6/ipv6_sockglue.c | 6 +- net/ipv6/proc.c | 6 - net/ipv6/udp.c | 1065 +++++++++++++++++++++++++++++++++++++++++++ net/ipv6/udp_ipv6.c | 1065 ------------------------------------------- net/ipv6/udplite.c | 125 +++++ net/ipv6/udplite_ipv6.c | 125 ----- 20 files changed, 2404 insertions(+), 2525 deletions(-) delete mode 100644 net/ipv4/udp_ipv4.c create mode 100644 net/ipv4/udplite.c delete mode 100644 net/ipv4/udplite_ipv4.c create mode 100644 net/ipv6/udp.c delete mode 100644 net/ipv6/udp_ipv6.c create mode 100644 net/ipv6/udplite.c delete mode 100644 net/ipv6/udplite_ipv6.c (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 4144664d69d9..1e7b7cb5703b 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -70,10 +70,8 @@ struct udp_sock { #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ -#ifdef CONFIG_IP_UDPLITE __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ __u8 unused[3]; -#endif /* * For encapsulation sockets. */ @@ -85,15 +83,7 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) return (struct udp_sock *)sk; } -#ifdef CONFIG_IP_UDPLITE #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) -#define IS_PROTO_UDPLITE(__proto) ((__proto) == IPPROTO_UDPLITE) -#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP || (level) == SOL_UDPLITE) -#else -#define IS_UDPLITE(__sk) 0 -#define IS_PROTO_UDPLITE(__proto) 0 -#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP) -#endif #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5f6df50a33a9..8db06af1efbb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -599,13 +599,8 @@ extern int tcp6_proc_init(void); extern void tcp6_proc_exit(void); extern int udp6_proc_init(void); extern void udp6_proc_exit(void); -#ifdef CONFIG_IP_UDPLITE extern int udplite6_proc_init(void); extern void udplite6_proc_exit(void); -#else -static inline int udplite6_proc_init(void) { return 0; } -static inline void udplite6_proc_exit(void) { } -#endif extern int ipv6_misc_proc_init(void); extern void ipv6_misc_proc_exit(void); extern int snmp6_register_dev(struct inet6_dev *idev); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 902e6c6bc793..27394e0447d8 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -27,13 +27,8 @@ extern int rawv6_init(void); extern void rawv6_exit(void); extern int udpv6_init(void); extern void udpv6_exit(void); -#ifdef CONFIG_IP_UDPLITE extern int udplitev6_init(void); extern void udplitev6_exit(void); -#else -static inline int udplitev6_init(void) { return 0; } -static inline void udplitev6_exit(void) { } -#endif extern int tcpv6_init(void); extern void tcpv6_exit(void); diff --git a/include/net/udplite.h b/include/net/udplite.h index 01ddb2c20264..b76b2e377af4 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -25,9 +25,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { -#ifdef CONFIG_IP_UDPLITE udp_sk(sk)->pcflag = UDPLITE_BIT; -#endif return 0; } @@ -71,7 +69,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) { int cscov = up->len; -#ifdef CONFIG_IP_UDPLITE + /* * Sender has set `partial coverage' option on UDP-Lite socket */ @@ -95,15 +93,13 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) * illegal, we fall back to the defaults here. */ } -#endif return cscov; } static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) { - __wsum csum = 0; -#ifdef CONFIG_IP_UDPLITE int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); + __wsum csum = 0; skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ @@ -116,7 +112,6 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) if ((cscov -= len) <= 0) break; } -#endif return csum; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5098fd2ff4d0..9c7e5ffb223d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -632,15 +632,5 @@ config TCP_MD5SIG If unsure, say N. -config IP_UDPLITE - bool "IP: UDP-Lite Protocol (RFC 3828)" - default n - ---help--- - UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length - checksum. Read for - details. - - If unsure, say N. - source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index d5226241d5ed..ad40ef3f9ebc 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ - datagram.o raw.o udp.o udp_ipv4.o \ + datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o \ inet_fragment.o @@ -49,7 +49,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 25871c6c7444..4cb8a1385539 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1317,18 +1317,15 @@ static int __init init_ipv4_mibs(void) if (snmp_mib_init((void **)udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; -#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; -#endif + tcp_mib_init(); return 0; -#ifdef CONFIG_IP_UDPLITE err_udplite_mib: -#endif snmp_mib_free((void **)udp_statistics); err_udp_mib: snmp_mib_free((void **)tcp_statistics); @@ -1426,10 +1423,8 @@ static int __init inet_init(void) /* Setup UDP memory threshold */ udp_init(); -#ifdef CONFIG_IP_UDPLITE /* Add UDP-Lite (RFC 3828) */ udplite4_register(); -#endif /* * Set the ICMP layer up diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d75ddb7fa4b8..d63474c6b400 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,9 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), atomic_read(&udp_memory_allocated)); -#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); -#endif seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); @@ -351,7 +349,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)udp_statistics, snmp4_udp_list[i].entry)); -#ifdef CONFIG_IP_UDPLITE /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) @@ -362,7 +359,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %lu", snmp_fold_field((void **)udplite_statistics, snmp4_udp_list[i].entry)); -#endif + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c53d7673b57d..7ea1b67b6de1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum, return __udp_lib_get_port(sk, snum, udp_hash, scmp); } +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); +} + +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + +/* UDP is nearly always wildcards out the wazoo, it makes no sense to try + * harder than this. -DaveM + */ +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) +{ + struct sock *sk, *result = NULL; + struct hlist_node *node; + unsigned short hnum = ntohs(dport); + int badness = -1; + + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + struct inet_sock *inet = inet_sk(sk); + + if (sk->sk_net == net && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } + } + } + if (result) + sock_hold(result); + read_unlock(&udp_hash_lock); + return result; +} + +static inline struct sock *udp_v4_mcast_next(struct sock *sk, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct hlist_node *node; + struct sock *s = sk; + unsigned short hnum = ntohs(loc_port); + + sk_for_each_from(s, node) { + struct inet_sock *inet = inet_sk(s); + + if (s->sk_hash != hnum || + (inet->daddr && inet->daddr != rmt_addr) || + (inet->dport != rmt_port && inet->dport) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + ipv6_only_sock(s) || + (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) + continue; + if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) + continue; + goto found; + } + s = NULL; +found: + return s; +} + +/* + * This routine is called by the ICMP module when it gets some + * sort of error condition. If err < 0 then the socket should + * be closed and the error returned to the user. If err > 0 + * it's just the icmp type << 8 | icmp code. + * Header points to the ip header of the error packet. We move + * on past this. Then (as it used to claim before adjustment) + * header points to the first 8 bytes of the udp header. We need + * to find the appropriate port. + */ + +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) +{ + struct inet_sock *inet; + struct iphdr *iph = (struct iphdr*)skb->data; + struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct sock *sk; + int harderr; + int err; + + sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, + iph->saddr, uh->source, skb->dev->ifindex, udptable); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; /* No socket for error */ + } + + err = 0; + harderr = 0; + inet = inet_sk(sk); + + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + goto out; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + if (inet->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; + } + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + } + + /* + * RFC1122: OK. Passes ICMP errors back to application, as per + * 4.1.3.3. + */ + if (!inet->recverr) { + if (!harderr || sk->sk_state != TCP_ESTABLISHED) + goto out; + } else { + ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); + } + sk->sk_err = err; + sk->sk_error_report(sk); +out: + sock_put(sk); +} + +void udp_err(struct sk_buff *skb, u32 info) +{ + __udp4_lib_err(skb, info, udp_hash); +} + +/* + * Throw away all pending data and cancel the corking. Socket is locked. + */ +static void udp_flush_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + + if (up->pending) { + up->len = 0; + up->pending = 0; + ip_flush_pending_frames(sk); + } +} + +/** + * udp4_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, int len ) +{ + unsigned int offset; + struct udphdr *uh = udp_hdr(skb); + __wsum csum = 0; + + if (skb_queue_len(&sk->sk_write_queue) == 1) { + /* + * Only one fragment on the socket. + */ + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + } else { + /* + * HW-checksum won't work as there are two or more + * fragments on the socket so that all csums of sk_buffs + * should be together + */ + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + + skb->ip_summed = CHECKSUM_NONE; + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + + uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + +/* + * Push out all pending data as one UDP datagram. Socket is locked. + */ +static int udp_push_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi *fl = &inet->cork.fl; + struct sk_buff *skb; + struct udphdr *uh; + int err = 0; + int is_udplite = IS_UDPLITE(sk); + __wsum csum = 0; + + /* Grab the skbuff where UDP header space exists. */ + if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + goto out; + + /* + * Create a UDP header + */ + uh = udp_hdr(skb); + uh->source = fl->fl_ip_sport; + uh->dest = fl->fl_ip_dport; + uh->len = htons(up->len); + uh->check = 0; + + if (is_udplite) /* UDP-Lite */ + csum = udplite_csum_outgoing(sk, skb); + + else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + + skb->ip_summed = CHECKSUM_NONE; + goto send; + + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + + udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); + goto send; + + } else /* `normal' UDP */ + csum = udp_csum_outgoing(sk, skb); + + /* add protocol-dependent pseudo-header */ + uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + sk->sk_protocol, csum ); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + +send: + err = ip_push_pending_frames(sk); +out: + up->len = 0; + up->pending = 0; + if (!err) + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct udp_sock *up = udp_sk(sk); + int ulen = len; + struct ipcm_cookie ipc; + struct rtable *rt = NULL; + int free = 0; + int connected = 0; + __be32 daddr, faddr, saddr; + __be16 dport; + u8 tos; + int err, is_udplite = IS_UDPLITE(sk); + int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + + if (len > 0xFFFF) + return -EMSGSIZE; + + /* + * Check the flags. + */ + + if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ + return -EOPNOTSUPP; + + ipc.opt = NULL; + + if (up->pending) { + /* + * There are pending frames. + * The socket lock must be held while it's corked. + */ + lock_sock(sk); + if (likely(up->pending)) { + if (unlikely(up->pending != AF_INET)) { + release_sock(sk); + return -EINVAL; + } + goto do_append_data; + } + release_sock(sk); + } + ulen += sizeof(struct udphdr); + + /* + * Get and verify the address. + */ + if (msg->msg_name) { + struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; + if (msg->msg_namelen < sizeof(*usin)) + return -EINVAL; + if (usin->sin_family != AF_INET) { + if (usin->sin_family != AF_UNSPEC) + return -EAFNOSUPPORT; + } + + daddr = usin->sin_addr.s_addr; + dport = usin->sin_port; + if (dport == 0) + return -EINVAL; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = inet->daddr; + dport = inet->dport; + /* Open fast path for connected socket. + Route will not be used, if at least one option is set. + */ + connected = 1; + } + ipc.addr = inet->saddr; + + ipc.oif = sk->sk_bound_dev_if; + if (msg->msg_controllen) { + err = ip_cmsg_send(msg, &ipc); + if (err) + return err; + if (ipc.opt) + free = 1; + connected = 0; + } + if (!ipc.opt) + ipc.opt = inet->opt; + + saddr = ipc.addr; + ipc.addr = faddr = daddr; + + if (ipc.opt && ipc.opt->srr) { + if (!daddr) + return -EINVAL; + faddr = ipc.opt->faddr; + connected = 0; + } + tos = RT_TOS(inet->tos); + if (sock_flag(sk, SOCK_LOCALROUTE) || + (msg->msg_flags & MSG_DONTROUTE) || + (ipc.opt && ipc.opt->is_strictroute)) { + tos |= RTO_ONLINK; + connected = 0; + } + + if (ipv4_is_multicast(daddr)) { + if (!ipc.oif) + ipc.oif = inet->mc_index; + if (!saddr) + saddr = inet->mc_addr; + connected = 0; + } + + if (connected) + rt = (struct rtable*)sk_dst_check(sk, 0); + + if (rt == NULL) { + struct flowi fl = { .oif = ipc.oif, + .nl_u = { .ip4_u = + { .daddr = faddr, + .saddr = saddr, + .tos = tos } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = inet->sport, + .dport = dport } } }; + security_sk_classify_flow(sk, &fl); + err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); + if (err) { + if (err == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + goto out; + } + + err = -EACCES; + if ((rt->rt_flags & RTCF_BROADCAST) && + !sock_flag(sk, SOCK_BROADCAST)) + goto out; + if (connected) + sk_dst_set(sk, dst_clone(&rt->u.dst)); + } + + if (msg->msg_flags&MSG_CONFIRM) + goto do_confirm; +back_from_confirm: + + saddr = rt->rt_src; + if (!ipc.addr) + daddr = ipc.addr = rt->rt_dst; + + lock_sock(sk); + if (unlikely(up->pending)) { + /* The socket is already corked while preparing it. */ + /* ... which is an evident application bug. --ANK */ + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + err = -EINVAL; + goto out; + } + /* + * Now cork the socket to pend data. + */ + inet->cork.fl.fl4_dst = daddr; + inet->cork.fl.fl_ip_dport = dport; + inet->cork.fl.fl4_src = saddr; + inet->cork.fl.fl_ip_sport = inet->sport; + up->pending = AF_INET; + +do_append_data: + up->len += ulen; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, rt, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + if (err) + udp_flush_pending_frames(sk); + else if (!corkreq) + err = udp_push_pending_frames(sk); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; + release_sock(sk); + +out: + ip_rt_put(rt); + if (free) + kfree(ipc.opt); + if (!err) + return len; + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + } + return err; + +do_confirm: + dst_confirm(&rt->u.dst); + if (!(msg->msg_flags&MSG_PROBE) || len) + goto back_from_confirm; + err = 0; + goto out; +} + +int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + struct udp_sock *up = udp_sk(sk); + int ret; + + if (!up->pending) { + struct msghdr msg = { .msg_flags = flags|MSG_MORE }; + + /* Call udp_sendmsg to specify destination address which + * sendpage interface can't pass. + * This will succeed only when the socket is connected. + */ + ret = udp_sendmsg(NULL, sk, &msg, 0); + if (ret < 0) + return ret; + } + + lock_sock(sk); + + if (unlikely(!up->pending)) { + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); + return -EINVAL; + } + + ret = ip_append_page(sk, page, offset, size, flags); + if (ret == -EOPNOTSUPP) { + release_sock(sk); + return sock_no_sendpage(sk->sk_socket, page, offset, + size, flags); + } + if (ret < 0) { + udp_flush_pending_frames(sk); + goto out; + } + + up->len += size; + if (!(up->corkflag || (flags&MSG_MORE))) + ret = udp_push_pending_frames(sk); + if (!ret) + ret = size; +out: + release_sock(sk); + return ret; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return 0; } +/* + * This should be easy, if there is something there we + * return it, otherwise we block. + */ + +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + struct sk_buff *skb; + unsigned int ulen, copied; + int peeked; + int err; + int is_udplite = IS_UDPLITE(sk); + + /* + * Check any passed addresses + */ + if (addr_len) + *addr_len=sizeof(*sin); + + if (flags & MSG_ERRQUEUE) + return ip_recv_error(sk, msg, len); + +try_again: + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); + if (!skb) + goto out; + + ulen = skb->len - sizeof(struct udphdr); + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) + msg->msg_flags |= MSG_TRUNC; + + /* + * If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, or if we only want a partial + * coverage checksum (UDP-Lite), do it before the copy. + */ + + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (udp_lib_checksum_complete(skb)) + goto csum_copy_err; + } + + if (skb_csum_unnecessary(skb)) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { + err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + + if (err == -EINVAL) + goto csum_copy_err; + } + + if (err) + goto out_free; + + if (!peeked) + UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + + sock_recv_timestamp(msg, sk, skb); + + /* Copy the address. */ + if (sin) + { + sin->sin_family = AF_INET; + sin->sin_port = udp_hdr(skb)->source; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); + + err = copied; + if (flags & MSG_TRUNC) + err = ulen; + +out_free: + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +out: + return err; + +csum_copy_err: + lock_sock(sk); + if (!skb_kill_datagram(sk, skb, flags)) + UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); + + if (noblock) + return -EAGAIN; + goto try_again; +} + + int udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +/* returns: + * -1: error + * 0: success + * >0: "udp encap" protocol resubmission + * + * Note that in the success and error cases, the skb is assumed to + * have either been requeued or freed. + */ +int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + int rc; + int is_udplite = IS_UDPLITE(sk); + + /* + * Charge it to the socket, dropping if the queue is full. + */ + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; + nf_reset(skb); + + if (up->encap_type) { + /* + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N + */ + + /* if we're overly short, let UDP handle it */ + if (skb->len > sizeof(struct udphdr) && + up->encap_rcv != NULL) { + int ret; + + ret = (*up->encap_rcv)(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + is_udplite); + return -ret; + } + } + + /* FALLTHROUGH -- it's a UDP Packet */ + } + + /* + * UDP-Lite specific tests, ignored on UDP sockets + */ + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + /* + * MIB statistics other than incrementing the error count are + * disabled for the following two types of errors: these depend + * on the application settings, not on the functioning of the + * protocol stack as such. + * + * RFC 3828 here recommends (sec 3.3): "There should also be a + * way ... to ... at least let the receiving application block + * delivery of packets with coverage values less than a value + * provided by the application." + */ + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " + "%d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; + } + /* The next case involves violating the min. coverage requested + * by the receiver. This is subtle: if receiver wants x and x is + * greater than the buffersize/MTU then receiver will complain + * that it wants x while sender emits packets of smaller size y. + * Therefore the above ...()->partial_cov statement is essential. + */ + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING + "UDPLITE: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } + } + + if (sk->sk_filter) { + if (udp_lib_checksum_complete(skb)) + goto drop; + } + + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + +/* + * Multicasts and broadcasts go to each listener. + * + * Note: called only from the BH handler context, + * so we don't need to lock the hashes. + */ +static int __udp4_lib_mcast_deliver(struct sk_buff *skb, + struct udphdr *uh, + __be32 saddr, __be32 daddr, + struct hlist_head udptable[]) +{ + struct sock *sk; + int dif; + + read_lock(&udp_hash_lock); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = skb->dev->ifindex; + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + if (sk) { + struct sock *sknext = NULL; + + do { + struct sk_buff *skb1 = skb; + + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, + uh->source, saddr, dif); + if (sknext) + skb1 = skb_clone(skb, GFP_ATOMIC); + + if (skb1) { + int ret = 0; + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + + if (ret > 0) + /* we should probably re-process instead + * of dropping packets here. */ + kfree_skb(skb1); + } + sk = sknext; + } while (sknext); + } else + kfree_skb(skb); + read_unlock(&udp_hash_lock); + return 0; +} + +/* Initialize UDP checksum. If exited with zero value (success), + * CHECKSUM_UNNECESSARY means, that no more checks are required. + * Otherwise, csum completion requires chacksumming packet body, + * including udp header and folding it to skb->csum. + */ +static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, + int proto) +{ + const struct iphdr *iph; + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + iph = ip_hdr(skb); + if (uh->check == 0) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + if (!skb_csum_unnecessary(skb)) + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb->len, proto, 0); + /* Probably, we should checksum udp header (it should be in cache + * in any case) and data in tiny packets (< rx copybreak). + */ + + return 0; +} + +/* + * All we need to do is get the socket, and then do a checksum. + */ + +int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int proto) +{ + struct sock *sk; + struct udphdr *uh = udp_hdr(skb); + unsigned short ulen; + struct rtable *rt = (struct rtable*)skb->dst; + __be32 saddr = ip_hdr(skb)->saddr; + __be32 daddr = ip_hdr(skb)->daddr; + + /* + * Validate the packet. + */ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto drop; /* No space for header. */ + + ulen = ntohs(uh->len); + if (ulen > skb->len) + goto short_packet; + + if (proto == IPPROTO_UDP) { + /* UDP validates ulen. */ + if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + goto short_packet; + uh = udp_hdr(skb); + } + + if (udp4_csum_init(skb, uh, proto)) + goto csum_error; + + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) + return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); + + sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, + uh->dest, inet_iif(skb), udptable); + + if (sk != NULL) { + int ret = 0; + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + sock_put(sk); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; + } + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + nf_reset(skb); + + /* No socket. Drop packet silently, if checksum is wrong */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + + UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + /* + * Hmm. We got an UDP packet to a port to which we + * don't wanna listen. Ignore it. + */ + kfree_skb(skb); + return 0; + +short_packet: + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + skb->len, + NIPQUAD(daddr), + ntohs(uh->dest)); + goto drop; + +csum_error: + /* + * RFC1122: OK. Discards the bad packet silently (as far as + * the network is concerned, anyway) as per 4.1.3.4 (MUST). + */ + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen); +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + kfree_skb(skb); + return 0; +} + +int udp_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); +} + +int udp_destroy_sock(struct sock *sk) +{ + lock_sock(sk); + udp_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* * Socket option code for UDP */ @@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val; int err = 0; -#ifdef CONFIG_IP_UDPLITE int is_udplite = IS_UDPLITE(sk); -#endif if (optlenpcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; -#endif default: err = -ENOPROTOOPT; @@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } +int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return ip_setsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return compat_ip_setsockopt(sk, level, optname, optval, optlen); +} +#endif + int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return 0; } +int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return ip_getsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return compat_ip_getsockopt(sk, level, optname, optval, optlen); +} +#endif /** * udp_poll - wait for a UDP event. * @file - file struct @@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } +DEFINE_PROTO_INUSE(udp) + +struct proto udp_prot = { + .name = "UDP", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .backlog_rcv = udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, + .obj_size = sizeof(struct udp_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, +#endif + REF_PROTO_INUSE(udp) +}; /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS @@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } + +/* ------------------------------------------------------------------------ */ +static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) +{ + struct inet_sock *inet = inet_sk(sp); + __be32 dest = inet->daddr; + __be32 src = inet->rcv_saddr; + __u16 destp = ntohs(inet->dport); + __u16 srcp = ntohs(inet->sport); + + sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", + bucket, src, srcp, dest, destp, sp->sk_state, + atomic_read(&sp->sk_wmem_alloc), + atomic_read(&sp->sk_rmem_alloc), + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp); +} + +int udp4_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_printf(seq, "%-127s\n", + " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode"); + else { + char tmpbuf[129]; + struct udp_iter_state *state = seq->private; + + udp4_format_sock(v, tmpbuf, state->bucket); + seq_printf(seq, "%-127s\n", tmpbuf); + } + return 0; +} + +/* ------------------------------------------------------------------------ */ +static struct file_operations udp4_seq_fops; +static struct udp_seq_afinfo udp4_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udp", + .family = AF_INET, + .hashtable = udp_hash, + .seq_show = udp4_seq_show, + .seq_fops = &udp4_seq_fops, +}; + +int __init udp4_proc_init(void) +{ + return udp_proc_register(&udp4_seq_afinfo); +} + +void udp4_proc_exit(void) +{ + udp_proc_unregister(&udp4_seq_afinfo); +} #endif /* CONFIG_PROC_FS */ void __init udp_init(void) @@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_get_port); +EXPORT_SYMBOL(udp_prot); +EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c deleted file mode 100644 index fd14c2c50ed4..000000000000 --- a/net/ipv4/udp_ipv4.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * UDP for IPv4. - * - * For full credits, see net/ipv4/udp.c. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "udp_impl.h" - -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) -{ - struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - - return ( !ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr )); -} - -static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) -{ - return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); -} - -/* UDP is nearly always wildcards out the wazoo, it makes no sense to try - * harder than this. -DaveM - */ -static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) -{ - struct sock *sk, *result = NULL; - struct hlist_node *node; - unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet = inet_sk(sk); - - if (sk->sk_net == net && sk->sk_hash == hnum && - !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } - } - } - if (result) - sock_hold(result); - read_unlock(&udp_hash_lock); - return result; -} - -static inline struct sock *udp_v4_mcast_next(struct sock *sk, - __be16 loc_port, __be32 loc_addr, - __be16 rmt_port, __be32 rmt_addr, - int dif) -{ - struct hlist_node *node; - struct sock *s = sk; - unsigned short hnum = ntohs(loc_port); - - sk_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (s->sk_hash != hnum || - (inet->daddr && inet->daddr != rmt_addr) || - (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || - ipv6_only_sock(s) || - (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) - continue; - if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) - continue; - goto found; - } - s = NULL; -found: - return s; -} - -/* - * This routine is called by the ICMP module when it gets some - * sort of error condition. If err < 0 then the socket should - * be closed and the error returned to the user. If err > 0 - * it's just the icmp type << 8 | icmp code. - * Header points to the ip header of the error packet. We move - * on past this. Then (as it used to claim before adjustment) - * header points to the first 8 bytes of the udp header. We need - * to find the appropriate port. - */ - -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) -{ - struct inet_sock *inet; - struct iphdr *iph = (struct iphdr*)skb->data; - struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct sock *sk; - int harderr; - int err; - - sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); - if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); - return; /* No socket for error */ - } - - err = 0; - harderr = 0; - inet = inet_sk(sk); - - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - goto out; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - if (inet->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; - } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; - } - break; - } - - /* - * RFC1122: OK. Passes ICMP errors back to application, as per - * 4.1.3.3. - */ - if (!inet->recverr) { - if (!harderr || sk->sk_state != TCP_ESTABLISHED) - goto out; - } else { - ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); - } - sk->sk_err = err; - sk->sk_error_report(sk); -out: - sock_put(sk); -} - -void udp_err(struct sk_buff *skb, u32 info) -{ - __udp4_lib_err(skb, info, udp_hash); -} - -/* - * Throw away all pending data and cancel the corking. Socket is locked. - */ -static void udp_flush_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - - if (up->pending) { - up->len = 0; - up->pending = 0; - ip_flush_pending_frames(sk); - } -} - -/** - * udp4_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on - * @skb: sk_buff containing the filled-in UDP header - * (checksum field must be zeroed out) - */ -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len ) -{ - unsigned int offset; - struct udphdr *uh = udp_hdr(skb); - __wsum csum = 0; - - if (skb_queue_len(&sk->sk_write_queue) == 1) { - /* - * Only one fragment on the socket. - */ - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); - } else { - /* - * HW-checksum won't work as there are two or more - * fragments on the socket so that all csums of sk_buffs - * should be together - */ - offset = skb_transport_offset(skb); - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); - - skb->ip_summed = CHECKSUM_NONE; - - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - - uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } -} - -/* - * Push out all pending data as one UDP datagram. Socket is locked. - */ -static int udp_push_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - struct sk_buff *skb; - struct udphdr *uh; - int err = 0; - int is_udplite = IS_UDPLITE(sk); - __wsum csum = 0; - - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - - /* - * Create a UDP header - */ - uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); - uh->check = 0; - - if (is_udplite) /* UDP-Lite */ - csum = udplite_csum_outgoing(sk, skb); - - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ - - skb->ip_summed = CHECKSUM_NONE; - goto send; - - } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); - goto send; - - } else /* `normal' UDP */ - csum = udp_csum_outgoing(sk, skb); - - /* add protocol-dependent pseudo-header */ - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, - sk->sk_protocol, csum ); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - -send: - err = ip_push_pending_frames(sk); -out: - up->len = 0; - up->pending = 0; - if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); - return err; -} - -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct inet_sock *inet = inet_sk(sk); - struct udp_sock *up = udp_sk(sk); - int ulen = len; - struct ipcm_cookie ipc; - struct rtable *rt = NULL; - int free = 0; - int connected = 0; - __be32 daddr, faddr, saddr; - __be16 dport; - u8 tos; - int err, is_udplite = IS_UDPLITE(sk); - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - - if (len > 0xFFFF) - return -EMSGSIZE; - - /* - * Check the flags. - */ - - if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ - return -EOPNOTSUPP; - - ipc.opt = NULL; - - if (up->pending) { - /* - * There are pending frames. - * The socket lock must be held while it's corked. - */ - lock_sock(sk); - if (likely(up->pending)) { - if (unlikely(up->pending != AF_INET)) { - release_sock(sk); - return -EINVAL; - } - goto do_append_data; - } - release_sock(sk); - } - ulen += sizeof(struct udphdr); - - /* - * Get and verify the address. - */ - if (msg->msg_name) { - struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; - if (msg->msg_namelen < sizeof(*usin)) - return -EINVAL; - if (usin->sin_family != AF_INET) { - if (usin->sin_family != AF_UNSPEC) - return -EAFNOSUPPORT; - } - - daddr = usin->sin_addr.s_addr; - dport = usin->sin_port; - if (dport == 0) - return -EINVAL; - } else { - if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; - daddr = inet->daddr; - dport = inet->dport; - /* Open fast path for connected socket. - Route will not be used, if at least one option is set. - */ - connected = 1; - } - ipc.addr = inet->saddr; - - ipc.oif = sk->sk_bound_dev_if; - if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc); - if (err) - return err; - if (ipc.opt) - free = 1; - connected = 0; - } - if (!ipc.opt) - ipc.opt = inet->opt; - - saddr = ipc.addr; - ipc.addr = faddr = daddr; - - if (ipc.opt && ipc.opt->srr) { - if (!daddr) - return -EINVAL; - faddr = ipc.opt->faddr; - connected = 0; - } - tos = RT_TOS(inet->tos); - if (sock_flag(sk, SOCK_LOCALROUTE) || - (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->is_strictroute)) { - tos |= RTO_ONLINK; - connected = 0; - } - - if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) - ipc.oif = inet->mc_index; - if (!saddr) - saddr = inet->mc_addr; - connected = 0; - } - - if (connected) - rt = (struct rtable*)sk_dst_check(sk, 0); - - if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, - .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = inet->sport, - .dport = dport } } }; - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); - if (err) { - if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - goto out; - } - - err = -EACCES; - if ((rt->rt_flags & RTCF_BROADCAST) && - !sock_flag(sk, SOCK_BROADCAST)) - goto out; - if (connected) - sk_dst_set(sk, dst_clone(&rt->u.dst)); - } - - if (msg->msg_flags&MSG_CONFIRM) - goto do_confirm; -back_from_confirm: - - saddr = rt->rt_src; - if (!ipc.addr) - daddr = ipc.addr = rt->rt_dst; - - lock_sock(sk); - if (unlikely(up->pending)) { - /* The socket is already corked while preparing it. */ - /* ... which is an evident application bug. --ANK */ - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); - err = -EINVAL; - goto out; - } - /* - * Now cork the socket to pend data. - */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->sport; - up->pending = AF_INET; - -do_append_data: - up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); - if (err) - udp_flush_pending_frames(sk); - else if (!corkreq) - err = udp_push_pending_frames(sk); - else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) - up->pending = 0; - release_sock(sk); - -out: - ip_rt_put(rt); - if (free) - kfree(ipc.opt); - if (!err) - return len; - /* - * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting - * ENOBUFS might not be good (it's not tunable per se), but otherwise - * we don't have a good statistic (IpOutDiscards but it can be too many - * things). We could add another new stat but at least for now that - * seems like overkill. - */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); - } - return err; - -do_confirm: - dst_confirm(&rt->u.dst); - if (!(msg->msg_flags&MSG_PROBE) || len) - goto back_from_confirm; - err = 0; - goto out; -} - -int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct udp_sock *up = udp_sk(sk); - int ret; - - if (!up->pending) { - struct msghdr msg = { .msg_flags = flags|MSG_MORE }; - - /* Call udp_sendmsg to specify destination address which - * sendpage interface can't pass. - * This will succeed only when the socket is connected. - */ - ret = udp_sendmsg(NULL, sk, &msg, 0); - if (ret < 0) - return ret; - } - - lock_sock(sk); - - if (unlikely(!up->pending)) { - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); - return -EINVAL; - } - - ret = ip_append_page(sk, page, offset, size, flags); - if (ret == -EOPNOTSUPP) { - release_sock(sk); - return sock_no_sendpage(sk->sk_socket, page, offset, - size, flags); - } - if (ret < 0) { - udp_flush_pending_frames(sk); - goto out; - } - - up->len += size; - if (!(up->corkflag || (flags&MSG_MORE))) - ret = udp_push_pending_frames(sk); - if (!ret) - ret = size; -out: - release_sock(sk); - return ret; -} - -/* - * This should be easy, if there is something there we - * return it, otherwise we block. - */ - -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) -{ - struct inet_sock *inet = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; - struct sk_buff *skb; - unsigned int ulen, copied; - int peeked; - int err; - int is_udplite = IS_UDPLITE(sk); - - /* - * Check any passed addresses - */ - if (addr_len) - *addr_len=sizeof(*sin); - - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); - -try_again: - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); - if (!skb) - goto out; - - ulen = skb->len - sizeof(struct udphdr); - copied = len; - if (copied > ulen) - copied = ulen; - else if (copied < ulen) - msg->msg_flags |= MSG_TRUNC; - - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. - */ - - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) - goto csum_copy_err; - } - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); - else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); - - if (err == -EINVAL) - goto csum_copy_err; - } - - if (err) - goto out_free; - - if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); - - sock_recv_timestamp(msg, sk, skb); - - /* Copy the address. */ - if (sin) - { - sin->sin_family = AF_INET; - sin->sin_port = udp_hdr(skb)->source; - sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); - - err = copied; - if (flags & MSG_TRUNC) - err = ulen; - -out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); -out: - return err; - -csum_copy_err: - lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); - release_sock(sk); - - if (noblock) - return -EAGAIN; - goto try_again; -} - - -/* returns: - * -1: error - * 0: success - * >0: "udp encap" protocol resubmission - * - * Note that in the success and error cases, the skb is assumed to - * have either been requeued or freed. - */ -int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) -{ - struct udp_sock *up = udp_sk(sk); - int rc; - int is_udplite = IS_UDPLITE(sk); - - /* - * Charge it to the socket, dropping if the queue is full. - */ - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto drop; - nf_reset(skb); - - if (up->encap_type) { - /* - * This is an encapsulation socket so pass the skb to - * the socket's udp_encap_rcv() hook. Otherwise, just - * fall through and pass this up the UDP socket. - * up->encap_rcv() returns the following value: - * =0 if skb was successfully passed to the encap - * handler or was discarded by it. - * >0 if skb should be passed on to UDP. - * <0 if skb should be resubmitted as proto -N - */ - - /* if we're overly short, let UDP handle it */ - if (skb->len > sizeof(struct udphdr) && - up->encap_rcv != NULL) { - int ret; - - ret = (*up->encap_rcv)(sk, skb); - if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, - is_udplite); - return -ret; - } - } - - /* FALLTHROUGH -- it's a UDP Packet */ - } - - /* - * UDP-Lite specific tests, ignored on UDP sockets - */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { - - /* - * MIB statistics other than incrementing the error count are - * disabled for the following two types of errors: these depend - * on the application settings, not on the functioning of the - * protocol stack as such. - * - * RFC 3828 here recommends (sec 3.3): "There should also be a - * way ... to ... at least let the receiving application block - * delivery of packets with coverage values less than a value - * provided by the application." - */ - if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " - "%d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - /* The next case involves violating the min. coverage requested - * by the receiver. This is subtle: if receiver wants x and x is - * greater than the buffersize/MTU then receiver will complain - * that it wants x while sender emits packets of smaller size y. - * Therefore the above ...()->partial_cov statement is essential. - */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING - "UDPLITE: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); - goto drop; - } - } - - if (sk->sk_filter) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } - - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } - - return 0; - -drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); - return -1; -} - -/* - * Multicasts and broadcasts go to each listener. - * - * Note: called only from the BH handler context, - * so we don't need to lock the hashes. - */ -static int __udp4_lib_mcast_deliver(struct sk_buff *skb, - struct udphdr *uh, - __be32 saddr, __be32 daddr, - struct hlist_head udptable[]) -{ - struct sock *sk; - int dif; - - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); - dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); - if (sk) { - struct sock *sknext = NULL; - - do { - struct sk_buff *skb1 = skb; - - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); - if (sknext) - skb1 = skb_clone(skb, GFP_ATOMIC); - - if (skb1) { - int ret = 0; - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - - if (ret > 0) - /* we should probably re-process instead - * of dropping packets here. */ - kfree_skb(skb1); - } - sk = sknext; - } while (sknext); - } else - kfree_skb(skb); - read_unlock(&udp_hash_lock); - return 0; -} - -/* Initialize UDP checksum. If exited with zero value (success), - * CHECKSUM_UNNECESSARY means, that no more checks are required. - * Otherwise, csum completion requires chacksumming packet body, - * including udp header and folding it to skb->csum. - */ -static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - const struct iphdr *iph; - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (IS_PROTO_UDPLITE(proto)) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - iph = ip_hdr(skb); - if (uh->check == 0) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - if (!skb_csum_unnecessary(skb)) - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, proto, 0); - /* Probably, we should checksum udp header (it should be in cache - * in any case) and data in tiny packets (< rx copybreak). - */ - - return 0; -} - -/* - * All we need to do is get the socket, and then do a checksum. - */ - -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], - int proto) -{ - struct sock *sk; - struct udphdr *uh = udp_hdr(skb); - unsigned short ulen; - struct rtable *rt = skb->rtable; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - - /* - * Validate the packet. - */ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto drop; /* No space for header. */ - - ulen = ntohs(uh->len); - if (ulen > skb->len) - goto short_packet; - - if (IS_PROTO_UDPLITE(proto)) { - /* UDP validates ulen. */ - if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) - goto short_packet; - uh = udp_hdr(skb); - } - - if (udp4_csum_init(skb, uh, proto)) - goto csum_error; - - if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - - sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); - - if (sk != NULL) { - int ret = 0; - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - sock_put(sk); - - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; - } - - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - nf_reset(skb); - - /* No socket. Drop packet silently, if checksum is wrong */ - if (udp_lib_checksum_complete(skb)) - goto csum_error; - - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - - /* - * Hmm. We got an UDP packet to a port to which we - * don't wanna listen. Ignore it. - */ - kfree_skb(skb); - return 0; - -short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - skb->len, - NIPQUAD(daddr), - ntohs(uh->dest)); - goto drop; - -csum_error: - /* - * RFC1122: OK. Discards the bad packet silently (as far as - * the network is concerned, anyway) as per 4.1.3.4 (MUST). - */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen); -drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); - kfree_skb(skb); - return 0; -} - -int udp_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); -} - -int udp_destroy_sock(struct sock *sk) -{ - lock_sock(sk); - udp_flush_pending_frames(sk); - release_sock(sk); - return 0; -} - -int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return ip_setsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return compat_ip_setsockopt(sk, level, optname, optval, optlen); -} -#endif - -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return ip_getsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ip_getsockopt(sk, level, optname, optval, optlen); -} -#endif - -/* ------------------------------------------------------------------------ */ -DEFINE_PROTO_INUSE(udp) - -struct proto udp_prot = { - .name = "UDP", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, -#endif - REF_PROTO_INUSE(udp) -}; - -/* ------------------------------------------------------------------------ */ -static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", - bucket, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); -} - -int udp4_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); - else { - char tmpbuf[129]; - struct udp_iter_state *state = seq->private; - - udp4_format_sock(v, tmpbuf, state->bucket); - seq_printf(seq, "%-127s\n", tmpbuf); - } - return 0; -} - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS -static struct file_operations udp4_seq_fops; -static struct udp_seq_afinfo udp4_seq_afinfo = { - .owner = THIS_MODULE, - .name = "udp", - .family = AF_INET, - .hashtable = udp_hash, - .seq_show = udp4_seq_show, - .seq_fops = &udp4_seq_fops, -}; - -int __init udp4_proc_init(void) -{ - return udp_proc_register(&udp4_seq_afinfo); -} - -void udp4_proc_exit(void) -{ - udp_proc_unregister(&udp4_seq_afinfo); -} -#endif /* CONFIG_PROC_FS */ - -EXPORT_SYMBOL(udp_prot); -EXPORT_SYMBOL(udp_sendmsg); - diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c new file mode 100644 index 000000000000..d49c6d68c8a9 --- /dev/null +++ b/net/ipv4/udplite.c @@ -0,0 +1,121 @@ +/* + * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). + * + * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ + * + * Authors: Gerrit Renker + * + * Changes: + * Fixes: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "udp_impl.h" +DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; + +struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; + +int udplite_get_port(struct sock *sk, unsigned short p, + int (*c)(const struct sock *, const struct sock *)) +{ + return __udp_lib_get_port(sk, p, udplite_hash, c); +} + +static int udplite_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + +static int udplite_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); +} + +static void udplite_err(struct sk_buff *skb, u32 info) +{ + __udp4_lib_err(skb, info, udplite_hash); +} + +static struct net_protocol udplite_protocol = { + .handler = udplite_rcv, + .err_handler = udplite_err, + .no_policy = 1, +}; + +DEFINE_PROTO_INUSE(udplite) + +struct proto udplite_prot = { + .name = "UDP-Lite", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udplite_sk_init, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .backlog_rcv = udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udplite_v4_get_port, + .obj_size = sizeof(struct udp_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, +#endif + REF_PROTO_INUSE(udplite) +}; + +static struct inet_protosw udplite4_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDPLITE, + .prot = &udplite_prot, + .ops = &inet_dgram_ops, + .capability = -1, + .no_check = 0, /* must checksum (RFC 3828) */ + .flags = INET_PROTOSW_PERMANENT, +}; + +#ifdef CONFIG_PROC_FS +static struct file_operations udplite4_seq_fops; +static struct udp_seq_afinfo udplite4_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udplite", + .family = AF_INET, + .hashtable = udplite_hash, + .seq_show = udp4_seq_show, + .seq_fops = &udplite4_seq_fops, +}; +#endif + +void __init udplite4_register(void) +{ + if (proto_register(&udplite_prot, 1)) + goto out_register_err; + + if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) + goto out_unregister_proto; + + inet_register_protosw(&udplite4_protosw); + +#ifdef CONFIG_PROC_FS + if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ + printk(KERN_ERR "%s: Cannot register /proc!\n", __func__); +#endif + return; + +out_unregister_proto: + proto_unregister(&udplite_prot); +out_register_err: + printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); +} + +EXPORT_SYMBOL(udplite_hash); +EXPORT_SYMBOL(udplite_prot); +EXPORT_SYMBOL(udplite_get_port); diff --git a/net/ipv4/udplite_ipv4.c b/net/ipv4/udplite_ipv4.c deleted file mode 100644 index d49c6d68c8a9..000000000000 --- a/net/ipv4/udplite_ipv4.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). - * - * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $ - * - * Authors: Gerrit Renker - * - * Changes: - * Fixes: - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; - -struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; - -int udplite_get_port(struct sock *sk, unsigned short p, - int (*c)(const struct sock *, const struct sock *)) -{ - return __udp_lib_get_port(sk, p, udplite_hash, c); -} - -static int udplite_v4_get_port(struct sock *sk, unsigned short snum) -{ - return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); -} - -static int udplite_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); -} - -static void udplite_err(struct sk_buff *skb, u32 info) -{ - __udp4_lib_err(skb, info, udplite_hash); -} - -static struct net_protocol udplite_protocol = { - .handler = udplite_rcv, - .err_handler = udplite_err, - .no_policy = 1, -}; - -DEFINE_PROTO_INUSE(udplite) - -struct proto udplite_prot = { - .name = "UDP-Lite", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udplite_sk_init, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .get_port = udplite_v4_get_port, - .obj_size = sizeof(struct udp_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, -#endif - REF_PROTO_INUSE(udplite) -}; - -static struct inet_protosw udplite4_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDPLITE, - .prot = &udplite_prot, - .ops = &inet_dgram_ops, - .capability = -1, - .no_check = 0, /* must checksum (RFC 3828) */ - .flags = INET_PROTOSW_PERMANENT, -}; - -#ifdef CONFIG_PROC_FS -static struct file_operations udplite4_seq_fops; -static struct udp_seq_afinfo udplite4_seq_afinfo = { - .owner = THIS_MODULE, - .name = "udplite", - .family = AF_INET, - .hashtable = udplite_hash, - .seq_show = udp4_seq_show, - .seq_fops = &udplite4_seq_fops, -}; -#endif - -void __init udplite4_register(void) -{ - if (proto_register(&udplite_prot, 1)) - goto out_register_err; - - if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) - goto out_unregister_proto; - - inet_register_protosw(&udplite4_protosw); - -#ifdef CONFIG_PROC_FS - if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ - printk(KERN_ERR "%s: Cannot register /proc!\n", __func__); -#endif - return; - -out_unregister_proto: - proto_unregister(&udplite_prot); -out_register_err: - printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); -} - -EXPORT_SYMBOL(udplite_hash); -EXPORT_SYMBOL(udplite_prot); -EXPORT_SYMBOL(udplite_get_port); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 107051f7c227..ae14617e607f 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp_ipv6.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o @@ -17,7 +17,6 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o -ipv6-$(CONFIG_IP_UDPLITE) += udplite_ipv6.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index afe9276d0420..730a861b8f41 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -813,16 +813,12 @@ static int __init init_ipv6_mibs(void) goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; -#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; -#endif return 0; -#ifdef CONFIG_IP_UDPLITE err_udplite_mib: -#endif snmp_mib_free((void **)udp_stats_in6); err_udp_mib: snmp_mib_free((void **)icmpv6msg_statistics); @@ -841,9 +837,7 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); -#ifdef CONFIG_IP_UDPLITE snmp_mib_free((void **)udplite_stats_in6); -#endif } static int inet6_net_init(struct net *net) @@ -888,11 +882,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; -#ifdef CONFIG_IP_UDPLITE err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; -#endif err = proto_register(&rawv6_prot, 1); if (err) @@ -1063,10 +1055,8 @@ out_sock_register_fail: out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: -#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); out_unregister_udp_proto: -#endif proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); @@ -1085,9 +1075,7 @@ static void __exit inet6_exit(void) ipv6_sysctl_unregister(); #endif udpv6_exit(); -#ifdef CONFIG_IP_UDPLITE udplitev6_exit(); -#endif tcpv6_exit(); /* Cleanup code parts. */ @@ -1117,9 +1105,7 @@ static void __exit inet6_exit(void) unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); -#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); -#endif proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3bbfdff698d2..5eea6fa506e5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -127,9 +127,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && -#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && -#endif sk->sk_protocol != IPPROTO_TCP) break; @@ -169,7 +167,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } else { struct proto *prot = &udp_prot; - if (IS_PROTO_UDPLITE(sk->sk_protocol)) + if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(sk->sk_prot, -1); @@ -734,9 +732,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && -#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && -#endif sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 2453f2229ef7..8a5be290c710 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,10 +39,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(&udpv6_prot)); -#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE6: inuse %d\n", sock_prot_inuse_get(&udplitev6_prot)); -#endif seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -113,7 +111,6 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; -#ifdef CONFIG_IP_UDPLITE static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), @@ -121,7 +118,6 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_SENTINEL }; -#endif static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { @@ -180,9 +176,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); -#ifdef CONFIG_IP_UDPLITE snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); -#endif } return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c new file mode 100644 index 000000000000..53739de829db --- /dev/null +++ b/net/ipv6/udp.c @@ -0,0 +1,1065 @@ +/* + * UDP over IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on linux/ipv4/udp.c + * + * $Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $ + * + * Fixes: + * Hideaki YOSHIFUJI : sin6_scope_id support + * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which + * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind + * a single port at the same time. + * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data + * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "udp_impl.h" + +static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); +} + +static struct sock *__udp6_lib_lookup(struct net *net, + struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif, struct hlist_head udptable[]) +{ + struct sock *sk, *result = NULL; + struct hlist_node *node; + unsigned short hnum = ntohs(dport); + int badness = -1; + + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + struct inet_sock *inet = inet_sk(sk); + + if (sk->sk_net == net && sk->sk_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + int score = 0; + if (inet->dport) { + if (inet->dport != sport) + continue; + score++; + } + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + continue; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + continue; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score++; + } + if (score == 4) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } + } + } + if (result) + sock_hold(result); + read_unlock(&udp_hash_lock); + return result; +} + +/* + * This should be easy, if there is something there we + * return it, otherwise we block. + */ + +int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct sk_buff *skb; + unsigned int ulen, copied; + int peeked; + int err; + int is_udplite = IS_UDPLITE(sk); + + if (addr_len) + *addr_len=sizeof(struct sockaddr_in6); + + if (flags & MSG_ERRQUEUE) + return ipv6_recv_error(sk, msg, len); + +try_again: + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); + if (!skb) + goto out; + + ulen = skb->len - sizeof(struct udphdr); + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) + msg->msg_flags |= MSG_TRUNC; + + /* + * If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, or if we only want a partial + * coverage checksum (UDP-Lite), do it before the copy. + */ + + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (udp_lib_checksum_complete(skb)) + goto csum_copy_err; + } + + if (skb_csum_unnecessary(skb)) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { + err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + if (err == -EINVAL) + goto csum_copy_err; + } + if (err) + goto out_free; + + if (!peeked) + UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + + sock_recv_timestamp(msg, sk, skb); + + /* Copy the address. */ + if (msg->msg_name) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = udp_hdr(skb)->source; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + + if (skb->protocol == htons(ETH_P_IP)) + ipv6_addr_set(&sin6->sin6_addr, 0, 0, + htonl(0xffff), ip_hdr(skb)->saddr); + else { + ipv6_addr_copy(&sin6->sin6_addr, + &ipv6_hdr(skb)->saddr); + if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6->sin6_scope_id = IP6CB(skb)->iif; + } + + } + if (skb->protocol == htons(ETH_P_IP)) { + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); + } else { + if (np->rxopt.all) + datagram_recv_ctl(sk, msg, skb); + } + + err = copied; + if (flags & MSG_TRUNC) + err = ulen; + +out_free: + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +out: + return err; + +csum_copy_err: + lock_sock(sk); + if (!skb_kill_datagram(sk, skb, flags)) + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); + + if (flags & MSG_DONTWAIT) + return -EAGAIN; + goto try_again; +} + +void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info, + struct hlist_head udptable[] ) +{ + struct ipv6_pinfo *np; + struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; + struct in6_addr *saddr = &hdr->saddr; + struct in6_addr *daddr = &hdr->daddr; + struct udphdr *uh = (struct udphdr*)(skb->data+offset); + struct sock *sk; + int err; + + sk = __udp6_lib_lookup(skb->dev->nd_net, daddr, uh->dest, + saddr, uh->source, inet6_iif(skb), udptable); + if (sk == NULL) + return; + + np = inet6_sk(sk); + + if (!icmpv6_err_convert(type, code, &err) && !np->recverr) + goto out; + + if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) + goto out; + + if (np->recverr) + ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); + + sk->sk_err = err; + sk->sk_error_report(sk); +out: + sock_put(sk); +} + +static __inline__ void udpv6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, int type, + int code, int offset, __be32 info ) +{ + __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); +} + +int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + int rc; + int is_udplite = IS_UDPLITE(sk); + + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; + + /* + * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). + */ + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" + " %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; + } + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " + "too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } + } + + if (sk->sk_filter) { + if (udp_lib_checksum_complete(skb)) + goto drop; + } + + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + goto drop; + } + + return 0; +drop: + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + +static struct sock *udp_v6_mcast_next(struct sock *sk, + __be16 loc_port, struct in6_addr *loc_addr, + __be16 rmt_port, struct in6_addr *rmt_addr, + int dif) +{ + struct hlist_node *node; + struct sock *s = sk; + unsigned short num = ntohs(loc_port); + + sk_for_each_from(s, node) { + struct inet_sock *inet = inet_sk(s); + + if (s->sk_hash == num && s->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(s); + if (inet->dport) { + if (inet->dport != rmt_port) + continue; + } + if (!ipv6_addr_any(&np->daddr) && + !ipv6_addr_equal(&np->daddr, rmt_addr)) + continue; + + if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) + continue; + + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + continue; + } + if (!inet6_mc_check(s, loc_addr, rmt_addr)) + continue; + return s; + } + } + return NULL; +} + +/* + * Note: called only from the BH handler context, + * so we don't need to lock the hashes. + */ +static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, + struct in6_addr *daddr, struct hlist_head udptable[]) +{ + struct sock *sk, *sk2; + const struct udphdr *uh = udp_hdr(skb); + int dif; + + read_lock(&udp_hash_lock); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = inet6_iif(skb); + sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + if (!sk) { + kfree_skb(skb); + goto out; + } + + sk2 = sk; + while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, + uh->source, saddr, dif))) { + struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); + if (buff) { + bh_lock_sock_nested(sk2); + if (!sock_owned_by_user(sk2)) + udpv6_queue_rcv_skb(sk2, buff); + else + sk_add_backlog(sk2, buff); + bh_unlock_sock(sk2); + } + } + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); +out: + read_unlock(&udp_hash_lock); + return 0; +} + +static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, + int proto) +{ + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + if (uh->check == 0) { + /* RFC 2460 section 8.1 says that we SHOULD log + this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + return 1; + } + if (skb->ip_summed == CHECKSUM_COMPLETE && + !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->len, proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!skb_csum_unnecessary(skb)) + skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len, proto, 0)); + + return 0; +} + +int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int proto) +{ + struct sock *sk; + struct udphdr *uh; + struct net_device *dev = skb->dev; + struct in6_addr *saddr, *daddr; + u32 ulen = 0; + + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto short_packet; + + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + uh = udp_hdr(skb); + + ulen = ntohs(uh->len); + if (ulen > skb->len) + goto short_packet; + + if (proto == IPPROTO_UDP) { + /* UDP validates ulen. */ + + /* Check for jumbo payload */ + if (ulen == 0) + ulen = skb->len; + + if (ulen < sizeof(*uh)) + goto short_packet; + + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) + goto short_packet; + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + uh = udp_hdr(skb); + } + } + + if (udp6_csum_init(skb, uh, proto)) + goto discard; + + /* + * Multicast receive code + */ + if (ipv6_addr_is_multicast(daddr)) + return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable); + + /* Unicast */ + + /* + * check socket cache ... must talk to Alan about his plans + * for sock caches... i'll skip this for now. + */ + sk = __udp6_lib_lookup(skb->dev->nd_net, saddr, uh->source, + daddr, uh->dest, inet6_iif(skb), udptable); + + if (sk == NULL) { + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto discard; + + if (udp_lib_checksum_complete(skb)) + goto discard; + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); + + kfree_skb(skb); + return 0; + } + + /* deliver */ + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + sock_put(sk); + return 0; + +short_packet: + LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + ulen, skb->len); + +discard: + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + kfree_skb(skb); + return 0; +} + +static __inline__ int udpv6_rcv(struct sk_buff *skb) +{ + return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); +} + +/* + * Throw away all pending data and cancel the corking. Socket is locked. + */ +static void udp_v6_flush_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + + if (up->pending) { + up->len = 0; + up->pending = 0; + ip6_flush_pending_frames(sk); + } +} + +/* + * Sending + */ + +static int udp_v6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + struct udphdr *uh; + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi *fl = &inet->cork.fl; + int err = 0; + int is_udplite = IS_UDPLITE(sk); + __wsum csum = 0; + + /* Grab the skbuff where UDP header space exists. */ + if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + goto out; + + /* + * Create a UDP header + */ + uh = udp_hdr(skb); + uh->source = fl->fl_ip_sport; + uh->dest = fl->fl_ip_dport; + uh->len = htons(up->len); + uh->check = 0; + + if (is_udplite) + csum = udplite_csum_outgoing(sk, skb); + else + csum = udp_csum_outgoing(sk, skb); + + /* add protocol-dependent pseudo-header */ + uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, + up->len, fl->proto, csum ); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + err = ip6_push_pending_frames(sk); +out: + up->len = 0; + up->pending = 0; + if (!err) + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct ipv6_txoptions opt_space; + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; + struct in6_addr *daddr, *final_p = NULL, final; + struct ipv6_txoptions *opt = NULL; + struct ip6_flowlabel *flowlabel = NULL; + struct flowi fl; + struct dst_entry *dst; + int addr_len = msg->msg_namelen; + int ulen = len; + int hlimit = -1; + int tclass = -1; + int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int err; + int connected = 0; + int is_udplite = IS_UDPLITE(sk); + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + + /* destination address check */ + if (sin6) { + if (addr_len < offsetof(struct sockaddr, sa_data)) + return -EINVAL; + + switch (sin6->sin6_family) { + case AF_INET6: + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + daddr = &sin6->sin6_addr; + break; + case AF_INET: + goto do_udp_sendmsg; + case AF_UNSPEC: + msg->msg_name = sin6 = NULL; + msg->msg_namelen = addr_len = 0; + daddr = NULL; + break; + default: + return -EINVAL; + } + } else if (!up->pending) { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } else + daddr = NULL; + + if (daddr) { + if (ipv6_addr_v4mapped(daddr)) { + struct sockaddr_in sin; + sin.sin_family = AF_INET; + sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; + sin.sin_addr.s_addr = daddr->s6_addr32[3]; + msg->msg_name = &sin; + msg->msg_namelen = sizeof(sin); +do_udp_sendmsg: + if (__ipv6_only_sock(sk)) + return -ENETUNREACH; + return udp_sendmsg(iocb, sk, msg, len); + } + } + + if (up->pending == AF_INET) + return udp_sendmsg(iocb, sk, msg, len); + + /* Rough check on arithmetic overflow, + better check is made in ip6_append_data(). + */ + if (len > INT_MAX - sizeof(struct udphdr)) + return -EMSGSIZE; + + if (up->pending) { + /* + * There are pending frames. + * The socket lock must be held while it's corked. + */ + lock_sock(sk); + if (likely(up->pending)) { + if (unlikely(up->pending != AF_INET6)) { + release_sock(sk); + return -EAFNOSUPPORT; + } + dst = NULL; + goto do_append_data; + } + release_sock(sk); + } + ulen += sizeof(struct udphdr); + + memset(&fl, 0, sizeof(fl)); + + if (sin6) { + if (sin6->sin6_port == 0) + return -EINVAL; + + fl.fl_ip_dport = sin6->sin6_port; + daddr = &sin6->sin6_addr; + + if (np->sndflow) { + fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + daddr = &flowlabel->dst; + } + } + + /* + * Otherwise it will be difficult to maintain + * sk->sk_dst_cache. + */ + if (sk->sk_state == TCP_ESTABLISHED && + ipv6_addr_equal(daddr, &np->daddr)) + daddr = &np->daddr; + + if (addr_len >= sizeof(struct sockaddr_in6) && + sin6->sin6_scope_id && + ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) + fl.oif = sin6->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + + fl.fl_ip_dport = inet->dport; + daddr = &np->daddr; + fl.fl6_flowlabel = np->flow_label; + connected = 1; + } + + if (!fl.oif) + fl.oif = sk->sk_bound_dev_if; + + if (msg->msg_controllen) { + opt = &opt_space; + memset(opt, 0, sizeof(struct ipv6_txoptions)); + opt->tot_len = sizeof(*opt); + + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + if (err < 0) { + fl6_sock_release(flowlabel); + return err; + } + if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if (flowlabel == NULL) + return -EINVAL; + } + if (!(opt->opt_nflen|opt->opt_flen)) + opt = NULL; + connected = 0; + } + if (opt == NULL) + opt = np->opt; + if (flowlabel) + opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = ipv6_fixup_options(&opt_space, opt); + + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, daddr); + if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl_ip_sport = inet->sport; + + /* merge ip6_build_xmit from ip6_output */ + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + connected = 0; + } + + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { + fl.oif = np->mcast_oif; + connected = 0; + } + + security_sk_classify_flow(sk, &fl); + + err = ip6_sk_dst_lookup(sk, &dst, &fl); + if (err) + goto out; + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, &fl); + if (err < 0) + goto out; + } + + if (hlimit < 0) { + if (ipv6_addr_is_multicast(&fl.fl6_dst)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hlimit < 0) + hlimit = ipv6_get_hoplimit(dst->dev); + } + + if (tclass < 0) { + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + } + + if (msg->msg_flags&MSG_CONFIRM) + goto do_confirm; +back_from_confirm: + + lock_sock(sk); + if (unlikely(up->pending)) { + /* The socket is already corked while preparing it. */ + /* ... which is an evident application bug. --ANK */ + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + err = -EINVAL; + goto out; + } + + up->pending = AF_INET6; + +do_append_data: + up->len += ulen; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, &fl, + (struct rt6_info*)dst, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + if (err) + udp_v6_flush_pending_frames(sk); + else if (!corkreq) + err = udp_v6_push_pending_frames(sk); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; + + if (dst) { + if (connected) { + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); + } else { + dst_release(dst); + } + } + + if (err > 0) + err = np->recverr ? net_xmit_errno(err) : 0; + release_sock(sk); +out: + fl6_sock_release(flowlabel); + if (!err) + return len; + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + } + return err; + +do_confirm: + dst_confirm(dst); + if (!(msg->msg_flags&MSG_PROBE) || len) + goto back_from_confirm; + err = 0; + goto out; +} + +int udpv6_destroy_sock(struct sock *sk) +{ + lock_sock(sk); + udp_v6_flush_pending_frames(sk); + release_sock(sk); + + inet6_destroy_sock(sk); + + return 0; +} + +/* + * Socket option code for UDP + */ +int udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_v6_push_pending_frames); + return ipv6_setsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_v6_push_pending_frames); + return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); +} +#endif + +int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return ipv6_getsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); +} +#endif + +static struct inet6_protocol udpv6_protocol = { + .handler = udpv6_rcv, + .err_handler = udpv6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +/* ------------------------------------------------------------------------ */ +#ifdef CONFIG_PROC_FS + +static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) +{ + struct inet_sock *inet = inet_sk(sp); + struct ipv6_pinfo *np = inet6_sk(sp); + struct in6_addr *dest, *src; + __u16 destp, srcp; + + dest = &np->daddr; + src = &np->rcv_saddr; + destp = ntohs(inet->dport); + srcp = ntohs(inet->sport); + seq_printf(seq, + "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", + bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp, + sp->sk_state, + atomic_read(&sp->sk_wmem_alloc), + atomic_read(&sp->sk_rmem_alloc), + 0, 0L, 0, + sock_i_uid(sp), 0, + sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp); +} + +int udp6_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_printf(seq, + " sl " + "local_address " + "remote_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + else + udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + return 0; +} + +static struct file_operations udp6_seq_fops; +static struct udp_seq_afinfo udp6_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udp6", + .family = AF_INET6, + .hashtable = udp_hash, + .seq_show = udp6_seq_show, + .seq_fops = &udp6_seq_fops, +}; + +int __init udp6_proc_init(void) +{ + return udp_proc_register(&udp6_seq_afinfo); +} + +void udp6_proc_exit(void) { + udp_proc_unregister(&udp6_seq_afinfo); +} +#endif /* CONFIG_PROC_FS */ + +/* ------------------------------------------------------------------------ */ + +DEFINE_PROTO_INUSE(udpv6) + +struct proto udpv6_prot = { + .name = "UDPv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .backlog_rcv = udpv6_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, + .obj_size = sizeof(struct udp6_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, +#endif + REF_PROTO_INUSE(udpv6) +}; + +static struct inet_protosw udpv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDP, + .prot = &udpv6_prot, + .ops = &inet6_dgram_ops, + .capability =-1, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_PERMANENT, +}; + + +int __init udpv6_init(void) +{ + int ret; + + ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); + if (ret) + goto out; + + ret = inet6_register_protosw(&udpv6_protosw); + if (ret) + goto out_udpv6_protocol; +out: + return ret; + +out_udpv6_protocol: + inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); + goto out; +} + +void udpv6_exit(void) +{ + inet6_unregister_protosw(&udpv6_protosw); + inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); +} diff --git a/net/ipv6/udp_ipv6.c b/net/ipv6/udp_ipv6.c deleted file mode 100644 index 55feac7ba717..000000000000 --- a/net/ipv6/udp_ipv6.c +++ /dev/null @@ -1,1065 +0,0 @@ -/* - * UDP over IPv6 - * Linux INET6 implementation - * - * Authors: - * Pedro Roque - * - * Based on linux/ipv4/udp.c - * - * $Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $ - * - * Fixes: - * Hideaki YOSHIFUJI : sin6_scope_id support - * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which - * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind - * a single port at the same time. - * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data - * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include "udp_impl.h" - -static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) -{ - return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); -} - -static struct sock *__udp6_lib_lookup(struct net *net, - struct in6_addr *saddr, __be16 sport, - struct in6_addr *daddr, __be16 dport, - int dif, struct hlist_head udptable[]) -{ - struct sock *sk, *result = NULL; - struct hlist_node *node; - unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet = inet_sk(sk); - - if (sk->sk_net == net && sk->sk_hash == hnum && - sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - int score = 0; - if (inet->dport) { - if (inet->dport != sport) - continue; - score++; - } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 4) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } - } - } - if (result) - sock_hold(result); - read_unlock(&udp_hash_lock); - return result; -} - -/* - * This should be easy, if there is something there we - * return it, otherwise we block. - */ - -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct sk_buff *skb; - unsigned int ulen, copied; - int peeked; - int err; - int is_udplite = IS_UDPLITE(sk); - - if (addr_len) - *addr_len=sizeof(struct sockaddr_in6); - - if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); - -try_again: - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); - if (!skb) - goto out; - - ulen = skb->len - sizeof(struct udphdr); - copied = len; - if (copied > ulen) - copied = ulen; - else if (copied < ulen) - msg->msg_flags |= MSG_TRUNC; - - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. - */ - - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) - goto csum_copy_err; - } - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); - else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); - if (err == -EINVAL) - goto csum_copy_err; - } - if (err) - goto out_free; - - if (!peeked) - UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); - - sock_recv_timestamp(msg, sk, skb); - - /* Copy the address. */ - if (msg->msg_name) { - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *) msg->msg_name; - sin6->sin6_family = AF_INET6; - sin6->sin6_port = udp_hdr(skb)->source; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - - if (skb->protocol == htons(ETH_P_IP)) - ipv6_addr_set(&sin6->sin6_addr, 0, 0, - htonl(0xffff), ip_hdr(skb)->saddr); - else { - ipv6_addr_copy(&sin6->sin6_addr, - &ipv6_hdr(skb)->saddr); - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; - } - - } - if (skb->protocol == htons(ETH_P_IP)) { - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); - } else { - if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); - } - - err = copied; - if (flags & MSG_TRUNC) - err = ulen; - -out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); -out: - return err; - -csum_copy_err: - lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); - release_sock(sk); - - if (flags & MSG_DONTWAIT) - return -EAGAIN; - goto try_again; -} - -void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info, - struct hlist_head udptable[] ) -{ - struct ipv6_pinfo *np; - struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; - struct in6_addr *saddr = &hdr->saddr; - struct in6_addr *daddr = &hdr->daddr; - struct udphdr *uh = (struct udphdr*)(skb->data+offset); - struct sock *sk; - int err; - - sk = __udp6_lib_lookup(skb->dev->nd_net, daddr, uh->dest, - saddr, uh->source, inet6_iif(skb), udptable); - if (sk == NULL) - return; - - np = inet6_sk(sk); - - if (!icmpv6_err_convert(type, code, &err) && !np->recverr) - goto out; - - if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) - goto out; - - if (np->recverr) - ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - - sk->sk_err = err; - sk->sk_error_report(sk); -out: - sock_put(sk); -} - -static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, int type, - int code, int offset, __be32 info ) -{ - __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); -} - -int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) -{ - struct udp_sock *up = udp_sk(sk); - int rc; - int is_udplite = IS_UDPLITE(sk); - - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) - goto drop; - - /* - * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). - */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { - - if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" - " %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " - "too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); - goto drop; - } - } - - if (sk->sk_filter) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } - - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } - - return 0; -drop: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); - return -1; -} - -static struct sock *udp_v6_mcast_next(struct sock *sk, - __be16 loc_port, struct in6_addr *loc_addr, - __be16 rmt_port, struct in6_addr *rmt_addr, - int dif) -{ - struct hlist_node *node; - struct sock *s = sk; - unsigned short num = ntohs(loc_port); - - sk_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (s->sk_hash == num && s->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(s); - if (inet->dport) { - if (inet->dport != rmt_port) - continue; - } - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) - continue; - - if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) - continue; - - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) - continue; - } - if (!inet6_mc_check(s, loc_addr, rmt_addr)) - continue; - return s; - } - } - return NULL; -} - -/* - * Note: called only from the BH handler context, - * so we don't need to lock the hashes. - */ -static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, - struct in6_addr *daddr, struct hlist_head udptable[]) -{ - struct sock *sk, *sk2; - const struct udphdr *uh = udp_hdr(skb); - int dif; - - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); - dif = inet6_iif(skb); - sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); - if (!sk) { - kfree_skb(skb); - goto out; - } - - sk2 = sk; - while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, - uh->source, saddr, dif))) { - struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); - if (buff) { - bh_lock_sock_nested(sk2); - if (!sock_owned_by_user(sk2)) - udpv6_queue_rcv_skb(sk2, buff); - else - sk_add_backlog(sk2, buff); - bh_unlock_sock(sk2); - } - } - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); -out: - read_unlock(&udp_hash_lock); - return 0; -} - -static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (IS_PROTO_UDPLITE(proto)) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); - - return 0; -} - -int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], - int proto) -{ - struct sock *sk; - struct udphdr *uh; - struct net_device *dev = skb->dev; - struct in6_addr *saddr, *daddr; - u32 ulen = 0; - - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto short_packet; - - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - uh = udp_hdr(skb); - - ulen = ntohs(uh->len); - if (ulen > skb->len) - goto short_packet; - - if (proto == IPPROTO_UDP) { - /* UDP validates ulen. */ - - /* Check for jumbo payload */ - if (ulen == 0) - ulen = skb->len; - - if (ulen < sizeof(*uh)) - goto short_packet; - - if (ulen < skb->len) { - if (pskb_trim_rcsum(skb, ulen)) - goto short_packet; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - uh = udp_hdr(skb); - } - } - - if (udp6_csum_init(skb, uh, proto)) - goto discard; - - /* - * Multicast receive code - */ - if (ipv6_addr_is_multicast(daddr)) - return __udp6_lib_mcast_deliver(skb, saddr, daddr, udptable); - - /* Unicast */ - - /* - * check socket cache ... must talk to Alan about his plans - * for sock caches... i'll skip this for now. - */ - sk = __udp6_lib_lookup(skb->dev->nd_net, saddr, uh->source, - daddr, uh->dest, inet6_iif(skb), udptable); - - if (sk == NULL) { - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard; - - if (udp_lib_checksum_complete(skb)) - goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); - - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); - - kfree_skb(skb); - return 0; - } - - /* deliver */ - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - sock_put(sk); - return 0; - -short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - ulen, skb->len); - -discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); - kfree_skb(skb); - return 0; -} - -static __inline__ int udpv6_rcv(struct sk_buff *skb) -{ - return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); -} - -/* - * Throw away all pending data and cancel the corking. Socket is locked. - */ -static void udp_v6_flush_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - - if (up->pending) { - up->len = 0; - up->pending = 0; - ip6_flush_pending_frames(sk); - } -} - -/* - * Sending - */ - -static int udp_v6_push_pending_frames(struct sock *sk) -{ - struct sk_buff *skb; - struct udphdr *uh; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - int err = 0; - int is_udplite = IS_UDPLITE(sk); - __wsum csum = 0; - - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - - /* - * Create a UDP header - */ - uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); - uh->check = 0; - - if (is_udplite) - csum = udplite_csum_outgoing(sk, skb); - else - csum = udp_csum_outgoing(sk, skb); - - /* add protocol-dependent pseudo-header */ - uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->proto, csum ); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - err = ip6_push_pending_frames(sk); -out: - up->len = 0; - up->pending = 0; - if (!err) - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); - return err; -} - -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) -{ - struct ipv6_txoptions opt_space; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; - struct ipv6_txoptions *opt = NULL; - struct ip6_flowlabel *flowlabel = NULL; - struct flowi fl; - struct dst_entry *dst; - int addr_len = msg->msg_namelen; - int ulen = len; - int hlimit = -1; - int tclass = -1; - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; - int err; - int connected = 0; - int is_udplite = IS_UDPLITE(sk); - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - - /* destination address check */ - if (sin6) { - if (addr_len < offsetof(struct sockaddr, sa_data)) - return -EINVAL; - - switch (sin6->sin6_family) { - case AF_INET6: - if (addr_len < SIN6_LEN_RFC2133) - return -EINVAL; - daddr = &sin6->sin6_addr; - break; - case AF_INET: - goto do_udp_sendmsg; - case AF_UNSPEC: - msg->msg_name = sin6 = NULL; - msg->msg_namelen = addr_len = 0; - daddr = NULL; - break; - default: - return -EINVAL; - } - } else if (!up->pending) { - if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; - daddr = &np->daddr; - } else - daddr = NULL; - - if (daddr) { - if (ipv6_addr_v4mapped(daddr)) { - struct sockaddr_in sin; - sin.sin_family = AF_INET; - sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; - sin.sin_addr.s_addr = daddr->s6_addr32[3]; - msg->msg_name = &sin; - msg->msg_namelen = sizeof(sin); -do_udp_sendmsg: - if (__ipv6_only_sock(sk)) - return -ENETUNREACH; - return udp_sendmsg(iocb, sk, msg, len); - } - } - - if (up->pending == AF_INET) - return udp_sendmsg(iocb, sk, msg, len); - - /* Rough check on arithmetic overflow, - better check is made in ip6_append_data(). - */ - if (len > INT_MAX - sizeof(struct udphdr)) - return -EMSGSIZE; - - if (up->pending) { - /* - * There are pending frames. - * The socket lock must be held while it's corked. - */ - lock_sock(sk); - if (likely(up->pending)) { - if (unlikely(up->pending != AF_INET6)) { - release_sock(sk); - return -EAFNOSUPPORT; - } - dst = NULL; - goto do_append_data; - } - release_sock(sk); - } - ulen += sizeof(struct udphdr); - - memset(&fl, 0, sizeof(fl)); - - if (sin6) { - if (sin6->sin6_port == 0) - return -EINVAL; - - fl.fl_ip_dport = sin6->sin6_port; - daddr = &sin6->sin6_addr; - - if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); - if (flowlabel == NULL) - return -EINVAL; - daddr = &flowlabel->dst; - } - } - - /* - * Otherwise it will be difficult to maintain - * sk->sk_dst_cache. - */ - if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; - - if (addr_len >= sizeof(struct sockaddr_in6) && - sin6->sin6_scope_id && - ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; - } else { - if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; - - fl.fl_ip_dport = inet->dport; - daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; - connected = 1; - } - - if (!fl.oif) - fl.oif = sk->sk_bound_dev_if; - - if (msg->msg_controllen) { - opt = &opt_space; - memset(opt, 0, sizeof(struct ipv6_txoptions)); - opt->tot_len = sizeof(*opt); - - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); - if (err < 0) { - fl6_sock_release(flowlabel); - return err; - } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); - if (flowlabel == NULL) - return -EINVAL; - } - if (!(opt->opt_nflen|opt->opt_flen)) - opt = NULL; - connected = 0; - } - if (opt == NULL) - opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); - opt = ipv6_fixup_options(&opt_space, opt); - - fl.proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, daddr); - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->sport; - - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - connected = 0; - } - - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.oif = np->mcast_oif; - connected = 0; - } - - security_sk_classify_flow(sk, &fl); - - err = ip6_sk_dst_lookup(sk, &dst, &fl); - if (err) - goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; - } - - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); - } - - if (tclass < 0) { - tclass = np->tclass; - if (tclass < 0) - tclass = 0; - } - - if (msg->msg_flags&MSG_CONFIRM) - goto do_confirm; -back_from_confirm: - - lock_sock(sk); - if (unlikely(up->pending)) { - /* The socket is already corked while preparing it. */ - /* ... which is an evident application bug. --ANK */ - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); - err = -EINVAL; - goto out; - } - - up->pending = AF_INET6; - -do_append_data: - up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, &fl, - (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); - if (err) - udp_v6_flush_pending_frames(sk); - else if (!corkreq) - err = udp_v6_push_pending_frames(sk); - else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) - up->pending = 0; - - if (dst) { - if (connected) { - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? - &np->saddr : -#endif - NULL); - } else { - dst_release(dst); - } - } - - if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; - release_sock(sk); -out: - fl6_sock_release(flowlabel); - if (!err) - return len; - /* - * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting - * ENOBUFS might not be good (it's not tunable per se), but otherwise - * we don't have a good statistic (IpOutDiscards but it can be too many - * things). We could add another new stat but at least for now that - * seems like overkill. - */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); - } - return err; - -do_confirm: - dst_confirm(dst); - if (!(msg->msg_flags&MSG_PROBE) || len) - goto back_from_confirm; - err = 0; - goto out; -} - -int udpv6_destroy_sock(struct sock *sk) -{ - lock_sock(sk); - udp_v6_flush_pending_frames(sk); - release_sock(sk); - - inet6_destroy_sock(sk); - - return 0; -} - -/* - * Socket option code for UDP - */ -int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_v6_push_pending_frames); - return ipv6_setsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_v6_push_pending_frames); - return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); -} -#endif - -int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return ipv6_getsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); -} -#endif - -static struct inet6_protocol udpv6_protocol = { - .handler = udpv6_rcv, - .err_handler = udpv6_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -}; - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->dport); - srcp = ntohs(inet->sport); - seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), - 0, 0L, 0, - sock_i_uid(sp), 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); -} - -int udp6_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); - return 0; -} - -static struct file_operations udp6_seq_fops; -static struct udp_seq_afinfo udp6_seq_afinfo = { - .owner = THIS_MODULE, - .name = "udp6", - .family = AF_INET6, - .hashtable = udp_hash, - .seq_show = udp6_seq_show, - .seq_fops = &udp6_seq_fops, -}; - -int __init udp6_proc_init(void) -{ - return udp_proc_register(&udp6_seq_afinfo); -} - -void udp6_proc_exit(void) { - udp_proc_unregister(&udp6_seq_afinfo); -} -#endif /* CONFIG_PROC_FS */ - -/* ------------------------------------------------------------------------ */ - -DEFINE_PROTO_INUSE(udpv6) - -struct proto udpv6_prot = { - .name = "UDPv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .get_port = udp_v6_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp6_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, -#endif - REF_PROTO_INUSE(udpv6) -}; - -static struct inet_protosw udpv6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDP, - .prot = &udpv6_prot, - .ops = &inet6_dgram_ops, - .capability =-1, - .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_PERMANENT, -}; - - -int __init udpv6_init(void) -{ - int ret; - - ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); - if (ret) - goto out; - - ret = inet6_register_protosw(&udpv6_protosw); - if (ret) - goto out_udpv6_protocol; -out: - return ret; - -out_udpv6_protocol: - inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); - goto out; -} - -void udpv6_exit(void) -{ - inet6_unregister_protosw(&udpv6_protosw); - inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); -} diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c new file mode 100644 index 000000000000..87d4202522ee --- /dev/null +++ b/net/ipv6/udplite.c @@ -0,0 +1,125 @@ +/* + * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. + * See also net/ipv4/udplite.c + * + * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $ + * + * Authors: Gerrit Renker + * + * Changes: + * Fixes: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "udp_impl.h" + +DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; + +static int udplitev6_rcv(struct sk_buff *skb) +{ + return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); +} + +static void udplitev6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info) +{ + __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); +} + +static struct inet6_protocol udplitev6_protocol = { + .handler = udplitev6_rcv, + .err_handler = udplitev6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static int udplite_v6_get_port(struct sock *sk, unsigned short snum) +{ + return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); +} + +DEFINE_PROTO_INUSE(udplitev6) + +struct proto udplitev6_prot = { + .name = "UDPLITEv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udplite_sk_init, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .backlog_rcv = udpv6_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udplite_v6_get_port, + .obj_size = sizeof(struct udp6_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, +#endif + REF_PROTO_INUSE(udplitev6) +}; + +static struct inet_protosw udplite6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDPLITE, + .prot = &udplitev6_prot, + .ops = &inet6_dgram_ops, + .capability = -1, + .no_check = 0, + .flags = INET_PROTOSW_PERMANENT, +}; + +int __init udplitev6_init(void) +{ + int ret; + + ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); + if (ret) + goto out; + + ret = inet6_register_protosw(&udplite6_protosw); + if (ret) + goto out_udplitev6_protocol; +out: + return ret; + +out_udplitev6_protocol: + inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); + goto out; +} + +void udplitev6_exit(void) +{ + inet6_unregister_protosw(&udplite6_protosw); + inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); +} + +#ifdef CONFIG_PROC_FS +static struct file_operations udplite6_seq_fops; +static struct udp_seq_afinfo udplite6_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udplite6", + .family = AF_INET6, + .hashtable = udplite_hash, + .seq_show = udp6_seq_show, + .seq_fops = &udplite6_seq_fops, +}; + +int __init udplite6_proc_init(void) +{ + return udp_proc_register(&udplite6_seq_afinfo); +} + +void udplite6_proc_exit(void) +{ + udp_proc_unregister(&udplite6_seq_afinfo); +} +#endif diff --git a/net/ipv6/udplite_ipv6.c b/net/ipv6/udplite_ipv6.c deleted file mode 100644 index 87d4202522ee..000000000000 --- a/net/ipv6/udplite_ipv6.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. - * See also net/ipv4/udplite.c - * - * Version: $Id: udplite.c,v 1.9 2006/10/19 08:28:10 gerrit Exp $ - * - * Authors: Gerrit Renker - * - * Changes: - * Fixes: - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include "udp_impl.h" - -DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; - -static int udplitev6_rcv(struct sk_buff *skb) -{ - return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); -} - -static void udplitev6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) -{ - __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); -} - -static struct inet6_protocol udplitev6_protocol = { - .handler = udplitev6_rcv, - .err_handler = udplitev6_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -}; - -static int udplite_v6_get_port(struct sock *sk, unsigned short snum) -{ - return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); -} - -DEFINE_PROTO_INUSE(udplitev6) - -struct proto udplitev6_prot = { - .name = "UDPLITEv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udplite_sk_init, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .get_port = udplite_v6_get_port, - .obj_size = sizeof(struct udp6_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, -#endif - REF_PROTO_INUSE(udplitev6) -}; - -static struct inet_protosw udplite6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDPLITE, - .prot = &udplitev6_prot, - .ops = &inet6_dgram_ops, - .capability = -1, - .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, -}; - -int __init udplitev6_init(void) -{ - int ret; - - ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); - if (ret) - goto out; - - ret = inet6_register_protosw(&udplite6_protosw); - if (ret) - goto out_udplitev6_protocol; -out: - return ret; - -out_udplitev6_protocol: - inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); - goto out; -} - -void udplitev6_exit(void) -{ - inet6_unregister_protosw(&udplite6_protosw); - inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); -} - -#ifdef CONFIG_PROC_FS -static struct file_operations udplite6_seq_fops; -static struct udp_seq_afinfo udplite6_seq_afinfo = { - .owner = THIS_MODULE, - .name = "udplite6", - .family = AF_INET6, - .hashtable = udplite_hash, - .seq_show = udp6_seq_show, - .seq_fops = &udplite6_seq_fops, -}; - -int __init udplite6_proc_init(void) -{ - return udp_proc_register(&udplite6_seq_afinfo); -} - -void udplite6_proc_exit(void) -{ - udp_proc_unregister(&udplite6_seq_afinfo); -} -#endif -- cgit v1.2.3-71-gd317 From e7ec2e3230633a858af1b0b359f6c4670dbeb997 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Mon, 10 Mar 2008 17:26:32 +0100 Subject: ssb: Add SPROM/invariants support for PCMCIA devices This adds support for reading/writing the SPROM invariants for PCMCIA based devices. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/Kconfig | 6 + drivers/ssb/Makefile | 1 + drivers/ssb/main.c | 23 +- drivers/ssb/pci.c | 113 +--------- drivers/ssb/pcmcia.c | 518 ++++++++++++++++++++++++++++++++++++++++------ drivers/ssb/sprom.c | 133 ++++++++++++ drivers/ssb/ssb_private.h | 17 ++ include/linux/ssb/ssb.h | 4 +- 8 files changed, 648 insertions(+), 167 deletions(-) create mode 100644 drivers/ssb/sprom.c (limited to 'include/linux') diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index f69ef0ba2613..0f7cce2560d1 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -20,6 +20,10 @@ config SSB If unsure, say N. +# Common SPROM support routines +config SSB_SPROM + bool + config SSB_PCIHOST_POSSIBLE bool depends on SSB && (PCI = y || PCI = SSB) @@ -28,6 +32,7 @@ config SSB_PCIHOST_POSSIBLE config SSB_PCIHOST bool "Support for SSB on PCI-bus host" depends on SSB_PCIHOST_POSSIBLE + select SSB_SPROM default y help Support for a Sonics Silicon Backplane on top @@ -48,6 +53,7 @@ config SSB_PCMCIAHOST_POSSIBLE config SSB_PCMCIAHOST bool "Support for SSB on PCMCIA-bus host (EXPERIMENTAL)" depends on SSB_PCMCIAHOST_POSSIBLE + select SSB_SPROM help Support for a Sonics Silicon Backplane on top of a PCMCIA device. diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile index 910f35e32fc9..6f255e9c5af9 100644 --- a/drivers/ssb/Makefile +++ b/drivers/ssb/Makefile @@ -1,6 +1,7 @@ # core ssb-y += main.o scan.o ssb-$(CONFIG_SSB_EMBEDDED) += embedded.o +ssb-$(CONFIG_SSB_SPROM) += sprom.o # host support ssb-$(CONFIG_SSB_PCIHOST) += pci.o pcihost_wrapper.o diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 49d7bbb9bea7..e12371916444 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -69,6 +69,25 @@ found: } #endif /* CONFIG_SSB_PCIHOST */ +#ifdef CONFIG_SSB_PCMCIAHOST +struct ssb_bus *ssb_pcmcia_dev_to_bus(struct pcmcia_device *pdev) +{ + struct ssb_bus *bus; + + ssb_buses_lock(); + list_for_each_entry(bus, &buses, list) { + if (bus->bustype == SSB_BUSTYPE_PCMCIA && + bus->host_pcmcia == pdev) + goto found; + } + bus = NULL; +found: + ssb_buses_unlock(); + + return bus; +} +#endif /* CONFIG_SSB_PCMCIAHOST */ + int ssb_for_each_bus_call(unsigned long data, int (*func)(struct ssb_bus *bus, unsigned long data)) { @@ -398,7 +417,7 @@ void ssb_bus_unregister(struct ssb_bus *bus) list_del(&bus->list); ssb_buses_unlock(); - /* ssb_pcmcia_exit(bus); */ + ssb_pcmcia_exit(bus); ssb_pci_exit(bus); ssb_iounmap(bus); } @@ -663,7 +682,7 @@ out: err_dequeue: list_del(&bus->list); err_pcmcia_exit: -/* ssb_pcmcia_exit(bus); */ + ssb_pcmcia_exit(bus); err_pci_exit: ssb_pci_exit(bus); err_unmap: diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index 1facc7620fc8..f1514b33cfae 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -227,7 +227,7 @@ static u8 ssb_sprom_crc(const u16 *sprom, u16 size) return crc; } -static int sprom_check_crc(const u16 *sprom, u16 size) +static int sprom_check_crc(const u16 *sprom, size_t size) { u8 crc; u8 expected_crc; @@ -242,12 +242,14 @@ static int sprom_check_crc(const u16 *sprom, u16 size) return 0; } -static void sprom_do_read(struct ssb_bus *bus, u16 *sprom) +static int sprom_do_read(struct ssb_bus *bus, u16 *sprom) { int i; for (i = 0; i < bus->sprom_size; i++) sprom[i] = ioread16(bus->mmio + SSB_SPROM_BASE + (i * 2)); + + return 0; } static int sprom_do_write(struct ssb_bus *bus, const u16 *sprom) @@ -660,71 +662,18 @@ const struct ssb_bus_ops ssb_pci_ops = { .write32 = ssb_pci_write32, }; -static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len, u16 size) -{ - int i, pos = 0; - - for (i = 0; i < size; i++) - pos += snprintf(buf + pos, buf_len - pos - 1, - "%04X", swab16(sprom[i]) & 0xFFFF); - pos += snprintf(buf + pos, buf_len - pos - 1, "\n"); - - return pos + 1; -} - -static int hex2sprom(u16 *sprom, const char *dump, size_t len, u16 size) -{ - char tmp[5] = { 0 }; - int cnt = 0; - unsigned long parsed; - - if (len < size * 2) - return -EINVAL; - - while (cnt < size) { - memcpy(tmp, dump, 4); - dump += 4; - parsed = simple_strtoul(tmp, NULL, 16); - sprom[cnt++] = swab16((u16)parsed); - } - - return 0; -} - static ssize_t ssb_pci_attr_sprom_show(struct device *pcidev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = container_of(pcidev, struct pci_dev, dev); struct ssb_bus *bus; - u16 *sprom; - int err = -ENODEV; - ssize_t count = 0; bus = ssb_pci_dev_to_bus(pdev); if (!bus) - goto out; - err = -ENOMEM; - sprom = kcalloc(bus->sprom_size, sizeof(u16), GFP_KERNEL); - if (!sprom) - goto out; + return -ENODEV; - /* Use interruptible locking, as the SPROM write might - * be holding the lock for several seconds. So allow userspace - * to cancel operation. */ - err = -ERESTARTSYS; - if (mutex_lock_interruptible(&bus->pci_sprom_mutex)) - goto out_kfree; - sprom_do_read(bus, sprom); - mutex_unlock(&bus->pci_sprom_mutex); - - count = sprom2hex(sprom, buf, PAGE_SIZE, bus->sprom_size); - err = 0; - -out_kfree: - kfree(sprom); -out: - return err ? err : count; + return ssb_attr_sprom_show(bus, buf, sprom_do_read); } static ssize_t ssb_pci_attr_sprom_store(struct device *pcidev, @@ -733,55 +682,13 @@ static ssize_t ssb_pci_attr_sprom_store(struct device *pcidev, { struct pci_dev *pdev = container_of(pcidev, struct pci_dev, dev); struct ssb_bus *bus; - u16 *sprom; - int res = 0, err = -ENODEV; bus = ssb_pci_dev_to_bus(pdev); if (!bus) - goto out; - err = -ENOMEM; - sprom = kcalloc(bus->sprom_size, sizeof(u16), GFP_KERNEL); - if (!sprom) - goto out; - err = hex2sprom(sprom, buf, count, bus->sprom_size); - if (err) { - err = -EINVAL; - goto out_kfree; - } - err = sprom_check_crc(sprom, bus->sprom_size); - if (err) { - err = -EINVAL; - goto out_kfree; - } + return -ENODEV; - /* Use interruptible locking, as the SPROM write might - * be holding the lock for several seconds. So allow userspace - * to cancel operation. */ - err = -ERESTARTSYS; - if (mutex_lock_interruptible(&bus->pci_sprom_mutex)) - goto out_kfree; - err = ssb_devices_freeze(bus); - if (err == -EOPNOTSUPP) { - ssb_printk(KERN_ERR PFX "SPROM write: Could not freeze devices. " - "No suspend support. Is CONFIG_PM enabled?\n"); - goto out_unlock; - } - if (err) { - ssb_printk(KERN_ERR PFX "SPROM write: Could not freeze all devices\n"); - goto out_unlock; - } - res = sprom_do_write(bus, sprom); - err = ssb_devices_thaw(bus); - if (err) - ssb_printk(KERN_ERR PFX "SPROM write: Could not thaw all devices\n"); -out_unlock: - mutex_unlock(&bus->pci_sprom_mutex); -out_kfree: - kfree(sprom); -out: - if (res) - return res; - return err ? err : count; + return ssb_attr_sprom_store(bus, buf, count, + sprom_check_crc, sprom_do_write); } static DEVICE_ATTR(ssb_sprom, 0600, @@ -808,7 +715,7 @@ int ssb_pci_init(struct ssb_bus *bus) return 0; pdev = bus->host_pci; - mutex_init(&bus->pci_sprom_mutex); + mutex_init(&bus->sprom_mutex); err = device_create_file(&pdev->dev, &dev_attr_ssb_sprom); if (err) goto out; diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index 84b3a845a8a8..cd49f7c65531 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -3,7 +3,7 @@ * PCMCIA-Hostbus related functions * * Copyright 2006 Johannes Berg - * Copyright 2007 Michael Buesch + * Copyright 2007-2008 Michael Buesch * * Licensed under the GNU/GPL. See COPYING for details. */ @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -26,59 +27,132 @@ #define SSB_VERBOSE_PCMCIACORESWITCH_DEBUG 0 +/* PCMCIA configuration registers */ +#define SSB_PCMCIA_CORECTL 0x00 +#define SSB_PCMCIA_CORECTL_RESET 0x80 /* Core reset */ +#define SSB_PCMCIA_CORECTL_IRQEN 0x04 /* IRQ enable */ +#define SSB_PCMCIA_CORECTL_FUNCEN 0x01 /* Function enable */ +#define SSB_PCMCIA_CORECTL2 0x80 +#define SSB_PCMCIA_ADDRESS0 0x2E +#define SSB_PCMCIA_ADDRESS1 0x30 +#define SSB_PCMCIA_ADDRESS2 0x32 +#define SSB_PCMCIA_MEMSEG 0x34 +#define SSB_PCMCIA_SPROMCTL 0x36 +#define SSB_PCMCIA_SPROMCTL_IDLE 0 +#define SSB_PCMCIA_SPROMCTL_WRITE 1 +#define SSB_PCMCIA_SPROMCTL_READ 2 +#define SSB_PCMCIA_SPROMCTL_WRITEEN 4 +#define SSB_PCMCIA_SPROMCTL_WRITEDIS 7 +#define SSB_PCMCIA_SPROMCTL_DONE 8 +#define SSB_PCMCIA_SPROM_DATALO 0x38 +#define SSB_PCMCIA_SPROM_DATAHI 0x3A +#define SSB_PCMCIA_SPROM_ADDRLO 0x3C +#define SSB_PCMCIA_SPROM_ADDRHI 0x3E + +/* Hardware invariants CIS tuples */ +#define SSB_PCMCIA_CIS 0x80 +#define SSB_PCMCIA_CIS_ID 0x01 +#define SSB_PCMCIA_CIS_BOARDREV 0x02 +#define SSB_PCMCIA_CIS_PA 0x03 +#define SSB_PCMCIA_CIS_PA_PA0B0_LO 0 +#define SSB_PCMCIA_CIS_PA_PA0B0_HI 1 +#define SSB_PCMCIA_CIS_PA_PA0B1_LO 2 +#define SSB_PCMCIA_CIS_PA_PA0B1_HI 3 +#define SSB_PCMCIA_CIS_PA_PA0B2_LO 4 +#define SSB_PCMCIA_CIS_PA_PA0B2_HI 5 +#define SSB_PCMCIA_CIS_PA_ITSSI 6 +#define SSB_PCMCIA_CIS_PA_MAXPOW 7 +#define SSB_PCMCIA_CIS_OEMNAME 0x04 +#define SSB_PCMCIA_CIS_CCODE 0x05 +#define SSB_PCMCIA_CIS_ANTENNA 0x06 +#define SSB_PCMCIA_CIS_ANTGAIN 0x07 +#define SSB_PCMCIA_CIS_BFLAGS 0x08 +#define SSB_PCMCIA_CIS_LEDS 0x09 + +/* PCMCIA SPROM size. */ +#define SSB_PCMCIA_SPROM_SIZE 256 +#define SSB_PCMCIA_SPROM_SIZE_BYTES (SSB_PCMCIA_SPROM_SIZE * sizeof(u16)) + + +/* Write to a PCMCIA configuration register. */ +static int ssb_pcmcia_cfg_write(struct ssb_bus *bus, u8 offset, u8 value) +{ + conf_reg_t reg; + int res; + + memset(®, 0, sizeof(reg)); + reg.Offset = offset; + reg.Action = CS_WRITE; + reg.Value = value; + res = pcmcia_access_configuration_register(bus->host_pcmcia, ®); + if (unlikely(res != CS_SUCCESS)) + return -EBUSY; + + return 0; +} + +/* Read from a PCMCIA configuration register. */ +static int ssb_pcmcia_cfg_read(struct ssb_bus *bus, u8 offset, u8 *value) +{ + conf_reg_t reg; + int res; + + memset(®, 0, sizeof(reg)); + reg.Offset = offset; + reg.Action = CS_READ; + res = pcmcia_access_configuration_register(bus->host_pcmcia, ®); + if (unlikely(res != CS_SUCCESS)) + return -EBUSY; + *value = reg.Value; + + return 0; +} + int ssb_pcmcia_switch_coreidx(struct ssb_bus *bus, u8 coreidx) { - struct pcmcia_device *pdev = bus->host_pcmcia; int err; int attempts = 0; u32 cur_core; - conf_reg_t reg; u32 addr; u32 read_addr; + u8 val; addr = (coreidx * SSB_CORE_SIZE) + SSB_ENUM_BASE; while (1) { - reg.Action = CS_WRITE; - reg.Offset = 0x2E; - reg.Value = (addr & 0x0000F000) >> 12; - err = pcmcia_access_configuration_register(pdev, ®); - if (err != CS_SUCCESS) + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_ADDRESS0, + (addr & 0x0000F000) >> 12); + if (err) goto error; - reg.Offset = 0x30; - reg.Value = (addr & 0x00FF0000) >> 16; - err = pcmcia_access_configuration_register(pdev, ®); - if (err != CS_SUCCESS) + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_ADDRESS1, + (addr & 0x00FF0000) >> 16); + if (err) goto error; - reg.Offset = 0x32; - reg.Value = (addr & 0xFF000000) >> 24; - err = pcmcia_access_configuration_register(pdev, ®); - if (err != CS_SUCCESS) + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_ADDRESS2, + (addr & 0xFF000000) >> 24); + if (err) goto error; read_addr = 0; - reg.Action = CS_READ; - reg.Offset = 0x2E; - err = pcmcia_access_configuration_register(pdev, ®); - if (err != CS_SUCCESS) + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_ADDRESS0, &val); + if (err) goto error; - read_addr |= ((u32)(reg.Value & 0x0F)) << 12; - reg.Offset = 0x30; - err = pcmcia_access_configuration_register(pdev, ®); - if (err != CS_SUCCESS) + read_addr |= ((u32)(val & 0x0F)) << 12; + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_ADDRESS1, &val); + if (err) goto error; - read_addr |= ((u32)reg.Value) << 16; - reg.Offset = 0x32; - err = pcmcia_access_configuration_register(pdev, ®); - if (err != CS_SUCCESS) + read_addr |= ((u32)val) << 16; + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_ADDRESS2, &val); + if (err) goto error; - read_addr |= ((u32)reg.Value) << 24; + read_addr |= ((u32)val) << 24; cur_core = (read_addr - SSB_ENUM_BASE) / SSB_CORE_SIZE; if (cur_core == coreidx) break; + err = -ETIMEDOUT; if (attempts++ > SSB_BAR0_MAX_RETRIES) goto error; udelay(10); @@ -87,7 +161,7 @@ int ssb_pcmcia_switch_coreidx(struct ssb_bus *bus, return 0; error: ssb_printk(KERN_ERR PFX "Failed to switch to core %u\n", coreidx); - return -ENODEV; + return err; } int ssb_pcmcia_switch_core(struct ssb_bus *bus, @@ -112,27 +186,21 @@ int ssb_pcmcia_switch_core(struct ssb_bus *bus, int ssb_pcmcia_switch_segment(struct ssb_bus *bus, u8 seg) { int attempts = 0; - conf_reg_t reg; - int res; + int err; + u8 val; SSB_WARN_ON((seg != 0) && (seg != 1)); - reg.Offset = 0x34; - reg.Function = 0; while (1) { - reg.Action = CS_WRITE; - reg.Value = seg; - res = pcmcia_access_configuration_register(bus->host_pcmcia, ®); - if (unlikely(res != CS_SUCCESS)) + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_MEMSEG, seg); + if (err) goto error; - reg.Value = 0xFF; - reg.Action = CS_READ; - res = pcmcia_access_configuration_register(bus->host_pcmcia, ®); - if (unlikely(res != CS_SUCCESS)) + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_MEMSEG, &val); + if (err) goto error; - - if (reg.Value == seg) + if (val == seg) break; + err = -ETIMEDOUT; if (unlikely(attempts++ > SSB_BAR0_MAX_RETRIES)) goto error; udelay(10); @@ -142,7 +210,7 @@ int ssb_pcmcia_switch_segment(struct ssb_bus *bus, u8 seg) return 0; error: ssb_printk(KERN_ERR PFX "Failed to switch pcmcia segment\n"); - return -ENODEV; + return err; } static int select_core_and_segment(struct ssb_device *dev, @@ -276,18 +344,344 @@ const struct ssb_bus_ops ssb_pcmcia_ops = { .write32 = ssb_pcmcia_write32, }; -#include +static int ssb_pcmcia_sprom_command(struct ssb_bus *bus, u8 command) +{ + unsigned int i; + int err; + u8 value; + + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROMCTL, command); + if (err) + return err; + for (i = 0; i < 1000; i++) { + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_SPROMCTL, &value); + if (err) + return err; + if (value & SSB_PCMCIA_SPROMCTL_DONE) + return 0; + udelay(10); + } + + return -ETIMEDOUT; +} + +/* offset is the 16bit word offset */ +static int ssb_pcmcia_sprom_read(struct ssb_bus *bus, u16 offset, u16 *value) +{ + int err; + u8 lo, hi; + + offset *= 2; /* Make byte offset */ + + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROM_ADDRLO, + (offset & 0x00FF)); + if (err) + return err; + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROM_ADDRHI, + (offset & 0xFF00) >> 8); + if (err) + return err; + err = ssb_pcmcia_sprom_command(bus, SSB_PCMCIA_SPROMCTL_READ); + if (err) + return err; + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_SPROM_DATALO, &lo); + if (err) + return err; + err = ssb_pcmcia_cfg_read(bus, SSB_PCMCIA_SPROM_DATAHI, &hi); + if (err) + return err; + *value = (lo | (((u16)hi) << 8)); + + return 0; +} + +/* offset is the 16bit word offset */ +static int ssb_pcmcia_sprom_write(struct ssb_bus *bus, u16 offset, u16 value) +{ + int err; + + offset *= 2; /* Make byte offset */ + + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROM_ADDRLO, + (offset & 0x00FF)); + if (err) + return err; + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROM_ADDRHI, + (offset & 0xFF00) >> 8); + if (err) + return err; + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROM_DATALO, + (value & 0x00FF)); + if (err) + return err; + err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_SPROM_DATAHI, + (value & 0xFF00) >> 8); + if (err) + return err; + err = ssb_pcmcia_sprom_command(bus, SSB_PCMCIA_SPROMCTL_WRITE); + if (err) + return err; + msleep(20); + + return 0; +} + +/* Read the SPROM image. bufsize is in 16bit words. */ +static int ssb_pcmcia_sprom_read_all(struct ssb_bus *bus, u16 *sprom) +{ + int err, i; + + for (i = 0; i < SSB_PCMCIA_SPROM_SIZE; i++) { + err = ssb_pcmcia_sprom_read(bus, i, &sprom[i]); + if (err) + return err; + } + + return 0; +} + +/* Write the SPROM image. size is in 16bit words. */ +static int ssb_pcmcia_sprom_write_all(struct ssb_bus *bus, const u16 *sprom) +{ + int i, err; + bool failed = 0; + size_t size = SSB_PCMCIA_SPROM_SIZE; + + ssb_printk(KERN_NOTICE PFX + "Writing SPROM. Do NOT turn off the power! " + "Please stand by...\n"); + err = ssb_pcmcia_sprom_command(bus, SSB_PCMCIA_SPROMCTL_WRITEEN); + if (err) { + ssb_printk(KERN_NOTICE PFX + "Could not enable SPROM write access.\n"); + return -EBUSY; + } + ssb_printk(KERN_NOTICE PFX "[ 0%%"); + msleep(500); + for (i = 0; i < size; i++) { + if (i == size / 4) + ssb_printk("25%%"); + else if (i == size / 2) + ssb_printk("50%%"); + else if (i == (size * 3) / 4) + ssb_printk("75%%"); + else if (i % 2) + ssb_printk("."); + err = ssb_pcmcia_sprom_write(bus, i, sprom[i]); + if (err) { + ssb_printk("\n" KERN_NOTICE PFX + "Failed to write to SPROM.\n"); + failed = 1; + break; + } + } + err = ssb_pcmcia_sprom_command(bus, SSB_PCMCIA_SPROMCTL_WRITEDIS); + if (err) { + ssb_printk("\n" KERN_NOTICE PFX + "Could not disable SPROM write access.\n"); + failed = 1; + } + msleep(500); + if (!failed) { + ssb_printk("100%% ]\n"); + ssb_printk(KERN_NOTICE PFX "SPROM written.\n"); + } + + return failed ? -EBUSY : 0; +} + +static int ssb_pcmcia_sprom_check_crc(const u16 *sprom, size_t size) +{ + //TODO + return 0; +} + +#define GOTO_ERROR_ON(condition, description) do { \ + if (unlikely(condition)) { \ + error_description = description; \ + goto error; \ + } \ + } while (0) + int ssb_pcmcia_get_invariants(struct ssb_bus *bus, struct ssb_init_invariants *iv) { - //TODO - random_ether_addr(iv->sprom.il0mac); + tuple_t tuple; + int res; + unsigned char buf[32]; + struct ssb_sprom *sprom = &iv->sprom; + struct ssb_boardinfo *bi = &iv->boardinfo; + const char *error_description; + + memset(sprom, 0xFF, sizeof(*sprom)); + sprom->revision = 1; + sprom->boardflags_lo = 0; + sprom->boardflags_hi = 0; + + /* First fetch the MAC address. */ + memset(&tuple, 0, sizeof(tuple)); + tuple.DesiredTuple = CISTPL_FUNCE; + tuple.TupleData = buf; + tuple.TupleDataMax = sizeof(buf); + res = pcmcia_get_first_tuple(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "MAC first tpl"); + res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "MAC first tpl data"); + while (1) { + GOTO_ERROR_ON(tuple.TupleDataLen < 1, "MAC tpl < 1"); + if (tuple.TupleData[0] == CISTPL_FUNCE_LAN_NODE_ID) + break; + res = pcmcia_get_next_tuple(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "MAC next tpl"); + res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "MAC next tpl data"); + } + GOTO_ERROR_ON(tuple.TupleDataLen != ETH_ALEN + 2, "MAC tpl size"); + memcpy(sprom->il0mac, &tuple.TupleData[2], ETH_ALEN); + + /* Fetch the vendor specific tuples. */ + memset(&tuple, 0, sizeof(tuple)); + tuple.DesiredTuple = SSB_PCMCIA_CIS; + tuple.TupleData = buf; + tuple.TupleDataMax = sizeof(buf); + res = pcmcia_get_first_tuple(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "VEN first tpl"); + res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "VEN first tpl data"); + while (1) { + GOTO_ERROR_ON(tuple.TupleDataLen < 1, "VEN tpl < 1"); + switch (tuple.TupleData[0]) { + case SSB_PCMCIA_CIS_ID: + GOTO_ERROR_ON((tuple.TupleDataLen != 5) && + (tuple.TupleDataLen != 7), + "id tpl size"); + bi->vendor = tuple.TupleData[1] | + ((u16)tuple.TupleData[2] << 8); + break; + case SSB_PCMCIA_CIS_BOARDREV: + GOTO_ERROR_ON(tuple.TupleDataLen != 2, + "boardrev tpl size"); + sprom->board_rev = tuple.TupleData[1]; + break; + case SSB_PCMCIA_CIS_PA: + GOTO_ERROR_ON(tuple.TupleDataLen != 9, + "pa tpl size"); + sprom->pa0b0 = tuple.TupleData[1] | + ((u16)tuple.TupleData[2] << 8); + sprom->pa0b1 = tuple.TupleData[3] | + ((u16)tuple.TupleData[4] << 8); + sprom->pa0b2 = tuple.TupleData[5] | + ((u16)tuple.TupleData[6] << 8); + sprom->itssi_a = tuple.TupleData[7]; + sprom->itssi_bg = tuple.TupleData[7]; + sprom->maxpwr_a = tuple.TupleData[8]; + sprom->maxpwr_bg = tuple.TupleData[8]; + break; + case SSB_PCMCIA_CIS_OEMNAME: + /* We ignore this. */ + break; + case SSB_PCMCIA_CIS_CCODE: + GOTO_ERROR_ON(tuple.TupleDataLen != 2, + "ccode tpl size"); + sprom->country_code = tuple.TupleData[1]; + break; + case SSB_PCMCIA_CIS_ANTENNA: + GOTO_ERROR_ON(tuple.TupleDataLen != 2, + "ant tpl size"); + sprom->ant_available_a = tuple.TupleData[1]; + sprom->ant_available_bg = tuple.TupleData[1]; + break; + case SSB_PCMCIA_CIS_ANTGAIN: + GOTO_ERROR_ON(tuple.TupleDataLen != 2, + "antg tpl size"); + sprom->antenna_gain.ghz24.a0 = tuple.TupleData[1]; + sprom->antenna_gain.ghz24.a1 = tuple.TupleData[1]; + sprom->antenna_gain.ghz24.a2 = tuple.TupleData[1]; + sprom->antenna_gain.ghz24.a3 = tuple.TupleData[1]; + sprom->antenna_gain.ghz5.a0 = tuple.TupleData[1]; + sprom->antenna_gain.ghz5.a1 = tuple.TupleData[1]; + sprom->antenna_gain.ghz5.a2 = tuple.TupleData[1]; + sprom->antenna_gain.ghz5.a3 = tuple.TupleData[1]; + break; + case SSB_PCMCIA_CIS_BFLAGS: + GOTO_ERROR_ON(tuple.TupleDataLen != 3, + "bfl tpl size"); + sprom->boardflags_lo = tuple.TupleData[1] | + ((u16)tuple.TupleData[2] << 8); + break; + case SSB_PCMCIA_CIS_LEDS: + GOTO_ERROR_ON(tuple.TupleDataLen != 5, + "leds tpl size"); + sprom->gpio0 = tuple.TupleData[1]; + sprom->gpio1 = tuple.TupleData[2]; + sprom->gpio2 = tuple.TupleData[3]; + sprom->gpio3 = tuple.TupleData[4]; + break; + } + res = pcmcia_get_next_tuple(bus->host_pcmcia, &tuple); + if (res == CS_NO_MORE_ITEMS) + break; + GOTO_ERROR_ON(res != CS_SUCCESS, "VEN next tpl"); + res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); + GOTO_ERROR_ON(res != CS_SUCCESS, "VEN next tpl data"); + } + return 0; +error: + ssb_printk(KERN_ERR PFX + "PCMCIA: Failed to fetch device invariants: %s\n", + error_description); + return -ENODEV; +} + +static ssize_t ssb_pcmcia_attr_sprom_show(struct device *pcmciadev, + struct device_attribute *attr, + char *buf) +{ + struct pcmcia_device *pdev = + container_of(pcmciadev, struct pcmcia_device, dev); + struct ssb_bus *bus; + + bus = ssb_pcmcia_dev_to_bus(pdev); + if (!bus) + return -ENODEV; + + return ssb_attr_sprom_show(bus, buf, + ssb_pcmcia_sprom_read_all); +} + +static ssize_t ssb_pcmcia_attr_sprom_store(struct device *pcmciadev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pcmcia_device *pdev = + container_of(pcmciadev, struct pcmcia_device, dev); + struct ssb_bus *bus; + + bus = ssb_pcmcia_dev_to_bus(pdev); + if (!bus) + return -ENODEV; + + return ssb_attr_sprom_store(bus, buf, count, + ssb_pcmcia_sprom_check_crc, + ssb_pcmcia_sprom_write_all); +} + +static DEVICE_ATTR(ssb_sprom, 0600, + ssb_pcmcia_attr_sprom_show, + ssb_pcmcia_attr_sprom_store); + +void ssb_pcmcia_exit(struct ssb_bus *bus) +{ + if (bus->bustype != SSB_BUSTYPE_PCMCIA) + return; + + device_remove_file(&bus->host_pcmcia->dev, &dev_attr_ssb_sprom); } int ssb_pcmcia_init(struct ssb_bus *bus) { - conf_reg_t reg; + u8 val, offset; int err; if (bus->bustype != SSB_BUSTYPE_PCMCIA) @@ -298,22 +692,26 @@ int ssb_pcmcia_init(struct ssb_bus *bus) ssb_pcmcia_switch_segment(bus, 0); /* Init IRQ routing */ - reg.Action = CS_READ; - reg.Function = 0; if (bus->chip_id == 0x4306) - reg.Offset = 0x00; + offset = SSB_PCMCIA_CORECTL; else - reg.Offset = 0x80; - err = pcmcia_access_configuration_register(bus->host_pcmcia, ®); - if (err != CS_SUCCESS) + offset = SSB_PCMCIA_CORECTL2; + err = ssb_pcmcia_cfg_read(bus, offset, &val); + if (err) goto error; - reg.Action = CS_WRITE; - reg.Value |= 0x04 | 0x01; - err = pcmcia_access_configuration_register(bus->host_pcmcia, ®); - if (err != CS_SUCCESS) + val |= SSB_PCMCIA_CORECTL_IRQEN | SSB_PCMCIA_CORECTL_FUNCEN; + err = ssb_pcmcia_cfg_write(bus, offset, val); + if (err) + goto error; + + bus->sprom_size = SSB_PCMCIA_SPROM_SIZE; + mutex_init(&bus->sprom_mutex); + err = device_create_file(&bus->host_pcmcia->dev, &dev_attr_ssb_sprom); + if (err) goto error; return 0; error: - return -ENODEV; + ssb_printk(KERN_ERR PFX "Failed to initialize PCMCIA host device\n"); + return err; } diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c new file mode 100644 index 000000000000..3668edb39315 --- /dev/null +++ b/drivers/ssb/sprom.c @@ -0,0 +1,133 @@ +/* + * Sonics Silicon Backplane + * Common SPROM support routines + * + * Copyright (C) 2005-2008 Michael Buesch + * Copyright (C) 2005 Martin Langer + * Copyright (C) 2005 Stefano Brivio + * Copyright (C) 2005 Danny van Dyk + * Copyright (C) 2005 Andreas Jaggi + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include "ssb_private.h" + + +static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len, + size_t sprom_size_words) +{ + int i, pos = 0; + + for (i = 0; i < sprom_size_words; i++) + pos += snprintf(buf + pos, buf_len - pos - 1, + "%04X", swab16(sprom[i]) & 0xFFFF); + pos += snprintf(buf + pos, buf_len - pos - 1, "\n"); + + return pos + 1; +} + +static int hex2sprom(u16 *sprom, const char *dump, size_t len, + size_t sprom_size_words) +{ + char tmp[5] = { 0 }; + int cnt = 0; + unsigned long parsed; + + if (len < sprom_size_words * 2) + return -EINVAL; + + while (cnt < sprom_size_words) { + memcpy(tmp, dump, 4); + dump += 4; + parsed = simple_strtoul(tmp, NULL, 16); + sprom[cnt++] = swab16((u16)parsed); + } + + return 0; +} + +/* Common sprom device-attribute show-handler */ +ssize_t ssb_attr_sprom_show(struct ssb_bus *bus, char *buf, + int (*sprom_read)(struct ssb_bus *bus, u16 *sprom)) +{ + u16 *sprom; + int err = -ENOMEM; + ssize_t count = 0; + size_t sprom_size_words = bus->sprom_size; + + sprom = kcalloc(sprom_size_words, sizeof(u16), GFP_KERNEL); + if (!sprom) + goto out; + + /* Use interruptible locking, as the SPROM write might + * be holding the lock for several seconds. So allow userspace + * to cancel operation. */ + err = -ERESTARTSYS; + if (mutex_lock_interruptible(&bus->sprom_mutex)) + goto out_kfree; + err = sprom_read(bus, sprom); + mutex_unlock(&bus->sprom_mutex); + + if (!err) + count = sprom2hex(sprom, buf, PAGE_SIZE, sprom_size_words); + +out_kfree: + kfree(sprom); +out: + return err ? err : count; +} + +/* Common sprom device-attribute store-handler */ +ssize_t ssb_attr_sprom_store(struct ssb_bus *bus, + const char *buf, size_t count, + int (*sprom_check_crc)(const u16 *sprom, size_t size), + int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom)) +{ + u16 *sprom; + int res = 0, err = -ENOMEM; + size_t sprom_size_words = bus->sprom_size; + + sprom = kcalloc(bus->sprom_size, sizeof(u16), GFP_KERNEL); + if (!sprom) + goto out; + err = hex2sprom(sprom, buf, count, sprom_size_words); + if (err) { + err = -EINVAL; + goto out_kfree; + } + err = sprom_check_crc(sprom, sprom_size_words); + if (err) { + err = -EINVAL; + goto out_kfree; + } + + /* Use interruptible locking, as the SPROM write might + * be holding the lock for several seconds. So allow userspace + * to cancel operation. */ + err = -ERESTARTSYS; + if (mutex_lock_interruptible(&bus->sprom_mutex)) + goto out_kfree; + err = ssb_devices_freeze(bus); + if (err == -EOPNOTSUPP) { + ssb_printk(KERN_ERR PFX "SPROM write: Could not freeze devices. " + "No suspend support. Is CONFIG_PM enabled?\n"); + goto out_unlock; + } + if (err) { + ssb_printk(KERN_ERR PFX "SPROM write: Could not freeze all devices\n"); + goto out_unlock; + } + res = sprom_write(bus, sprom); + err = ssb_devices_thaw(bus); + if (err) + ssb_printk(KERN_ERR PFX "SPROM write: Could not thaw all devices\n"); +out_unlock: + mutex_unlock(&bus->sprom_mutex); +out_kfree: + kfree(sprom); +out: + if (res) + return res; + return err ? err : count; +} diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h index d03b20983b1e..a83bf7a4d80b 100644 --- a/drivers/ssb/ssb_private.h +++ b/drivers/ssb/ssb_private.h @@ -81,6 +81,7 @@ extern int ssb_pcmcia_switch_segment(struct ssb_bus *bus, u8 seg); extern int ssb_pcmcia_get_invariants(struct ssb_bus *bus, struct ssb_init_invariants *iv); +extern void ssb_pcmcia_exit(struct ssb_bus *bus); extern int ssb_pcmcia_init(struct ssb_bus *bus); extern const struct ssb_bus_ops ssb_pcmcia_ops; #else /* CONFIG_SSB_PCMCIAHOST */ @@ -99,6 +100,9 @@ static inline int ssb_pcmcia_switch_segment(struct ssb_bus *bus, { return 0; } +static inline void ssb_pcmcia_exit(struct ssb_bus *bus) +{ +} static inline int ssb_pcmcia_init(struct ssb_bus *bus) { return 0; @@ -113,6 +117,17 @@ extern int ssb_bus_scan(struct ssb_bus *bus, extern void ssb_iounmap(struct ssb_bus *ssb); +/* sprom.c */ +extern +ssize_t ssb_attr_sprom_show(struct ssb_bus *bus, char *buf, + int (*sprom_read)(struct ssb_bus *bus, u16 *sprom)); +extern +ssize_t ssb_attr_sprom_store(struct ssb_bus *bus, + const char *buf, size_t count, + int (*sprom_check_crc)(const u16 *sprom, size_t size), + int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom)); + + /* core.c */ extern u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m); extern int ssb_devices_freeze(struct ssb_bus *bus); @@ -120,6 +135,8 @@ extern int ssb_devices_thaw(struct ssb_bus *bus); extern struct ssb_bus *ssb_pci_dev_to_bus(struct pci_dev *pdev); int ssb_for_each_bus_call(unsigned long data, int (*func)(struct ssb_bus *bus, unsigned long data)); +extern struct ssb_bus *ssb_pcmcia_dev_to_bus(struct pcmcia_device *pdev); + /* b43_pci_bridge.c */ #ifdef CONFIG_SSB_B43_PCI_BRIDGE diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index b7c388972fcf..8644e03cf588 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -245,9 +245,9 @@ struct ssb_bus { /* Pointer to the PCMCIA device (only if bustype == SSB_BUSTYPE_PCMCIA). */ struct pcmcia_device *host_pcmcia; -#ifdef CONFIG_SSB_PCIHOST +#ifdef CONFIG_SSB_SPROM /* Mutex to protect the SPROM writing. */ - struct mutex pci_sprom_mutex; + struct mutex sprom_mutex; #endif /* ID information about the Chip. */ -- cgit v1.2.3-71-gd317 From 3e94794355724f77dc6cbb5ad956f7c72d8313a4 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 22 Feb 2008 19:55:15 +0900 Subject: smc91x: introduce platform data flags V2 This patch introduces struct smc91x_platdata and modifies the driver so bus width is checked during run time using SMC_nBIT() instead of SMC_CAN_USE_nBIT. V2 keeps static configuration lean using SMC_DYNAMIC_BUS_CONFIG. Signed-off-by: Magnus Damm Acked-by: Nicolas Pitre Signed-off-by: Jeff Garzik --- drivers/net/smc91x.c | 34 +++++++++++++++++++++++++----- drivers/net/smc91x.h | 57 +++++++++++++++++++++++++++++++------------------- include/linux/smc91x.h | 13 ++++++++++++ 3 files changed, 77 insertions(+), 27 deletions(-) create mode 100644 include/linux/smc91x.h (limited to 'include/linux') diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index d0ef80ae018a..97bdb2a43bc8 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -1997,6 +1997,8 @@ err_out: static int smc_enable_device(struct platform_device *pdev) { + struct net_device *ndev = platform_get_drvdata(pdev); + struct smc_local *lp = netdev_priv(ndev); unsigned long flags; unsigned char ecor, ecsr; void __iomem *addr; @@ -2039,7 +2041,7 @@ static int smc_enable_device(struct platform_device *pdev) * Set the appropriate byte/word mode. */ ecsr = readb(addr + (ECSR << SMC_IO_SHIFT)) & ~ECSR_IOIS8; - if (!SMC_CAN_USE_16BIT) + if (!SMC_16BIT(lp)) ecsr |= ECSR_IOIS8; writeb(ecsr, addr + (ECSR << SMC_IO_SHIFT)); local_irq_restore(flags); @@ -2124,10 +2126,11 @@ static void smc_release_datacs(struct platform_device *pdev, struct net_device * */ static int smc_drv_probe(struct platform_device *pdev) { + struct smc91x_platdata *pd = pdev->dev.platform_data; + struct smc_local *lp; struct net_device *ndev; struct resource *res, *ires; unsigned int __iomem *addr; - unsigned long irq_flags = SMC_IRQ_FLAGS; int ret; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-regs"); @@ -2152,6 +2155,27 @@ static int smc_drv_probe(struct platform_device *pdev) } SET_NETDEV_DEV(ndev, &pdev->dev); + /* get configuration from platform data, only allow use of + * bus width if both SMC_CAN_USE_xxx and SMC91X_USE_xxx are set. + */ + + lp = netdev_priv(ndev); + lp->cfg.irq_flags = SMC_IRQ_FLAGS; + +#ifdef SMC_DYNAMIC_BUS_CONFIG + if (pd) + memcpy(&lp->cfg, pd, sizeof(lp->cfg)); + else { + lp->cfg.flags = SMC91X_USE_8BIT; + lp->cfg.flags |= SMC91X_USE_16BIT; + lp->cfg.flags |= SMC91X_USE_32BIT; + } + + lp->cfg.flags &= ~(SMC_CAN_USE_8BIT ? 0 : SMC91X_USE_8BIT); + lp->cfg.flags &= ~(SMC_CAN_USE_16BIT ? 0 : SMC91X_USE_16BIT); + lp->cfg.flags &= ~(SMC_CAN_USE_32BIT ? 0 : SMC91X_USE_32BIT); +#endif + ndev->dma = (unsigned char)-1; ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); @@ -2162,7 +2186,7 @@ static int smc_drv_probe(struct platform_device *pdev) ndev->irq = ires->start; if (SMC_IRQ_FLAGS == -1) - irq_flags = ires->flags & IRQF_TRIGGER_MASK; + lp->cfg.irq_flags = ires->flags & IRQF_TRIGGER_MASK; ret = smc_request_attrib(pdev); if (ret) @@ -2170,6 +2194,7 @@ static int smc_drv_probe(struct platform_device *pdev) #if defined(CONFIG_SA1100_ASSABET) NCR_0 |= NCR_ENET_OSC_EN; #endif + platform_set_drvdata(pdev, ndev); ret = smc_enable_device(pdev); if (ret) goto out_release_attrib; @@ -2188,8 +2213,7 @@ static int smc_drv_probe(struct platform_device *pdev) } #endif - platform_set_drvdata(pdev, ndev); - ret = smc_probe(ndev, addr, irq_flags); + ret = smc_probe(ndev, addr, lp->cfg.irq_flags); if (ret != 0) goto out_iounmap; diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index 92ff9c42367e..e044b4de1397 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h @@ -34,6 +34,7 @@ #ifndef _SMC91X_H_ #define _SMC91X_H_ +#include /* * Define your architecture specific bus configuration parameters here. @@ -481,6 +482,7 @@ static inline void LPD7_SMC_outsw (unsigned char* a, int r, #define RPC_LSA_DEFAULT RPC_LED_100_10 #define RPC_LSB_DEFAULT RPC_LED_TX_RX +#define SMC_DYNAMIC_BUS_CONFIG #endif @@ -526,8 +528,19 @@ struct smc_local { #endif void __iomem *base; void __iomem *datacs; + + struct smc91x_platdata cfg; }; +#ifdef SMC_DYNAMIC_BUS_CONFIG +#define SMC_8BIT(p) (((p)->cfg.flags & SMC91X_USE_8BIT) && SMC_CAN_USE_8BIT) +#define SMC_16BIT(p) (((p)->cfg.flags & SMC91X_USE_16BIT) && SMC_CAN_USE_16BIT) +#define SMC_32BIT(p) (((p)->cfg.flags & SMC91X_USE_32BIT) && SMC_CAN_USE_32BIT) +#else +#define SMC_8BIT(p) SMC_CAN_USE_8BIT +#define SMC_16BIT(p) SMC_CAN_USE_16BIT +#define SMC_32BIT(p) SMC_CAN_USE_32BIT +#endif #ifdef SMC_USE_PXA_DMA /* @@ -1108,41 +1121,41 @@ static const char * chip_ids[ 16 ] = { * * Enforce it on any 32-bit capable setup for now. */ -#define SMC_MUST_ALIGN_WRITE SMC_CAN_USE_32BIT +#define SMC_MUST_ALIGN_WRITE(lp) SMC_32BIT(lp) #define SMC_GET_PN(lp) \ - (SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, PN_REG(lp))) \ + (SMC_8BIT(lp) ? (SMC_inb(ioaddr, PN_REG(lp))) \ : (SMC_inw(ioaddr, PN_REG(lp)) & 0xFF)) #define SMC_SET_PN(lp, x) \ do { \ - if (SMC_MUST_ALIGN_WRITE) \ + if (SMC_MUST_ALIGN_WRITE(lp)) \ SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 0, 2)); \ - else if (SMC_CAN_USE_8BIT) \ + else if (SMC_8BIT(lp)) \ SMC_outb(x, ioaddr, PN_REG(lp)); \ else \ SMC_outw(x, ioaddr, PN_REG(lp)); \ } while (0) #define SMC_GET_AR(lp) \ - (SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, AR_REG(lp))) \ + (SMC_8BIT(lp) ? (SMC_inb(ioaddr, AR_REG(lp))) \ : (SMC_inw(ioaddr, PN_REG(lp)) >> 8)) #define SMC_GET_TXFIFO(lp) \ - (SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, TXFIFO_REG(lp))) \ + (SMC_8BIT(lp) ? (SMC_inb(ioaddr, TXFIFO_REG(lp))) \ : (SMC_inw(ioaddr, TXFIFO_REG(lp)) & 0xFF)) #define SMC_GET_RXFIFO(lp) \ - (SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, RXFIFO_REG(lp))) \ + (SMC_8BIT(lp) ? (SMC_inb(ioaddr, RXFIFO_REG(lp))) \ : (SMC_inw(ioaddr, TXFIFO_REG(lp)) >> 8)) #define SMC_GET_INT(lp) \ - (SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, INT_REG(lp))) \ + (SMC_8BIT(lp) ? (SMC_inb(ioaddr, INT_REG(lp))) \ : (SMC_inw(ioaddr, INT_REG(lp)) & 0xFF)) #define SMC_ACK_INT(lp, x) \ do { \ - if (SMC_CAN_USE_8BIT) \ + if (SMC_8BIT(lp)) \ SMC_outb(x, ioaddr, INT_REG(lp)); \ else { \ unsigned long __flags; \ @@ -1155,12 +1168,12 @@ static const char * chip_ids[ 16 ] = { } while (0) #define SMC_GET_INT_MASK(lp) \ - (SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, IM_REG(lp))) \ + (SMC_8BIT(lp) ? (SMC_inb(ioaddr, IM_REG(lp))) \ : (SMC_inw(ioaddr, INT_REG(lp)) >> 8)) #define SMC_SET_INT_MASK(lp, x) \ do { \ - if (SMC_CAN_USE_8BIT) \ + if (SMC_8BIT(lp)) \ SMC_outb(x, ioaddr, IM_REG(lp)); \ else \ SMC_outw((x) << 8, ioaddr, INT_REG(lp)); \ @@ -1170,7 +1183,7 @@ static const char * chip_ids[ 16 ] = { #define SMC_SELECT_BANK(lp, x) \ do { \ - if (SMC_MUST_ALIGN_WRITE) \ + if (SMC_MUST_ALIGN_WRITE(lp)) \ SMC_outl((x)<<16, ioaddr, 12<> 16; \ @@ -1290,7 +1303,7 @@ static const char * chip_ids[ 16 ] = { #define SMC_PUSH_DATA(lp, p, l) \ do { \ - if (SMC_CAN_USE_32BIT) { \ + if (SMC_32BIT(lp)) { \ void *__ptr = (p); \ int __len = (l); \ void __iomem *__ioaddr = ioaddr; \ @@ -1308,15 +1321,15 @@ static const char * chip_ids[ 16 ] = { SMC_outw(*((u16 *)__ptr), ioaddr, \ DATA_REG(lp)); \ } \ - } else if (SMC_CAN_USE_16BIT) \ + } else if (SMC_16BIT(lp)) \ SMC_outsw(ioaddr, DATA_REG(lp), p, (l) >> 1); \ - else if (SMC_CAN_USE_8BIT) \ + else if (SMC_8BIT(lp)) \ SMC_outsb(ioaddr, DATA_REG(lp), p, l); \ } while (0) #define SMC_PULL_DATA(lp, p, l) \ do { \ - if (SMC_CAN_USE_32BIT) { \ + if (SMC_32BIT(lp)) { \ void *__ptr = (p); \ int __len = (l); \ void __iomem *__ioaddr = ioaddr; \ @@ -1343,9 +1356,9 @@ static const char * chip_ids[ 16 ] = { __ioaddr = lp->datacs; \ __len += 2; \ SMC_insl(__ioaddr, DATA_REG(lp), __ptr, __len>>2); \ - } else if (SMC_CAN_USE_16BIT) \ + } else if (SMC_16BIT(lp)) \ SMC_insw(ioaddr, DATA_REG(lp), p, (l) >> 1); \ - else if (SMC_CAN_USE_8BIT) \ + else if (SMC_8BIT(lp)) \ SMC_insb(ioaddr, DATA_REG(lp), p, l); \ } while (0) diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h new file mode 100644 index 000000000000..8e0556b8781c --- /dev/null +++ b/include/linux/smc91x.h @@ -0,0 +1,13 @@ +#ifndef __SMC91X_H__ +#define __SMC91X_H__ + +#define SMC91X_USE_8BIT (1 << 0) +#define SMC91X_USE_16BIT (1 << 1) +#define SMC91X_USE_32BIT (1 << 2) + +struct smc91x_platdata { + unsigned long flags; + unsigned long irq_flags; /* IRQF_... */ +}; + +#endif /* __SMC91X_H__ */ -- cgit v1.2.3-71-gd317 From 82cc1a7a56872056af0ead6c7d695aa223f36695 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 21 Mar 2008 03:43:19 -0700 Subject: [NET]: Add per-connection option to set max TSO frame size Update: My mailer ate one of Jarek's feedback mails... Fixed the parameter in netif_set_gso_max_size() to be u32, not u16. Fixed the whitespace issue due to a patch import botch. Changed the types from u32 to unsigned int to be more consistent with other variables in the area. Also brought the patch up to the latest net-2.6.26 tree. Update: Made gso_max_size container 32 bits, not 16. Moved the location of gso_max_size within netdev to be less hotpath. Made more consistent names between the sock and netdev layers, and added a define for the max GSO size. Update: Respun for net-2.6.26 tree. Update: changed max_gso_frame_size and sk_gso_max_size from signed to unsigned - thanks Stephen! This patch adds the ability for device drivers to control the size of the TSO frames being sent to them, per TCP connection. By setting the netdevice's gso_max_size value, the socket layer will set the GSO frame size based on that value. This will propogate into the TCP layer, and send TSO's of that size to the hardware. This can be desirable to help tune the bursty nature of TSO on a per-adapter basis, where one may have 1 GbE and 10 GbE devices coexisting in a system, one running multiqueue and the other not, etc. This can also be desirable for devices that cannot support full 64 KB TSO's, but still want to benefit from some level of segmentation offloading. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ include/net/sock.h | 2 ++ net/core/dev.c | 1 + net/core/sock.c | 6 ++++-- net/ipv4/tcp_output.c | 4 ++-- 5 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a2f003239c85..ced61f87660e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -724,6 +724,10 @@ struct net_device /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; + /* for setting kernel sock attribute on TCP connection setup */ +#define GSO_MAX_SIZE 65536 + unsigned int gso_max_size; + /* The TX queue control structures */ unsigned int egress_subqueue_count; struct net_device_subqueue egress_subqueue[1]; @@ -1475,6 +1479,12 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } +static inline void netif_set_gso_max_size(struct net_device *dev, + unsigned int size) +{ + dev->gso_max_size = size; +} + /* On bonding slaves other than the currently active slave, suppress * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. diff --git a/include/net/sock.h b/include/net/sock.h index 39112e75411c..8358fff002eb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -151,6 +151,7 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) + * @sk_gso_max_size: Maximum GSO segment size to build * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -237,6 +238,7 @@ struct sock { gfp_t sk_allocation; int sk_route_caps; int sk_gso_type; + unsigned int sk_gso_max_size; int sk_rcvlowat; unsigned long sk_flags; unsigned long sk_lingertime; diff --git a/net/core/dev.c b/net/core/dev.c index fcdf03cf3b3f..f973e38b81af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4021,6 +4021,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, } dev->egress_subqueue_count = queue_count; + dev->gso_max_size = GSO_MAX_SIZE; dev->get_stats = internal_stats; netpoll_netdev_init(dev); diff --git a/net/core/sock.c b/net/core/sock.c index bb5236aee643..b1a6ed4d33c1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1095,10 +1095,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (sk_can_gso(sk)) { - if (dst->header_len) + if (dst->header_len) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - else + } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + sk->sk_gso_max_size = dst->dev->gso_max_size; + } } } EXPORT_SYMBOL_GPL(sk_setup_caps); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4e11d834c9f..a627616314ba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -998,7 +998,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) xmit_size_goal = mss_now; if (doing_tso) { - xmit_size_goal = (65535 - + xmit_size_goal = ((sk->sk_gso_max_size - 1) - inet_csk(sk)->icsk_af_ops->net_header_len - inet_csk(sk)->icsk_ext_hdr_len - tp->tcp_header_len); @@ -1282,7 +1282,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= 65536) + if (limit >= sk->sk_gso_max_size) goto send_now; if (sysctl_tcp_tso_win_divisor) { -- cgit v1.2.3-71-gd317 From ec3c0982a2dd1e671bad8e9d26c28dcba0039d87 Mon Sep 17 00:00:00 2001 From: Patrick McManus Date: Fri, 21 Mar 2008 16:33:01 -0700 Subject: [TCP]: TCP_DEFER_ACCEPT updates - process as established Change TCP_DEFER_ACCEPT implementation so that it transitions a connection to ESTABLISHED after handshake is complete instead of leaving it in SYN-RECV until some data arrvies. Place connection in accept queue when first data packet arrives from slow path. Benefits: - established connection is now reset if it never makes it to the accept queue - diagnostic state of established matches with the packet traces showing completed handshake - TCP_DEFER_ACCEPT timeouts are expressed in seconds and can now be enforced with reasonable accuracy instead of rounding up to next exponential back-off of syn-ack retry. Signed-off-by: Patrick McManus Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 +++++++ include/net/request_sock.h | 4 ++-- include/net/tcp.h | 1 + net/ipv4/inet_connection_sock.c | 11 +++------- net/ipv4/tcp.c | 18 +++++++--------- net/ipv4/tcp_input.c | 46 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 8 +++++++ net/ipv4/tcp_minisocks.c | 32 +++++++++++++++++----------- net/ipv4/tcp_timer.c | 5 +++++ 9 files changed, 99 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 08027f1d7f31..d96d9b122304 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -239,6 +239,11 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +struct tcp_deferred_accept_info { + struct sock *listen_sk; + struct request_sock *request; +}; + struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; @@ -374,6 +379,8 @@ struct tcp_sock { unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; + struct tcp_deferred_accept_info defer_tcp_accept; + unsigned long last_synq_overflow; u32 tso_deferred; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 040780add355..0369f98e9f3a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -115,8 +115,8 @@ struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; - u8 rskq_defer_accept; - /* 3 bytes hole, try to pack */ + u16 rskq_defer_accept; + /* 2 bytes hole, try to pack */ struct listen_sock *listen_opt; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 847e1634e1f4..67cc3956d29c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -139,6 +139,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_KEEPINTVL 32767 #define MAX_TCP_KEEPCNT 127 #define MAX_TCP_SYNCNT 127 +#define MAX_TCP_ACCEPT_DEFERRED 65535 #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8a45be988709..cc1a1859a61b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -414,8 +414,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, struct inet_connection_sock *icsk = inet_csk(parent); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; - int thresh = max_retries; + int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; int i, budget; @@ -451,9 +450,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } } - if (queue->rskq_defer_accept) - max_retries = queue->rskq_defer_accept; - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); i = lopt->clock_hand; @@ -461,9 +457,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) && - (inet_rsk(req)->acked || - !req->rsk_ops->rtx_syn_ack(parent, req))) { + if (req->retrans < thresh && + !req->rsk_ops->rtx_syn_ack(parent, req)) { unsigned long timeo; if (req->retrans++ == 0) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 071e83a894ad..e0fbc25ca816 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - icsk->icsk_accept_queue.rskq_defer_accept = 0; - if (val > 0) { - /* Translate value in seconds to number of - * retransmits */ - while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && - val > ((TCP_TIMEOUT_INIT / HZ) << - icsk->icsk_accept_queue.rskq_defer_accept)) - icsk->icsk_accept_queue.rskq_defer_accept++; - icsk->icsk_accept_queue.rskq_defer_accept++; + if (val < 0) { + err = -EINVAL; + } else { + if (val > MAX_TCP_ACCEPT_DEFERRED) + val = MAX_TCP_ACCEPT_DEFERRED; + icsk->icsk_accept_queue.rskq_defer_accept = val; } break; @@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : - ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); + val = icsk->icsk_accept_queue.rskq_defer_accept; break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9cf446427cc2..6e46b4c0f28c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4451,6 +4451,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) } } +static int tcp_defer_accept_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->defer_tcp_accept.request) { + int queued_data = tp->rcv_nxt - tp->copied_seq; + int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ? + tcp_hdr((struct sk_buff *) + sk->sk_receive_queue.prev)->fin : 0; + + if (queued_data && hasfin) + queued_data--; + + if (queued_data && + tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) { + if (sock_flag(sk, SOCK_KEEPOPEN)) { + inet_csk_reset_keepalive_timer(sk, + keepalive_time_when(tp)); + } else { + inet_csk_delete_keepalive_timer(sk); + } + + inet_csk_reqsk_queue_add( + tp->defer_tcp_accept.listen_sk, + tp->defer_tcp_accept.request, + sk); + + tp->defer_tcp_accept.listen_sk->sk_data_ready( + tp->defer_tcp_accept.listen_sk, 0); + + sock_put(tp->defer_tcp_accept.listen_sk); + sock_put(sk); + tp->defer_tcp_accept.listen_sk = NULL; + tp->defer_tcp_accept.request = NULL; + } else if (hasfin || + tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) { + tcp_reset(sk); + return -1; + } + } + return 0; +} + static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) { struct tcp_sock *tp = tcp_sk(sk); @@ -4811,6 +4854,9 @@ step5: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); + + if (tcp_defer_accept_check(sk)) + return -1; return 0; csum_error: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0ba6e911c979..167a0f557531 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1920,6 +1920,14 @@ int tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } + if (tp->defer_tcp_accept.request) { + reqsk_free(tp->defer_tcp_accept.request); + sock_put(tp->defer_tcp_accept.listen_sk); + sock_put(sk); + tp->defer_tcp_accept.listen_sk = NULL; + tp->defer_tcp_accept.request = NULL; + } + atomic_dec(&tcp_sockets_allocated); return 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8245247a6ceb..019c8c16e5cc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -571,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, does sequence test, SYN is truncated, and thus we consider it a bare ACK. - If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this - bare ACK. Otherwise, we create an established connection. Both - ends (listening sockets) accept the new incoming connection and try - to talk to each other. 8-) + Both ends (listening sockets) accept the new incoming + connection and try to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. @@ -642,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - inet_rsk(req)->acked = 1; - return NULL; - } - /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all * the tests. THIS SEGMENT MUST MOVE SOCKET TO @@ -687,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + + /* the accept queue handling is done is est recv slow + * path so lets make sure to start there + */ + tcp_sk(child)->pred_flags = 0; + sock_hold(sk); + sock_hold(child); + tcp_sk(child)->defer_tcp_accept.listen_sk = sk; + tcp_sk(child)->defer_tcp_accept.request = req; + + inet_csk_reset_keepalive_timer(child, + inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ); + } else { + inet_csk_reqsk_queue_add(sk, req, child); + } + return child; listen_overflow: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 803d758a2b12..160d16f9f4fc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } + if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { + tcp_send_active_reset(sk, GFP_ATOMIC); + goto death; + } + if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) goto out; -- cgit v1.2.3-71-gd317 From 0098b7273e968fb9989a6e1e4e4c024cd081fe0d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 22 Mar 2008 17:18:47 -0700 Subject: [NET]: NPROTO is redundant; it's equal to AF_MAX/PF_MAX. DaveM pointed out NPROTO exposed to userspace, so keep it around, just make sure it stays in sync. Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- include/linux/net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index c414d90e647b..71f7dd559285 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -19,6 +19,7 @@ #define _LINUX_NET_H #include +#include #include struct poll_table_struct; @@ -26,7 +27,7 @@ struct pipe_inode_info; struct inode; struct net; -#define NPROTO 34 /* should be enough for now.. */ +#define NPROTO AF_MAX #define SYS_SOCKET 1 /* sys_socket(2) */ #define SYS_BIND 2 /* sys_bind(2) */ -- cgit v1.2.3-71-gd317 From 414f69d8a6ff0b30e7ea5ce10534b19f851e172e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 23 Mar 2008 22:04:31 -0700 Subject: [NET]: include/linux/atalk.h - remove duplicate include Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/atalk.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atalk.h b/include/linux/atalk.h index ced8a1ed080c..e9ebac2e2ecc 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -85,8 +85,6 @@ static inline struct atalk_sock *at_sk(struct sock *sk) return (struct atalk_sock *)sk; } -#include - struct ddpehdr { __be16 deh_len_hops; /* lower 10 bits are length, next 4 - hops */ __be16 deh_sum; -- cgit v1.2.3-71-gd317 From cc32e05416b4023a5466a2f66e3c02236a771c5b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 23 Mar 2008 22:05:44 -0700 Subject: [NET]: include/linux/igmp.h - remove duplicate include Removed duplicate #include Combined #ifdef __KERNEL__ blocks Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/igmp.h | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index f510e7e382a8..f5a1a0db2e8e 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -80,27 +80,6 @@ struct igmpv3_query { __be32 srcs[0]; }; -#ifdef __KERNEL__ -#include - -static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) -{ - return (struct igmphdr *)skb_transport_header(skb); -} - -static inline struct igmpv3_report * - igmpv3_report_hdr(const struct sk_buff *skb) -{ - return (struct igmpv3_report *)skb_transport_header(skb); -} - -static inline struct igmpv3_query * - igmpv3_query_hdr(const struct sk_buff *skb) -{ - return (struct igmpv3_query *)skb_transport_header(skb); -} -#endif - #define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */ #define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Ditto */ #define IGMP_DVMRP 0x13 /* DVMRP routing */ @@ -151,6 +130,23 @@ static inline struct igmpv3_query * #include #include +static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) +{ + return (struct igmphdr *)skb_transport_header(skb); +} + +static inline struct igmpv3_report * + igmpv3_report_hdr(const struct sk_buff *skb) +{ + return (struct igmpv3_report *)skb_transport_header(skb); +} + +static inline struct igmpv3_query * + igmpv3_query_hdr(const struct sk_buff *skb) +{ + return (struct igmpv3_query *)skb_transport_header(skb); +} + extern int sysctl_igmp_max_memberships; extern int sysctl_igmp_max_msf; -- cgit v1.2.3-71-gd317 From 310afe86af8ddd96a06b75aa61ef1af233f80e89 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 23 Mar 2008 22:06:51 -0700 Subject: [NET]: include/linux/udp.h - remove duplicate include Remove duplicate #include Combine #ifdef __KERNEL__ blocks Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/udp.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 1e7b7cb5703b..581ca2c14c52 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -26,15 +26,6 @@ struct udphdr { __sum16 check; }; -#ifdef __KERNEL__ -#include - -static inline struct udphdr *udp_hdr(const struct sk_buff *skb) -{ - return (struct udphdr *)skb_transport_header(skb); -} -#endif - /* UDP socket options */ #define UDP_CORK 1 /* Never send partially complete segments */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ @@ -45,9 +36,14 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #ifdef __KERNEL__ -#include - #include +#include + +static inline struct udphdr *udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} + #define UDP_HTABLE_SIZE 128 struct udp_sock { -- cgit v1.2.3-71-gd317 From c8cdaf998df221b01134a051aba38c570105061b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 10 Mar 2008 04:30:37 -0400 Subject: [IPV4,IPV6]: Share cork.rt between IPv4 and IPv6. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 2 +- net/ipv4/ip_output.c | 14 ++++++-------- net/ipv6/ip6_output.c | 12 ++++++------ 4 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4aaefc349a4b..2102d8b67c01 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -315,7 +315,6 @@ struct ipv6_pinfo { struct sk_buff *pktoptions; struct { struct ipv6_txoptions *opt; - struct rt6_info *rt; int hop_limit; int tclass; } cork; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b6db16d2766a..a42cd63d241a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -136,7 +136,7 @@ struct inet_sock { unsigned int flags; unsigned int fragsize; struct ip_options *opt; - struct rtable *rt; + struct dst_entry *dst; int length; /* Total length of all frames */ __be32 addr; struct flowi fl; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 349fae58c1a3..913266cd9902 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -825,7 +825,7 @@ int ip_append_data(struct sock *sk, inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); - inet->cork.rt = rt; + inet->cork.dst = &rt->u.dst; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; @@ -834,7 +834,7 @@ int ip_append_data(struct sock *sk, transhdrlen += exthdrlen; } } else { - rt = inet->cork.rt; + rt = (struct rtable *)inet->cork.dst; if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; @@ -1083,7 +1083,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (skb_queue_empty(&sk->sk_write_queue)) return -EINVAL; - rt = inet->cork.rt; + rt = (struct rtable *)inet->cork.dst; if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; @@ -1208,10 +1208,8 @@ static void ip_cork_release(struct inet_sock *inet) inet->cork.flags &= ~IPCORK_OPT; kfree(inet->cork.opt); inet->cork.opt = NULL; - if (inet->cork.rt) { - ip_rt_put(inet->cork.rt); - inet->cork.rt = NULL; - } + dst_release(inet->cork.dst); + inet->cork.dst = NULL; } /* @@ -1224,7 +1222,7 @@ int ip_push_pending_frames(struct sock *sk) struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = NULL; - struct rtable *rt = inet->cork.rt; + struct rtable *rt = (struct rtable *)inet->cork.dst; struct iphdr *iph; __be16 df = 0; __u8 ttl; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 98762fde2b65..ed6482667a25 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1115,7 +1115,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } dst_hold(&rt->u.dst); - np->cork.rt = rt; + inet->cork.dst = &rt->u.dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; @@ -1136,7 +1136,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, length += exthdrlen; transhdrlen += exthdrlen; } else { - rt = np->cork.rt; + rt = (struct rt6_info *)inet->cork.dst; fl = &inet->cork.fl; if (inet->cork.flags & IPCORK_OPT) opt = np->cork.opt; @@ -1381,9 +1381,9 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) inet->cork.flags &= ~IPCORK_OPT; kfree(np->cork.opt); np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; + if (inet->cork.dst) { + dst_release(inet->cork.dst); + inet->cork.dst = NULL; inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); @@ -1398,7 +1398,7 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = np->cork.rt; + struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct flowi *fl = &inet->cork.fl; unsigned char proto = fl->proto; int err = 0; -- cgit v1.2.3-71-gd317 From 4725474584d6aa2f07b3d47442dfbc4f6544f65e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 10 Mar 2008 04:41:33 -0400 Subject: [IPV6]: Convert cork.hop_limit and cork.tclass into u8 instead of int. Values of those fields are always between 0 and 255 (inclusive), so use u8 and save some memory on 32bit systems. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2102d8b67c01..9b59e37afad9 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -315,8 +315,8 @@ struct ipv6_pinfo { struct sk_buff *pktoptions; struct { struct ipv6_txoptions *opt; - int hop_limit; - int tclass; + u8 hop_limit; + u8 tclass; } cork; }; -- cgit v1.2.3-71-gd317 From 1d5d236d309ab90fa6aedf712f586b3595721373 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 10 Mar 2008 10:56:55 -0400 Subject: [IPV6]: Use bitfields for hop_limit and mcast_hops. Save some bits for future extensions. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 9b59e37afad9..87ae4e389ce1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -274,8 +274,29 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - __s16 hop_limit; - __s16 mcast_hops; + + /* + * Packed in 16bits. + * Omit one shift by by putting the signed field at MSB. + */ +#if defined(__BIG_ENDIAN_BITFIELD) + __s16 hop_limit:9; + __u16 __unused_1:7; +#else + __u16 __unused_1:7; + __s16 hop_limit:9; +#endif + +#if defined(__BIG_ENDIAN_BITFIELD) + /* Packed in 16bits. */ + __s16 mcast_hops:9; + __u16 __unused_2:6, + mc_loop:1; +#else + __u16 mc_loop:1, + __unused_2:6; + __s16 mcast_hops:9; +#endif int mcast_oif; /* pktoption flags */ @@ -298,8 +319,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 mc_loop:1, - recverr:1, + __u8 recverr:1, sndflow:1, pmtudisc:2, ipv6only:1; -- cgit v1.2.3-71-gd317 From 7cbca67c073263c179f605bdbbdc565ab29d801d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 25 Mar 2008 09:37:42 +0900 Subject: [IPV6]: Support Source Address Selection API (RFC5014). Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 11 +++++++ include/linux/ipv6.h | 6 +++- include/net/addrconf.h | 1 + include/net/ip6_route.h | 9 ++++-- net/ipv6/addrconf.c | 17 +++++++++-- net/ipv6/fib6_rules.c | 12 +++++++- net/ipv6/ip6_output.c | 4 ++- net/ipv6/ipv6_sockglue.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ndisc.c | 4 ++- net/ipv6/route.c | 11 ++++++- net/ipv6/xfrm6_policy.c | 2 +- net/sctp/ipv6.c | 4 ++- 12 files changed, 146 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 2a61c82af115..f674000c6c99 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -249,4 +249,15 @@ struct in6_flowlabel_req * IP6T_SO_GET_REVISION_TARGET 69 */ +/* RFC5014: Source address selection */ +#define IPV6_ADDR_PREFERENCES 72 + +#define IPV6_PREFER_SRC_TMP 0x0001 +#define IPV6_PREFER_SRC_PUBLIC 0x0002 +#define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100 +#define IPV6_PREFER_SRC_COA 0x0004 +#define IPV6_PREFER_SRC_HOME 0x0400 +#define IPV6_PREFER_SRC_CGA 0x0008 +#define IPV6_PREFER_SRC_NONCGA 0x0800 + #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 87ae4e389ce1..c9ba0da16ce9 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -322,7 +322,11 @@ struct ipv6_pinfo { __u8 recverr:1, sndflow:1, pmtudisc:2, - ipv6only:1; + ipv6only:1, + srcprefs:3; /* 001: prefer temporary address + * 010: prefer public address + * 100: prefer care-of address + */ __u8 tclass; __u32 dst_cookie; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index edcb4bbaab7d..c9276c72764d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -78,6 +78,7 @@ extern struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, extern int ipv6_dev_get_saddr(struct net_device *dev, struct in6_addr *daddr, + unsigned int srcprefs, struct in6_addr *saddr); extern int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5c3b67c86aef..3ae6799c2b14 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -30,9 +30,12 @@ struct route_info { #include #include -#define RT6_LOOKUP_F_IFACE 0x1 -#define RT6_LOOKUP_F_REACHABLE 0x2 -#define RT6_LOOKUP_F_HAS_SADDR 0x4 +#define RT6_LOOKUP_F_IFACE 0x00000001 +#define RT6_LOOKUP_F_REACHABLE 0x00000002 +#define RT6_LOOKUP_F_HAS_SADDR 0x00000004 +#define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 +#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 +#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 extern struct rt6_info *ip6_null_entry; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 787e90af166c..89954885dee1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -909,6 +909,7 @@ struct ipv6_saddr_dst { int ifindex; int scope; int label; + unsigned int prefs; }; static inline int ipv6_saddr_preferred(int type) @@ -984,9 +985,12 @@ static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, break; #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: + { /* Rule 4: Prefer home address */ - ret = !!(score->ifa->flags & IFA_F_HOMEADDRESS); + int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA); + ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome; break; + } #endif case IPV6_SADDR_RULE_OIF: /* Rule 5: Prefer outgoing interface */ @@ -1000,11 +1004,16 @@ static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, break; #ifdef CONFIG_IPV6_PRIVACY case IPV6_SADDR_RULE_PRIVACY: + { /* Rule 7: Prefer public address * Note: prefer temprary address if use_tempaddr >= 2 */ - ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ (score->ifa->idev->cnf.use_tempaddr >= 2); + int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? + !!(dst->prefs & IPV6_PREFER_SRC_TMP) : + score->ifa->idev->cnf.use_tempaddr >= 2; + ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; + } #endif case IPV6_SADDR_RULE_ORCHID: /* Rule 8-: Prefer ORCHID vs ORCHID or @@ -1030,7 +1039,8 @@ out: } int ipv6_dev_get_saddr(struct net_device *dst_dev, - struct in6_addr *daddr, struct in6_addr *saddr) + struct in6_addr *daddr, unsigned int prefs, + struct in6_addr *saddr) { struct ipv6_saddr_score scores[2], *score = &scores[0], *hiscore = &scores[1]; @@ -1044,6 +1054,7 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev, dst.ifindex = dst_dev ? dst_dev->ifindex : 0; dst.scope = __ipv6_addr_src_scope(dst_type); dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex); + dst.prefs = prefs; hiscore->rule = -1; hiscore->ifa = NULL; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 55137408f054..e7a7fe26cebf 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,8 +84,18 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; + unsigned int srcprefs = 0; + + if (flags & RT6_LOOKUP_F_SRCPREF_TMP) + srcprefs |= IPV6_PREFER_SRC_TMP; + if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) + srcprefs |= IPV6_PREFER_SRC_PUBLIC; + if (flags & RT6_LOOKUP_F_SRCPREF_COA) + srcprefs |= IPV6_PREFER_SRC_COA; + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, &saddr)) + &flp->fl6_dst, srcprefs, + &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2a4f08c8a02d..d34aa61353bb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -920,7 +920,9 @@ static int ip6_dst_lookup_tail(struct sock *sk, if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev, - &fl->fl6_dst, &fl->fl6_src); + &fl->fl6_dst, + sk ? inet6_sk(sk)->srcprefs : 0, + &fl->fl6_src); if (err) goto out_err_release; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8e29fb1d1df6..dc6695cc5767 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -617,7 +617,67 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; + case IPV6_ADDR_PREFERENCES: + { + unsigned int pref = 0; + unsigned int prefmask = ~0; + + retv = -EINVAL; + + /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ + switch (val & (IPV6_PREFER_SRC_PUBLIC| + IPV6_PREFER_SRC_TMP| + IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { + case IPV6_PREFER_SRC_PUBLIC: + pref |= IPV6_PREFER_SRC_PUBLIC; + break; + case IPV6_PREFER_SRC_TMP: + pref |= IPV6_PREFER_SRC_TMP; + break; + case IPV6_PREFER_SRC_PUBTMP_DEFAULT: + break; + case 0: + goto pref_skip_pubtmp; + default: + goto e_inval; + } + + prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| + IPV6_PREFER_SRC_TMP); +pref_skip_pubtmp: + + /* check HOME/COA conflicts */ + switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { + case IPV6_PREFER_SRC_HOME: + break; + case IPV6_PREFER_SRC_COA: + pref |= IPV6_PREFER_SRC_COA; + case 0: + goto pref_skip_coa; + default: + goto e_inval; + } + + prefmask &= ~IPV6_PREFER_SRC_COA; +pref_skip_coa: + + /* check CGA/NONCGA conflicts */ + switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { + case IPV6_PREFER_SRC_CGA: + case IPV6_PREFER_SRC_NONCGA: + case 0: + break; + default: + goto e_inval; + } + + np->srcprefs = (np->srcprefs & prefmask) | pref; + retv = 0; + + break; + } } + release_sock(sk); return retv; @@ -932,6 +992,24 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->sndflow; break; + case IPV6_ADDR_PREFERENCES: + val = 0; + + if (np->srcprefs & IPV6_PREFER_SRC_TMP) + val |= IPV6_PREFER_SRC_TMP; + else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + val |= IPV6_PREFER_SRC_PUBLIC; + else { + /* XXX: should we return system default? */ + val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; + } + + if (np->srcprefs & IPV6_PREFER_SRC_COA) + val |= IPV6_PREFER_SRC_COA; + else + val |= IPV6_PREFER_SRC_HOME; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e7d8e74704c1..3f68a6eae7b2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -546,7 +546,9 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, override = 0; in6_ifa_put(ifp); } else { - if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr)) + if (ipv6_dev_get_saddr(dev, daddr, + inet6_sk(dev->nd_net->ipv6.ndisc_sk)->srcprefs, + &tmpaddr)) return; src_addr = &tmpaddr; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aa3f08718e44..06faa46920e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -782,6 +782,15 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; + else if (sk) { + unsigned int prefs = inet6_sk(sk)->srcprefs; + if (prefs & IPV6_PREFER_SRC_TMP) + flags |= RT6_LOOKUP_F_SRCPREF_TMP; + if (prefs & IPV6_PREFER_SRC_PUBLIC) + flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; + if (prefs & IPV6_PREFER_SRC_COA) + flags |= RT6_LOOKUP_F_SRCPREF_COA; + } return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } @@ -2162,7 +2171,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, else if (dst) { struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, - dst, &saddr_buf) == 0) + dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e96dafdc7032..d92d1fceb8cf 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -58,7 +58,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) return -EHOSTUNREACH; ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev, - (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&daddr->a6, 0, (struct in6_addr *)&saddr->a6); dst_release(dst); return 0; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 46c5b3c5cb99..dc71d0d83753 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -316,7 +316,9 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, if (!asoc) { ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, - &daddr->v6.sin6_addr, &saddr->v6.sin6_addr); + &daddr->v6.sin6_addr, + inet6_sk(asoc->base.sk)->srcprefs, + &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", NIP6(saddr->v6.sin6_addr)); return; -- cgit v1.2.3-71-gd317 From c346dca10840a874240c78efe3f39acf4312a1f2 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 25 Mar 2008 21:47:49 +0900 Subject: [NET] NETNS: Omit net_device->nd_net without CONFIG_NET_NS. Introduce per-net_device inlines: dev_net(), dev_net_set(). Without CONFIG_NET_NS, no namespace other than &init_net exists. Let's explicitly define them to help compiler optimizations. Signed-off-by: YOSHIFUJI Hideaki --- arch/ia64/hp/sim/simeth.c | 2 +- drivers/block/aoe/aoenet.c | 2 +- drivers/net/bonding/bond_3ad.c | 2 +- drivers/net/bonding/bond_alb.c | 2 +- drivers/net/bonding/bond_main.c | 6 ++--- drivers/net/hamradio/bpqether.c | 4 ++-- drivers/net/loopback.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/pppoe.c | 6 ++--- drivers/net/veth.c | 2 +- drivers/net/via-velocity.c | 2 +- drivers/net/wan/dlci.c | 2 +- drivers/net/wan/hdlc.c | 4 ++-- drivers/net/wan/lapbether.c | 4 ++-- drivers/net/wan/syncppp.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- include/linux/inetdevice.h | 6 ++--- include/linux/netdevice.h | 25 ++++++++++++++++++++- net/8021q/vlan.c | 2 +- net/8021q/vlan_dev.c | 2 +- net/appletalk/aarp.c | 4 ++-- net/appletalk/ddp.c | 6 ++--- net/atm/clip.c | 2 +- net/atm/mpc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_in.c | 2 +- net/bridge/br_notify.c | 2 +- net/bridge/br_stp_bpdu.c | 2 +- net/can/af_can.c | 4 ++-- net/can/bcm.c | 2 +- net/can/raw.c | 2 +- net/core/dev.c | 22 +++++++++---------- net/core/dst.c | 2 +- net/core/fib_rules.c | 2 +- net/core/neighbour.c | 12 +++++----- net/core/pktgen.c | 2 +- net/core/rtnetlink.c | 4 ++-- net/decnet/af_decnet.c | 2 +- net/decnet/dn_route.c | 2 +- net/econet/af_econet.c | 4 ++-- net/ipv4/arp.c | 14 ++++++------ net/ipv4/devinet.c | 10 ++++----- net/ipv4/fib_frontend.c | 12 +++++----- net/ipv4/icmp.c | 8 +++---- net/ipv4/igmp.c | 16 +++++++------- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_input.c | 6 ++--- net/ipv4/ip_options.c | 2 +- net/ipv4/ipconfig.c | 4 ++-- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/raw.c | 4 ++-- net/ipv4/route.c | 28 +++++++++++------------ net/ipv4/tcp_ipv4.c | 6 ++--- net/ipv4/udp.c | 4 ++-- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/addrconf.c | 44 ++++++++++++++++++------------------- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/mcast.c | 6 ++--- net/ipv6/ndisc.c | 24 ++++++++++---------- net/ipv6/netfilter/ip6_queue.c | 2 +- net/ipv6/proc.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/reassembly.c | 2 +- net/ipv6/route.c | 40 ++++++++++++++++----------------- net/ipv6/tcp_ipv6.c | 10 ++++----- net/ipv6/udp.c | 4 ++-- net/ipv6/xfrm6_policy.c | 2 +- net/ipx/af_ipx.c | 4 ++-- net/irda/irlap_frame.c | 2 +- net/llc/llc_input.c | 2 +- net/netfilter/core.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netrom/af_netrom.c | 2 +- net/packet/af_packet.c | 8 +++---- net/rose/af_rose.c | 2 +- net/sctp/protocol.c | 2 +- net/tipc/eth_media.c | 4 ++-- net/wireless/wext.c | 2 +- net/x25/af_x25.c | 2 +- net/x25/x25_dev.c | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- security/selinux/netif.c | 2 +- 87 files changed, 251 insertions(+), 228 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index 969fe9f443c4..3d47839a0c48 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -294,7 +294,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) return NOTIFY_DONE; } - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 8460ef736d56..18d243c73eee 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -115,7 +115,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct aoe_hdr *h; u32 n; - if (ifp->nd_net != &init_net) + if (dev_net(ifp) != &init_net) goto exit; skb = skb_share_check(skb, GFP_ATOMIC); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index cb3c6faa7888..457d81f73e39 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2429,7 +2429,7 @@ int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct pac struct slave *slave = NULL; int ret = NET_RX_DROP; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto out; if (!(dev->flags & IFF_MASTER)) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index b57bc9467dbe..b986dacf5d33 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -345,7 +345,7 @@ static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; - if (bond_dev->nd_net != &init_net) + if (dev_net(bond_dev) != &init_net) goto out; if (!(bond_dev->flags & IFF_MASTER)) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5fc9d8d58ece..ac688fcb27d7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2629,7 +2629,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack unsigned char *arp_ptr; __be32 sip, tip; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto out; if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) @@ -3470,7 +3470,7 @@ static int bond_netdev_event(struct notifier_block *this, unsigned long event, v { struct net_device *event_dev = (struct net_device *)ptr; - if (event_dev->nd_net != &init_net) + if (dev_net(event_dev) != &init_net) return NOTIFY_DONE; dprintk("event_dev: %s, event: %lx\n", @@ -3508,7 +3508,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, struct bonding *bond, *bond_next; struct vlan_entry *vlan, *vlan_next; - if (ifa->ifa_dev->dev->nd_net != &init_net) + if (dev_net(ifa->ifa_dev->dev) != &init_net) return NOTIFY_DONE; list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 5ddf8b0c34f9..5f4b4c6c9f76 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -172,7 +172,7 @@ static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty struct ethhdr *eth; struct bpqdev *bpq; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -553,7 +553,7 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (!dev_is_ethdev(dev)) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index f2a6e7132241..41b774baac4d 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -258,7 +258,7 @@ static __net_init int loopback_net_init(struct net *net) if (!dev) goto out; - dev->nd_net = net; + dev_net_set(dev, net); err = register_netdev(dev); if (err) goto out_free_netdev; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index f651a816b280..2056cfc624dc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -402,7 +402,7 @@ static int macvlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - lowerdev = __dev_get_by_index(dev->nd_net, nla_get_u32(tb[IFLA_LINK])); + lowerdev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); if (lowerdev == NULL) return -ENODEV; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ac0ac98b19cd..4fad4ddb3504 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -301,7 +301,7 @@ static int pppoe_device_event(struct notifier_block *this, { struct net_device *dev = (struct net_device *) ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* Only look at sockets that are using this specific device. */ @@ -392,7 +392,7 @@ static int pppoe_rcv(struct sk_buff *skb, if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) @@ -424,7 +424,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct pppoe_hdr *ph; struct pppox_sock *po; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto abort; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e2ad98bee6e7..31cd817f33f9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -375,7 +375,7 @@ static int veth_newlink(struct net_device *dev, else snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); - peer = rtnl_create_link(dev->nd_net, ifname, &veth_link_ops, tbp); + peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp); if (IS_ERR(peer)) return PTR_ERR(peer); diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 1525e8a89844..ed1afaf683a4 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -3464,7 +3464,7 @@ static int velocity_netdev_event(struct notifier_block *nb, unsigned long notifi struct velocity_info *vptr; unsigned long flags; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; spin_lock_irqsave(&velocity_dev_list_lock, flags); diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 96b232446c0b..b14242768fad 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -517,7 +517,7 @@ static int dlci_dev_event(struct notifier_block *unused, { struct net_device *dev = (struct net_device *) ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 39951d0c34d6..9a83c9d5b8cf 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -68,7 +68,7 @@ static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, { struct hdlc_device *hdlc = dev_to_hdlc(dev); - if (dev->nd_net != &init_net) { + if (dev_net(dev) != &init_net) { kfree_skb(skb); return 0; } @@ -105,7 +105,7 @@ static int hdlc_device_event(struct notifier_block *this, unsigned long event, unsigned long flags; int on; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (dev->get_stats != hdlc_get_stats) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index fb37b8095231..629c909e05f9 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -91,7 +91,7 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe int len, err; struct lapbethdev *lapbeth; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -393,7 +393,7 @@ static int lapbeth_device_event(struct notifier_block *this, struct lapbethdev *lapbeth; struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (!dev_is_ethdev(dev)) diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index 61e24b7a45a3..29b4b94e4947 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -1444,7 +1444,7 @@ static void sppp_print_bytes (u_char *p, u16 len) static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { - if (dev->nd_net != &init_net) { + if (dev_net(dev) != &init_net) { kfree_skb(skb); return 0; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index a856cb47fc78..21c439046b3c 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3250,7 +3250,7 @@ static int qeth_l3_ip_event(struct notifier_block *this, struct qeth_ipaddr *addr; struct qeth_card *card; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; QETH_DBF_TEXT(trace, 3, "ipevent"); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index da05ab47ff2f..7009b0cdd06f 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -70,13 +70,13 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, attr) && \ + (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, attr) || \ + (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(in_dev->dev->nd_net, attr), \ + (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ced61f87660e..d146be40f46c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -708,8 +708,10 @@ struct net_device void (*poll_controller)(struct net_device *dev); #endif +#ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ struct net *nd_net; +#endif /* bridge stuff */ struct net_bridge_port *br_port; @@ -737,6 +739,27 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) +/* + * Net namespace inlines + */ +static inline +struct net *dev_net(const struct net_device *dev) +{ +#ifdef CONFIG_NET_NS + return dev->nd_net; +#else + return &init_net; +#endif +} + +static inline +void dev_net_set(struct net_device *dev, const struct net *net) +{ +#ifdef CONFIG_NET_NS + dev->nd_dev = net; +#endif +} + /** * netdev_priv - access network device private data * @dev: network device @@ -813,7 +836,7 @@ static inline struct net_device *next_net_device(struct net_device *dev) struct list_head *lh; struct net *net; - net = dev->nd_net; + net = dev_net(dev); lh = dev->dev_list.next; return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index dbc81b965096..c35dc230365c 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -382,7 +382,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (!grp) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 1e5c9904571d..e536162b1ebc 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -153,7 +153,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct net_device_stats *stats; unsigned short vlan_TCI; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto err_free; skb = skb_share_check(skb, GFP_ATOMIC); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 61166f66479f..25aa37ce9430 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -333,7 +333,7 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; int ct; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event == NETDEV_DOWN) { @@ -716,7 +716,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto out0; /* We only do Ethernet SNAP AARP. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3be55c8ca4ef..44cd42f7786b 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -648,7 +648,7 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event == NETDEV_DOWN) @@ -1405,7 +1405,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, int origlen; __u16 len_hops; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto freeit; /* Don't mangle buffer if shared */ @@ -1493,7 +1493,7 @@ freeit: static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto freeit; /* Expand any short form frames */ diff --git a/net/atm/clip.c b/net/atm/clip.c index e82da6746723..6f8223ebf551 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -612,7 +612,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = arg; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 9c7f712fc7e9..9db332e7a6c0 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -964,7 +964,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo dev = (struct net_device *)dev_ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (dev->name == NULL || strncmp(dev->name, "lec", 3)) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 48bfcc741f25..ee9dd83e7561 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -116,7 +116,7 @@ static int ax25_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* Reject non AX.25 devices */ diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index d1be080dcb25..33790a8efbc8 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -451,7 +451,7 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ - if (dev->nd_net != &init_net) { + if (dev_net(dev) != &init_net) { kfree_skb(skb); return 0; } diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 07ac3ae68d8f..00644a544e3c 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -37,7 +37,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* not a port of a bridge */ diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 0edbd2a1c3f3..8deab645ef75 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -142,7 +142,7 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, struct net_bridge *br; const unsigned char *buf; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto err; if (!p) diff --git a/net/can/af_can.c b/net/can/af_can.c index 36b9f22ed83a..2759b76f731c 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -599,7 +599,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct dev_rcv_lists *d; int matches; - if (dev->type != ARPHRD_CAN || dev->nd_net != &init_net) { + if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) { kfree_skb(skb); return 0; } @@ -710,7 +710,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, struct net_device *dev = (struct net_device *)data; struct dev_rcv_lists *d; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/can/bcm.c b/net/can/bcm.c index bd4282dae754..e9f99b2c6bc9 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1285,7 +1285,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, struct bcm_op *op; int notify_enodev = 0; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/can/raw.c b/net/can/raw.c index 94cd7f27c444..ead50c7c0d40 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -210,7 +210,7 @@ static int raw_notifier(struct notifier_block *nb, struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) diff --git a/net/core/dev.c b/net/core/dev.c index aebd08606040..812534828914 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -216,7 +216,7 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) /* Device list insertion */ static int list_netdevice(struct net_device *dev) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); ASSERT_RTNL(); @@ -852,8 +852,8 @@ int dev_alloc_name(struct net_device *dev, const char *name) struct net *net; int ret; - BUG_ON(!dev->nd_net); - net = dev->nd_net; + BUG_ON(!dev_net(dev)); + net = dev_net(dev); ret = __dev_alloc_name(net, name, buf); if (ret >= 0) strlcpy(dev->name, buf, IFNAMSIZ); @@ -877,9 +877,9 @@ int dev_change_name(struct net_device *dev, char *newname) struct net *net; ASSERT_RTNL(); - BUG_ON(!dev->nd_net); + BUG_ON(!dev_net(dev)); - net = dev->nd_net; + net = dev_net(dev); if (dev->flags & IFF_UP) return -EBUSY; @@ -2615,7 +2615,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || pt->dev->nd_net == seq_file_net(seq)) { + else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else @@ -3689,8 +3689,8 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev->nd_net); - net = dev->nd_net; + BUG_ON(!dev_net(dev)); + net = dev_net(dev); spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -4011,7 +4011,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; - dev->nd_net = &init_net; + dev_net_set(dev, &init_net); if (sizeof_priv) { dev->priv = ((char *)dev + @@ -4136,7 +4136,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Get out if there is nothing todo */ err = 0; - if (dev->nd_net == net) + if (dev_net(dev) == net) goto out; /* Pick the destination device name, and ensure @@ -4187,7 +4187,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_addr_discard(dev); /* Actually switch the network namespace */ - dev->nd_net = net; + dev_net_set(dev, net); /* Assign the new device name */ if (destname != dev->name) diff --git a/net/core/dst.c b/net/core/dst.c index 3a01a819ba47..694cd2a3f6d2 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -279,7 +279,7 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = dst->dev->nd_net->loopback_dev; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 42ccaf5b8509..942be93a2eb0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -618,7 +618,7 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct fib_rules_ops *ops; ASSERT_RTNL(); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 23c0a10c0c37..c978bd1cd659 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -388,7 +388,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, hash_val = tbl->hash(pkey, NULL); for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (!memcmp(n->primary_key, pkey, key_len) && - (net == n->dev->nd_net)) { + dev_net(n->dev) == net) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -1298,7 +1298,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_parms *p, *ref; struct net *net; - net = dev->nd_net; + net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -2050,7 +2050,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, s_idx = 0; for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { int lidx; - if (n->dev->nd_net != net) + if (dev_net(n->dev) != net) continue; lidx = idx++; if (lidx < s_idx) @@ -2155,7 +2155,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) n = tbl->hash_buckets[bucket]; while (n) { - if (n->dev->nd_net != net) + if (dev_net(n->dev) != net) goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; @@ -2198,7 +2198,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, while (1) { while (n) { - if (n->dev->nd_net != net) + if (dev_net(n->dev) != net) goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); @@ -2482,7 +2482,7 @@ static inline size_t neigh_nlmsg_size(void) static void __neigh_notify(struct neighbour *n, int type, int flags) { - struct net *net = n->dev->nd_net; + struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 20e63b302ba6..a803b442234c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1874,7 +1874,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2bd9c5f7627d..09250a0800f6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -972,7 +972,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, goto err_free; } - dev->nd_net = net; + dev_net_set(dev, net); dev->rtnl_link_ops = ops; if (tb[IFLA_MTU]) @@ -1198,7 +1198,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 23fd95a7ad15..3554fb3d251c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2089,7 +2089,7 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; switch(event) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9dc0abb50eaf..0a46b6c10e51 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -580,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto dump_it; if (dn == NULL) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index bc0f6252613f..68d154480043 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1064,7 +1064,7 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct sock *sk; struct ec_device *edev = dev->ec_ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; if (skb->pkt_type == PACKET_OTHERHOST) @@ -1121,7 +1121,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = (struct net_device *)data; struct ec_device *edev; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; switch (msg) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 832473e30b36..3ce2e137e7bc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh) return -EINVAL; } - neigh->type = inet_addr_type(dev->nd_net, addr); + neigh->type = inet_addr_type(dev_net(dev), addr); parms = in_dev->arp_parms; __neigh_parms_put(neigh->parms); @@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ - if (skb && inet_addr_type(dev->nd_net, ip_hdr(skb)->saddr) == RTN_LOCAL) + if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) saddr = ip_hdr(skb)->saddr; break; case 1: /* Restrict announcements of saddr in same subnet */ if (!skb) break; saddr = ip_hdr(skb)->saddr; - if (inet_addr_type(dev->nd_net, saddr) == RTN_LOCAL) { + if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) { /* saddr should be known to target */ if (inet_addr_onlink(in_dev, target, saddr)) break; @@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) int flag = 0; /*unsigned long now; */ - if (ip_route_output_key(dev->nd_net, &rt, &fl) < 0) + if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); @@ -477,7 +477,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) paddr = skb->rtable->rt_gateway; - if (arp_set_predefined(inet_addr_type(dev->nd_net, paddr), haddr, paddr, dev)) + if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) return 0; n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); @@ -709,7 +709,7 @@ static int arp_process(struct sk_buff *skb) u16 dev_type = dev->type; int addr_type; struct neighbour *n; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); /* arp_rcv below verifies the ARP header and verifies the device * is ARP'able. @@ -858,7 +858,7 @@ static int arp_process(struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) { + if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 4a10dbbbe0a1..823c724a8593 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -165,7 +165,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (!in_dev) goto out; INIT_RCU_HEAD(&in_dev->rcu_head); - memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt, + memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; @@ -872,7 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { __be32 addr = 0; struct in_device *in_dev; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -974,7 +974,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev, if (scope != RT_SCOPE_LINK) return confirm_addr_indev(in_dev, dst, local, scope); - net = in_dev->dev->nd_net; + net = dev_net(in_dev->dev); read_lock(&dev_base_lock); rcu_read_lock(); for_each_netdev(net, dev) { @@ -1203,7 +1203,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, int err = -ENOBUFS; struct net *net; - net = ifa->ifa_dev->dev->nd_net; + net = dev_net(ifa->ifa_dev->dev); skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; @@ -1517,7 +1517,7 @@ static void devinet_sysctl_register(struct in_device *idev) { neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL); - __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name, + __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, &idev->cnf); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86ff2711fc95..0e4b34b07cb5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -257,7 +257,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (in_dev == NULL) goto e_inval; - net = dev->nd_net; + net = dev_net(dev); if (fib_lookup(net, &fl, &res)) goto last_resort; if (res.type != RTN_UNICAST) @@ -674,7 +674,7 @@ out: static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { - struct net *net = ifa->ifa_dev->dev->nd_net; + struct net *net = dev_net(ifa->ifa_dev->dev); struct fib_table *tb; struct fib_config cfg = { .fc_protocol = RTPROT_KERNEL, @@ -801,15 +801,15 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); /* Check, that this local address finally disappeared. */ - if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { + if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { /* And the last, but not the least thing. We must flush stray FIB entries. First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) - fib_flush(dev->nd_net); + if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) + fib_flush(dev_net(dev)); } } #undef LOCAL_OK @@ -899,7 +899,7 @@ static void nl_fib_lookup_exit(struct net *net) static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) - fib_flush(dev->nd_net); + fib_flush(dev_net(dev)); rt_cache_flush(0); arp_ifdown(dev); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ff9a8e643fcc..f38f093ef751 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -351,7 +351,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(rt->u.dst.dev->nd_net); + sk = icmp_sk(dev_net(rt->u.dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -382,7 +382,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; struct rtable *rt = skb->rtable; - struct net *net = rt->u.dst.dev->nd_net; + struct net *net = dev_net(rt->u.dst.dev); struct sock *sk = icmp_sk(net); struct inet_sock *inet = inet_sk(sk); __be32 daddr; @@ -447,7 +447,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; - net = rt->u.dst.dev->nd_net; + net = dev_net(rt->u.dst.dev); sk = icmp_sk(net); /* @@ -677,7 +677,7 @@ static void icmp_unreach(struct sk_buff *skb) u32 info = 0; struct net *net; - net = skb->dst->dev->nd_net; + net = dev_net(skb->dst->dev); /* * Incomplete header ? diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6a4ee8da6994..682f632bfb77 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -130,12 +130,12 @@ */ #define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ + (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ ((in_dev)->mr_v1_seen && \ time_before(jiffies, (in_dev)->mr_v1_seen))) #define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ + (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) @@ -1198,7 +1198,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (in_dev->dev->nd_net != &init_net) + if (dev_net(in_dev->dev) != &init_net) return; for (im=in_dev->mc_list; im; im=im->next) { @@ -1280,7 +1280,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (in_dev->dev->nd_net != &init_net) + if (dev_net(in_dev->dev) != &init_net) return; for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { @@ -1310,7 +1310,7 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); - if (in_dev->dev->nd_net != &init_net) + if (dev_net(in_dev->dev) != &init_net) return; for (i=in_dev->mc_list; i; i=i->next) @@ -1333,7 +1333,7 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - if (in_dev->dev->nd_net != &init_net) + if (dev_net(in_dev->dev) != &init_net) return; in_dev->mc_tomb = NULL; @@ -1359,7 +1359,7 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); - if (in_dev->dev->nd_net != &init_net) + if (dev_net(in_dev->dev) != &init_net) return; ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); @@ -1378,7 +1378,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); - if (in_dev->dev->nd_net != &init_net) + if (dev_net(in_dev->dev) != &init_net) return; /* Deactivate timers */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8b448c4b9080..fcb60e76b234 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -571,7 +571,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); - net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; + net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev); /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f9ee84420cb3..50972b397a9a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1190,7 +1190,7 @@ static int ipgre_close(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; - in_dev = inetdev_by_index(dev->nd_net, t->mlink); + in_dev = inetdev_by_index(dev_net(dev), t->mlink); if (in_dev) { ip_mc_dec_group(in_dev, t->parms.iph.daddr); in_dev_put(in_dev); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2aeea5d15425..26685c83a146 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -172,7 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb) if (sk && inet_sk(sk)->num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && - sk->sk_net == dev->nd_net) { + sk->sk_net == dev_net(dev)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); @@ -199,7 +199,7 @@ int ip_call_ra_chain(struct sk_buff *skb) static int ip_local_deliver_finish(struct sk_buff *skb) { - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); __skb_pull(skb, ip_hdrlen(skb)); @@ -291,7 +291,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) opt = &(IPCB(skb)->opt); opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(dev->nd_net, opt, skb)) { + if (ip_options_compile(dev_net(dev), opt, skb)) { IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); goto drop; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 87cc1222c600..d107543d3f81 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -145,7 +145,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) __be32 addr; memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(skb->dst->dev->nd_net, addr) != RTN_LOCAL) { + if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) { dopt->ts_needtime = 1; soffset += 8; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 96138b128de8..08e8fb60d315 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -434,7 +434,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -854,7 +854,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str struct ic_device *d; int len, ext_len; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; /* Perform verifications before taking the lock. */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7d63d74ef62a..e54bc1364473 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1089,7 +1089,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct vif_device *v; int ct; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event != NETDEV_UNREGISTER) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index fe05da41d6ba..500998a2dec1 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -481,7 +481,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index c6817b18366a..84c26dd27d81 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this, { const struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event == NETDEV_DOWN) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3f68a937b602..8756d502a47f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -168,7 +168,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) if (hlist_empty(head)) goto out; - net = skb->dev->nd_net; + net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); @@ -276,7 +276,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); if (raw_sk != NULL) { iph = (struct iphdr *)skb->data; - net = skb->dev->nd_net; + net = dev_net(skb->dev); while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, iph->saddr, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2941ef21f203..7768d718e199 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -284,7 +284,7 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st->bucket].chain); while (r) { - if (r->u.dst.dev->nd_net == st->p.net && + if (dev_net(r->u.dst.dev) == st->p.net && r->rt_genid == st->genid) return r; r = rcu_dereference(r->u.dst.rt_next); @@ -312,7 +312,7 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) { while ((r = __rt_cache_get_next(st, r)) != NULL) { - if (r->u.dst.dev->nd_net != st->p.net) + if (dev_net(r->u.dst.dev) != st->p.net) continue; if (r->rt_genid == st->genid) break; @@ -680,7 +680,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) { - return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net; + return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); } /* @@ -1164,7 +1164,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (!in_dev) return; - net = dev->nd_net; + net = dev_net(dev); if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || ipv4_is_zeronet(new_gw)) @@ -1195,7 +1195,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || rth->rt_genid != atomic_read(&rt_genid) || - rth->u.dst.dev->nd_net != net) { + dev_net(rth->u.dst.dev) != net) { rthp = &rth->u.dst.rt_next; continue; } @@ -1454,7 +1454,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_src == iph->saddr && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && - rth->u.dst.dev->nd_net == net && + dev_net(rth->u.dst.dev) == net && rth->rt_genid == atomic_read(&rt_genid)) { unsigned short mtu = new_mtu; @@ -1530,9 +1530,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) { + if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) { struct in_device *loopback_idev = - in_dev_get(dev->nd_net->loopback_dev); + in_dev_get(dev_net(dev)->loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1576,7 +1576,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; - else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) { + else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else @@ -1900,7 +1900,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, __be32 spec_dst; int err = -EINVAL; int free_res = 0; - struct net * net = dev->nd_net; + struct net * net = dev_net(dev); /* IP on this device is disabled. */ @@ -2071,7 +2071,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, int iif = dev->ifindex; struct net *net; - net = dev->nd_net; + net = dev_net(dev); tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif); @@ -2084,7 +2084,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif == 0 && rth->fl.mark == skb->mark && rth->fl.fl4_tos == tos && - rth->u.dst.dev->nd_net == net && + dev_net(rth->u.dst.dev) == net && rth->rt_genid == atomic_read(&rt_genid)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); @@ -2486,7 +2486,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && - rth->u.dst.dev->nd_net == net && + dev_net(rth->u.dst.dev) == net && rth->rt_genid == atomic_read(&rt_genid)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); @@ -2795,7 +2795,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.dst.rt_next), idx++) { - if (rt->u.dst.dev->nd_net != net || idx < s_idx) + if (dev_net(rt->u.dst.dev) != net || idx < s_idx) continue; if (rt->rt_genid != atomic_read(&rt_genid)) continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 649d00a50cb1..28bece6f281b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -353,7 +353,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, + sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, th->source, inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); @@ -1644,7 +1644,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, + sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1718,7 +1718,7 @@ do_time_wait: } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { - struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, + struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest, inet_iif(skb)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b37581dfd029..e2cd93481359 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -357,7 +357,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) int harderr; int err; - sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, + sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); @@ -1181,7 +1181,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, + sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, uh->dest, inet_iif(skb), udptable); if (sk != NULL) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 10ed70491434..c63de0a72aba 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -221,7 +221,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { struct in_device *loopback_idev = - in_dev_get(dev->nd_net->loopback_dev); + in_dev_get(dev_net(dev)->loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 89954885dee1..d1de9ec74261 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -335,7 +335,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; - memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf)); + memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -561,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(idev->dev->nd_net, addr, idev->dev)) { + if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { ADBG(("ipv6_add_addr: already assigned\n")); err = -EEXIST; goto out; @@ -751,7 +751,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { struct in6_addr prefix; struct rt6_info *rt; - struct net *net = ifp->idev->dev->nd_net; + struct net *net = dev_net(ifp->idev->dev); ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); @@ -1044,7 +1044,7 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev, { struct ipv6_saddr_score scores[2], *score = &scores[0], *hiscore = &scores[1]; - struct net *net = dst_dev->nd_net; + struct net *net = dev_net(dst_dev); struct ipv6_saddr_dst dst; struct net_device *dev; int dst_type; @@ -1217,7 +1217,7 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr, read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (dev_net(ifp->idev->dev) != net) continue; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp->flags&IFA_F_TENTATIVE)) { @@ -1239,7 +1239,7 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, u8 hash = ipv6_addr_hash(addr); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (dev_net(ifp->idev->dev) != net) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) @@ -1257,7 +1257,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr, read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (dev_net(ifp->idev->dev) != net) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev || @@ -1559,7 +1559,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_expires = expires, .fc_dst_len = plen, .fc_flags = RTF_UP | flags, - .fc_nlinfo.nl_net = dev->nd_net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_dst, pfx); @@ -1586,7 +1586,7 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, - .fc_nlinfo.nl_net = dev->nd_net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); @@ -1603,7 +1603,7 @@ static void sit_route_add(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 96, .fc_flags = RTF_UP | RTF_NONEXTHOP, - .fc_nlinfo.nl_net = dev->nd_net, + .fc_nlinfo.nl_net = dev_net(dev), }; /* prefix length - 96 bits "::d.d.d.d" */ @@ -1704,7 +1704,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (pinfo->onlink) { struct rt6_info *rt; - rt = rt6_lookup(dev->nd_net, &pinfo->prefix, NULL, + rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { @@ -1748,7 +1748,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(dev->nd_net, &addr, dev, 1); + ifp = ipv6_get_ifaddr(dev_net(dev), &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2071,7 +2071,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) struct inet6_ifaddr * ifp; struct in6_addr addr; struct net_device *dev; - struct net *net = idev->dev->nd_net; + struct net *net = dev_net(idev->dev); int scope; ASSERT_RTNL(); @@ -2261,7 +2261,7 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) static void ip6_tnl_add_linklocal(struct inet6_dev *idev) { struct net_device *link_dev; - struct net *net = idev->dev->nd_net; + struct net *net = dev_net(idev->dev); /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && @@ -2442,7 +2442,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; struct inet6_ifaddr *ifa, **bifa; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); int i; ASSERT_RTNL(); @@ -2771,7 +2771,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { ifa = inet6_addr_lst[state->bucket]; - while (ifa && ifa->idev->dev->nd_net != net) + while (ifa && dev_net(ifa->idev->dev) != net) ifa = ifa->lst_next; if (ifa) break; @@ -2787,7 +2787,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifad ifa = ifa->lst_next; try_again: if (ifa) { - if (ifa->idev->dev->nd_net != net) { + if (dev_net(ifa->idev->dev) != net) { ifa = ifa->lst_next; goto try_again; } @@ -2905,7 +2905,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (dev_net(ifp->idev->dev) != net) continue; if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && (ifp->flags & IFA_F_HOMEADDRESS)) { @@ -3469,7 +3469,7 @@ errout: static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; - struct net *net = ifa->idev->dev->nd_net; + struct net *net = dev_net(ifa->idev->dev); int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); @@ -3675,7 +3675,7 @@ cont: void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - struct net *net = idev->dev->nd_net; + struct net *net = dev_net(idev->dev); int err = -ENOBUFS; skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); @@ -3745,7 +3745,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - struct net *net = idev->dev->nd_net; + struct net *net = dev_net(idev->dev); int err = -ENOBUFS; skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); @@ -4157,7 +4157,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, NULL); - __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name, + __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 86332417b402..50857662e6b7 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -306,7 +306,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct net_device *dev) { - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; @@ -507,7 +507,7 @@ EXPORT_SYMBOL(icmpv6_send); static void icmpv6_echo_reply(struct sk_buff *skb) { - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d34aa61353bb..556300f0eba5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -402,7 +402,7 @@ int ip6_forward(struct sk_buff *skb) struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dst->dev->nd_net; + struct net *net = dev_net(dst->dev); if (ipv6_devconf.forwarding == 0) goto error; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 957ac7e9e929..0357de8e78c8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1400,7 +1400,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) static struct sk_buff *mld_newpack(struct net_device *dev, int size) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; struct mld2_report *pmr; @@ -1448,7 +1448,7 @@ static void mld_sendpack(struct sk_buff *skb) (struct mld2_report *)skb_transport_header(skb); int payload_len, mldlen; struct inet6_dev *idev = in6_dev_get(skb->dev); - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); int err; struct flowi fl; @@ -1762,7 +1762,7 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct inet6_dev *idev; struct sk_buff *skb; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3f68a6eae7b2..79af57f586e8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -447,7 +447,7 @@ static void __ndisc_send(struct net_device *dev, { struct flowi fl; struct dst_entry *dst; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; @@ -539,7 +539,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, }; /* for anycast or proxy, solicited_addr != src_addr */ - ifp = ipv6_get_ifaddr(dev->nd_net, solicited_addr, dev, 1); + ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) @@ -547,7 +547,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev, daddr, - inet6_sk(dev->nd_net->ipv6.ndisc_sk)->srcprefs, + inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, &tmpaddr)) return; src_addr = &tmpaddr; @@ -601,7 +601,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, * suppress the inclusion of the sllao. */ if (send_sllao) { - struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev->nd_net, saddr, + struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr, dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { @@ -639,7 +639,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(dev->nd_net, &ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { @@ -727,7 +727,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) inc = ipv6_addr_is_multicast(daddr); - ifp = ipv6_get_ifaddr(dev->nd_net, &msg->target, dev, 1); + ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { @@ -776,7 +776,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, dev->nd_net, + (pneigh = pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -886,7 +886,7 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } } - ifp = ipv6_get_ifaddr(dev->nd_net, &msg->target, dev, 1); + ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { if (ifp->flags & IFA_F_TENTATIVE) { addrconf_dad_failure(ifp); @@ -918,7 +918,7 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, dev->nd_net, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1008,7 +1008,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) struct sk_buff *skb; struct nlmsghdr *nlh; struct nduseroptmsg *ndmsg; - struct net *net = ra->dev->nd_net; + struct net *net = dev_net(ra->dev); int err; int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + (opt->nd_opt_len << 3)); @@ -1395,7 +1395,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct in6_addr *target) { struct net_device *dev = skb->dev; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); struct sk_buff *buff; @@ -1597,7 +1597,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); switch (event) { case NETDEV_CHANGEADDR: diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index cc2f9afcf808..a6d30626b47c 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -484,7 +484,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 8a5be290c710..364dc332532c 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -214,7 +214,7 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!idev || !idev->dev) return -EINVAL; - if (idev->dev->nd_net != &init_net) + if (dev_net(idev->dev) != &init_net) return 0; if (!proc_net_devsnmp6) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 548d0763f4d3..efb0047f6880 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -176,7 +176,7 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - net = skb->dev->nd_net; + net = dev_net(skb->dev); sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { @@ -363,7 +363,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, if (sk != NULL) { saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; - net = skb->dev->nd_net; + net = dev_net(skb->dev); while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, IP6CB(skb)->iif))) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f936d045a39d..4e1447634f36 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -600,7 +600,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - net = skb->dev->nd_net; + net = dev_net(skb->dev); if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) ip6_evictor(net, ip6_dst_idev(skb->dst)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 06faa46920e1..65053fba8c1a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -208,7 +208,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; struct net_device *loopback_dev = - dev->nd_net->loopback_dev; + dev_net(dev)->loopback_dev; if (dev != loopback_dev && idev != NULL && idev->dev == dev) { struct inet6_dev *loopback_idev = @@ -433,7 +433,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) RT6_TRACE("%s() => %p\n", __func__, match); - net = rt0->rt6i_dev->nd_net; + net = dev_net(rt0->rt6i_dev); return (match ? match : net->ipv6.ip6_null_entry); } @@ -441,7 +441,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr) { - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -607,7 +607,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = rt->rt6i_dev->nd_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_ins_rt(rt, &info); } @@ -745,7 +745,7 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, @@ -928,7 +928,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, { struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); if (unlikely(idev == NULL)) return NULL; @@ -1252,7 +1252,7 @@ install_route: rt->rt6i_idev = idev; rt->rt6i_table = table; - cfg->fc_nlinfo.nl_net = dev->nd_net; + cfg->fc_nlinfo.nl_net = dev_net(dev); return __ip6_ins_rt(rt, &cfg->fc_nlinfo); @@ -1270,7 +1270,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; - struct net *net = rt->rt6i_dev->nd_net; + struct net *net = dev_net(rt->rt6i_dev); if (rt == net->ipv6.ip6_null_entry) return -ENOENT; @@ -1289,7 +1289,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = rt->rt6i_dev->nd_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_del_rt(rt, &info); } @@ -1401,7 +1401,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net_device *dev) { int flags = RT6_LOOKUP_F_HAS_SADDR; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, @@ -1428,7 +1428,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, { struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; - struct net *net = neigh->dev->nd_net; + struct net *net = dev_net(neigh->dev); rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1477,7 +1477,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net, + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), dst_mtu(&nrt->u.dst)); if (ip6_ins_rt(nrt)) @@ -1506,7 +1506,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); int allfrag = 0; rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); @@ -1583,7 +1583,7 @@ out: static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { - struct net *net = ort->rt6i_dev->nd_net; + struct net *net = dev_net(ort->rt6i_dev); struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt) { @@ -1682,7 +1682,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); if (table == NULL) return NULL; @@ -1713,7 +1713,7 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, RTF_UP | RTF_EXPIRES | RTF_PREF(pref), .fc_nlinfo.pid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = dev->nd_net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_gateway, gwaddr); @@ -1862,7 +1862,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, int anycast) { - struct net *net = idev->dev->nd_net; + struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) @@ -1939,7 +1939,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; - struct net *net = arg->dev->nd_net; + struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -1983,7 +1983,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) .mtu = mtu, }; - fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -2321,7 +2321,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, unsigned long event, void *data) { struct net_device *dev = (struct net_device *)data; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { net->ipv6.ip6_null_entry->u.dst.dev = dev; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8dd72966ff78..086deffff9c9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -321,7 +321,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq; - sk = inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, &hdr->daddr, + sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { @@ -988,7 +988,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - struct net *net = skb->dst->dev->nd_net; + struct net *net = dev_net(skb->dst->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(*th); #ifdef CONFIG_TCP_MD5SIG @@ -1093,7 +1093,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - struct net *net = skb->dev->nd_net; + struct net *net = dev_net(skb->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; @@ -1739,7 +1739,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, + sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); @@ -1822,7 +1822,7 @@ do_time_wait: { struct sock *sk2; - sk2 = inet6_lookup_listener(skb->dev->nd_net, &tcp_hashinfo, + sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 593d3efadaf9..6683c04b427e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -235,7 +235,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; - sk = __udp6_lib_lookup(skb->dev->nd_net, daddr, uh->dest, + sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); if (sk == NULL) return; @@ -483,7 +483,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = __udp6_lib_lookup(skb->dev->nd_net, saddr, uh->source, + sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, uh->dest, inet6_iif(skb), udptable); if (sk == NULL) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d92d1fceb8cf..8f1e0543b3c4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -247,7 +247,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { struct inet6_dev *loopback_idev = - in6_dev_get(dev->nd_net->loopback_dev); + in6_dev_get(dev_net(dev)->loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index c76a9523091b..81ae8735f5e3 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -335,7 +335,7 @@ static int ipxitf_device_event(struct notifier_block *notifier, struct net_device *dev = ptr; struct ipx_interface *i, *tmp; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event != NETDEV_DOWN && event != NETDEV_UP) @@ -1636,7 +1636,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty u16 ipx_pktsize; int rc = 0; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; /* Not ours */ diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index a38b231c8689..90894534f3cc 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1326,7 +1326,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, int command; __u8 control; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto out; /* FIXME: should we get our own field? */ diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index b9143d2a04e1..a69c5c427fe3 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -146,7 +146,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; /* diff --git a/net/netfilter/core.c b/net/netfilter/core.c index ec05684c56d7..292fa28146fb 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -168,7 +168,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, #ifdef CONFIG_NET_NS struct net *net; - net = indev == NULL ? outdev->nd_net : indev->nd_net; + net = indev == NULL ? dev_net(outdev) : dev_net(indev); if (net != &init_net) return 1; #endif diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 012cb6910820..81fb048add88 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -557,7 +557,7 @@ nfqnl_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 4478f2f6079d..a547c6320eb3 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -954,7 +954,7 @@ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, struct net_device *dev = ptr; struct netlbl_unlhsh_iface *iface = NULL; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 972250c974f1..a270ebf9f765 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -106,7 +106,7 @@ static int nr_device_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = (struct net_device *)ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event != NETDEV_DOWN) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a56ed2120e07..baa290d3444a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -263,7 +263,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct if (skb->pkt_type == PACKET_LOOPBACK) goto out; - if (dev->nd_net != sk->sk_net) + if (dev_net(dev) != sk->sk_net) goto out; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -451,7 +451,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev->nd_net != sk->sk_net) + if (dev_net(dev) != sk->sk_net) goto drop; skb->dev = dev; @@ -568,7 +568,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev->nd_net != sk->sk_net) + if (dev_net(dev) != sk->sk_net) goto drop; if (dev->header_ops) { @@ -1450,7 +1450,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct sock *sk; struct hlist_node *node; struct net_device *dev = data; - struct net *net = dev->nd_net; + struct net *net = dev_net(dev); read_lock(&net->packet.sklist_lock); sk_for_each(sk, node, &net->packet.sklist) { diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4a31a81059ab..1a7f143cf741 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -197,7 +197,7 @@ static int rose_device_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = (struct net_device *)ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event != NETDEV_DOWN) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index beea2fb18b15..2faa0d8839eb 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -630,7 +630,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct sctp_sockaddr_entry *temp; int found = 0; - if (ifa->ifa_dev->dev->nd_net != &init_net) + if (dev_net(ifa->ifa_dev->dev) != &init_net) return NOTIFY_DONE; switch (ev) { diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 3bbef2ab22ae..9cd35eec3e7f 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -101,7 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; - if (dev->nd_net != &init_net) { + if (dev_net(dev) != &init_net) { kfree_skb(buf); return 0; } @@ -198,7 +198,7 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; while ((eb_ptr->dev != dev)) { diff --git a/net/wireless/wext.c b/net/wireless/wext.c index 2c569b63e7d8..947188a5b937 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -1157,7 +1157,7 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len) struct sk_buff *skb; int err; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 339ca4a8e89e..7a46ea73fe2d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -191,7 +191,7 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct x25_neigh *nb; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (dev->type == ARPHRD_X25 diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index f0679d283110..3ff206c0ae94 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -95,7 +95,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, struct sk_buff *nskb; struct x25_neigh *nb; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) goto drop; nskb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8e588f20c60c..15d73e47cc2c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2079,7 +2079,7 @@ static int stale_bundle(struct dst_entry *dst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = dev->nd_net->loopback_dev; + dst->dev = dev_net(dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); } @@ -2350,7 +2350,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; switch (event) { diff --git a/security/selinux/netif.c b/security/selinux/netif.c index 013d3117a86b..9c8a82aa8baf 100644 --- a/security/selinux/netif.c +++ b/security/selinux/netif.c @@ -281,7 +281,7 @@ static int sel_netif_netdev_notifier_handler(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; if (event == NETDEV_DOWN) -- cgit v1.2.3-71-gd317 From 3b1e0a655f8eba44ab1ee2a1068d169ccfb853b9 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 26 Mar 2008 02:26:21 +0900 Subject: [NET] NETNS: Omit sock->sk_net without CONFIG_NET_NS. Introduce per-sock inlines: sock_net(), sock_net_set() and per-inet_timewait_sock inlines: twsk_net(), twsk_net_set(). Without CONFIG_NET_NS, no namespace other than &init_net exists. Let's explicitly define them to help compiler optimizations. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 4 ++-- include/net/inet_hashtables.h | 8 ++++---- include/net/inet_timewait_sock.h | 18 ++++++++++++++++++ include/net/route.h | 4 ++-- include/net/sock.h | 24 ++++++++++++++++++++++-- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/l2cap.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/bridge/br_netlink.c | 4 ++-- net/core/fib_rules.c | 6 +++--- net/core/neighbour.c | 10 +++++----- net/core/rtnetlink.c | 12 ++++++------ net/core/sock.c | 10 +++++----- net/decnet/af_decnet.c | 2 +- net/decnet/dn_dev.c | 6 +++--- net/decnet/dn_fib.c | 4 ++-- net/decnet/dn_route.c | 4 ++-- net/decnet/dn_table.c | 2 +- net/ipv4/af_inet.c | 6 +++--- net/ipv4/devinet.c | 6 +++--- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv4/fib_rules.c | 2 +- net/ipv4/igmp.c | 14 +++++++------- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/inet_hashtables.c | 8 ++++---- net/ipv4/inet_timewait_sock.c | 2 +- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_output.c | 4 ++-- net/ipv4/ip_sockglue.c | 6 +++--- net/ipv4/ipmr.c | 4 ++-- net/ipv4/netfilter/arp_tables.c | 16 ++++++++-------- net/ipv4/netfilter/ip_tables.c | 16 ++++++++-------- net/ipv4/raw.c | 12 ++++++------ net/ipv4/route.c | 4 ++-- net/ipv4/tcp_ipv4.c | 14 +++++++------- net/ipv4/udp.c | 16 ++++++++-------- net/ipv6/addrconf.c | 10 +++++----- net/ipv6/addrlabel.c | 6 +++--- net/ipv6/af_inet6.c | 4 ++-- net/ipv6/fib6_rules.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/inet6_hashtables.c | 4 ++-- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/mcast.c | 12 ++++++------ net/ipv6/netfilter/ip6_tables.c | 16 ++++++++-------- net/ipv6/raw.c | 6 +++--- net/ipv6/route.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 4 ++-- net/irda/af_irda.c | 2 +- net/llc/llc_conn.c | 2 +- net/netfilter/nf_sockopt.c | 2 +- net/netlink/af_netlink.c | 30 +++++++++++++++--------------- net/netrom/af_netrom.c | 2 +- net/packet/af_packet.c | 28 ++++++++++++++-------------- net/rose/af_rose.c | 2 +- net/sched/act_api.c | 4 ++-- net/sched/cls_api.c | 4 ++-- net/sched/sch_api.c | 10 +++++----- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/socket.c | 4 ++-- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 20 ++++++++++---------- net/x25/af_x25.c | 2 +- 69 files changed, 253 insertions(+), 215 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c9ba0da16ce9..b90d3d461d4e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -481,7 +481,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ #define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ + (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ @@ -489,7 +489,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET6_TW_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ + (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ (*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports)) && \ ((__sk)->sk_family == PF_INET6) && \ (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))) && \ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index d99c1ba2ece0..5525227c5e92 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -314,25 +314,25 @@ typedef __u64 __bitwise __addrpair; ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ + (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ + (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ + (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ + (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 296547bfb0b7..07fe0d1a4f03 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -207,4 +207,22 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo, const int timewait_len); extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, struct inet_timewait_death_row *twdr); + +static inline +struct net *twsk_net(const struct inet_timewait_sock *twsk) +{ +#ifdef CONFIG_NET_NS + return twsk->tw_net; +#else + return &init_net; +#endif +} + +static inline +void twsk_net_set(struct inet_timewait_sock *twsk, const struct net *net) +{ +#ifdef CONFIG_NET_NS + twsk->tw_net = net; +#endif +} #endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/route.h b/include/net/route.h index 28dba925663c..c6338802e8f1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -160,7 +160,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, .dport = dport } } }; int err; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); if (!dst || !src) { err = __ip_route_output_key(net, rp, &fl); if (err) @@ -188,7 +188,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, &fl); - return ip_route_output_flow(sk->sk_net, rp, &fl, sk, 0); + return ip_route_output_flow(sock_net(sk), rp, &fl, sk, 0); } return 0; } diff --git a/include/net/sock.h b/include/net/sock.h index b433b1ed203d..7e0d4a0c4d12 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -126,7 +126,9 @@ struct sock_common { atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; +#ifdef CONFIG_NET_NS struct net *skc_net; +#endif }; /** @@ -1345,6 +1347,24 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e } #endif +static inline +struct net *sock_net(const struct sock *sk) +{ +#ifdef CONFIG_NET_NS + return sk->sk_net; +#else + return &init_net; +#endif +} + +static inline +void sock_net_set(struct sock *sk, const struct net *net) +{ +#ifdef CONFIG_NET_NS + sk->sk_net = net; +#endif +} + /* * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. * They should not hold a referrence to a namespace in order to allow @@ -1353,8 +1373,8 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e */ static inline void sk_change_net(struct sock *sk, struct net *net) { - put_net(sk->sk_net); - sk->sk_net = net; + put_net(sock_net(sk)); + sock_net_set(sk, net); } extern void sock_enable_timestamp(struct sock *sk); diff --git a/net/atm/svc.c b/net/atm/svc.c index daf9a48a7db0..de1e4f2f3a43 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -326,7 +326,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) lock_sock(sk); - error = svc_create(sk->sk_net, newsock,0); + error = svc_create(sock_net(sk), newsock,0); if (error) goto out; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ee9dd83e7561..2712544cf0ca 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -869,7 +869,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot); if (sk == NULL) return NULL; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 34f8bf98bc05..6b995ac832f5 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1499,7 +1499,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto response; } - sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); + sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); if (!sk) goto response; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c46d51035e77..c103fa02893b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -868,7 +868,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b91d3c81a73c..2a5953b4405d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -803,7 +803,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); - sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); + sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); goto done; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f5d69336d97b..f155e6ce8a21 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -108,7 +108,7 @@ errout: */ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct net_device *dev; int idx; @@ -140,7 +140,7 @@ skip: */ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct nlattr *protinfo; struct net_device *dev; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 942be93a2eb0..540c07283e31 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -214,7 +214,7 @@ errout: static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -352,7 +352,7 @@ errout: static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; @@ -534,7 +534,7 @@ skip: static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_rules_ops *ops; int idx = 0, family; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c978bd1cd659..065fbac7ecd3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1478,7 +1478,7 @@ int neigh_table_clear(struct neigh_table *tbl) static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1544,7 +1544,7 @@ out: static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1812,7 +1812,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; @@ -1937,7 +1937,7 @@ errout: static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; @@ -2037,7 +2037,7 @@ static void neigh_update_notify(struct neighbour *neigh) static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { - struct net * net = skb->sk->sk_net; + struct net * net = sock_net(skb->sk); struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09250a0800f6..da99ac0871bf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -662,7 +662,7 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx; int s_idx = cb->args[0]; struct net_device *dev; @@ -879,7 +879,7 @@ errout: static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; int err; @@ -921,7 +921,7 @@ errout: static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -1000,7 +1000,7 @@ err: static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -1132,7 +1132,7 @@ replay: static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -1227,7 +1227,7 @@ static int rtattr_max; static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); rtnl_doit_func doit; int sz_idx, kind; int min_len; diff --git a/net/core/sock.c b/net/core/sock.c index b1a6ed4d33c1..3ee95060dbd0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -372,7 +372,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); char devname[IFNAMSIZ]; int index; @@ -958,7 +958,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sk->sk_net = get_net(net); + sock_net_set(sk, get_net(net)); } return sk; @@ -983,7 +983,7 @@ void sk_free(struct sock *sk) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", __func__, atomic_read(&sk->sk_omem_alloc)); - put_net(sk->sk_net); + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -1001,7 +1001,7 @@ void sk_release_kernel(struct sock *sk) sock_hold(sk); sock_release(sk->sk_socket); - sk->sk_net = get_net(&init_net); + sock_net_set(sk, get_net(&init_net)); sock_put(sk); } EXPORT_SYMBOL(sk_release_kernel); @@ -1017,7 +1017,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(newsk->sk_net); + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3554fb3d251c..fc2efe899e91 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1094,7 +1094,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 1bbfce5f7a2d..2f0ac3c3eb71 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -625,7 +625,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; @@ -663,7 +663,7 @@ errout: static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; struct net_device *dev; struct dn_dev *dn_db; @@ -779,7 +779,7 @@ errout: static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx, dn_idx = 0, skip_ndevs, skip_naddr; struct net_device *dev; struct dn_dev *dn_db; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 4aa9a423e606..27ea2e9b080a 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -504,7 +504,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); @@ -524,7 +524,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0a46b6c10e51..2f665a516476 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1512,7 +1512,7 @@ rtattr_failure: */ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct dn_route *rt = NULL; @@ -1601,7 +1601,7 @@ out_free: */ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct dn_route *rt; int h, s_h; int idx, s_idx; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index e09d915dbd77..3a2830ac89c2 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -463,7 +463,7 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 06cfb0bed631..5882a1316441 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -464,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -802,7 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = 0; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGSTAMP: @@ -1132,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 0); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); } if (!err) sk_setup_caps(sk, &rt->u.dst); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 823c724a8593..6848e4760f34 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -437,7 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; @@ -552,7 +552,7 @@ errout: static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; ASSERT_RTNL(); @@ -1158,7 +1158,7 @@ nla_put_failure: static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx, ip_idx; struct net_device *dev; struct in_device *in_dev; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0e4b34b07cb5..0f1557a4ac7a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -583,7 +583,7 @@ errout: static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_config cfg; struct fib_table *tb; int err; @@ -605,7 +605,7 @@ errout: static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib_config cfg; struct fib_table *tb; int err; @@ -627,7 +627,7 @@ errout: static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; @@ -857,7 +857,7 @@ static void nl_fib_input(struct sk_buff *skb) struct fib_table *tb; u32 pid; - net = skb->sk->sk_net; + net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 19274d01afa4..1fb56876be54 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -137,7 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_rule_hdr *frh, struct nlattr **tb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 682f632bfb77..6250f4239b61 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1762,7 +1762,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -EPROTONOSUPPORT; rtnl_lock(); @@ -1833,7 +1833,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) u32 ifindex; int ret = -EADDRNOTAVAIL; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -EPROTONOSUPPORT; rtnl_lock(); @@ -1881,7 +1881,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2017,7 +2017,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2100,7 +2100,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2165,7 +2165,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -EPROTONOSUPPORT; rtnl_lock(); @@ -2252,7 +2252,7 @@ void ip_mc_drop_socket(struct sock *sk) if (inet->mc_list == NULL) return; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return; rtnl_lock(); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d13c5f12bb32..a7fcaf205644 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -85,7 +85,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct hlist_node *node; struct inet_bind_bucket *tb; int ret; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); local_bh_disable(); if (!snum) { @@ -333,7 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .dport = ireq->rmt_port } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 0)) { + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8cd1ad9b9111..1064111e5b96 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -139,7 +139,7 @@ static struct sock *inet_lookup_listener_slow(struct net *net, sk_for_each(sk, node, head) { const struct inet_sock *inet = inet_sk(sk); - if (sk->sk_net == net && inet->num == hnum && + if (sock_net(sk) == net && inet->num == hnum && !ipv6_only_sock(sk)) { const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; @@ -182,7 +182,7 @@ struct sock *__inet_lookup_listener(struct net *net, if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if && sk->sk_net == net) + !sk->sk_bound_dev_if && sock_net(sk) == net) goto sherry_cache; sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); } @@ -254,7 +254,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -406,7 +406,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); if (!snum) { int i, remaining, low, high, port; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 717c411a5c6b..f12bc24de46f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -124,7 +124,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; - tw->tw_net = sk->sk_net; + twsk_net_set(tw, sock_net(sk)); atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 26685c83a146..4be00959b748 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -172,7 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb) if (sk && inet_sk(sk)->num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && - sk->sk_net == dev_net(dev)) { + sock_net(sk) == dev_net(dev)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 913266cd9902..08349267ceb4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -351,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 0)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } sk_setup_caps(sk, &rt->u.dst); @@ -1382,7 +1382,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .dport = tcp_hdr(skb)->source } }, .proto = sk->sk_protocol }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(sk->sk_net, &rt, &fl)) + if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b854431047a4..d6e76f5229cc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -449,7 +449,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, struct ip_options * opt = NULL; if (optlen > 40 || optlen < 0) goto e_inval; - err = ip_options_get_from_user(sk->sk_net, &opt, + err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); if (err) break; @@ -590,13 +590,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = 0; break; } - dev = ip_dev_find(sk->sk_net, mreq.imr_address.s_addr); + dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); if (dev) { mreq.imr_ifindex = dev->ifindex; dev_put(dev); } } else - dev = __dev_get_by_index(sk->sk_net, mreq.imr_ifindex); + dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); err = -EADDRNOTAVAIL; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e54bc1364473..11700a4dcd95 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk) { rtnl_lock(); if (sk == mroute_socket) { - IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; + IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; write_lock_bh(&mrt_lock); mroute_socket=NULL; @@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt mroute_socket=sk; write_unlock_bh(&mrt_lock); - IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; + IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; } rtnl_unlock(); return ret; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 756bc0e1a7c6..1563f29b5117 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1496,11 +1496,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = compat_do_replace(sk->sk_net, user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 1); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -1644,10 +1644,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 1); + ret = get_info(sock_net(sk), user, len, 1); break; case ARPT_SO_GET_ENTRIES: - ret = compat_get_entries(sk->sk_net, user, len); + ret = compat_get_entries(sock_net(sk), user, len); break; default: ret = do_arpt_get_ctl(sk, cmd, user, len); @@ -1665,11 +1665,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = do_replace(sk->sk_net, user, len); + ret = do_replace(sock_net(sk), user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 0); + ret = do_add_counters(sock_net(sk), user, len, 0); break; default: @@ -1689,11 +1689,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 0); + ret = get_info(sock_net(sk), user, len, 0); break; case ARPT_SO_GET_ENTRIES: - ret = get_entries(sk->sk_net, user, len); + ret = get_entries(sock_net(sk), user, len); break; case ARPT_SO_GET_REVISION_TARGET: { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 85a75e186b4b..a819d191e1aa 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1852,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case IPT_SO_SET_REPLACE: - ret = compat_do_replace(sk->sk_net, user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 1); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -1963,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 1); + ret = get_info(sock_net(sk), user, len, 1); break; case IPT_SO_GET_ENTRIES: - ret = compat_get_entries(sk->sk_net, user, len); + ret = compat_get_entries(sock_net(sk), user, len); break; default: ret = do_ipt_get_ctl(sk, cmd, user, len); @@ -1985,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(sk->sk_net, user, len); + ret = do_replace(sock_net(sk), user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 0); + ret = do_add_counters(sock_net(sk), user, len, 0); break; default: @@ -2010,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 0); + ret = get_info(sock_net(sk), user, len, 0); break; case IPT_SO_GET_ENTRIES: - ret = get_entries(sk->sk_net, user, len); + ret = get_entries(sock_net(sk), user, len); break; case IPT_SO_GET_REVISION_MATCH: diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8756d502a47f..be19a4048d7c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -117,7 +117,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk, node) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_net == net && inet->num == num && + if (sock_net(sk) == net && inet->num == num && !(inet->daddr && inet->daddr != raddr) && !(inet->rcv_saddr && inet->rcv_saddr != laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) @@ -499,7 +499,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(sk->sk_net, msg, &ipc); + err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) goto out; if (ipc.opt) @@ -553,7 +553,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 1); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); } if (err) goto done; @@ -620,7 +620,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); ret = -EADDRNOTAVAIL; if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -856,7 +856,7 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, &state->h->ht[state->bucket]) - if (sk->sk_net == state->p.net) + if (sock_net(sk) == state->p.net) goto found; } sk = NULL; @@ -872,7 +872,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_net != state->p.net); + } while (sk && sock_net(sk) != state->p.net); if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { sk = sk_head(&state->h->ht[state->bucket]); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7768d718e199..194f5cca3121 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2689,7 +2689,7 @@ nla_put_failure: static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; @@ -2785,7 +2785,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx, s_idx; struct net *net; - net = skb->sk->sk_net; + net = sock_net(skb->sk); s_h = cb->args[0]; if (s_h < 0) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 28bece6f281b..46847e600a46 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1486,7 +1486,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, + nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (nsk) { @@ -1974,7 +1974,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) while (1) { while (req) { if (req->rsk_ops->family == st->family && - req->sk->sk_net == net) { + sock_net(req->sk) == net) { cur = req; goto out; } @@ -1998,7 +1998,7 @@ get_req: } get_sk: sk_for_each_from(sk, node) { - if (sk->sk_family == st->family && sk->sk_net == net) { + if (sk->sk_family == st->family && sock_net(sk) == net) { cur = sk; goto out; } @@ -2049,7 +2049,7 @@ static void *established_get_first(struct seq_file *seq) read_lock_bh(lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || - sk->sk_net != net) { + sock_net(sk) != net) { continue; } rc = sk; @@ -2059,7 +2059,7 @@ static void *established_get_first(struct seq_file *seq) inet_twsk_for_each(tw, node, &tcp_hashinfo.ehash[st->bucket].twchain) { if (tw->tw_family != st->family || - tw->tw_net != net) { + twsk_net(tw) != net) { continue; } rc = tw; @@ -2086,7 +2086,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) tw = cur; tw = tw_next(tw); get_tw: - while (tw && (tw->tw_family != st->family || tw->tw_net != net)) { + while (tw && (tw->tw_family != st->family || twsk_net(tw) != net)) { tw = tw_next(tw); } if (tw) { @@ -2107,7 +2107,7 @@ get_tw: sk = sk_next(sk); sk_for_each_from(sk, node) { - if (sk->sk_family == st->family && sk->sk_net == net) + if (sk->sk_family == st->family && sock_net(sk) == net) goto found; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e2cd93481359..76d52d37d6ac 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -137,7 +137,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_node *node; sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (sk->sk_net == net && sk->sk_hash == num) + if (sock_net(sk) == net && sk->sk_hash == num) return 1; return 0; } @@ -158,7 +158,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_head *head; struct sock *sk2; int error = 1; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); write_lock_bh(&udp_hash_lock); @@ -218,7 +218,7 @@ gotit: sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && sk2 != sk && - sk2->sk_net == net && + sock_net(sk2) == net && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -269,7 +269,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_net == net && sk->sk_hash == hnum && + if (sock_net(sk) == net && sk->sk_hash == hnum && !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); if (inet->rcv_saddr) { @@ -607,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(sk->sk_net, msg, &ipc); + err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) return err; if (ipc.opt) @@ -656,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .sport = inet->sport, .dport = dport } } }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sk->sk_net, &rt, &fl, sk, 1); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); @@ -1511,7 +1511,7 @@ static struct sock *udp_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; sk_for_each(sk, node, state->hashtable + state->bucket) { - if (sk->sk_net != net) + if (sock_net(sk) != net) continue; if (sk->sk_family == state->family) goto found; @@ -1531,7 +1531,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && (sk->sk_net != net || sk->sk_family != state->family)); + } while (sk && (sock_net(sk) != net || sk->sk_family != state->family)); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(state->hashtable + state->bucket); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1de9ec74261..f2c90f145cbb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3054,7 +3054,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3112,7 +3112,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3322,7 +3322,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct inet6_ifaddr *ifa; struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; @@ -3418,7 +3418,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3645,7 +3645,7 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx, err; int s_idx = cb->args[0]; struct net_device *dev; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index de371b5997fe..9bfa8846f262 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -364,7 +364,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *pfx; @@ -452,7 +452,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb, static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; struct hlist_node *pos; int idx = 0, s_idx = cb->args[0]; @@ -490,7 +490,7 @@ static inline int ip6addrlbl_msgsize(void) static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *addr; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f52bdaed8a1b..12f04e9d3e88 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -245,7 +245,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; int addr_type = 0; @@ -438,7 +438,7 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); switch(cmd) { diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e7a7fe26cebf..cac580749ebe 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -154,7 +154,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb) { int err = -EINVAL; - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (rule->action == FR_ACT_TO_TBL) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 50857662e6b7..63309d10df3a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -163,7 +163,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, struct flowi *fl) { struct dst_entry *dst; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); int res = 0; /* Informational messages are not limited. */ diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c0c8d2d17682..21c467675412 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -105,7 +105,7 @@ struct sock *inet6_lookup_listener(struct net *net, read_lock(&hashinfo->lhash_lock); sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { - if (sk->sk_net == net && inet_sk(sk)->num == hnum && + if (sock_net(sk) == net && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -172,7 +172,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b0814b0082e7..b3f6e03c454c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -346,7 +346,7 @@ end: static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 556300f0eba5..a8b4da25b0a7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -910,7 +910,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi *fl) { int err; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); if (*dst == NULL) *dst = ip6_route_output(net, sk, fl); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index dc6695cc5767..d3d93d752e10 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -107,7 +107,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0357de8e78c8..20a3d8e2f6c6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -181,7 +181,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) struct net_device *dev = NULL; struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); int err; if (!ipv6_addr_is_multicast(addr)) @@ -255,7 +255,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst, **lnk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); write_lock_bh(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { @@ -327,7 +327,7 @@ void ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); write_lock_bh(&ipv6_sk_mc_lock); while ((mc_lst = np->ipv6_mc_list) != NULL) { @@ -365,7 +365,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); int i, j, rv; int leavegroup = 0; int pmclocked = 0; @@ -505,7 +505,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); int leavegroup = 0; int i, err; @@ -598,7 +598,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct net_device *dev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index af1ec7ba757c..70ef0d276cc0 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1879,11 +1879,11 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = compat_do_replace(sk->sk_net, user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 1); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -1990,10 +1990,10 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 1); + ret = get_info(sock_net(sk), user, len, 1); break; case IP6T_SO_GET_ENTRIES: - ret = compat_get_entries(sk->sk_net, user, len); + ret = compat_get_entries(sock_net(sk), user, len); break; default: ret = do_ip6t_get_ctl(sk, cmd, user, len); @@ -2012,11 +2012,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = do_replace(sk->sk_net, user, len); + ret = do_replace(sock_net(sk), user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 0); + ret = do_add_counters(sock_net(sk), user, len, 0); break; default: @@ -2037,11 +2037,11 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 0); + ret = get_info(sock_net(sk), user, len, 0); break; case IP6T_SO_GET_ENTRIES: - ret = get_entries(sk->sk_net, user, len); + ret = get_entries(sock_net(sk), user, len); break; case IP6T_SO_GET_REVISION_MATCH: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index efb0047f6880..12c7a1560977 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -76,7 +76,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, if (inet_sk(sk)->num == num) { struct ipv6_pinfo *np = inet6_sk(sk); - if (sk->sk_net != net) + if (sock_net(sk) != net) continue; if (!ipv6_addr_any(&np->daddr) && @@ -280,7 +280,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if); + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -293,7 +293,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { err = -EADDRNOTAVAIL; - if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr, + if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { if (dev) dev_put(dev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 65053fba8c1a..ac4428371432 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2020,7 +2020,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; - cfg->fc_nlinfo.nl_net = skb->sk->sk_net; + cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2216,7 +2216,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 086deffff9c9..323c7e06ef43 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1218,7 +1218,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet6_lookup_established(sk->sk_net, &tcp_hashinfo, + nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6683c04b427e..db266ff297e5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -70,7 +70,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_net == net && sk->sk_hash == hnum && + if (sock_net(sk) == net && sk->sk_hash == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); int score = 0; @@ -323,7 +323,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (s->sk_net != sk->sk_net) + if (sock_net(s) != sock_net(sk)) continue; if (s->sk_hash == num && s->sk_family == PF_INET6) { diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 6f21a53cb3e7..ae54b20d0470 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -837,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) IRDA_DEBUG(2, "%s()\n", __func__); - err = irda_create(sk->sk_net, newsock, sk->sk_protocol); + err = irda_create(sock_net(sk), newsock, sk->sk_protocol); if (err) return err; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 5ebfd93ff5e7..5c6d89c6d51d 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -700,7 +700,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *saddr, struct llc_addr *daddr) { - struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, + struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC, sk->sk_prot); struct llc_sock *newllc, *llc = llc_sk(sk); diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 3dd4b3c76d81..69d699f95f4c 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -65,7 +65,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf, { struct nf_sockopt_ops *ops; - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return ERR_PTR(-ENOPROTOOPT); if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 86bd8660a8f2..712a7bff8560 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -228,7 +228,7 @@ static inline struct sock *netlink_lookup(struct net *net, int protocol, read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { + if (sock_net(sk) == net && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -348,7 +348,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) + if (sock_net(osk) == net && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -486,7 +486,7 @@ static int netlink_release(struct socket *sock) if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { - .net = sk->sk_net, + .net = sock_net(sk), .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -518,7 +518,7 @@ static int netlink_release(struct socket *sock) static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -532,7 +532,7 @@ retry: netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { - if ((osk->sk_net != net)) + if (sock_net(osk) != net) continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ @@ -611,7 +611,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -720,7 +720,7 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); + sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); @@ -962,7 +962,7 @@ static inline int do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; - if ((sk->sk_net != p->net)) + if (sock_net(sk) != p->net) goto out; if (p->failure) { @@ -1006,7 +1006,7 @@ out: int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { - struct net *net = ssk->sk_net; + struct net *net = sock_net(ssk); struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -1064,7 +1064,7 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; - if (sk->sk_net != p->exclude_sk->sk_net) + if (sock_net(sk) != sock_net(p->exclude_sk)) goto out; if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || @@ -1601,7 +1601,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1643,7 +1643,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_net, + sk = netlink_lookup(sock_net(in_skb->sk), in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { @@ -1758,7 +1758,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { - if (iter->p.net != s->sk_net) + if (sock_net(s) != iter->p.net) continue; if (off == pos) { iter->link = i; @@ -1794,7 +1794,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) s = v; do { s = sk_next(s); - } while (s && (iter->p.net != s->sk_net)); + } while (s && (sock_net(s) != iter->p.net)); if (s) return s; @@ -1806,7 +1806,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); - while (s && (iter->p.net != s->sk_net)) + while (s && sock_net(s) != iter->p.net) s = sk_next(s); if (s) { iter->link = i; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index a270ebf9f765..4bae8b998cab 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -466,7 +466,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot); if (sk == NULL) return NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index baa290d3444a..25070240d4ae 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -263,7 +263,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct if (skb->pkt_type == PACKET_LOOPBACK) goto out; - if (dev_net(dev) != sk->sk_net) + if (dev_net(dev) != sock_net(sk)) goto out; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) @@ -337,7 +337,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(sk->sk_net, saddr->spkt_device); + dev = dev_get_by_name(sock_net(sk), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -451,7 +451,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev_net(dev) != sk->sk_net) + if (dev_net(dev) != sock_net(sk)) goto drop; skb->dev = dev; @@ -568,7 +568,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev_net(dev) != sk->sk_net) + if (dev_net(dev) != sock_net(sk)) goto drop; if (dev->header_ops) { @@ -728,7 +728,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, } - dev = dev_get_by_index(sk->sk_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -800,7 +800,7 @@ static int packet_release(struct socket *sock) if (!sk) return 0; - net = sk->sk_net; + net = sock_net(sk); po = pkt_sk(sk); write_lock_bh(&net->packet.sklist_lock); @@ -914,7 +914,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(sk->sk_net, name); + dev = dev_get_by_name(sock_net(sk), name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -941,7 +941,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex); + dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); if (dev == NULL) goto out; } @@ -1135,7 +1135,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex); + dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1160,7 +1160,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(sk->sk_net, po->ifindex); + dev = dev_get_by_index(sock_net(sk), po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1212,7 +1212,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex); + dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); if (!dev) goto done; @@ -1266,7 +1266,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(sk->sk_net, ml->ifindex); + dev = dev_get_by_index(sock_net(sk), ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1294,7 +1294,7 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1540,7 +1540,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (sk->sk_net != &init_net) + if (sock_net(sk) != &init_net) return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 1a7f143cf741..92d85c38e4d2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -551,7 +551,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto); if (sk == NULL) return NULL; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 0b8eb235bc13..74e662cbb2c5 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -951,7 +951,7 @@ done: static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; @@ -1029,7 +1029,7 @@ find_dump_kind(struct nlmsghdr *n) static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0fbedcabf111..1086df7478bc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -118,7 +118,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct tcmsg *t; u32 protocol; @@ -389,7 +389,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int t; int s_t; struct net_device *dev; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7e3c048ba9b1..15b91a9ee8e8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -605,7 +605,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -674,7 +674,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct tcmsg *tcm; struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -893,7 +893,7 @@ err_out: static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; @@ -945,7 +945,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -1139,7 +1139,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int t; int s_t; struct net_device *dev; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index dc71d0d83753..036bfcc8d15b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -636,7 +636,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2faa0d8839eb..5aea91137fbb 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -554,7 +554,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, + struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, sk->sk_prot); if (!newsk) diff --git a/net/socket.c b/net/socket.c index 9d3fbfbc8535..79e5382fd110 100644 --- a/net/socket.c +++ b/net/socket.c @@ -857,7 +857,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) sock = file->private_data; sk = sock->sk; - net = sk->sk_net; + net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(net, cmd, argp); } else @@ -1375,7 +1375,7 @@ asmlinkage long sys_listen(int fd, int backlog) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - somaxconn = sock->sk->sk_net->sysctl_somaxconn; + somaxconn = sock_net(sock->sk)->sysctl_somaxconn; if ((unsigned)backlog > somaxconn) backlog = somaxconn; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3220d5cb5b5d..ae45df060e3a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1375,7 +1375,7 @@ static int accept(struct socket *sock, struct socket *newsock, int flags) } buf = skb_peek(&sock->sk->sk_receive_queue); - res = tipc_create(sock->sk->sk_net, newsock, 0); + res = tipc_create(sock_net(sock->sk), newsock, 0); if (!res) { struct tipc_sock *new_tsock = tipc_sk(newsock->sk); struct tipc_portid id; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ae584356852c..cb9d0cb5f270 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -252,7 +252,7 @@ static struct sock *__unix_find_socket_byname(struct net *net, sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); - if (s->sk_net != net) + if (sock_net(s) != net) continue; if (u->addr->len == len && @@ -289,7 +289,7 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; - if (s->sk_net != net) + if (sock_net(s) != net) continue; if(dentry && dentry->d_inode == i) @@ -654,7 +654,7 @@ static int unix_release(struct socket *sock) static int unix_autobind(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; struct unix_address * addr; @@ -758,7 +758,7 @@ fail: static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct dentry * dentry = NULL; @@ -899,7 +899,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; struct sock *other; unsigned hash; @@ -996,7 +996,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct sock *newsk = NULL; struct sock *other = NULL; @@ -1025,7 +1025,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sk->sk_net, NULL); + newsk = unix_create1(sock_net(sk), NULL); if (newsk == NULL) goto out; @@ -1312,7 +1312,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, { struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=msg->msg_name; struct sock *other = NULL; @@ -2022,7 +2022,7 @@ static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) struct sock *s; for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (s->sk_net != iter->p.net) + if (sock_net(s) != iter->p.net) continue; if (off == pos) return s; @@ -2050,7 +2050,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) sk = first_unix_socket(&iter->i); else sk = next_unix_socket(&iter->i, sk); - while (sk && (sk->sk_net != iter->p.net)) + while (sk && (sock_net(sk) != iter->p.net)) sk = next_unix_socket(&iter->i, sk); return sk; } diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 7a46ea73fe2d..6ba67c523c16 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -549,7 +549,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) + if ((sk = x25_alloc_socket(sock_net(osk))) == NULL) goto out; x25 = x25_sk(sk); -- cgit v1.2.3-71-gd317 From 1218854afa6f659be90b748cf1bc7badee954a35 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 26 Mar 2008 02:36:06 +0900 Subject: [NET] NETNS: Omit seq_net_private->net without CONFIG_NET_NS. Without CONFIG_NET_NS, no namespace other than &init_net exists, no need to store net in seq_net_private. Signed-off-by: YOSHIFUJI Hideaki --- fs/proc/proc_net.c | 6 +++--- include/linux/seq_file.h | 7 +++++++ net/core/neighbour.c | 8 ++++---- net/ipv4/fib_hash.c | 5 ++--- net/ipv4/fib_trie.c | 13 ++++++------- net/ipv4/raw.c | 4 ++-- net/ipv4/route.c | 29 +++++++++++++++-------------- net/ipv6/addrconf.c | 4 ++-- net/ipv6/mcast.c | 4 ++-- net/netfilter/x_tables.c | 4 ++-- net/netlink/af_netlink.c | 6 +++--- net/unix/af_unix.c | 10 +++++----- 12 files changed, 53 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4caa5f774fb7..13cd7835d0df 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -44,7 +44,9 @@ int seq_open_net(struct inode *ino, struct file *f, put_net(net); return -ENOMEM; } +#ifdef CONFIG_NET_NS p->net = net; +#endif return 0; } EXPORT_SYMBOL_GPL(seq_open_net); @@ -52,12 +54,10 @@ EXPORT_SYMBOL_GPL(seq_open_net); int seq_release_net(struct inode *ino, struct file *f) { struct seq_file *seq; - struct seq_net_private *p; seq = f->private_data; - p = seq->private; - put_net(p->net); + put_net(seq_file_net(seq)); seq_release_private(ino, f); return 0; } diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 67c2563961f3..d870a8253769 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -5,6 +5,7 @@ #include #include #include +#include struct seq_operations; struct file; @@ -64,7 +65,9 @@ extern struct list_head *seq_list_next(void *v, struct list_head *head, struct net; struct seq_net_private { +#ifdef CONFIG_NET_NS struct net *net; +#endif }; int seq_open_net(struct inode *, struct file *, @@ -72,7 +75,11 @@ int seq_open_net(struct inode *, struct file *, int seq_release_net(struct inode *, struct file *); static inline struct net *seq_file_net(struct seq_file *seq) { +#ifdef CONFIG_NET_NS return ((struct seq_net_private *)seq->private)->net; +#else + return &init_net; +#endif } #endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 065fbac7ecd3..b8d491fb4b42 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2145,7 +2145,7 @@ EXPORT_SYMBOL(__neigh_for_each_release); static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct neighbour *n = NULL; int bucket = state->bucket; @@ -2186,7 +2186,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; if (state->neigh_sub_iter) { @@ -2246,7 +2246,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; - struct net * net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; @@ -2269,7 +2269,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net * net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; pn = pn->next; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 8d58d85dfac6..02088deb0461 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -821,7 +821,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) struct fib_table *main_table; struct fn_hash *table; - main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN); + main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); table = (struct fn_hash *)main_table->tb_data; iter->bucket = 0; @@ -959,11 +959,10 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) static void *fib_seq_start(struct seq_file *seq, loff_t *pos) __acquires(fib_hash_lock) { - struct fib_iter_state *iter = seq->private; void *v = NULL; read_lock(&fib_hash_lock); - if (fib_get_table(iter->p.net, RT_TABLE_MAIN)) + if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; return v; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ce6cb34e28e1..9e491e70e855 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2279,9 +2279,10 @@ static const struct file_operations fib_triestat_fops = { .release = fib_triestat_seq_release, }; -static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, loff_t pos) +static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { - struct net *net = iter->p.net; + struct fib_trie_iter *iter = seq->private; + struct net *net = seq_file_net(seq); loff_t idx = 0; unsigned int h; @@ -2309,16 +2310,14 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, loff_t pos) static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct fib_trie_iter *iter = seq->private; - rcu_read_lock(); - return fib_trie_get_idx(iter, *pos); + return fib_trie_get_idx(seq, *pos); } static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_trie_iter *iter = seq->private; - struct net *net = iter->p.net; + struct net *net = seq_file_net(seq); struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; @@ -2513,7 +2512,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) struct fib_table *tb; rcu_read_lock(); - tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); + tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); if (!tb) return NULL; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index be19a4048d7c..25dc8b38cac3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -856,7 +856,7 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, &state->h->ht[state->bucket]) - if (sock_net(sk) == state->p.net) + if (sock_net(sk) == seq_file_net(seq)) goto found; } sk = NULL; @@ -872,7 +872,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sock_net(sk) != state->p.net); + } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { sk = sk_head(&state->h->ht[state->bucket]); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 194f5cca3121..eab8d75e5222 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -276,15 +276,16 @@ struct rt_cache_iter_state { int genid; }; -static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) +static struct rtable *rt_cache_get_first(struct seq_file *seq) { + struct rt_cache_iter_state *st = seq->private; struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st->bucket].chain); while (r) { - if (dev_net(r->u.dst.dev) == st->p.net && + if (dev_net(r->u.dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; r = rcu_dereference(r->u.dst.rt_next); @@ -294,9 +295,10 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) return r; } -static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st, +static struct rtable *__rt_cache_get_next(struct seq_file *seq, struct rtable *r) { + struct rt_cache_iter_state *st = seq->private; r = r->u.dst.rt_next; while (!r) { rcu_read_unlock_bh(); @@ -308,11 +310,12 @@ static struct rtable *__rt_cache_get_next(struct rt_cache_iter_state *st, return rcu_dereference(r); } -static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, +static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) { - while ((r = __rt_cache_get_next(st, r)) != NULL) { - if (dev_net(r->u.dst.dev) != st->p.net) + struct rt_cache_iter_state *st = seq->private; + while ((r = __rt_cache_get_next(seq, r)) != NULL) { + if (dev_net(r->u.dst.dev) != seq_file_net(seq)) continue; if (r->rt_genid == st->genid) break; @@ -320,12 +323,12 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, return r; } -static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) +static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) { - struct rtable *r = rt_cache_get_first(st); + struct rtable *r = rt_cache_get_first(seq); if (r) - while (pos && (r = rt_cache_get_next(st, r))) + while (pos && (r = rt_cache_get_next(seq, r))) --pos; return pos ? NULL : r; } @@ -333,9 +336,8 @@ static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t po static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { struct rt_cache_iter_state *st = seq->private; - if (*pos) - return rt_cache_get_idx(st, *pos - 1); + return rt_cache_get_idx(seq, *pos - 1); st->genid = atomic_read(&rt_genid); return SEQ_START_TOKEN; } @@ -343,12 +345,11 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct rtable *r; - struct rt_cache_iter_state *st = seq->private; if (v == SEQ_START_TOKEN) - r = rt_cache_get_first(st); + r = rt_cache_get_first(seq); else - r = rt_cache_get_next(st, v); + r = rt_cache_get_next(seq, v); ++*pos; return r; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2c90f145cbb..ac5d4f4b6312 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2766,7 +2766,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) { struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { ifa = inet6_addr_lst[state->bucket]; @@ -2782,7 +2782,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) { struct if6_iter_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); ifa = ifa->lst_next; try_again: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 20a3d8e2f6c6..d810cff818cf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2355,7 +2355,7 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) { struct ifmcaddr6 *im = NULL; struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); state->idev = NULL; for_each_netdev(net, state->dev) { @@ -2486,7 +2486,7 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) struct ip6_sf_list *psf = NULL; struct ifmcaddr6 *im = NULL; struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); state->idev = NULL; state->im = NULL; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a6792089fcf9..0bd95680a494 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -727,7 +727,7 @@ struct xt_names_priv { static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { struct xt_names_priv *priv = seq->private; - struct net *net = priv->p.net; + struct net *net = seq_file_net(seq); int af = priv->af; mutex_lock(&xt[af].mutex); @@ -737,7 +737,7 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct xt_names_priv *priv = seq->private; - struct net *net = priv->p.net; + struct net *net = seq_file_net(seq); int af = priv->af; return seq_list_next(v, &net->xt.tables[af], pos); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 712a7bff8560..1d16d95dfaaf 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1758,7 +1758,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { - if (sock_net(s) != iter->p.net) + if (sock_net(s) != seq_file_net(seq)) continue; if (off == pos) { iter->link = i; @@ -1794,7 +1794,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) s = v; do { s = sk_next(s); - } while (s && (sock_net(s) != iter->p.net)); + } while (s && sock_net(s) != seq_file_net(seq)); if (s) return s; @@ -1806,7 +1806,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); - while (s && sock_net(s) != iter->p.net) + while (s && sock_net(s) != seq_file_net(seq)) s = sk_next(s); if (s) { iter->link = i; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index cb9d0cb5f270..4a4793051bcb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2016,13 +2016,14 @@ struct unix_iter_state { struct seq_net_private p; int i; }; -static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) +static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) { + struct unix_iter_state *iter = seq->private; loff_t off = 0; struct sock *s; for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != iter->p.net) + if (sock_net(s) != seq_file_net(seq)) continue; if (off == pos) return s; @@ -2035,9 +2036,8 @@ static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { - struct unix_iter_state *iter = seq->private; spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1); + return *pos ? unix_seq_idx(seq, *pos - 1) : ((void *) 1); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -2050,7 +2050,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) sk = first_unix_socket(&iter->i); else sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != iter->p.net)) + while (sk && (sock_net(sk) != seq_file_net(seq))) sk = next_unix_socket(&iter->i, sk); return sk; } -- cgit v1.2.3-71-gd317 From b8beedd25d3913d45b8330a08ab88fdf90eb54b8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:09:33 -0700 Subject: [NETFILTER]: Add nf_inet_addr_cmp() Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 9 +++++++++ include/net/netfilter/nf_conntrack_tuple.h | 15 +++------------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f0680c2bee73..89e6c72ad295 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -61,6 +61,15 @@ union nf_inet_addr { #ifdef __KERNEL__ #ifdef CONFIG_NETFILTER +static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, + const union nf_inet_addr *a2) +{ + return a1->all[0] == a2->all[0] && + a1->all[1] == a2->all[1] && + a1->all[2] == a2->all[2] && + a1->all[3] == a2->all[3]; +} + extern void netfilter_init(void); /* Largest hook number + 1 */ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index d9b53dd601ad..168c91754d89 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -163,10 +163,7 @@ struct nf_conntrack_tuple_hash static inline int __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) { - return (t1->src.u3.all[0] == t2->src.u3.all[0] && - t1->src.u3.all[1] == t2->src.u3.all[1] && - t1->src.u3.all[2] == t2->src.u3.all[2] && - t1->src.u3.all[3] == t2->src.u3.all[3] && + return (nf_inet_addr_cmp(&t1->src.u3, &t2->src.u3) && t1->src.u.all == t2->src.u.all && t1->src.l3num == t2->src.l3num); } @@ -174,10 +171,7 @@ static inline int __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, static inline int __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) { - return (t1->dst.u3.all[0] == t2->dst.u3.all[0] && - t1->dst.u3.all[1] == t2->dst.u3.all[1] && - t1->dst.u3.all[2] == t2->dst.u3.all[2] && - t1->dst.u3.all[3] == t2->dst.u3.all[3] && + return (nf_inet_addr_cmp(&t1->dst.u3, &t2->dst.u3) && t1->dst.u.all == t2->dst.u.all && t1->dst.protonum == t2->dst.protonum); } @@ -192,10 +186,7 @@ static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, static inline int nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, const struct nf_conntrack_tuple_mask *m2) { - return (m1->src.u3.all[0] == m2->src.u3.all[0] && - m1->src.u3.all[1] == m2->src.u3.all[1] && - m1->src.u3.all[2] == m2->src.u3.all[2] && - m1->src.u3.all[3] == m2->src.u3.all[3] && + return (nf_inet_addr_cmp(&m1->src.u3, &m2->src.u3) && m1->src.u.all == m2->src.u.all); } -- cgit v1.2.3-71-gd317 From 2a6cfb22ae002330d445f734668d9158db9e90de Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:16:54 -0700 Subject: [NETFILTER]: nf_conntrack_sip: adjust dptr and datalen after packet mangling After mangling the packet, the pointer to the data and the length of the data portion may change and need to be adjusted. Use double data pointers and a pointer to the length everywhere and add a helper function to the NAT helper for performing the adjustments. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 6 +- net/ipv4/netfilter/nf_nat_sip.c | 91 ++++++++++++++++-------------- net/netfilter/nf_conntrack_sip.c | 14 +++-- 3 files changed, 60 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 8e5ce1ca7bfc..9d0dbfb26300 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -24,11 +24,13 @@ enum sip_header_pos { extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, - const char **dptr); + const char **dptr, + unsigned int *datalen); extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, - const char *dptr); + const char **dptr, + unsigned int *datalen); extern int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, size_t dlen, unsigned int *matchoff, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 84d8b4982cdf..e77122e65283 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -60,15 +60,35 @@ static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) } } +static unsigned int mangle_packet(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int matchoff, unsigned int matchlen, + const char *buffer, unsigned int buflen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, + buffer, buflen)) + return 0; + + /* Reload data pointer and adjust datalen value */ + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + *datalen += buflen - matchlen; + return 1; +} + static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, - struct nf_conn *ct, const char **dptr, size_t dlen, + struct nf_conn *ct, + const char **dptr, unsigned int *datalen, enum sip_header_pos pos, struct addr_map *map) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchlen, matchoff, addrlen; char *addr; - if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) + if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, + pos) <= 0) return 1; if ((matchlen == map->addr[dir].srciplen || @@ -84,26 +104,19 @@ static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, } else return 1; - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - matchoff, matchlen, addr, addrlen)) - return 0; - *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); - return 1; - + return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + addr, addrlen); } static unsigned int ip_nat_sip(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, - const char **dptr) + const char **dptr, unsigned int *datalen) { enum sip_header_pos pos; struct addr_map map; - int dataoff, datalen; - dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); - datalen = skb->len - dataoff; - if (datalen < sizeof("SIP/2.0") - 1) + if (*datalen < sizeof("SIP/2.0") - 1) return NF_ACCEPT; addr_map_init(ct, &map); @@ -115,7 +128,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, * The "userinfo" and "@" components of the SIP URI MUST NOT * be present. */ - if (datalen >= sizeof("REGISTER") - 1 && + if (*datalen >= sizeof("REGISTER") - 1 && strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) pos = POS_REG_REQ_URI; else @@ -136,51 +149,45 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, static unsigned int mangle_sip_packet(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, - const char **dptr, size_t dlen, + const char **dptr, unsigned int *datalen, char *buffer, int bufflen, enum sip_header_pos pos) { unsigned int matchlen, matchoff; - if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) - return 0; - - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, - matchoff, matchlen, buffer, bufflen)) + if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, + pos) <= 0) return 0; - /* We need to reload this. Thanks Patrick. */ - *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); - return 1; + return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + buffer, bufflen); } static int mangle_content_len(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, - const char *dptr) + const char **dptr, unsigned int *datalen) { - unsigned int dataoff, matchoff, matchlen; + unsigned int matchoff, matchlen; char buffer[sizeof("65536")]; int bufflen; - dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); - /* Get actual SDP length */ - if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, POS_SDP_HEADER) > 0) { /* since ct_sip_get_info() give us a pointer passing 'v=' we need to add 2 bytes in this count. */ - int c_len = skb->len - dataoff - matchoff + 2; + int c_len = *datalen - matchoff + 2; /* Now, update SDP length */ - if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, POS_CONTENT) > 0) { bufflen = sprintf(buffer, "%u", c_len); - return nf_nat_mangle_udp_packet(skb, ct, ctinfo, - matchoff, matchlen, - buffer, bufflen); + return mangle_packet(skb, dptr, datalen, + matchoff, matchlen, + buffer, bufflen); } } return 0; @@ -190,30 +197,28 @@ static unsigned int mangle_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, __be32 newip, u_int16_t port, - const char *dptr) + const char **dptr, unsigned int *datalen) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; - unsigned int dataoff, bufflen; - - dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); + unsigned int bufflen; /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, dptr, datalen, buffer, bufflen, POS_OWNER_IP4)) return 0; - if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, dptr, datalen, buffer, bufflen, POS_CONNECTION_IP4)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, dptr, datalen, buffer, bufflen, POS_MEDIA)) return 0; - return mangle_content_len(skb, ctinfo, ct, dptr); + return mangle_content_len(skb, ctinfo, ct, dptr, datalen); } static void ip_nat_sdp_expect(struct nf_conn *ct, @@ -242,7 +247,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct, static unsigned int ip_nat_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, - const char *dptr) + const char **dptr, unsigned int *datalen) { struct nf_conn *ct = exp->master; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -275,7 +280,7 @@ static unsigned int ip_nat_sdp(struct sk_buff *skb, if (port == 0) return NF_DROP; - if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) { + if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr, datalen)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 016e1c1aafe4..fa0d5599ff24 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -39,13 +39,15 @@ MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, - const char **dptr) __read_mostly; + const char **dptr, + unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, - const char *dptr) __read_mostly; + const char **dptr, + unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); static int digits_len(const struct nf_conn *, const char *, const char *, int *); @@ -369,7 +371,7 @@ static int set_expected_rtp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, union nf_inet_addr *addr, __be16 port, - const char *dptr) + const char **dptr, unsigned int *datalen) { struct nf_conntrack_expect *exp; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -386,7 +388,7 @@ static int set_expected_rtp(struct sk_buff *skb, nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(skb, ctinfo, exp, dptr); + ret = nf_nat_sdp(skb, ctinfo, exp, dptr, datalen); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -429,7 +431,7 @@ static int sip_help(struct sk_buff *skb, nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && ct->status & IPS_NAT_MASK) { - if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) { + if (!nf_nat_sip(skb, ctinfo, ct, &dptr, &datalen)) { ret = NF_DROP; goto out; } @@ -466,7 +468,7 @@ static int sip_help(struct sk_buff *skb, goto out; } ret = set_expected_rtp(skb, ct, ctinfo, &addr, - htons(port), dptr); + htons(port), &dptr, &datalen); } } out: -- cgit v1.2.3-71-gd317 From 212440a7d04a12ee13787afecc6c86c7fc4e6184 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:17:13 -0700 Subject: [NETFILTER]: nf_conntrack_sip: remove redundant function arguments The conntrack reference and ctinfo can be derived from the packet. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 7 ++--- net/ipv4/netfilter/nf_nat_sip.c | 49 +++++++++++++++--------------- net/netfilter/nf_conntrack_sip.c | 24 ++++++--------- 3 files changed, 37 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 9d0dbfb26300..b94de3d60303 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -22,15 +22,12 @@ enum sip_header_pos { }; extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conn *ct, const char **dptr, unsigned int *datalen); extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp, const char **dptr, - unsigned int *datalen); + unsigned int *datalen, + struct nf_conntrack_expect *exp); extern int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, size_t dlen, unsigned int *matchoff, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e77122e65283..acaa7d4569fa 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -78,11 +78,12 @@ static unsigned int mangle_packet(struct sk_buff *skb, return 1; } -static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, - struct nf_conn *ct, +static int map_sip_addr(struct sk_buff *skb, const char **dptr, unsigned int *datalen, enum sip_header_pos pos, struct addr_map *map) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchlen, matchoff, addrlen; char *addr; @@ -109,10 +110,10 @@ static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, } static unsigned int ip_nat_sip(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conn *ct, const char **dptr, unsigned int *datalen) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum sip_header_pos pos; struct addr_map map; @@ -134,25 +135,25 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, else pos = POS_REQ_URI; - if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) + if (!map_sip_addr(skb, dptr, datalen, pos, &map)) return NF_DROP; } - if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || - !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || - !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || - !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + if (!map_sip_addr(skb, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(skb, dptr, datalen, POS_TO, &map) || + !map_sip_addr(skb, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(skb, dptr, datalen, POS_CONTACT, &map)) return NF_DROP; return NF_ACCEPT; } static unsigned int mangle_sip_packet(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conn *ct, const char **dptr, unsigned int *datalen, char *buffer, int bufflen, enum sip_header_pos pos) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchlen, matchoff; if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, @@ -164,10 +165,10 @@ static unsigned int mangle_sip_packet(struct sk_buff *skb, } static int mangle_content_len(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conn *ct, const char **dptr, unsigned int *datalen) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen; char buffer[sizeof("65536")]; int bufflen; @@ -204,21 +205,21 @@ static unsigned int mangle_sdp(struct sk_buff *skb, /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(skb, ctinfo, ct, dptr, datalen, - buffer, bufflen, POS_OWNER_IP4)) + if (!mangle_sip_packet(skb, dptr, datalen, buffer, bufflen, + POS_OWNER_IP4)) return 0; - if (!mangle_sip_packet(skb, ctinfo, ct, dptr, datalen, - buffer, bufflen, POS_CONNECTION_IP4)) + if (!mangle_sip_packet(skb, dptr, datalen, buffer, bufflen, + POS_CONNECTION_IP4)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(skb, ctinfo, ct, dptr, datalen, - buffer, bufflen, POS_MEDIA)) + if (!mangle_sip_packet(skb, dptr, datalen, buffer, bufflen, + POS_MEDIA)) return 0; - return mangle_content_len(skb, ctinfo, ct, dptr, datalen); + return mangle_content_len(skb, dptr, datalen); } static void ip_nat_sdp_expect(struct nf_conn *ct, @@ -245,11 +246,11 @@ static void ip_nat_sdp_expect(struct nf_conn *ct, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ static unsigned int ip_nat_sdp(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp, - const char **dptr, unsigned int *datalen) + const char **dptr, unsigned int *datalen, + struct nf_conntrack_expect *exp) { - struct nf_conn *ct = exp->master; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); __be32 newip; u_int16_t port; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index fa0d5599ff24..38e1e7a05334 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -37,17 +37,14 @@ module_param(sip_timeout, uint, 0600); MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conn *ct, const char **dptr, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp, const char **dptr, - unsigned int *datalen) __read_mostly; + unsigned int *datalen, + struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); static int digits_len(const struct nf_conn *, const char *, const char *, int *); @@ -367,13 +364,12 @@ int ct_sip_get_info(const struct nf_conn *ct, EXPORT_SYMBOL_GPL(ct_sip_get_info); static int set_expected_rtp(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - union nf_inet_addr *addr, - __be16 port, - const char **dptr, unsigned int *datalen) + const char **dptr, unsigned int *datalen, + union nf_inet_addr *addr, __be16 port) { struct nf_conntrack_expect *exp; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); int family = ct->tuplehash[!dir].tuple.src.l3num; int ret; @@ -388,7 +384,7 @@ static int set_expected_rtp(struct sk_buff *skb, nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(skb, ctinfo, exp, dptr, datalen); + ret = nf_nat_sdp(skb, dptr, datalen, exp); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -431,7 +427,7 @@ static int sip_help(struct sk_buff *skb, nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && ct->status & IPS_NAT_MASK) { - if (!nf_nat_sip(skb, ctinfo, ct, &dptr, &datalen)) { + if (!nf_nat_sip(skb, &dptr, &datalen)) { ret = NF_DROP; goto out; } @@ -467,8 +463,8 @@ static int sip_help(struct sk_buff *skb, ret = NF_DROP; goto out; } - ret = set_expected_rtp(skb, ct, ctinfo, &addr, - htons(port), &dptr, &datalen); + ret = set_expected_rtp(skb, &dptr, &datalen, + &addr, htons(port)); } } out: -- cgit v1.2.3-71-gd317 From 3e9b4600b4e71beaa9d943251bfe9c25f6a97b8c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:17:55 -0700 Subject: [NETFILTER]: nf_conntrack_sip: add seperate SDP header parsing function SDP and SIP headers are quite different, SIP can have continuation lines, leading and trailing whitespace after the colon and is mostly case-insensitive while SDP headers always begin on a new line and are followed by an equal sign and the value, without any whitespace. Introduce new SDP header parsing function and convert all users that used the SIP header parsing function. This will allow to properly deal with the special SIP cases in the SIP header parsing function later. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 49 +++++++-- net/ipv4/netfilter/nf_nat_sip.c | 69 ++++++------- net/netfilter/nf_conntrack_sip.c | 159 +++++++++++++++++------------ 3 files changed, 169 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index b94de3d60303..9131cbc9b9de 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -13,12 +13,42 @@ enum sip_header_pos { POS_VIA, POS_CONTACT, POS_CONTENT, - POS_MEDIA, - POS_OWNER_IP4, - POS_CONNECTION_IP4, - POS_OWNER_IP6, - POS_CONNECTION_IP6, - POS_SDP_HEADER, +}; + +struct sip_header { + const char *name; + const char *cname; + const char *search; + unsigned int len; + unsigned int clen; + unsigned int slen; + int (*match_len)(const struct nf_conn *ct, + const char *dptr, const char *limit, + int *shift); +}; + +#define __SIP_HDR(__name, __cname, __search, __match) \ +{ \ + .name = (__name), \ + .len = sizeof(__name) - 1, \ + .cname = (__cname), \ + .clen = (__cname) ? sizeof(__cname) - 1 : 0, \ + .search = (__search), \ + .slen = (__search) ? sizeof(__search) - 1 : 0, \ + .match_len = (__match), \ +} + +#define SDP_HDR(__name, __search, __match) \ + __SIP_HDR(__name, NULL, __search, __match) + +enum sdp_header_types { + SDP_HDR_UNSPEC, + SDP_HDR_VERSION, + SDP_HDR_OWNER_IP4, + SDP_HDR_CONNECTION_IP4, + SDP_HDR_OWNER_IP6, + SDP_HDR_CONNECTION_IP6, + SDP_HDR_MEDIA, }; extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, @@ -36,5 +66,12 @@ extern int ct_sip_lnlen(const char *line, const char *limit); extern const char *ct_sip_search(const char *needle, const char *haystack, size_t needle_len, size_t haystack_len, int case_sensitive); + +extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sdp_header_types type, + enum sdp_header_types term, + unsigned int *matchoff, unsigned int *matchlen); + #endif /* __KERNEL__ */ #endif /* __NF_CONNTRACK_SIP_H__ */ diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index dd1b2d86deee..aa8a4f492baf 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -147,51 +147,46 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int mangle_sip_packet(struct sk_buff *skb, - const char **dptr, unsigned int *datalen, - char *buffer, int bufflen, - enum sip_header_pos pos) +static int mangle_content_len(struct sk_buff *skb, + const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchlen, matchoff; + unsigned int matchoff, matchlen; + char buffer[sizeof("65536")]; + int buflen, c_len; + /* Get actual SDP length */ + if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, + SDP_HDR_VERSION, SDP_HDR_UNSPEC, + &matchoff, &matchlen) <= 0) + return 0; + c_len = *datalen - matchoff + strlen("v="); + + /* Now, update SDP length */ if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, - pos) <= 0) + POS_CONTENT) <= 0) return 0; + buflen = sprintf(buffer, "%u", c_len); return mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, bufflen); + buffer, buflen); } -static int mangle_content_len(struct sk_buff *skb, - const char **dptr, unsigned int *datalen) +static unsigned mangle_sdp_packet(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + enum sdp_header_types type, + char *buffer, int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchoff, matchlen; - char buffer[sizeof("65536")]; - int bufflen; + unsigned int matchlen, matchoff; - /* Get actual SDP length */ - if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, - &matchlen, POS_SDP_HEADER) > 0) { - - /* since ct_sip_get_info() give us a pointer passing 'v=' - we need to add 2 bytes in this count. */ - int c_len = *datalen - matchoff + 2; - - /* Now, update SDP length */ - if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, - &matchlen, POS_CONTENT) > 0) { - - bufflen = sprintf(buffer, "%u", c_len); - return mangle_packet(skb, dptr, datalen, - matchoff, matchlen, - buffer, bufflen); - } - } - return 0; + if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, type, SDP_HDR_UNSPEC, + &matchoff, &matchlen) <= 0) + return 0; + return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + buffer, buflen); } static unsigned int mangle_sdp(struct sk_buff *skb, @@ -205,18 +200,18 @@ static unsigned int mangle_sdp(struct sk_buff *skb, /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(skb, dptr, datalen, buffer, bufflen, - POS_OWNER_IP4)) + if (!mangle_sdp_packet(skb, dptr, datalen, SDP_HDR_OWNER_IP4, + buffer, bufflen)) return 0; - if (!mangle_sip_packet(skb, dptr, datalen, buffer, bufflen, - POS_CONNECTION_IP4)) + if (!mangle_sdp_packet(skb, dptr, datalen, SDP_HDR_CONNECTION_IP4, + buffer, bufflen)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(skb, dptr, datalen, buffer, bufflen, - POS_MEDIA)) + if (!mangle_sdp_packet(skb, dptr, datalen, SDP_HDR_MEDIA, + buffer, bufflen)) return 0; return mangle_content_len(skb, dptr, datalen); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cf19a7082a75..801fcb3c749f 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -124,66 +124,6 @@ static const struct sip_header_nfo ct_sip_hdrs[] = { .ln_strlen = sizeof(":") - 1, .match_len = skp_digits_len }, - [POS_MEDIA] = { /* SDP media info */ - .case_sensitive = 1, - .lname = "\nm=", - .lnlen = sizeof("\nm=") - 1, - .sname = "\rm=", - .snlen = sizeof("\rm=") - 1, - .ln_str = "audio ", - .ln_strlen = sizeof("audio ") - 1, - .match_len = digits_len - }, - [POS_OWNER_IP4] = { /* SDP owner address*/ - .case_sensitive = 1, - .lname = "\no=", - .lnlen = sizeof("\no=") - 1, - .sname = "\ro=", - .snlen = sizeof("\ro=") - 1, - .ln_str = "IN IP4 ", - .ln_strlen = sizeof("IN IP4 ") - 1, - .match_len = epaddr_len - }, - [POS_CONNECTION_IP4] = {/* SDP connection info */ - .case_sensitive = 1, - .lname = "\nc=", - .lnlen = sizeof("\nc=") - 1, - .sname = "\rc=", - .snlen = sizeof("\rc=") - 1, - .ln_str = "IN IP4 ", - .ln_strlen = sizeof("IN IP4 ") - 1, - .match_len = epaddr_len - }, - [POS_OWNER_IP6] = { /* SDP owner address*/ - .case_sensitive = 1, - .lname = "\no=", - .lnlen = sizeof("\no=") - 1, - .sname = "\ro=", - .snlen = sizeof("\ro=") - 1, - .ln_str = "IN IP6 ", - .ln_strlen = sizeof("IN IP6 ") - 1, - .match_len = epaddr_len - }, - [POS_CONNECTION_IP6] = {/* SDP connection info */ - .case_sensitive = 1, - .lname = "\nc=", - .lnlen = sizeof("\nc=") - 1, - .sname = "\rc=", - .snlen = sizeof("\rc=") - 1, - .ln_str = "IN IP6 ", - .ln_strlen = sizeof("IN IP6 ") - 1, - .match_len = epaddr_len - }, - [POS_SDP_HEADER] = { /* SDP version header */ - .case_sensitive = 1, - .lname = "\nv=", - .lnlen = sizeof("\nv=") - 1, - .sname = "\rv=", - .snlen = sizeof("\rv=") - 1, - .ln_str = "=", - .ln_strlen = sizeof("=") - 1, - .match_len = digits_len - } }; /* get line length until first CR or LF seen. */ @@ -363,6 +303,92 @@ int ct_sip_get_info(const struct nf_conn *ct, } EXPORT_SYMBOL_GPL(ct_sip_get_info); +/* SDP header parsing: a SDP session description contains an ordered set of + * headers, starting with a section containing general session parameters, + * optionally followed by multiple media descriptions. + * + * SDP headers always start at the beginning of a line. According to RFC 2327: + * "The sequence CRLF (0x0d0a) is used to end a record, although parsers should + * be tolerant and also accept records terminated with a single newline + * character". We handle both cases. + */ +static const struct sip_header ct_sdp_hdrs[] = { + [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), + [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len), + [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), + [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), + [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), + [SDP_HDR_MEDIA] = SDP_HDR("m=", "audio ", digits_len), +}; + +/* Linear string search within SDP header values */ +static const char *ct_sdp_header_search(const char *dptr, const char *limit, + const char *needle, unsigned int len) +{ + for (limit -= len; dptr < limit; dptr++) { + if (*dptr == '\r' || *dptr == '\n') + break; + if (strncmp(dptr, needle, len) == 0) + return dptr; + } + return NULL; +} + +/* Locate a SDP header (optionally a substring within the header value), + * optionally stopping at the first occurence of the term header, parse + * it and return the offset and length of the data we're interested in. + */ +int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sdp_header_types type, + enum sdp_header_types term, + unsigned int *matchoff, unsigned int *matchlen) +{ + const struct sip_header *hdr = &ct_sdp_hdrs[type]; + const struct sip_header *thdr = &ct_sdp_hdrs[term]; + const char *start = dptr, *limit = dptr + datalen; + int shift = 0; + + for (dptr += dataoff; dptr < limit; dptr++) { + /* Find beginning of line */ + if (*dptr != '\r' && *dptr != '\n') + continue; + if (++dptr >= limit) + break; + if (*(dptr - 1) == '\r' && *dptr == '\n') { + if (++dptr >= limit) + break; + } + + if (term != SDP_HDR_UNSPEC && + limit - dptr >= thdr->len && + strnicmp(dptr, thdr->name, thdr->len) == 0) + break; + else if (limit - dptr >= hdr->len && + strnicmp(dptr, hdr->name, hdr->len) == 0) + dptr += hdr->len; + else + continue; + + *matchoff = dptr - start; + if (hdr->search) { + dptr = ct_sdp_header_search(dptr, limit, hdr->search, + hdr->slen); + if (!dptr) + return -1; + dptr += hdr->slen; + } + + *matchlen = hdr->match_len(ct, dptr, limit, &shift); + if (!*matchlen) + return -1; + *matchoff = dptr - start + shift; + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(ct_sip_get_sdp_header); + static int set_expected_rtp(struct sk_buff *skb, const char **dptr, unsigned int *datalen, union nf_inet_addr *addr, __be16 port) @@ -408,7 +434,7 @@ static int sip_help(struct sk_buff *skb, int ret = NF_ACCEPT; unsigned int matchoff, matchlen; u_int16_t port; - enum sip_header_pos pos; + enum sdp_header_types type; typeof(nf_nat_sip_hook) nf_nat_sip; /* No Data ? */ @@ -446,8 +472,10 @@ static int sip_help(struct sk_buff *skb, goto out; } /* Get address and port from SDP packet. */ - pos = family == AF_INET ? POS_CONNECTION_IP4 : POS_CONNECTION_IP6; - if (ct_sip_get_info(ct, dptr, datalen, &matchoff, &matchlen, pos) > 0) { + type = family == AF_INET ? SDP_HDR_CONNECTION_IP4 : + SDP_HDR_CONNECTION_IP6; + if (ct_sip_get_sdp_header(ct, dptr, 0, datalen, type, SDP_HDR_UNSPEC, + &matchoff, &matchlen) > 0) { /* We'll drop only if there are parse problems. */ if (!parse_addr(ct, dptr + matchoff, NULL, &addr, @@ -455,8 +483,9 @@ static int sip_help(struct sk_buff *skb, ret = NF_DROP; goto out; } - if (ct_sip_get_info(ct, dptr, datalen, &matchoff, &matchlen, - POS_MEDIA) > 0) { + if (ct_sip_get_sdp_header(ct, dptr, 0, datalen, + SDP_HDR_MEDIA, SDP_HDR_UNSPEC, + &matchoff, &matchlen) > 0) { port = simple_strtoul(dptr + matchoff, NULL, 10); if (port < 1024) { -- cgit v1.2.3-71-gd317 From ac3677406d4e36e86b1eb5a453997a3b3e0c089a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:18:40 -0700 Subject: [NETFILTER]: nf_conntrack_sip: kill request URI "header" definitions The request URI is not a header and needs to be treated differently than real SIP headers. Add a seperate function for parsing it and get rid of the POS_REQ_URI/POS_REG_REQ_URI definitions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 5 ++- net/ipv4/netfilter/nf_nat_sip.c | 46 +++++++++++---------- net/netfilter/nf_conntrack_sip.c | 64 +++++++++++++++++++++++------- 3 files changed, 77 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 9131cbc9b9de..480b26f40ce4 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -6,8 +6,6 @@ #define SIP_TIMEOUT 3600 enum sip_header_pos { - POS_REG_REQ_URI, - POS_REQ_URI, POS_FROM, POS_TO, POS_VIA, @@ -59,6 +57,9 @@ extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, unsigned int *datalen, struct nf_conntrack_expect *exp); +extern int ct_sip_parse_request(const struct nf_conn *ct, + const char *dptr, unsigned int datalen, + unsigned int *matchoff, unsigned int *matchlen); extern int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, size_t dlen, unsigned int *matchoff, unsigned int *matchlen, enum sip_header_pos pos); diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index aa8a4f492baf..60151b5901a5 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -78,20 +78,17 @@ static unsigned int mangle_packet(struct sk_buff *skb, return 1; } -static int map_sip_addr(struct sk_buff *skb, - const char **dptr, unsigned int *datalen, - enum sip_header_pos pos, struct addr_map *map) +static int map_addr(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int matchoff, unsigned int matchlen, + struct addr_map *map) { enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn *ct __maybe_unused = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned int matchlen, matchoff, addrlen; + unsigned int addrlen; char *addr; - if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, - pos) <= 0) - return 1; - if ((matchlen == map->addr[dir].srciplen || matchlen == map->addr[dir].srclen) && strncmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) { @@ -109,13 +106,27 @@ static int map_sip_addr(struct sk_buff *skb, addr, addrlen); } +static int map_sip_addr(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + enum sip_header_pos pos, struct addr_map *map) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + unsigned int matchlen, matchoff; + + if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, + pos) <= 0) + return 1; + return map_addr(skb, dptr, datalen, matchoff, matchlen, map); +} + static unsigned int ip_nat_sip(struct sk_buff *skb, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - enum sip_header_pos pos; struct addr_map map; + unsigned int matchoff, matchlen; if (*datalen < strlen("SIP/2.0")) return NF_ACCEPT; @@ -124,18 +135,9 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, /* Basic rules: requests and responses. */ if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { - /* 10.2: Constructing the REGISTER Request: - * - * The "userinfo" and "@" components of the SIP URI MUST NOT - * be present. - */ - if (*datalen >= strlen("REGISTER") && - strnicmp(*dptr, "REGISTER", strlen("REGISTER")) == 0) - pos = POS_REG_REQ_URI; - else - pos = POS_REQ_URI; - - if (!map_sip_addr(skb, dptr, datalen, pos, &map)) + if (ct_sip_parse_request(ct, *dptr, *datalen, + &matchoff, &matchlen) > 0 && + !map_addr(skb, dptr, datalen, matchoff, matchlen, &map)) return NF_DROP; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 801fcb3c749f..bb4396155681 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -65,20 +65,6 @@ struct sip_header_nfo { }; static const struct sip_header_nfo ct_sip_hdrs[] = { - [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */ - .lname = "sip:", - .lnlen = sizeof("sip:") - 1, - .ln_str = ":", - .ln_strlen = sizeof(":") - 1, - .match_len = epaddr_len, - }, - [POS_REQ_URI] = { /* SIP request URI */ - .lname = "sip:", - .lnlen = sizeof("sip:") - 1, - .ln_str = "@", - .ln_strlen = sizeof("@") - 1, - .match_len = epaddr_len, - }, [POS_FROM] = { /* SIP From header */ .lname = "From:", .lnlen = sizeof("From:") - 1, @@ -164,6 +150,18 @@ const char *ct_sip_search(const char *needle, const char *haystack, } EXPORT_SYMBOL_GPL(ct_sip_search); +static int string_len(const struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + int len = 0; + + while (dptr < limit && isalpha(*dptr)) { + dptr++; + len++; + } + return len; +} + static int digits_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { @@ -258,6 +256,44 @@ static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr, return epaddr_len(ct, dptr, limit, shift); } +/* Parse a SIP request line of the form: + * + * Request-Line = Method SP Request-URI SP SIP-Version CRLF + * + * and return the offset and length of the address contained in the Request-URI. + */ +int ct_sip_parse_request(const struct nf_conn *ct, + const char *dptr, unsigned int datalen, + unsigned int *matchoff, unsigned int *matchlen) +{ + const char *start = dptr, *limit = dptr + datalen; + unsigned int mlen; + int shift = 0; + + /* Skip method and following whitespace */ + mlen = string_len(ct, dptr, limit, NULL); + if (!mlen) + return 0; + dptr += mlen; + if (++dptr >= limit) + return 0; + + /* Find SIP URI */ + limit -= strlen("sip:"); + for (; dptr < limit; dptr++) { + if (*dptr == '\r' || *dptr == '\n') + return -1; + if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) + break; + } + *matchlen = skp_epaddr_len(ct, dptr, limit, &shift); + if (!*matchlen) + return 0; + *matchoff = dptr - start + shift; + return 1; +} +EXPORT_SYMBOL_GPL(ct_sip_parse_request); + /* Returns 0 if not found, -1 error parsing. */ int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, size_t dlen, -- cgit v1.2.3-71-gd317 From ea45f12a2766dae54e5426a23e8f4bafdbe2782e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:18:57 -0700 Subject: [NETFILTER]: nf_conntrack_sip: parse SIP headers properly Introduce new function for SIP header parsing that properly deals with continuation lines and whitespace in headers and use it. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 30 ++-- net/ipv4/netfilter/nf_nat_sip.c | 18 +- net/netfilter/nf_conntrack_sip.c | 271 ++++++++++++++--------------- 3 files changed, 151 insertions(+), 168 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 480b26f40ce4..ccc701422963 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -5,14 +5,6 @@ #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 -enum sip_header_pos { - POS_FROM, - POS_TO, - POS_VIA, - POS_CONTACT, - POS_CONTENT, -}; - struct sip_header { const char *name; const char *cname; @@ -36,9 +28,20 @@ struct sip_header { .match_len = (__match), \ } +#define SIP_HDR(__name, __cname, __search, __match) \ + __SIP_HDR(__name, __cname, __search, __match) + #define SDP_HDR(__name, __search, __match) \ __SIP_HDR(__name, NULL, __search, __match) +enum sip_header_types { + SIP_HDR_FROM, + SIP_HDR_TO, + SIP_HDR_CONTACT, + SIP_HDR_VIA, + SIP_HDR_CONTENT_LENGTH, +}; + enum sdp_header_types { SDP_HDR_UNSPEC, SDP_HDR_VERSION, @@ -60,13 +63,10 @@ extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, extern int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, unsigned int *matchoff, unsigned int *matchlen); -extern int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, - size_t dlen, unsigned int *matchoff, - unsigned int *matchlen, enum sip_header_pos pos); -extern int ct_sip_lnlen(const char *line, const char *limit); -extern const char *ct_sip_search(const char *needle, const char *haystack, - size_t needle_len, size_t haystack_len, - int case_sensitive); +extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sip_header_types type, + unsigned int *matchoff, unsigned int *matchlen); extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 60151b5901a5..c13e43862361 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -108,14 +108,14 @@ static int map_addr(struct sk_buff *skb, static int map_sip_addr(struct sk_buff *skb, const char **dptr, unsigned int *datalen, - enum sip_header_pos pos, struct addr_map *map) + enum sip_header_types type, struct addr_map *map) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchlen, matchoff; - if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, - pos) <= 0) + if (ct_sip_get_header(ct, *dptr, 0, *datalen, type, + &matchoff, &matchlen) <= 0) return 1; return map_addr(skb, dptr, datalen, matchoff, matchlen, map); } @@ -141,10 +141,10 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, return NF_DROP; } - if (!map_sip_addr(skb, dptr, datalen, POS_FROM, &map) || - !map_sip_addr(skb, dptr, datalen, POS_TO, &map) || - !map_sip_addr(skb, dptr, datalen, POS_VIA, &map) || - !map_sip_addr(skb, dptr, datalen, POS_CONTACT, &map)) + if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM, &map) || + !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO, &map) || + !map_sip_addr(skb, dptr, datalen, SIP_HDR_VIA, &map) || + !map_sip_addr(skb, dptr, datalen, SIP_HDR_CONTACT, &map)) return NF_DROP; return NF_ACCEPT; } @@ -166,8 +166,8 @@ static int mangle_content_len(struct sk_buff *skb, c_len = *datalen - matchoff + strlen("v="); /* Now, update SDP length */ - if (ct_sip_get_info(ct, *dptr, *datalen, &matchoff, &matchlen, - POS_CONTENT) <= 0) + if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH, + &matchoff, &matchlen) <= 0) return 0; buflen = sprintf(buffer, "%u", c_len); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index bb4396155681..cbc91598acee 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -47,109 +47,6 @@ unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); -static int digits_len(const struct nf_conn *, const char *, const char *, int *); -static int epaddr_len(const struct nf_conn *, const char *, const char *, int *); -static int skp_digits_len(const struct nf_conn *, const char *, const char *, int *); -static int skp_epaddr_len(const struct nf_conn *, const char *, const char *, int *); - -struct sip_header_nfo { - const char *lname; - const char *sname; - const char *ln_str; - size_t lnlen; - size_t snlen; - size_t ln_strlen; - int case_sensitive; - int (*match_len)(const struct nf_conn *, const char *, - const char *, int *); -}; - -static const struct sip_header_nfo ct_sip_hdrs[] = { - [POS_FROM] = { /* SIP From header */ - .lname = "From:", - .lnlen = sizeof("From:") - 1, - .sname = "\r\nf:", - .snlen = sizeof("\r\nf:") - 1, - .ln_str = "sip:", - .ln_strlen = sizeof("sip:") - 1, - .match_len = skp_epaddr_len, - }, - [POS_TO] = { /* SIP To header */ - .lname = "To:", - .lnlen = sizeof("To:") - 1, - .sname = "\r\nt:", - .snlen = sizeof("\r\nt:") - 1, - .ln_str = "sip:", - .ln_strlen = sizeof("sip:") - 1, - .match_len = skp_epaddr_len - }, - [POS_VIA] = { /* SIP Via header */ - .lname = "Via:", - .lnlen = sizeof("Via:") - 1, - .sname = "\r\nv:", - .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */ - .ln_str = "UDP ", - .ln_strlen = sizeof("UDP ") - 1, - .match_len = epaddr_len, - }, - [POS_CONTACT] = { /* SIP Contact header */ - .lname = "Contact:", - .lnlen = sizeof("Contact:") - 1, - .sname = "\r\nm:", - .snlen = sizeof("\r\nm:") - 1, - .ln_str = "sip:", - .ln_strlen = sizeof("sip:") - 1, - .match_len = skp_epaddr_len - }, - [POS_CONTENT] = { /* SIP Content length header */ - .lname = "Content-Length:", - .lnlen = sizeof("Content-Length:") - 1, - .sname = "\r\nl:", - .snlen = sizeof("\r\nl:") - 1, - .ln_str = ":", - .ln_strlen = sizeof(":") - 1, - .match_len = skp_digits_len - }, -}; - -/* get line length until first CR or LF seen. */ -int ct_sip_lnlen(const char *line, const char *limit) -{ - const char *k = line; - - while ((line < limit) && (*line == '\r' || *line == '\n')) - line++; - - while (line < limit) { - if (*line == '\r' || *line == '\n') - break; - line++; - } - return line - k; -} -EXPORT_SYMBOL_GPL(ct_sip_lnlen); - -/* Linear string search, case sensitive. */ -const char *ct_sip_search(const char *needle, const char *haystack, - size_t needle_len, size_t haystack_len, - int case_sensitive) -{ - const char *limit = haystack + (haystack_len - needle_len); - - while (haystack < limit) { - if (case_sensitive) { - if (strncmp(haystack, needle, needle_len) == 0) - return haystack; - } else { - if (strnicmp(haystack, needle, needle_len) == 0) - return haystack; - } - haystack++; - } - return NULL; -} -EXPORT_SYMBOL_GPL(ct_sip_search); - static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { @@ -173,16 +70,6 @@ static int digits_len(const struct nf_conn *ct, const char *dptr, return len; } -/* get digits length, skipping blank spaces. */ -static int skp_digits_len(const struct nf_conn *ct, const char *dptr, - const char *limit, int *shift) -{ - for (; dptr < limit && *dptr == ' '; dptr++) - (*shift)++; - - return digits_len(ct, dptr, limit, shift); -} - static int parse_addr(const struct nf_conn *ct, const char *cp, const char **endp, union nf_inet_addr *addr, const char *limit) @@ -294,50 +181,146 @@ int ct_sip_parse_request(const struct nf_conn *ct, } EXPORT_SYMBOL_GPL(ct_sip_parse_request); -/* Returns 0 if not found, -1 error parsing. */ -int ct_sip_get_info(const struct nf_conn *ct, - const char *dptr, size_t dlen, - unsigned int *matchoff, - unsigned int *matchlen, - enum sip_header_pos pos) +/* SIP header parsing: SIP headers are located at the beginning of a line, but + * may span several lines, in which case the continuation lines begin with a + * whitespace character. RFC 2543 allows lines to be terminated with CR, LF or + * CRLF, RFC 3261 allows only CRLF, we support both. + * + * Headers are followed by (optionally) whitespace, a colon, again (optionally) + * whitespace and the values. Whitespace in this context means any amount of + * tabs, spaces and continuation lines, which are treated as a single whitespace + * character. + */ +static const struct sip_header ct_sip_hdrs[] = { + [SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len), + [SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len), + [SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len), + [SIP_HDR_VIA] = SIP_HDR("Via", "v", "UDP ", epaddr_len), + [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), +}; + +static const char *sip_follow_continuation(const char *dptr, const char *limit) { - const struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos]; - const char *limit, *aux, *k = dptr; - int shift = 0; + /* Walk past newline */ + if (++dptr >= limit) + return NULL; + + /* Skip '\n' in CR LF */ + if (*(dptr - 1) == '\r' && *dptr == '\n') { + if (++dptr >= limit) + return NULL; + } + + /* Continuation line? */ + if (*dptr != ' ' && *dptr != '\t') + return NULL; + + /* skip leading whitespace */ + for (; dptr < limit; dptr++) { + if (*dptr != ' ' && *dptr != '\t') + break; + } + return dptr; +} + +static const char *sip_skip_whitespace(const char *dptr, const char *limit) +{ + for (; dptr < limit; dptr++) { + if (*dptr == ' ') + continue; + if (*dptr != '\r' && *dptr != '\n') + break; + dptr = sip_follow_continuation(dptr, limit); + if (dptr == NULL) + return NULL; + } + return dptr; +} - limit = dptr + (dlen - hnfo->lnlen); +/* Search within a SIP header value, dealing with continuation lines */ +static const char *ct_sip_header_search(const char *dptr, const char *limit, + const char *needle, unsigned int len) +{ + for (limit -= len; dptr < limit; dptr++) { + if (*dptr == '\r' || *dptr == '\n') { + dptr = sip_follow_continuation(dptr, limit); + if (dptr == NULL) + break; + continue; + } - while (dptr < limit) { - if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) && - (hnfo->sname == NULL || - strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { - dptr++; + if (strnicmp(dptr, needle, len) == 0) + return dptr; + } + return NULL; +} + +int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sip_header_types type, + unsigned int *matchoff, unsigned int *matchlen) +{ + const struct sip_header *hdr = &ct_sip_hdrs[type]; + const char *start = dptr, *limit = dptr + datalen; + int shift = 0; + + for (dptr += dataoff; dptr < limit; dptr++) { + /* Find beginning of line */ + if (*dptr != '\r' && *dptr != '\n') continue; + if (++dptr >= limit) + break; + if (*(dptr - 1) == '\r' && *dptr == '\n') { + if (++dptr >= limit) + break; } - aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen, - ct_sip_lnlen(dptr, limit), - hnfo->case_sensitive); - if (!aux) { - pr_debug("'%s' not found in '%s'.\n", hnfo->ln_str, - hnfo->lname); - return -1; + + /* Skip continuation lines */ + if (*dptr == ' ' || *dptr == '\t') + continue; + + /* Find header. Compact headers must be followed by a + * non-alphabetic character to avoid mismatches. */ + if (limit - dptr >= hdr->len && + strnicmp(dptr, hdr->name, hdr->len) == 0) + dptr += hdr->len; + else if (hdr->cname && limit - dptr >= hdr->clen + 1 && + strnicmp(dptr, hdr->cname, hdr->clen) == 0 && + !isalpha(*(dptr + hdr->clen + 1))) + dptr += hdr->clen; + else + continue; + + /* Find and skip colon */ + dptr = sip_skip_whitespace(dptr, limit); + if (dptr == NULL) + break; + if (*dptr != ':' || ++dptr >= limit) + break; + + /* Skip whitespace after colon */ + dptr = sip_skip_whitespace(dptr, limit); + if (dptr == NULL) + break; + + *matchoff = dptr - start; + if (hdr->search) { + dptr = ct_sip_header_search(dptr, limit, hdr->search, + hdr->slen); + if (!dptr) + return -1; + dptr += hdr->slen; } - aux += hnfo->ln_strlen; - *matchlen = hnfo->match_len(ct, aux, limit, &shift); + *matchlen = hdr->match_len(ct, dptr, limit, &shift); if (!*matchlen) return -1; - - *matchoff = (aux - k) + shift; - - pr_debug("%s match succeeded! - len: %u\n", hnfo->lname, - *matchlen); + *matchoff = dptr - start + shift; return 1; } - pr_debug("%s header not found.\n", hnfo->lname); return 0; } -EXPORT_SYMBOL_GPL(ct_sip_get_info); +EXPORT_SYMBOL_GPL(ct_sip_get_header); /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, -- cgit v1.2.3-71-gd317 From 05e3ced297fe755093140e7487e292fb7603316e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:19:13 -0700 Subject: [NETFILTER]: nf_conntrack_sip: introduce SIP-URI parsing helper Introduce a helper function to parse a SIP-URI in a header value, optionally iterating through all headers of this kind. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 5 ++ net/netfilter/nf_conntrack_sip.c | 107 +++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index ccc701422963..87bc6f79efc4 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -67,6 +67,11 @@ extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, enum sip_header_types type, unsigned int *matchoff, unsigned int *matchlen); +extern int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, + unsigned int *dataoff, unsigned int datalen, + enum sip_header_types type, int *in_header, + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr, __be16 *port); extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cbc91598acee..a74d76a97312 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -190,6 +190,9 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request); * whitespace and the values. Whitespace in this context means any amount of * tabs, spaces and continuation lines, which are treated as a single whitespace * character. + * + * Some headers may appear multiple times. A comma seperated list of values is + * equivalent to multiple headers. */ static const struct sip_header ct_sip_hdrs[] = { [SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len), @@ -322,6 +325,110 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_get_header); +/* Get next header field in a list of comma seperated values */ +static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sip_header_types type, + unsigned int *matchoff, unsigned int *matchlen) +{ + const struct sip_header *hdr = &ct_sip_hdrs[type]; + const char *start = dptr, *limit = dptr + datalen; + int shift = 0; + + dptr += dataoff; + + dptr = ct_sip_header_search(dptr, limit, ",", strlen(",")); + if (!dptr) + return 0; + + dptr = ct_sip_header_search(dptr, limit, hdr->search, hdr->slen); + if (!dptr) + return 0; + dptr += hdr->slen; + + *matchoff = dptr - start; + *matchlen = hdr->match_len(ct, dptr, limit, &shift); + if (!*matchlen) + return -1; + *matchoff += shift; + return 1; +} + +/* Walk through headers until a parsable one is found or no header of the + * given type is left. */ +static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sip_header_types type, int *in_header, + unsigned int *matchoff, unsigned int *matchlen) +{ + int ret; + + if (in_header && *in_header) { + while (1) { + ret = ct_sip_next_header(ct, dptr, dataoff, datalen, + type, matchoff, matchlen); + if (ret > 0) + return ret; + if (ret == 0) + break; + dataoff += *matchoff; + } + *in_header = 0; + } + + while (1) { + ret = ct_sip_get_header(ct, dptr, dataoff, datalen, + type, matchoff, matchlen); + if (ret > 0) + break; + if (ret == 0) + return ret; + dataoff += *matchoff; + } + + if (in_header) + *in_header = 1; + return 1; +} + +/* Locate a SIP header, parse the URI and return the offset and length of + * the address as well as the address and port themselves. A stream of + * headers can be parsed by handing in a non-NULL datalen and in_header + * pointer. + */ +int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, + unsigned int *dataoff, unsigned int datalen, + enum sip_header_types type, int *in_header, + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr, __be16 *port) +{ + const char *c, *limit = dptr + datalen; + unsigned int p; + int ret; + + ret = ct_sip_walk_headers(ct, dptr, dataoff ? *dataoff : 0, datalen, + type, in_header, matchoff, matchlen); + WARN_ON(ret < 0); + if (ret == 0) + return ret; + + if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit)) + return -1; + if (*c == ':') { + c++; + p = simple_strtoul(c, (char **)&c, 10); + if (p < 1024 || p > 65535) + return -1; + *port = htons(p); + } else + *port = htons(SIP_PORT); + + if (dataoff) + *dataoff = c - dptr; + return 1; +} +EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri); + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. -- cgit v1.2.3-71-gd317 From 624f8b7bba98c27a1464f5f858c4a861d5d3e8d7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:19:30 -0700 Subject: [NETFILTER]: nf_nat_sip: get rid of text based header translation Use the URI parsing helper to get the numerical addresses and get rid of the text based header translation. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 3 +- net/ipv4/netfilter/nf_nat_sip.c | 100 +++++++++++------------------ net/netfilter/nf_conntrack_sip.c | 27 ++++++-- 3 files changed, 62 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 87bc6f79efc4..68a0d6a41733 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -62,7 +62,8 @@ extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, extern int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, - unsigned int *matchoff, unsigned int *matchlen); + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr, __be16 *port); extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, enum sip_header_types type, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index c13e43862361..5b4a5cd23f39 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -26,39 +26,6 @@ MODULE_AUTHOR("Christian Hentschel "); MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -struct addr_map { - struct { - char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; - unsigned int srclen, srciplen; - unsigned int dstlen, dstiplen; - } addr[IP_CT_DIR_MAX]; -}; - -static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) -{ - const struct nf_conntrack_tuple *t; - enum ip_conntrack_dir dir; - unsigned int n; - - for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { - t = &ct->tuplehash[dir].tuple; - - n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", - NIPQUAD(t->src.u3.ip)); - map->addr[dir].srciplen = n; - n += sprintf(map->addr[dir].src + n, ":%u", - ntohs(t->src.u.udp.port)); - map->addr[dir].srclen = n; - - n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u", - NIPQUAD(t->dst.u3.ip)); - map->addr[dir].dstiplen = n; - n += sprintf(map->addr[dir].dst + n, ":%u", - ntohs(t->dst.u.udp.port)); - map->addr[dir].dstlen = n; - } -} static unsigned int mangle_packet(struct sk_buff *skb, const char **dptr, unsigned int *datalen, @@ -81,43 +48,51 @@ static unsigned int mangle_packet(struct sk_buff *skb, static int map_addr(struct sk_buff *skb, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, - struct addr_map *map) + union nf_inet_addr *addr, __be16 port) { enum ip_conntrack_info ctinfo; - struct nf_conn *ct __maybe_unused = nf_ct_get(skb, &ctinfo); + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned int addrlen; - char *addr; - - if ((matchlen == map->addr[dir].srciplen || - matchlen == map->addr[dir].srclen) && - strncmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) { - addr = map->addr[!dir].dst; - addrlen = map->addr[!dir].dstlen; - } else if ((matchlen == map->addr[dir].dstiplen || - matchlen == map->addr[dir].dstlen) && - strncmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) { - addr = map->addr[!dir].src; - addrlen = map->addr[!dir].srclen; + char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + unsigned int buflen; + __be32 newaddr; + __be16 newport; + + if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip && + ct->tuplehash[dir].tuple.src.u.udp.port == port) { + newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + newport = ct->tuplehash[!dir].tuple.dst.u.udp.port; + } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip && + ct->tuplehash[dir].tuple.dst.u.udp.port == port) { + newaddr = ct->tuplehash[!dir].tuple.src.u3.ip; + newport = ct->tuplehash[!dir].tuple.src.u.udp.port; } else return 1; + if (newaddr == addr->ip && newport == port) + return 1; + + buflen = sprintf(buffer, "%u.%u.%u.%u:%u", + NIPQUAD(newaddr), ntohs(newport)); + return mangle_packet(skb, dptr, datalen, matchoff, matchlen, - addr, addrlen); + buffer, buflen); } static int map_sip_addr(struct sk_buff *skb, const char **dptr, unsigned int *datalen, - enum sip_header_types type, struct addr_map *map) + enum sip_header_types type) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchlen, matchoff; + union nf_inet_addr addr; + __be16 port; - if (ct_sip_get_header(ct, *dptr, 0, *datalen, type, - &matchoff, &matchlen) <= 0) + if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, + &matchoff, &matchlen, &addr, &port) <= 0) return 1; - return map_addr(skb, dptr, datalen, matchoff, matchlen, map); + return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); } static unsigned int ip_nat_sip(struct sk_buff *skb, @@ -125,26 +100,27 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct addr_map map; unsigned int matchoff, matchlen; + union nf_inet_addr addr; + __be16 port; if (*datalen < strlen("SIP/2.0")) return NF_ACCEPT; - addr_map_init(ct, &map); - /* Basic rules: requests and responses. */ if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { if (ct_sip_parse_request(ct, *dptr, *datalen, - &matchoff, &matchlen) > 0 && - !map_addr(skb, dptr, datalen, matchoff, matchlen, &map)) + &matchoff, &matchlen, + &addr, &port) > 0 && + !map_addr(skb, dptr, datalen, matchoff, matchlen, + &addr, port)) return NF_DROP; } - if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM, &map) || - !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO, &map) || - !map_sip_addr(skb, dptr, datalen, SIP_HDR_VIA, &map) || - !map_sip_addr(skb, dptr, datalen, SIP_HDR_CONTACT, &map)) + if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || + !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO) || + !map_sip_addr(skb, dptr, datalen, SIP_HDR_VIA) || + !map_sip_addr(skb, dptr, datalen, SIP_HDR_CONTACT)) return NF_DROP; return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index a74d76a97312..f20fa2d94c0a 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -151,10 +151,12 @@ static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr, */ int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, - unsigned int *matchoff, unsigned int *matchlen) + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr, __be16 *port) { - const char *start = dptr, *limit = dptr + datalen; + const char *start = dptr, *limit = dptr + datalen, *end; unsigned int mlen; + unsigned int p; int shift = 0; /* Skip method and following whitespace */ @@ -173,10 +175,25 @@ int ct_sip_parse_request(const struct nf_conn *ct, if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) break; } - *matchlen = skp_epaddr_len(ct, dptr, limit, &shift); - if (!*matchlen) + if (!skp_epaddr_len(ct, dptr, limit, &shift)) return 0; - *matchoff = dptr - start + shift; + dptr += shift; + + if (!parse_addr(ct, dptr, &end, addr, limit)) + return -1; + if (end < limit && *end == ':') { + end++; + p = simple_strtoul(end, (char **)&end, 10); + if (p < 1024 || p > 65535) + return -1; + *port = htons(p); + } else + *port = htons(SIP_PORT); + + if (end == dptr) + return 0; + *matchoff = dptr - start; + *matchlen = end - dptr; return 1; } EXPORT_SYMBOL_GPL(ct_sip_parse_request); -- cgit v1.2.3-71-gd317 From 30f33e6dee80c6ded917f978e4f377d1069d519d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:22:20 -0700 Subject: [NETFILTER]: nf_conntrack_sip: support method specific request/response handling Add support for per-method request/response handlers and perform SDP parsing for INVITE/UPDATE requests and for all informational and successful responses. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 20 ++++++ net/netfilter/nf_conntrack_sip.c | 107 ++++++++++++++++++++++++++--- 2 files changed, 117 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 68a0d6a41733..da93e80804c2 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -5,6 +5,25 @@ #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 +struct sip_handler { + const char *method; + unsigned int len; + int (*request)(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int cseq); + int (*response)(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int cseq, unsigned int code); +}; + +#define SIP_HANDLER(__method, __request, __response) \ +{ \ + .method = (__method), \ + .len = sizeof(__method) - 1, \ + .request = (__request), \ + .response = (__response), \ +} + struct sip_header { const char *name; const char *cname; @@ -35,6 +54,7 @@ struct sip_header { __SIP_HDR(__name, NULL, __search, __match) enum sip_header_types { + SIP_HDR_CSEQ, SIP_HDR_FROM, SIP_HDR_TO, SIP_HDR_CONTACT, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 96bedb52bd4b..1be949febab7 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -212,6 +212,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request); * equivalent to multiple headers. */ static const struct sip_header ct_sip_hdrs[] = { + [SIP_HDR_CSEQ] = SIP_HDR("CSeq", NULL, NULL, digits_len), [SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len), [SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len), [SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len), @@ -566,7 +567,8 @@ static int set_expected_rtp(struct sk_buff *skb, } static int process_sdp(struct sk_buff *skb, - const char **dptr, unsigned int *datalen) + const char **dptr, unsigned int *datalen, + unsigned int cseq) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -600,6 +602,96 @@ static int process_sdp(struct sk_buff *skb, return set_expected_rtp(skb, dptr, datalen, &addr, htons(port)); } +static int process_invite_response(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int cseq, unsigned int code) +{ + if ((code >= 100 && code <= 199) || + (code >= 200 && code <= 299)) + return process_sdp(skb, dptr, datalen, cseq); + + return NF_ACCEPT; +} + +static int process_update_response(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int cseq, unsigned int code) +{ + if ((code >= 100 && code <= 199) || + (code >= 200 && code <= 299)) + return process_sdp(skb, dptr, datalen, cseq); + + return NF_ACCEPT; +} + +static const struct sip_handler sip_handlers[] = { + SIP_HANDLER("INVITE", process_sdp, process_invite_response), + SIP_HANDLER("UPDATE", process_sdp, process_update_response), +}; + +static int process_sip_response(struct sk_buff *skb, + const char **dptr, unsigned int *datalen) +{ + static const struct sip_handler *handler; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + unsigned int matchoff, matchlen; + unsigned int code, cseq, dataoff, i; + + if (*datalen < strlen("SIP/2.0 200")) + return NF_ACCEPT; + code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10); + if (!code) + return NF_DROP; + + if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, + &matchoff, &matchlen) <= 0) + return NF_DROP; + cseq = simple_strtoul(*dptr + matchoff, NULL, 10); + if (!cseq) + return NF_DROP; + dataoff = matchoff + matchlen + 1; + + for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { + handler = &sip_handlers[i]; + if (handler->response == NULL) + continue; + if (*datalen < dataoff + handler->len || + strnicmp(*dptr + dataoff, handler->method, handler->len)) + continue; + return handler->response(skb, dptr, datalen, cseq, code); + } + return NF_ACCEPT; +} + +static int process_sip_request(struct sk_buff *skb, + const char **dptr, unsigned int *datalen) +{ + static const struct sip_handler *handler; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + unsigned int matchoff, matchlen; + unsigned int cseq, i; + + for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { + handler = &sip_handlers[i]; + if (handler->request == NULL) + continue; + if (*datalen < handler->len || + strnicmp(*dptr, handler->method, handler->len)) + continue; + + if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, + &matchoff, &matchlen) <= 0) + return NF_DROP; + cseq = simple_strtoul(*dptr + matchoff, NULL, 10); + if (!cseq) + return NF_DROP; + + return handler->request(skb, dptr, datalen, cseq); + } + return NF_ACCEPT; +} static int sip_help(struct sk_buff *skb, unsigned int protoff, @@ -634,15 +726,10 @@ static int sip_help(struct sk_buff *skb, if (datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - /* RTP info only in some SDP pkts */ - if (strnicmp(dptr, "INVITE", strlen("INVITE")) != 0 && - strnicmp(dptr, "UPDATE", strlen("UPDATE")) != 0 && - strnicmp(dptr, "SIP/2.0 180", strlen("SIP/2.0 180")) != 0 && - strnicmp(dptr, "SIP/2.0 183", strlen("SIP/2.0 183")) != 0 && - strnicmp(dptr, "SIP/2.0 200", strlen("SIP/2.0 200")) != 0) - return NF_ACCEPT; - - return process_sdp(skb, &dptr, &datalen); + if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) + return process_sip_request(skb, &dptr, &datalen); + else + return process_sip_response(skb, &dptr, &datalen); } static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly; -- cgit v1.2.3-71-gd317 From 2bbb21168a90c788e12fe722eb66f27e611e7df7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:24:24 -0700 Subject: [NETFILTER]: nf_conntrack_sip: introduce URI and header parameter parsing helpers Introduce URI and header parameter parsing helpers. These are needed by the conntrack helper to parse expiration values in Contact: header parameters and by the NAT helper to properly update the Via-header rport=, received= and maddr= parameters. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 10 ++++++ net/netfilter/nf_conntrack_sip.c | 58 ++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index da93e80804c2..87e402825dba 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -93,6 +93,16 @@ extern int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, enum sip_header_types type, int *in_header, unsigned int *matchoff, unsigned int *matchlen, union nf_inet_addr *addr, __be16 *port); +extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + const char *name, + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr); +extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, + unsigned int off, unsigned int datalen, + const char *name, + unsigned int *matchoff, unsigned int *matchen, + unsigned int *val); extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 8e7e5b465ffb..126f30842d60 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -448,6 +448,64 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri); +/* Parse address from header parameter and return address, offset and length */ +int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + const char *name, + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr) +{ + const char *limit = dptr + datalen; + const char *start, *end; + + limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(",")); + if (!limit) + limit = dptr + datalen; + + start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name)); + if (!start) + return 0; + + start += strlen(name); + if (!parse_addr(ct, start, &end, addr, limit)) + return 0; + *matchoff = start - dptr; + *matchlen = end - start; + return 1; +} +EXPORT_SYMBOL_GPL(ct_sip_parse_address_param); + +/* Parse numerical header parameter and return value, offset and length */ +int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + const char *name, + unsigned int *matchoff, unsigned int *matchlen, + unsigned int *val) +{ + const char *limit = dptr + datalen; + const char *start; + char *end; + + limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(",")); + if (!limit) + limit = dptr + datalen; + + start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name)); + if (!start) + return 0; + + start += strlen(name); + *val = simple_strtoul(start, &end, 0); + if (start == end) + return 0; + if (matchoff && matchlen) { + *matchoff = start - dptr; + *matchlen = end - start; + } + return 1; +} +EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param); + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. -- cgit v1.2.3-71-gd317 From 0f32a40fc91a9ebbbf66e826ac2a829ab37d9cf8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:25:13 -0700 Subject: [NETFILTER]: nf_conntrack_sip: create signalling expectations Create expectations for incoming signalling connections when seeing a REGISTER request. This is needed when the registrar uses a different source port number for signalling messages and for receiving incoming calls from other endpoints than the registrar. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 18 +++ include/net/netfilter/nf_conntrack.h | 4 +- net/ipv4/netfilter/nf_nat_sip.c | 111 +++++++++++--- net/netfilter/nf_conntrack_sip.c | 232 +++++++++++++++++++++++++++-- 4 files changed, 332 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 87e402825dba..7cc84ed0c5da 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -5,6 +5,17 @@ #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 +struct nf_ct_sip_master { + unsigned int register_cseq; +}; + +enum sip_expectation_classes { + SIP_EXPECT_SIGNALLING, + SIP_EXPECT_AUDIO, + __SIP_EXPECT_MAX +}; +#define SIP_EXPECT_MAX (__SIP_EXPECT_MAX - 1) + struct sip_handler { const char *method; unsigned int len; @@ -59,6 +70,7 @@ enum sip_header_types { SIP_HDR_TO, SIP_HDR_CONTACT, SIP_HDR_VIA, + SIP_HDR_EXPIRES, SIP_HDR_CONTENT_LENGTH, }; @@ -75,6 +87,12 @@ enum sdp_header_types { extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, const char **dptr, unsigned int *datalen); +extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *exp, + unsigned int matchoff, + unsigned int matchlen); extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, const char **dptr, unsigned int *datalen, diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 922877133598..4a4f870d2a5e 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -46,6 +46,7 @@ union nf_conntrack_expect_proto { #include #include #include +#include /* per conntrack: application helper private data */ union nf_conntrack_help { @@ -54,6 +55,7 @@ union nf_conntrack_help { struct nf_ct_pptp_master ct_pptp_info; struct nf_ct_h323_master ct_h323_info; struct nf_ct_sane_master ct_sane_info; + struct nf_ct_sip_master ct_sip_info; }; #include @@ -76,7 +78,7 @@ do { \ struct nf_conntrack_helper; /* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 1 +#define NF_CT_MAX_EXPECT_CLASSES 2 /* nf_conn feature for connections that have a helper */ struct nf_conn_help { diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index b443618a857f..4b85e21a2a4a 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -205,6 +205,91 @@ next: return NF_ACCEPT; } +/* Handles expected signalling connections and media streams */ +static void ip_nat_sip_expected(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = exp->saved_proto; + range.min_ip = range.max_ip = exp->saved_ip; + nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + + /* Change src to where master sends to, but only if the connection + * actually came from the same source. */ + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == + ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + } +} + +static unsigned int ip_nat_sip_expect(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + struct nf_conntrack_expect *exp, + unsigned int matchoff, + unsigned int matchlen) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + __be32 newip; + u_int16_t port; + char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + unsigned buflen; + + /* Connection will come from reply */ + if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) + newip = exp->tuple.dst.u3.ip; + else + newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + + /* If the signalling port matches the connection's source port in the + * original direction, try to use the destination port in the opposite + * direction. */ + if (exp->tuple.dst.u.udp.port == + ct->tuplehash[dir].tuple.src.u.udp.port) + port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); + else + port = ntohs(exp->tuple.dst.u.udp.port); + + exp->saved_ip = exp->tuple.dst.u3.ip; + exp->tuple.dst.u3.ip = newip; + exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; + exp->dir = !dir; + exp->expectfn = ip_nat_sip_expected; + + for (; port != 0; port++) { + exp->tuple.dst.u.udp.port = htons(port); + if (nf_ct_expect_related(exp) == 0) + break; + } + + if (port == 0) + return NF_DROP; + + if (exp->tuple.dst.u3.ip != exp->saved_ip || + exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { + buflen = sprintf(buffer, "%u.%u.%u.%u:%u", + NIPQUAD(newip), port); + if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, + buffer, buflen)) + goto err; + } + return NF_ACCEPT; + +err: + nf_ct_unexpect_related(exp); + return NF_DROP; +} + static int mangle_content_len(struct sk_buff *skb, const char **dptr, unsigned int *datalen) { @@ -275,27 +360,6 @@ static unsigned int mangle_sdp(struct sk_buff *skb, return mangle_content_len(skb, dptr, datalen); } -static void ip_nat_sdp_expect(struct nf_conn *ct, - struct nf_conntrack_expect *exp) -{ - struct nf_nat_range range; - - /* This must be a fresh one. */ - BUG_ON(ct->status & IPS_NAT_DONE_MASK); - - /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip = exp->saved_ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); - - /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); -} - /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ static unsigned int ip_nat_sdp(struct sk_buff *skb, @@ -322,7 +386,7 @@ static unsigned int ip_nat_sdp(struct sk_buff *skb, /* When you see the packet, we need to NAT it the same as the this one. */ - exp->expectfn = ip_nat_sdp_expect; + exp->expectfn = ip_nat_sip_expected; /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { @@ -344,6 +408,7 @@ static unsigned int ip_nat_sdp(struct sk_buff *skb, static void __exit nf_nat_sip_fini(void) { rcu_assign_pointer(nf_nat_sip_hook, NULL); + rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); rcu_assign_pointer(nf_nat_sdp_hook, NULL); synchronize_rcu(); } @@ -351,8 +416,10 @@ static void __exit nf_nat_sip_fini(void) static int __init nf_nat_sip_init(void) { BUG_ON(nf_nat_sip_hook != NULL); + BUG_ON(nf_nat_sip_expect_hook != NULL); BUG_ON(nf_nat_sdp_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); + rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); return 0; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 126f30842d60..043aa557e7a8 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -37,11 +37,24 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT; module_param(sip_timeout, uint, 0600); MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); +static int sip_direct_signalling __read_mostly = 1; +module_param(sip_direct_signalling, int, 0600); +MODULE_PARM_DESC(sip_direct_signalling, "expect incoming calls from registrar " + "only (default 1)"); + unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, const char **dptr, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); +unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *exp, + unsigned int matchoff, + unsigned int matchlen) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); + unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, const char **dptr, unsigned int *datalen, @@ -218,6 +231,7 @@ static const struct sip_header ct_sip_hdrs[] = { [SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len), [SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len), [SIP_HDR_VIA] = SIP_HDR("Via", "v", "UDP ", epaddr_len), + [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len), [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), }; @@ -592,18 +606,50 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_get_sdp_header); -static void flush_expectations(struct nf_conn *ct) +static int refresh_signalling_expectation(struct nf_conn *ct, + union nf_inet_addr *addr, + __be16 port, + unsigned int expires) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; struct hlist_node *n, *next; + int found = 0; spin_lock_bh(&nf_conntrack_lock); hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + if (exp->class != SIP_EXPECT_SIGNALLING || + !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || + exp->tuple.dst.u.udp.port != port) + continue; + if (!del_timer(&exp->timeout)) + continue; + exp->flags &= ~NF_CT_EXPECT_INACTIVE; + exp->timeout.expires = jiffies + expires * HZ; + add_timer(&exp->timeout); + found = 1; + break; + } + spin_unlock_bh(&nf_conntrack_lock); + return found; +} + +static void flush_expectations(struct nf_conn *ct, bool media) +{ + struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; + + spin_lock_bh(&nf_conntrack_lock); + hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) + continue; if (!del_timer(&exp->timeout)) continue; nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); + if (!media) + break; } spin_unlock_bh(&nf_conntrack_lock); } @@ -623,7 +669,7 @@ static int set_expected_rtp(struct sk_buff *skb, exp = nf_ct_expect_alloc(ct); if (exp == NULL) return NF_DROP; - nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, family, + nf_ct_expect_init(exp, SIP_EXPECT_AUDIO, family, &ct->tuplehash[!dir].tuple.src.u3, addr, IPPROTO_UDP, NULL, &port); @@ -688,7 +734,7 @@ static int process_invite_response(struct sk_buff *skb, (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); else { - flush_expectations(ct); + flush_expectations(ct, true); return NF_ACCEPT; } } @@ -704,7 +750,7 @@ static int process_update_response(struct sk_buff *skb, (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); else { - flush_expectations(ct); + flush_expectations(ct, true); return NF_ACCEPT; } } @@ -720,7 +766,7 @@ static int process_prack_response(struct sk_buff *skb, (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); else { - flush_expectations(ct); + flush_expectations(ct, true); return NF_ACCEPT; } } @@ -732,7 +778,165 @@ static int process_bye_request(struct sk_buff *skb, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - flush_expectations(ct); + flush_expectations(ct, true); + return NF_ACCEPT; +} + +/* Parse a REGISTER request and create a permanent expectation for incoming + * signalling connections. The expectation is marked inactive and is activated + * when receiving a response indicating success from the registrar. + */ +static int process_register_request(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int cseq) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + unsigned int matchoff, matchlen; + struct nf_conntrack_expect *exp; + union nf_inet_addr *saddr, daddr; + __be16 port; + unsigned int expires = 0; + int ret; + typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; + + /* Expected connections can not register again. */ + if (ct->status & IPS_EXPECTED) + return NF_ACCEPT; + + /* We must check the expiration time: a value of zero signals the + * registrar to release the binding. We'll remove our expectation + * when receiving the new bindings in the response, but we don't + * want to create new ones. + * + * The expiration time may be contained in Expires: header, the + * Contact: header parameters or the URI parameters. + */ + if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, + &matchoff, &matchlen) > 0) + expires = simple_strtoul(*dptr + matchoff, NULL, 10); + + ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, + SIP_HDR_CONTACT, NULL, + &matchoff, &matchlen, &daddr, &port); + if (ret < 0) + return NF_DROP; + else if (ret == 0) + return NF_ACCEPT; + + /* We don't support third-party registrations */ + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr)) + return NF_ACCEPT; + + if (ct_sip_parse_numerical_param(ct, *dptr, + matchoff + matchlen, *datalen, + "expires=", NULL, NULL, &expires) < 0) + return NF_DROP; + + if (expires == 0) { + ret = NF_ACCEPT; + goto store_cseq; + } + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return NF_DROP; + + saddr = NULL; + if (sip_direct_signalling) + saddr = &ct->tuplehash[!dir].tuple.src.u3; + + nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, family, saddr, &daddr, + IPPROTO_UDP, NULL, &port); + exp->timeout.expires = sip_timeout * HZ; + exp->helper = nfct_help(ct)->helper; + exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; + + nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); + if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) + ret = nf_nat_sip_expect(skb, dptr, datalen, exp, + matchoff, matchlen); + else { + if (nf_ct_expect_related(exp) != 0) + ret = NF_DROP; + else + ret = NF_ACCEPT; + } + nf_ct_expect_put(exp); + +store_cseq: + if (ret == NF_ACCEPT) + help->help.ct_sip_info.register_cseq = cseq; + return ret; +} + +static int process_register_response(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + unsigned int cseq, unsigned int code) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + union nf_inet_addr addr; + __be16 port; + unsigned int matchoff, matchlen, dataoff = 0; + unsigned int expires = 0; + int in_contact = 0, ret; + + /* According to RFC 3261, "UAs MUST NOT send a new registration until + * they have received a final response from the registrar for the + * previous one or the previous REGISTER request has timed out". + * + * However, some servers fail to detect retransmissions and send late + * responses, so we store the sequence number of the last valid + * request and compare it here. + */ + if (help->help.ct_sip_info.register_cseq != cseq) + return NF_ACCEPT; + + if (code >= 100 && code <= 199) + return NF_ACCEPT; + if (code < 200 || code > 299) + goto flush; + + if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, + &matchoff, &matchlen) > 0) + expires = simple_strtoul(*dptr + matchoff, NULL, 10); + + while (1) { + unsigned int c_expires = expires; + + ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, + SIP_HDR_CONTACT, &in_contact, + &matchoff, &matchlen, + &addr, &port); + if (ret < 0) + return NF_DROP; + else if (ret == 0) + break; + + /* We don't support third-party registrations */ + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr)) + continue; + + ret = ct_sip_parse_numerical_param(ct, *dptr, + matchoff + matchlen, + *datalen, "expires=", + NULL, NULL, &c_expires); + if (ret < 0) + return NF_DROP; + if (c_expires == 0) + break; + if (refresh_signalling_expectation(ct, &addr, port, c_expires)) + return NF_ACCEPT; + } + +flush: + flush_expectations(ct, false); return NF_ACCEPT; } @@ -742,6 +946,7 @@ static const struct sip_handler sip_handlers[] = { SIP_HANDLER("ACK", process_sdp, NULL), SIP_HANDLER("PRACK", process_sdp, process_prack_response), SIP_HANDLER("BYE", process_bye_request, NULL), + SIP_HANDLER("REGISTER", process_register_request, process_register_response), }; static int process_sip_response(struct sk_buff *skb, @@ -853,9 +1058,15 @@ static int sip_help(struct sk_buff *skb, static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly; static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly; -static const struct nf_conntrack_expect_policy sip_exp_policy = { - .max_expected = 2, - .timeout = 3 * 60, +static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { + [SIP_EXPECT_SIGNALLING] = { + .max_expected = 1, + .timeout = 3 * 60, + }, + [SIP_EXPECT_AUDIO] = { + .max_expected = IP_CT_DIR_MAX, + .timeout = 3 * 60, + }, }; static void nf_conntrack_sip_fini(void) @@ -887,7 +1098,8 @@ static int __init nf_conntrack_sip_init(void) for (j = 0; j < 2; j++) { sip[i][j].tuple.dst.protonum = IPPROTO_UDP; sip[i][j].tuple.src.u.udp.port = htons(ports[i]); - sip[i][j].expect_policy = &sip_exp_policy; + sip[i][j].expect_policy = sip_exp_policy; + sip[i][j].expect_class_max = SIP_EXPECT_MAX; sip[i][j].me = THIS_MODULE; sip[i][j].help = sip_help; -- cgit v1.2.3-71-gd317 From a9c1d35917c0c95c8f95a8e497fb91e301419693 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:25:49 -0700 Subject: [NETFILTER]: nf_conntrack_sip: create RTCP expectations Create expectations for the RTCP connections in addition to RTP connections. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 3 +- net/ipv4/netfilter/nf_nat_sip.c | 42 +++++++++++++--------- net/netfilter/nf_conntrack_sip.c | 58 +++++++++++++++++++----------- 3 files changed, 66 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 7cc84ed0c5da..6ddf95f51fb5 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -96,7 +96,8 @@ extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, const char **dptr, unsigned int *datalen, - struct nf_conntrack_expect *exp); + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp); extern int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4b85e21a2a4a..f73ab4883b75 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -364,7 +364,8 @@ static unsigned int mangle_sdp(struct sk_buff *skb, Mangle it, and change the expectation to match the new version. */ static unsigned int ip_nat_sdp(struct sk_buff *skb, const char **dptr, unsigned int *datalen, - struct nf_conntrack_expect *exp) + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); @@ -375,31 +376,40 @@ static unsigned int ip_nat_sdp(struct sk_buff *skb, /* Connection will come from reply */ if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) - newip = exp->tuple.dst.u3.ip; + newip = rtp_exp->tuple.dst.u3.ip; else newip = ct->tuplehash[!dir].tuple.dst.u3.ip; - exp->saved_ip = exp->tuple.dst.u3.ip; - exp->tuple.dst.u3.ip = newip; - exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; - exp->dir = !dir; - - /* When you see the packet, we need to NAT it the same as the - this one. */ - exp->expectfn = ip_nat_sip_expected; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { - exp->tuple.dst.u.udp.port = htons(port); - if (nf_ct_expect_related(exp) == 0) + rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; + rtp_exp->tuple.dst.u3.ip = newip; + rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; + rtp_exp->dir = !dir; + rtp_exp->expectfn = ip_nat_sip_expected; + + rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; + rtcp_exp->tuple.dst.u3.ip = newip; + rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; + rtcp_exp->dir = !dir; + rtcp_exp->expectfn = ip_nat_sip_expected; + + /* Try to get same pair of ports: if not, try to change them. */ + for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); + port != 0; port += 2) { + rtp_exp->tuple.dst.u.udp.port = htons(port); + if (nf_ct_expect_related(rtp_exp) != 0) + continue; + rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); + if (nf_ct_expect_related(rtcp_exp) == 0) break; + nf_ct_unexpect_related(rtp_exp); } if (port == 0) return NF_DROP; if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr, datalen)) { - nf_ct_unexpect_related(exp); + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); return NF_DROP; } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 813aa8c67e4c..217262e23403 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -63,7 +63,9 @@ EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, const char **dptr, unsigned int *datalen, - struct nf_conntrack_expect *exp) __read_mostly; + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) + __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); static int string_len(const struct nf_conn *ct, const char *dptr, @@ -659,18 +661,20 @@ static void flush_expectations(struct nf_conn *ct, bool media) spin_unlock_bh(&nf_conntrack_lock); } -static int set_expected_rtp(struct sk_buff *skb, - const char **dptr, unsigned int *datalen, - union nf_inet_addr *daddr, __be16 port) +static int set_expected_rtp_rtcp(struct sk_buff *skb, + const char **dptr, unsigned int *datalen, + union nf_inet_addr *daddr, __be16 port) { - struct nf_conntrack_expect *exp; + struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr *saddr; struct nf_conntrack_tuple tuple; int family = ct->tuplehash[!dir].tuple.src.l3num; - int skip_expect = 0, ret; + int skip_expect = 0, ret = NF_DROP; + u_int16_t base_port; + __be16 rtp_port, rtcp_port; typeof(nf_nat_sdp_hook) nf_nat_sdp; saddr = NULL; @@ -704,23 +708,37 @@ static int set_expected_rtp(struct sk_buff *skb, if (skip_expect) return NF_ACCEPT; - exp = nf_ct_expect_alloc(ct); - if (exp == NULL) - return NF_DROP; - nf_ct_expect_init(exp, SIP_EXPECT_AUDIO, family, saddr, daddr, - IPPROTO_UDP, NULL, &port); + base_port = ntohs(tuple.dst.u.udp.port) & ~1; + rtp_port = htons(base_port); + rtcp_port = htons(base_port + 1); + + rtp_exp = nf_ct_expect_alloc(ct); + if (rtp_exp == NULL) + goto err1; + nf_ct_expect_init(rtp_exp, SIP_EXPECT_AUDIO, family, saddr, daddr, + IPPROTO_UDP, NULL, &rtp_port); + + rtcp_exp = nf_ct_expect_alloc(ct); + if (rtcp_exp == NULL) + goto err2; + nf_ct_expect_init(rtcp_exp, SIP_EXPECT_AUDIO, family, saddr, daddr, + IPPROTO_UDP, NULL, &rtcp_port); nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(skb, dptr, datalen, exp); + ret = nf_nat_sdp(skb, dptr, datalen, rtp_exp, rtcp_exp); else { - if (nf_ct_expect_related(exp) != 0) - ret = NF_DROP; - else - ret = NF_ACCEPT; + if (nf_ct_expect_related(rtp_exp) == 0) { + if (nf_ct_expect_related(rtcp_exp) != 0) + nf_ct_unexpect_related(rtp_exp); + else + ret = NF_ACCEPT; + } } - nf_ct_expect_put(exp); - + nf_ct_expect_put(rtcp_exp); +err2: + nf_ct_expect_put(rtp_exp); +err1: return ret; } @@ -758,7 +776,7 @@ static int process_sdp(struct sk_buff *skb, if (port < 1024 || port > 65535) return NF_DROP; - return set_expected_rtp(skb, dptr, datalen, &addr, htons(port)); + return set_expected_rtp_rtcp(skb, dptr, datalen, &addr, htons(port)); } static int process_invite_response(struct sk_buff *skb, const char **dptr, unsigned int *datalen, @@ -1101,7 +1119,7 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 .timeout = 3 * 60, }, [SIP_EXPECT_AUDIO] = { - .max_expected = IP_CT_DIR_MAX, + .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, }; -- cgit v1.2.3-71-gd317 From 4ab9e64e5e3c0516577818804aaf13a630d67bc9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:26:08 -0700 Subject: [NETFILTER]: nf_nat_sip: split up SDP mangling The SDP connection addresses may be contained in the payload multiple times (in the session description and/or once per media description), currently only the session description is properly updated. Split up SDP mangling so the function setting up expectations only updates the media port, update connection addresses from media descriptions while parsing them and at the end update the session description when the final addresses are known. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 25 ++++- net/ipv4/netfilter/nf_nat_sip.c | 121 ++++++++++++++++-------- net/netfilter/nf_conntrack_sip.c | 142 +++++++++++++++++++++++------ 3 files changed, 219 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 6ddf95f51fb5..eca3ad3f28dc 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -93,11 +93,26 @@ extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp, unsigned int matchoff, unsigned int matchlen); -extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp); +extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int dataoff, + unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + const union nf_inet_addr *addr); +extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int dataoff, + unsigned int *datalen, + const union nf_inet_addr *addr); +extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp, + unsigned int mediaoff, + unsigned int medialen, + union nf_inet_addr *rtp_addr); extern int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index f73ab4883b75..4429069d9b42 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -316,45 +316,77 @@ static int mangle_content_len(struct sk_buff *skb, buffer, buflen); } -static unsigned mangle_sdp_packet(struct sk_buff *skb, - const char **dptr, unsigned int *datalen, +static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, unsigned int *datalen, enum sdp_header_types type, + enum sdp_header_types term, char *buffer, int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchlen, matchoff; - if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, type, SDP_HDR_UNSPEC, + if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, &matchoff, &matchlen) <= 0) return 0; return mangle_packet(skb, dptr, datalen, matchoff, matchlen, buffer, buflen); } -static unsigned int mangle_sdp(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conn *ct, - __be32 newip, u_int16_t port, - const char **dptr, unsigned int *datalen) +static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, + unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + const union nf_inet_addr *addr) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; - unsigned int bufflen; + unsigned int buflen; - /* Mangle owner and contact info. */ - bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sdp_packet(skb, dptr, datalen, SDP_HDR_OWNER_IP4, - buffer, bufflen)) + buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); + if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + buffer, buflen)) return 0; - if (!mangle_sdp_packet(skb, dptr, datalen, SDP_HDR_CONNECTION_IP4, - buffer, bufflen)) + return mangle_content_len(skb, dptr, datalen); +} + +static unsigned int ip_nat_sdp_port(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + unsigned int matchoff, + unsigned int matchlen, + u_int16_t port) +{ + char buffer[sizeof("nnnnn")]; + unsigned int buflen; + + buflen = sprintf(buffer, "%u", port); + if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, + buffer, buflen)) return 0; - /* Mangle media port. */ - bufflen = sprintf(buffer, "%u", port); - if (!mangle_sdp_packet(skb, dptr, datalen, SDP_HDR_MEDIA, - buffer, bufflen)) + return mangle_content_len(skb, dptr, datalen); +} + +static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, + unsigned int dataoff, + unsigned int *datalen, + const union nf_inet_addr *addr) +{ + char buffer[sizeof("nnn.nnn.nnn.nnn")]; + unsigned int buflen; + + /* Mangle session description owner and contact addresses */ + buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); + if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, + SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, + buffer, buflen)) + return 0; + + if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, + SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, + buffer, buflen)) return 0; return mangle_content_len(skb, dptr, datalen); @@ -362,32 +394,35 @@ static unsigned int mangle_sdp(struct sk_buff *skb, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp(struct sk_buff *skb, - const char **dptr, unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp) +static unsigned int ip_nat_sdp_media(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp, + unsigned int mediaoff, + unsigned int medialen, + union nf_inet_addr *rtp_addr) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - __be32 newip; u_int16_t port; /* Connection will come from reply */ if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) - newip = rtp_exp->tuple.dst.u3.ip; + rtp_addr->ip = rtp_exp->tuple.dst.u3.ip; else - newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; - rtp_exp->tuple.dst.u3.ip = newip; + rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; rtp_exp->dir = !dir; rtp_exp->expectfn = ip_nat_sip_expected; rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; - rtcp_exp->tuple.dst.u3.ip = newip; + rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; rtcp_exp->dir = !dir; rtcp_exp->expectfn = ip_nat_sip_expected; @@ -405,21 +440,29 @@ static unsigned int ip_nat_sdp(struct sk_buff *skb, } if (port == 0) - return NF_DROP; + goto err1; + + /* Update media port. */ + if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && + !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) + goto err2; - if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr, datalen)) { - nf_ct_unexpect_related(rtp_exp); - nf_ct_unexpect_related(rtcp_exp); - return NF_DROP; - } return NF_ACCEPT; + +err2: + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); +err1: + return NF_DROP; } static void __exit nf_nat_sip_fini(void) { rcu_assign_pointer(nf_nat_sip_hook, NULL); rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_hook, NULL); + rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); + rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); + rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); synchronize_rcu(); } @@ -427,10 +470,14 @@ static int __init nf_nat_sip_init(void) { BUG_ON(nf_nat_sip_hook != NULL); BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_hook != NULL); + BUG_ON(nf_nat_sdp_addr_hook != NULL); + BUG_ON(nf_nat_sdp_session_hook != NULL); + BUG_ON(nf_nat_sdp_media_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); - rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); + rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); + rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); + rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); return 0; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 217262e23403..f929add324f3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -60,13 +60,34 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int matchlen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); -unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); +unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int dataoff, + unsigned int *datalen, + enum sdp_header_types type, + enum sdp_header_types term, + const union nf_inet_addr *addr) + __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); + +unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int dataoff, + unsigned int *datalen, + const union nf_inet_addr *addr) + __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); + +unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp, + unsigned int mediaoff, + unsigned int medialen, + union nf_inet_addr *rtp_addr) + __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook); static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -613,6 +634,26 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_get_sdp_header); +static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sdp_header_types type, + enum sdp_header_types term, + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr) +{ + int ret; + + ret = ct_sip_get_sdp_header(ct, dptr, dataoff, datalen, type, term, + matchoff, matchlen); + if (ret <= 0) + return ret; + + if (!parse_addr(ct, dptr + *matchoff, NULL, addr, + dptr + *matchoff + *matchlen)) + return -1; + return 1; +} + static int refresh_signalling_expectation(struct nf_conn *ct, union nf_inet_addr *addr, __be16 port, @@ -663,7 +704,8 @@ static void flush_expectations(struct nf_conn *ct, bool media) static int set_expected_rtp_rtcp(struct sk_buff *skb, const char **dptr, unsigned int *datalen, - union nf_inet_addr *daddr, __be16 port) + union nf_inet_addr *daddr, __be16 port, + unsigned int mediaoff, unsigned int medialen) { struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp; enum ip_conntrack_info ctinfo; @@ -675,7 +717,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, int skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; - typeof(nf_nat_sdp_hook) nf_nat_sdp; + typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; saddr = NULL; if (sip_direct_media) { @@ -724,9 +766,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, nf_ct_expect_init(rtcp_exp, SIP_EXPECT_AUDIO, family, saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); - nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); - if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(skb, dptr, datalen, rtp_exp, rtcp_exp); + nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); + if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK) + ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp, + mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { if (nf_ct_expect_related(rtcp_exp) != 0) @@ -750,33 +793,78 @@ static int process_sdp(struct sk_buff *skb, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; unsigned int matchoff, matchlen; - union nf_inet_addr addr; + unsigned int mediaoff, medialen; + unsigned int sdpoff; + unsigned int caddr_len, maddr_len; + union nf_inet_addr caddr, maddr, rtp_addr; unsigned int port; - enum sdp_header_types type; + enum sdp_header_types c_hdr; + int ret; + typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; + typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; - /* Get address and port from SDP packet. */ - type = family == AF_INET ? SDP_HDR_CONNECTION_IP4 : - SDP_HDR_CONNECTION_IP6; + c_hdr = family == AF_INET ? SDP_HDR_CONNECTION_IP4 : + SDP_HDR_CONNECTION_IP6; + /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, - type, SDP_HDR_UNSPEC, + SDP_HDR_VERSION, SDP_HDR_UNSPEC, &matchoff, &matchlen) <= 0) return NF_ACCEPT; - - /* We'll drop only if there are parse problems. */ - if (!parse_addr(ct, *dptr + matchoff, NULL, &addr, *dptr + *datalen)) - return NF_DROP; - - if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, + sdpoff = matchoff; + + /* The connection information is contained in the session description + * and/or once per media description. The first media description marks + * the end of the session description. */ + caddr_len = 0; + if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen, + c_hdr, SDP_HDR_MEDIA, + &matchoff, &matchlen, &caddr) > 0) + caddr_len = matchlen; + + if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, SDP_HDR_MEDIA, SDP_HDR_UNSPEC, - &matchoff, &matchlen) <= 0) + &mediaoff, &medialen) <= 0) return NF_ACCEPT; - port = simple_strtoul(*dptr + matchoff, NULL, 10); + port = simple_strtoul(*dptr + mediaoff, NULL, 10); if (port < 1024 || port > 65535) return NF_DROP; - return set_expected_rtp_rtcp(skb, dptr, datalen, &addr, htons(port)); + /* The media description overrides the session description. */ + maddr_len = 0; + if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen, + c_hdr, SDP_HDR_MEDIA, + &matchoff, &matchlen, &maddr) > 0) { + maddr_len = matchlen; + memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); + } else if (caddr_len) + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + else + return NF_DROP; + + ret = set_expected_rtp_rtcp(skb, dptr, datalen, &rtp_addr, htons(port), + mediaoff, medialen); + if (ret != NF_ACCEPT) + return ret; + + /* Update media connection address if present */ + if (maddr_len) { + nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); + if (nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { + ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen, + c_hdr, SDP_HDR_MEDIA, &rtp_addr); + if (ret != NF_ACCEPT) + return ret; + } + } + + /* Update session connection and owner addresses */ + nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); + if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) + ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr); + + return ret; } static int process_invite_response(struct sk_buff *skb, const char **dptr, unsigned int *datalen, -- cgit v1.2.3-71-gd317 From 0d0ab0378d67517a4f4ae3497706c13d9dd24af1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:26:24 -0700 Subject: [NETFILTER]: nf_conntrack_sip: support multiple media channels Add support for multiple media channels and use it to create expectations for video streams when present. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 14 ++++ include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_sip.c | 121 +++++++++++++++++++++-------- 3 files changed, 105 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index eca3ad3f28dc..71fa3eb5f485 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -12,10 +12,24 @@ struct nf_ct_sip_master { enum sip_expectation_classes { SIP_EXPECT_SIGNALLING, SIP_EXPECT_AUDIO, + SIP_EXPECT_VIDEO, __SIP_EXPECT_MAX }; #define SIP_EXPECT_MAX (__SIP_EXPECT_MAX - 1) +struct sdp_media_type { + const char *name; + unsigned int len; + enum sip_expectation_classes class; +}; + +#define SDP_MEDIA_TYPE(__name, __class) \ +{ \ + .name = (__name), \ + .len = sizeof(__name) - 1, \ + .class = (__class), \ +} + struct sip_handler { const char *method; unsigned int len; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4a4f870d2a5e..a3567a7a6d67 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -78,7 +78,7 @@ do { \ struct nf_conntrack_helper; /* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 2 +#define NF_CT_MAX_EXPECT_CLASSES 3 /* nf_conn feature for connections that have a helper */ struct nf_conn_help { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f929add324f3..f40a525732d1 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -112,6 +112,21 @@ static int digits_len(const struct nf_conn *ct, const char *dptr, return len; } +/* get media type + port length */ +static int media_len(const struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + int len = string_len(ct, dptr, limit, shift); + + dptr += len; + if (dptr >= limit || *dptr != ' ') + return 0; + len++; + dptr++; + + return len + digits_len(ct, dptr, limit, shift); +} + static int parse_addr(const struct nf_conn *ct, const char *cp, const char **endp, union nf_inet_addr *addr, const char *limit) @@ -563,7 +578,7 @@ static const struct sip_header ct_sdp_hdrs[] = { [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), - [SDP_HDR_MEDIA] = SDP_HDR("m=", "audio ", digits_len), + [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), }; /* Linear string search within SDP header values */ @@ -705,6 +720,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) static int set_expected_rtp_rtcp(struct sk_buff *skb, const char **dptr, unsigned int *datalen, union nf_inet_addr *daddr, __be16 port, + enum sip_expectation_classes class, unsigned int mediaoff, unsigned int medialen) { struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp; @@ -743,7 +759,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, exp = __nf_ct_expect_find(&tuple); if (exp && exp->master != ct && nfct_help(exp->master)->helper == nfct_help(ct)->helper && - exp->class == SIP_EXPECT_AUDIO) + exp->class == class) skip_expect = 1; rcu_read_unlock(); @@ -757,13 +773,13 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) goto err1; - nf_ct_expect_init(rtp_exp, SIP_EXPECT_AUDIO, family, saddr, daddr, + nf_ct_expect_init(rtp_exp, class, family, saddr, daddr, IPPROTO_UDP, NULL, &rtp_port); rtcp_exp = nf_ct_expect_alloc(ct); if (rtcp_exp == NULL) goto err2; - nf_ct_expect_init(rtcp_exp, SIP_EXPECT_AUDIO, family, saddr, daddr, + nf_ct_expect_init(rtcp_exp, class, family, saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); @@ -785,6 +801,28 @@ err1: return ret; } +static const struct sdp_media_type sdp_media_types[] = { + SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO), + SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO), +}; + +static const struct sdp_media_type *sdp_media_type(const char *dptr, + unsigned int matchoff, + unsigned int matchlen) +{ + const struct sdp_media_type *t; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(sdp_media_types); i++) { + t = &sdp_media_types[i]; + if (matchlen < t->len || + strncmp(dptr + matchoff, t->name, t->len)) + continue; + return t; + } + return NULL; +} + static int process_sdp(struct sk_buff *skb, const char **dptr, unsigned int *datalen, unsigned int cseq) @@ -796,13 +834,16 @@ static int process_sdp(struct sk_buff *skb, unsigned int mediaoff, medialen; unsigned int sdpoff; unsigned int caddr_len, maddr_len; + unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; unsigned int port; enum sdp_header_types c_hdr; - int ret; + const struct sdp_media_type *t; + int ret = NF_ACCEPT; typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; + nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); c_hdr = family == AF_INET ? SDP_HDR_CONNECTION_IP4 : SDP_HDR_CONNECTION_IP6; @@ -822,41 +863,55 @@ static int process_sdp(struct sk_buff *skb, &matchoff, &matchlen, &caddr) > 0) caddr_len = matchlen; - if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, - SDP_HDR_MEDIA, SDP_HDR_UNSPEC, - &mediaoff, &medialen) <= 0) - return NF_ACCEPT; + mediaoff = sdpoff; + for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { + if (ct_sip_get_sdp_header(ct, *dptr, mediaoff, *datalen, + SDP_HDR_MEDIA, SDP_HDR_UNSPEC, + &mediaoff, &medialen) <= 0) + break; - port = simple_strtoul(*dptr + mediaoff, NULL, 10); - if (port < 1024 || port > 65535) - return NF_DROP; + /* Get media type and port number. A media port value of zero + * indicates an inactive stream. */ + t = sdp_media_type(*dptr, mediaoff, medialen); + if (!t) { + mediaoff += medialen; + continue; + } + mediaoff += t->len; + medialen -= t->len; - /* The media description overrides the session description. */ - maddr_len = 0; - if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen, - c_hdr, SDP_HDR_MEDIA, - &matchoff, &matchlen, &maddr) > 0) { - maddr_len = matchlen; - memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); - } else if (caddr_len) - memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); - else - return NF_DROP; + port = simple_strtoul(*dptr + mediaoff, NULL, 10); + if (port == 0) + continue; + if (port < 1024 || port > 65535) + return NF_DROP; - ret = set_expected_rtp_rtcp(skb, dptr, datalen, &rtp_addr, htons(port), - mediaoff, medialen); - if (ret != NF_ACCEPT) - return ret; + /* The media description overrides the session description. */ + maddr_len = 0; + if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen, + c_hdr, SDP_HDR_MEDIA, + &matchoff, &matchlen, &maddr) > 0) { + maddr_len = matchlen; + memcpy(&rtp_addr, &maddr, sizeof(rtp_addr)); + } else if (caddr_len) + memcpy(&rtp_addr, &caddr, sizeof(rtp_addr)); + else + return NF_DROP; + + ret = set_expected_rtp_rtcp(skb, dptr, datalen, + &rtp_addr, htons(port), t->class, + mediaoff, medialen); + if (ret != NF_ACCEPT) + return ret; - /* Update media connection address if present */ - if (maddr_len) { - nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); - if (nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { + /* Update media connection address if present */ + if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen, c_hdr, SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) return ret; } + i++; } /* Update session connection and owner addresses */ @@ -1210,6 +1265,10 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, + [SIP_EXPECT_VIDEO] = { + .max_expected = 2 * IP_CT_DIR_MAX, + .timeout = 3 * 60, + }, }; static void nf_conntrack_sip_fini(void) -- cgit v1.2.3-71-gd317 From c7f485abd618e0d249bdd1abdc586bd10fee1954 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Mar 2008 20:26:43 -0700 Subject: [NETFILTER]: nf_conntrack_sip: RTP routing optimization Optimize call routing between NATed endpoints: when an external registrar sends a media description that contains an existing RTP expectation from a different SNATed connection, the gatekeeper is trying to route the call directly between the two endpoints. We assume both endpoints can reach each other directly and "un-NAT" the addresses, which makes the media stream go between the two endpoints directly. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 6 +++ net/ipv4/netfilter/nf_nat_sip.c | 3 ++ net/netfilter/nf_conntrack_sip.c | 59 +++++++++++++++++++++++++----- 3 files changed, 58 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 71fa3eb5f485..5da04e586a3f 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -114,6 +114,12 @@ extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr); +extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + unsigned int matchoff, + unsigned int matchlen, + u_int16_t port); extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, const char **dptr, unsigned int dataoff, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 4429069d9b42..bcddccddf768 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -461,6 +461,7 @@ static void __exit nf_nat_sip_fini(void) rcu_assign_pointer(nf_nat_sip_hook, NULL); rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); + rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); synchronize_rcu(); @@ -471,11 +472,13 @@ static int __init nf_nat_sip_init(void) BUG_ON(nf_nat_sip_hook != NULL); BUG_ON(nf_nat_sip_expect_hook != NULL); BUG_ON(nf_nat_sdp_addr_hook != NULL); + BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); + rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); return 0; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f40a525732d1..57de22c770a3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -70,6 +70,14 @@ unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); +unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, + const char **dptr, + unsigned int *datalen, + unsigned int matchoff, + unsigned int matchlen, + u_int16_t port) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); + unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, const char **dptr, unsigned int dataoff, @@ -730,9 +738,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, union nf_inet_addr *saddr; struct nf_conntrack_tuple tuple; int family = ct->tuplehash[!dir].tuple.src.l3num; - int skip_expect = 0, ret = NF_DROP; + int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; + typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; saddr = NULL; @@ -746,6 +755,14 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, * to register it since we can see the same media description multiple * times on different connections in case multiple endpoints receive * the same call. + * + * RTP optimization: if we find a matching media channel expectation + * and both the expectation and this connection are SNATed, we assume + * both sides can reach each other directly and use the final + * destination address from the expectation. We still need to keep + * the NATed expectations for media that might arrive from the + * outside, and additionally need to expect the direct RTP stream + * in case it passes through us even without NAT. */ memset(&tuple, 0, sizeof(tuple)); if (saddr) @@ -756,20 +773,42 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, tuple.dst.u.udp.port = port; rcu_read_lock(); - exp = __nf_ct_expect_find(&tuple); - if (exp && exp->master != ct && - nfct_help(exp->master)->helper == nfct_help(ct)->helper && - exp->class == class) - skip_expect = 1; - rcu_read_unlock(); + do { + exp = __nf_ct_expect_find(&tuple); - if (skip_expect) - return NF_ACCEPT; + if (!exp || exp->master == ct || + nfct_help(exp->master)->helper != nfct_help(ct)->helper || + exp->class != class) + break; + + if (exp->tuple.src.l3num == AF_INET && !direct_rtp && + (exp->saved_ip != exp->tuple.dst.u3.ip || + exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && + ct->status & IPS_NAT_MASK) { + daddr->ip = exp->saved_ip; + tuple.dst.u3.ip = exp->saved_ip; + tuple.dst.u.udp.port = exp->saved_proto.udp.port; + direct_rtp = 1; + } else + skip_expect = 1; + } while (!skip_expect); + rcu_read_unlock(); base_port = ntohs(tuple.dst.u.udp.port) & ~1; rtp_port = htons(base_port); rtcp_port = htons(base_port + 1); + if (direct_rtp) { + nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); + if (nf_nat_sdp_port && + !nf_nat_sdp_port(skb, dptr, datalen, + mediaoff, medialen, ntohs(rtp_port))) + goto err1; + } + + if (skip_expect) + return NF_ACCEPT; + rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) goto err1; @@ -783,7 +822,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, IPPROTO_UDP, NULL, &rtcp_port); nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK) + if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { -- cgit v1.2.3-71-gd317 From 9c2f5746b9cd536f0007709196d85a7e7d0070fa Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 26 Mar 2008 00:47:14 -0700 Subject: [NETNS]: Compilation fix for include/linux/netdevice.h. Commit commit c346dca10840a874240c78efe3f39acf4312a1f2 ([NET] NETNS: Omit net_device->nd_net without CONFIG_NET_NS) breaks compilation with CONFIG_NET_NS set. Fix the typo. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d146be40f46c..06ca84d71db4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -756,7 +756,7 @@ static inline void dev_net_set(struct net_device *dev, const struct net *net) { #ifdef CONFIG_NET_NS - dev->nd_dev = net; + dev->nd_net = net; #endif } -- cgit v1.2.3-71-gd317 From f5aa23fd49063745f85644dd7a9330acd706add6 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 26 Mar 2008 00:48:17 -0700 Subject: [NETNS]: Compilation warnings under CONFIG_NET_NS. Recent commits from YOSHIFUJI Hideaki have been introduced a several compilation warnings 'assignment discards qualifiers from pointer target type' due to extra const modifier in the inline call parameters of {dev|sock|twsk}_net_set. Drop it. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/net/inet_timewait_sock.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 06ca84d71db4..15fa84a15c27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -753,7 +753,7 @@ struct net *dev_net(const struct net_device *dev) } static inline -void dev_net_set(struct net_device *dev, const struct net *net) +void dev_net_set(struct net_device *dev, struct net *net) { #ifdef CONFIG_NET_NS dev->nd_net = net; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 07fe0d1a4f03..95c660c9719b 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -219,7 +219,7 @@ struct net *twsk_net(const struct inet_timewait_sock *twsk) } static inline -void twsk_net_set(struct inet_timewait_sock *twsk, const struct net *net) +void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net) { #ifdef CONFIG_NET_NS twsk->tw_net = net; diff --git a/include/net/sock.h b/include/net/sock.h index 7e0d4a0c4d12..1c9d059223ee 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1358,7 +1358,7 @@ struct net *sock_net(const struct sock *sk) } static inline -void sock_net_set(struct sock *sk, const struct net *net) +void sock_net_set(struct sock *sk, struct net *net) { #ifdef CONFIG_NET_NS sk->sk_net = net; -- cgit v1.2.3-71-gd317 From 67727184f28c38d06013c6659560bb046c1d9f9c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 26 Mar 2008 16:27:22 -0700 Subject: [VLAN]: Reduce memory consumed by vlan_groups Currently each vlan_groupd contains 8 pointers on arrays with 512 pointers on struct net_device each :) Such a construction "in many cases ... wastes memory". My proposal is to allow for some of these arrays pointers be NULL, meaning that there are no devices in it. When a new device is added to the vlan_group, the appropriate array is allocated. The check in vlan_group_get_device's is safe, since the pointer vg->vlan_devices_arrays[x] can only switch from NULL to not-NULL. The vlan_group_prealloc_vid() is guarded with rtnl lock and is also safe. I've checked (I hope that) all the places, that use these arrays and found, that the register_vlan_dev is the only place, that can put a vlan device on an empty vlan_group. Rough calculations shows, that after the patch a setup with a single vlan dev (or up to 512 vlans with sequential vids) will occupy approximately 8 times less memory. The question I have is - does this patch makes sense, or a totally new structures are required to store the vlan_devs? Signed-off-by: Pavel Emelyanov Signed-off-by: Patrick McHardy --- include/linux/if_vlan.h | 2 +- net/8021q/vlan.c | 36 +++++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 79504b22a932..edd55af7ebd6 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -93,7 +93,7 @@ static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, { struct net_device **array; array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; - return array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN]; + return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; } static inline void vlan_group_set_device(struct vlan_group *vg, diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c35dc230365c..694be86e4490 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -106,29 +106,35 @@ static void vlan_group_free(struct vlan_group *grp) static struct vlan_group *vlan_group_alloc(int ifindex) { struct vlan_group *grp; - unsigned int size; - unsigned int i; grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); if (!grp) return NULL; - size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; - - for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { - grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL); - if (!grp->vlan_devices_arrays[i]) - goto err; - } - grp->real_dev_ifindex = ifindex; hlist_add_head_rcu(&grp->hlist, &vlan_group_hash[vlan_grp_hashfn(ifindex)]); return grp; +} -err: - vlan_group_free(grp); - return NULL; +static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid) +{ + struct net_device **array; + unsigned int size; + + ASSERT_RTNL(); + + array = vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN]; + if (array != NULL) + return 0; + + size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; + array = kzalloc(size, GFP_KERNEL); + if (array == NULL) + return -ENOBUFS; + + vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN] = array; + return 0; } static void vlan_rcu_free(struct rcu_head *rcu) @@ -247,6 +253,10 @@ int register_vlan_dev(struct net_device *dev) return -ENOBUFS; } + err = vlan_group_prealloc_vid(grp, vlan_id); + if (err < 0) + goto out_free_group; + err = register_netdevice(dev); if (err < 0) goto out_free_group; -- cgit v1.2.3-71-gd317 From 0e5f8be1388093edc324a78ebf241170b258eba3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 27 Mar 2008 14:25:53 -0700 Subject: [NETNS]: Compile NET /proc support only if CONFIG_NET is set. This fix broken compilation for 'allnoconfig'. This was introduced by Introduced by commit 1218854afa6f659be90b748cf1bc7badee954a35 ("[NET] NETNS: Omit seq_net_private->net without CONFIG_NET_NS.") Signed-off-by: Denis V. Lunev Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 2 ++ include/linux/seq_file.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 13cd7835d0df..7034facf8b8f 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -51,6 +51,7 @@ int seq_open_net(struct inode *ino, struct file *f, } EXPORT_SYMBOL_GPL(seq_open_net); +#ifdef CONFIG_NET int seq_release_net(struct inode *ino, struct file *f) { struct seq_file *seq; @@ -218,3 +219,4 @@ int __init proc_net_init(void) return register_pernet_subsys(&proc_net_ns_ops); } +#endif /* CONFIG_NET */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index d870a8253769..5da70c3f4417 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -63,6 +63,7 @@ extern struct list_head *seq_list_start_head(struct list_head *head, extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); +#ifdef CONFIG_NET struct net; struct seq_net_private { #ifdef CONFIG_NET_NS @@ -81,6 +82,7 @@ static inline struct net *seq_file_net(struct seq_file *seq) return &init_net; #endif } +#endif /* CONFIG_NET */ #endif #endif -- cgit v1.2.3-71-gd317 From 0dde3e16485dca16eb682dd59da1a598bf62e284 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 27 Mar 2008 17:43:41 -0700 Subject: [NET]: uninline skb_put, de-bloats a lot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allyesconfig (v2.6.24-mm1): ~500 files changed ... 869 funcs, 198 +, 111003 -, diff: -110805 --- skb_put skb_put | +104 Without number of debug related CONFIGs (v2.6.25-rc2-mm1): -60744 855 funcs, 861 +, 61605 -, diff: -60744 --- skb_put skb_put | +57 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +-------------------- net/core/skbuff.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7beb239d2ee0..f085955cb5a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -892,6 +892,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) /* * Add data to an sk_buff */ +extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) { unsigned char *tmp = skb_tail_pointer(skb); @@ -901,26 +902,6 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) return tmp; } -/** - * skb_put - add data to a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer. If this would - * exceed the total buffer size the kernel will panic. A pointer to the - * first byte of the extra data is returned. - */ -static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp = skb_tail_pointer(skb); - SKB_LINEAR_ASSERT(skb); - skb->tail += len; - skb->len += len; - if (unlikely(skb->tail > skb->end)) - skb_over_panic(skb, len, current_text_addr()); - return tmp; -} - static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0d0fd28a9041..3402eca768f8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -857,6 +857,27 @@ free_skb: return err; } +/** + * skb_put - add data to a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer. If this would + * exceed the total buffer size the kernel will panic. A pointer to the + * first byte of the extra data is returned. + */ +unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp = skb_tail_pointer(skb); + SKB_LINEAR_ASSERT(skb); + skb->tail += len; + skb->len += len; + if (unlikely(skb->tail > skb->end)) + skb_over_panic(skb, len, __builtin_return_address(0)); + return tmp; +} +EXPORT_SYMBOL(skb_put); + /* Trims skb to length len. It can change skb pointers. */ -- cgit v1.2.3-71-gd317 From 6be8ac2fdc5e69dec53913a42312a92dbfbd4907 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 27 Mar 2008 17:47:24 -0700 Subject: [NET]: uninline skb_pull, de-bloats a lot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allyesconfig (v2.6.24-mm1): -28162 354 funcs, 3005 +, 31167 -, diff: -28162 --- skb_pull Without number of debug related CONFIGs (v2.6.25-rc2-mm1): -9697 338 funcs, 221 +, 9918 -, diff: -9697 --- skb_pull skb_pull | +44 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 +--------------- net/core/skbuff.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f085955cb5a7..6d6cde7b243c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -927,6 +927,7 @@ static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) return skb->data; } +extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) { skb->len -= len; @@ -934,21 +935,6 @@ static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) return skb->data += len; } -/** - * skb_pull - remove data from the start of a buffer - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the next data in the buffer - * is returned. Once the data has been pulled future pushes will overwrite - * the old data. - */ -static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); -} - extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3402eca768f8..cf489b6329e8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -878,6 +878,22 @@ unsigned char *skb_put(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL(skb_put); +/** + * skb_pull - remove data from the start of a buffer + * @skb: buffer to use + * @len: amount of data to remove + * + * This function removes data from the start of a buffer, returning + * the memory to the headroom. A pointer to the next data in the buffer + * is returned. Once the data has been pulled future pushes will overwrite + * the old data. + */ +unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) +{ + return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); +} +EXPORT_SYMBOL(skb_pull); + /* Trims skb to length len. It can change skb pointers. */ -- cgit v1.2.3-71-gd317 From f58518e678e5eef430c8d5cdcc7cd28d285f1980 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 27 Mar 2008 17:51:31 -0700 Subject: [NET]: uninline dev_alloc_skb, de-bloats a lot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allyesconfig (v2.6.24-mm1): -23668 392 funcs, 104 +, 23772 -, diff: -23668 --- dev_alloc_skb Without many debug CONFIGs (v2.6.25-rc2-mm1): -12178 382 funcs, 157 +, 12335 -, diff: -12178 --- dev_alloc_skb dev_alloc_skb | +37 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 +---------------- net/core/skbuff.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6d6cde7b243c..01a11b0c0291 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1272,22 +1272,7 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, return skb; } -/** - * dev_alloc_skb - allocate an skbuff for receiving - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -static inline struct sk_buff *dev_alloc_skb(unsigned int length) -{ - return __dev_alloc_skb(length, GFP_ATOMIC); -} +extern struct sk_buff *dev_alloc_skb(unsigned int length); extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cf489b6329e8..0daf5c0e5b8d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -263,6 +263,24 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, return skb; } +/** + * dev_alloc_skb - allocate an skbuff for receiving + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ +struct sk_buff *dev_alloc_skb(unsigned int length) +{ + return __dev_alloc_skb(length, GFP_ATOMIC); +} +EXPORT_SYMBOL(dev_alloc_skb); + static void skb_drop_list(struct sk_buff **listp) { struct sk_buff *list = *listp; -- cgit v1.2.3-71-gd317 From c2aa270ad73d385bd6cdebf5d741bdf18a3e17ad Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 27 Mar 2008 17:52:40 -0700 Subject: [NET]: uninline skb_push, de-bloats a lot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allyesconfig (v2.6.24-mm1): -21593 356 funcs, 2418 +, 24011 -, diff: -21593 --- skb_push Without many debug related CONFIGs (v2.6.25-rc2-mm1): -13890 341 funcs, 189 +, 14079 -, diff: -13890 --- skb_push skb_push | +46 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 19 +------------------ net/core/skbuff.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 01a11b0c0291..1baf4d43bb2d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -902,6 +902,7 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) return tmp; } +extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; @@ -909,24 +910,6 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) return skb->data; } -/** - * skb_push - add data to the start of a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer at the buffer - * start. If this would exceed the total buffer headroom the kernel will - * panic. A pointer to the first byte of the extra data is returned. - */ -static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data -= len; - skb->len += len; - if (unlikely(skb->datahead)) - skb_under_panic(skb, len, current_text_addr()); - return skb->data; -} - extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0daf5c0e5b8d..a37127b5899c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -896,6 +896,25 @@ unsigned char *skb_put(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL(skb_put); +/** + * skb_push - add data to the start of a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer at the buffer + * start. If this would exceed the total buffer headroom the kernel will + * panic. A pointer to the first byte of the extra data is returned. + */ +unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data -= len; + skb->len += len; + if (unlikely(skb->datahead)) + skb_under_panic(skb, len, __builtin_return_address(0)); + return skb->data; +} +EXPORT_SYMBOL(skb_push); + /** * skb_pull - remove data from the start of a buffer * @skb: buffer to use -- cgit v1.2.3-71-gd317 From 419ae74ecc9494e58928a5c6652f4c072f3ca744 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 27 Mar 2008 17:54:01 -0700 Subject: [NET]: uninline skb_trim, de-bloats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allyesconfig (v2.6.24-mm1): -10976 209 funcs, 123 +, 11099 -, diff: -10976 --- skb_trim Without number of debug related CONFIGs (v2.6.25-rc2-mm1): -7360 192 funcs, 131 +, 7491 -, diff: -7360 --- skb_trim skb_trim | +42 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 +--------------- net/core/skbuff.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1baf4d43bb2d..ff72145d5d9e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1158,21 +1158,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - * The skb must be linear. - */ -static inline void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) - __skb_trim(skb, len); -} - +extern void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a37127b5899c..86e5682728be 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -931,6 +931,22 @@ unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) } EXPORT_SYMBOL(skb_pull); +/** + * skb_trim - remove end from a buffer + * @skb: buffer to alter + * @len: new length + * + * Cut the length of a buffer down by removing data from the tail. If + * the buffer is already under the length specified it is not modified. + * The skb must be linear. + */ +void skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (skb->len > len) + __skb_trim(skb, len); +} +EXPORT_SYMBOL(skb_trim); + /* Trims skb to length len. It can change skb pointers. */ -- cgit v1.2.3-71-gd317 From 1567ca7eec7664b8be3b07755ac59dc1b1ec76cb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 28 Mar 2008 15:53:11 -0700 Subject: [NET]: Protect device namespace inlines with CONFIG_NET Include sites should not be bothered by whether CONFIG_NET is set or not when trying to include benign files like linux/etherdevice.h et al. From a report by Stephen Rothwell. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3b54f8a2c055..8576ca928dae 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -741,6 +741,7 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) +#ifdef CONFIG_NET /* * Net namespace inlines */ @@ -761,6 +762,7 @@ void dev_net_set(struct net_device *dev, struct net *net) dev->nd_net = net; #endif } +#endif /** * netdev_priv - access network device private data -- cgit v1.2.3-71-gd317 From 095d911201b0741e7f326d269a005dba55985acf Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 28 Mar 2008 16:39:58 -0700 Subject: [LIB]: Drop the pcounter itself. The knock-out. The pcounter abstraction is not used any longer in the kernel. Not sure whether this should go via netdev tree, but as far as I remember it was added via this one, and besides Eric thinks that Andrew shouldn't mind this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/pcounter.h | 74 ------------------------------------------------ lib/Makefile | 1 - lib/pcounter.c | 58 ------------------------------------- 3 files changed, 133 deletions(-) delete mode 100644 include/linux/pcounter.h delete mode 100644 lib/pcounter.c (limited to 'include/linux') diff --git a/include/linux/pcounter.h b/include/linux/pcounter.h deleted file mode 100644 index a82d9f2628ca..000000000000 --- a/include/linux/pcounter.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef __LINUX_PCOUNTER_H -#define __LINUX_PCOUNTER_H -/* - * Using a dynamic percpu 'int' variable has a cost : - * 1) Extra dereference - * Current per_cpu_ptr() implementation uses an array per 'percpu variable'. - * 2) memory cost of NR_CPUS*(32+sizeof(void *)) instead of num_possible_cpus()*4 - * - * This pcounter implementation is an abstraction to be able to use - * either a static or a dynamic per cpu variable. - * One dynamic per cpu variable gets a fast & cheap implementation, we can - * change pcounter implementation too. - */ -struct pcounter { -#ifdef CONFIG_SMP - void (*add)(struct pcounter *self, int inc); - int (*getval)(const struct pcounter *self, int cpu); - int *per_cpu_values; -#else - int val; -#endif -}; - -#ifdef CONFIG_SMP -#include - -#define DEFINE_PCOUNTER(NAME) \ -static DEFINE_PER_CPU(int, NAME##_pcounter_values); \ -static void NAME##_pcounter_add(struct pcounter *self, int val) \ -{ \ - __get_cpu_var(NAME##_pcounter_values) += val; \ -} \ -static int NAME##_pcounter_getval(const struct pcounter *self, int cpu) \ -{ \ - return per_cpu(NAME##_pcounter_values, cpu); \ -} \ - -#define PCOUNTER_MEMBER_INITIALIZER(NAME, MEMBER) \ - MEMBER = { \ - .add = NAME##_pcounter_add, \ - .getval = NAME##_pcounter_getval, \ - } - - -static inline void pcounter_add(struct pcounter *self, int inc) -{ - self->add(self, inc); -} - -extern int pcounter_getval(const struct pcounter *self); -extern int pcounter_alloc(struct pcounter *self); -extern void pcounter_free(struct pcounter *self); - - -#else /* CONFIG_SMP */ - -static inline void pcounter_add(struct pcounter *self, int inc) -{ - self->val += inc; -} - -static inline int pcounter_getval(const struct pcounter *self) -{ - return self->val; -} - -#define DEFINE_PCOUNTER(NAME) -#define PCOUNTER_MEMBER_INITIALIZER(NAME, MEMBER) -#define pcounter_alloc(self) 0 -#define pcounter_free(self) - -#endif /* CONFIG_SMP */ - -#endif /* __LINUX_PCOUNTER_H */ diff --git a/lib/Makefile b/lib/Makefile index 23de261a4c83..4d059d469554 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -61,7 +61,6 @@ obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o obj-$(CONFIG_SMP) += percpu_counter.o -obj-$(CONFIG_SMP) += pcounter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o diff --git a/lib/pcounter.c b/lib/pcounter.c deleted file mode 100644 index 9b56807da93b..000000000000 --- a/lib/pcounter.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Define default pcounter functions - * Note that often used pcounters use dedicated functions to get a speed increase. - * (see DEFINE_PCOUNTER/REF_PCOUNTER_MEMBER) - */ - -#include -#include -#include -#include - -static void pcounter_dyn_add(struct pcounter *self, int inc) -{ - per_cpu_ptr(self->per_cpu_values, smp_processor_id())[0] += inc; -} - -static int pcounter_dyn_getval(const struct pcounter *self, int cpu) -{ - return per_cpu_ptr(self->per_cpu_values, cpu)[0]; -} - -int pcounter_getval(const struct pcounter *self) -{ - int res = 0, cpu; - - for_each_possible_cpu(cpu) - res += self->getval(self, cpu); - - return res; -} -EXPORT_SYMBOL_GPL(pcounter_getval); - -int pcounter_alloc(struct pcounter *self) -{ - int rc = 0; - if (self->add == NULL) { - self->per_cpu_values = alloc_percpu(int); - if (self->per_cpu_values != NULL) { - self->add = pcounter_dyn_add; - self->getval = pcounter_dyn_getval; - } else - rc = 1; - } - return rc; -} -EXPORT_SYMBOL_GPL(pcounter_alloc); - -void pcounter_free(struct pcounter *self) -{ - if (self->per_cpu_values != NULL) { - free_percpu(self->per_cpu_values); - self->per_cpu_values = NULL; - self->getval = NULL; - self->add = NULL; - } -} -EXPORT_SYMBOL_GPL(pcounter_free); - -- cgit v1.2.3-71-gd317 From 9307b570a745da4f2d83195f5337927e98221bb2 Mon Sep 17 00:00:00 2001 From: "S.Caglar Onur" Date: Fri, 28 Mar 2008 14:41:24 -0700 Subject: drivers/net/arcnet/arcnet.c: use time_* macros The functions time_before, time_before_eq, time_after, and time_after_eq are more robust for comparing jiffies against other values. So use the time_after() macro, defined in linux/jiffies.h, which deals with wrapping correctly. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: S.Caglar Onur Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/arcnet/arcnet.c | 5 +++-- include/linux/arcdevice.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index c59c8067de99..bdc4c0bb56d9 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -940,7 +940,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) /* is the RECON info empty or old? */ if (!lp->first_recon || !lp->last_recon || - jiffies - lp->last_recon > HZ * 10) { + time_after(jiffies, lp->last_recon + HZ * 10)) { if (lp->network_down) BUGMSG(D_NORMAL, "reconfiguration detected: cabling restored?\n"); lp->first_recon = lp->last_recon = jiffies; @@ -974,7 +974,8 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) lp->num_recons = 1; } } - } else if (lp->network_down && jiffies - lp->last_recon > HZ * 10) { + } else if (lp->network_down && + time_after(jiffies, lp->last_recon + HZ * 10)) { if (lp->network_down) BUGMSG(D_NORMAL, "cabling restored?\n"); lp->first_recon = lp->last_recon = 0; diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index fde675872c56..a1916078fd08 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -283,8 +283,8 @@ struct arcnet_local { int next_buf, first_free_buf; /* network "reconfiguration" handling */ - time_t first_recon, /* time of "first" RECON message to count */ - last_recon; /* time of most recent RECON */ + unsigned long first_recon; /* time of "first" RECON message to count */ + unsigned long last_recon; /* time of most recent RECON */ int num_recons; /* number of RECONs between first and last. */ bool network_down; /* do we think the network is down? */ -- cgit v1.2.3-71-gd317 From 3edf8fa5ccf10688a9280b5cbca8ed3947c42866 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 31 Mar 2008 00:28:14 -0700 Subject: [NET]: Fix allnoconfig build on powerpc and avr32 As reported by Haavard Skinnemoen and Stephen Rothwell: > allnoconfig fails with > > include/linux/netdevice.h:843: error: implicit declaration of function 'dev_net' > > which seems to be because the definition of dev_net is inside #ifdef > CONFIG_NET, while next_net_device, which calls it, is not. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8576ca928dae..993758f924be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -827,6 +827,7 @@ struct packet_type { extern rwlock_t dev_base_lock; /* Device list lock */ +#ifdef CONFIG_NET #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ @@ -850,6 +851,7 @@ static inline struct net_device *first_net_device(struct net *net) return list_empty(&net->dev_base_head) ? NULL : net_device_entry(net->dev_base_head.next); } +#endif extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); -- cgit v1.2.3-71-gd317 From c0f39322c335412339dec16ebfd2a05ceba5ebcf Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 2 Apr 2008 00:10:28 -0700 Subject: [NETNS]: Do not include net/net_namespace.h from seq_file.h Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/linux/seq_file.h | 22 ---------------------- include/linux/seq_file_net.h | 27 +++++++++++++++++++++++++++ include/net/net_namespace.h | 2 ++ 3 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 include/linux/seq_file_net.h (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 5da70c3f4417..1da1e6208a0a 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -5,7 +5,6 @@ #include #include #include -#include struct seq_operations; struct file; @@ -63,26 +62,5 @@ extern struct list_head *seq_list_start_head(struct list_head *head, extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); -#ifdef CONFIG_NET -struct net; -struct seq_net_private { -#ifdef CONFIG_NET_NS - struct net *net; -#endif -}; - -int seq_open_net(struct inode *, struct file *, - const struct seq_operations *, int); -int seq_release_net(struct inode *, struct file *); -static inline struct net *seq_file_net(struct seq_file *seq) -{ -#ifdef CONFIG_NET_NS - return ((struct seq_net_private *)seq->private)->net; -#else - return &init_net; -#endif -} -#endif /* CONFIG_NET */ - #endif #endif diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h new file mode 100644 index 000000000000..4ac52542a563 --- /dev/null +++ b/include/linux/seq_file_net.h @@ -0,0 +1,27 @@ +#ifndef __SEQ_FILE_NET_H__ +#define __SEQ_FILE_NET_H__ + +#include + +struct net; +extern struct net init_net; + +struct seq_net_private { +#ifdef CONFIG_NET_NS + struct net *net; +#endif +}; + +int seq_open_net(struct inode *, struct file *, + const struct seq_operations *, int); +int seq_release_net(struct inode *, struct file *); +static inline struct net *seq_file_net(struct seq_file *seq) +{ +#ifdef CONFIG_NET_NS + return ((struct seq_net_private *)seq->private)->net; +#else + return &init_net; +#endif +} + +#endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4a37037b1d17..6c9a48a46685 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -61,6 +61,8 @@ struct net { #ifdef CONFIG_NET +#include + /* Init's network namespace */ extern struct net init_net; #define INIT_NET_NS(net_ns) .net_ns = &init_net, -- cgit v1.2.3-71-gd317 From fadf6bf06069138f8e97c9a963be38348ba2708b Mon Sep 17 00:00:00 2001 From: "Templin, Fred L" Date: Tue, 11 Mar 2008 18:35:59 -0400 Subject: [IPV6] SIT: Add PRL management for ISATAP. This patch updates the Linux the Intra-Site Automatic Tunnel Addressing Protocol (ISATAP) implementation. It places the ISATAP potential router list (PRL) in the kernel and adds three new private ioctls for PRL management. [Add several changes of structure name, constant names etc. - yoshfuji] Signed-off-by: Fred L. Templin Signed-off-by: YOSHIFUJI Hideaki --- include/linux/if_tunnel.h | 18 ++++- include/linux/skbuff.h | 3 +- include/net/ipip.h | 7 ++ include/net/ndisc.h | 9 +++ net/ipv6/ndisc.c | 24 ++++++ net/ipv6/route.c | 2 - net/ipv6/sit.c | 186 +++++++++++++++++++++++++++++++++++----------- 7 files changed, 199 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 228eb4eb3129..f20c224d544c 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -7,6 +7,9 @@ #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) #define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2) #define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3) +#define SIOCADDPRL (SIOCDEVPRIVATE + 5) +#define SIOCDELPRL (SIOCDEVPRIVATE + 6) +#define SIOCCHGPRL (SIOCDEVPRIVATE + 7) #define GRE_CSUM __constant_htons(0x8000) #define GRE_ROUTING __constant_htons(0x4000) @@ -17,9 +20,6 @@ #define GRE_FLAGS __constant_htons(0x00F8) #define GRE_VERSION __constant_htons(0x0007) -/* i_flags values for SIT mode */ -#define SIT_ISATAP 0x0001 - struct ip_tunnel_parm { char name[IFNAMSIZ]; @@ -31,4 +31,16 @@ struct ip_tunnel_parm struct iphdr iph; }; +/* SIT-mode i_flags */ +#define SIT_ISATAP 0x0001 + +struct ip_tunnel_prl { + __be32 addr; + __u16 flags; + __u16 __reserved; +}; + +/* PRL flags */ +#define PRL_DEFAULT 0x0001 + #endif /* _IF_TUNNEL_H_ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ff72145d5d9e..e10e55c9b081 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -313,7 +313,8 @@ struct sk_buff { __u16 tc_verd; /* traffic control verdict */ #endif #endif - /* 2 byte hole */ + __u8 ndisc_nodetype:2; + /* 14 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; diff --git a/include/net/ipip.h b/include/net/ipip.h index 549e132bca9c..205536a014e8 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -24,6 +24,13 @@ struct ip_tunnel int mlink; struct ip_tunnel_parm parms; + struct ip_tunnel_prl_entry *prl; /* potential router list */ +}; + +struct ip_tunnel_prl_entry +{ + struct ip_tunnel_prl_entry *next; + struct ip_tunnel_prl entry; }; #define IPTUNNEL_XMIT() do { \ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 5aedf324de66..9f2bae68d28c 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -11,6 +11,15 @@ #define NDISC_NEIGHBOUR_ADVERTISEMENT 136 #define NDISC_REDIRECT 137 +/* + * Router type: cross-layer information from link-layer to + * IPv6 layer reported by certain link types (e.g., RFC4214). + */ +#define NDISC_NODETYPE_UNSPEC 0 /* unspecified (default) */ +#define NDISC_NODETYPE_HOST 1 /* host or unauthorized router */ +#define NDISC_NODETYPE_NODEFAULT 2 /* non-default router */ +#define NDISC_NODETYPE_DEFAULT 3 /* default router */ + /* * ndisc options */ diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 510aa747a404..53b546019fd5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1092,6 +1092,12 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } + if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { + ND_PRINTK2(KERN_WARNING + "ICMPv6 RA: from host or unauthorized router\n"); + return; + } + /* * set the RA_RECV flag in the interface */ @@ -1115,6 +1121,10 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } + /* skip link-specific parameters from interior routers */ + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + goto skip_linkparms; + if (in6_dev->if_flags & IF_RS_SENT) { /* * flag that an RA was received after an RS was sent @@ -1229,6 +1239,8 @@ skip_defrtr: } } +skip_linkparms: + /* * Process options. */ @@ -1268,6 +1280,10 @@ skip_defrtr: } #endif + /* skip link-specific ndopts from interior routers */ + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + goto out; + if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; @@ -1331,6 +1347,14 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) int optlen; u8 *lladdr = NULL; + switch (skb->ndisc_nodetype) { + case NDISC_NODETYPE_HOST: + case NDISC_NODETYPE_NODEFAULT: + ND_PRINTK2(KERN_WARNING + "ICMPv6 Redirect: from host or unauthorized router\n"); + return; + } + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: source address is not link-local.\n"); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cd82b6db35ff..f17b2f61891e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1699,8 +1699,6 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return rt; } -EXPORT_SYMBOL(rt6_get_dflt_router); - struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b8196c8d145..4786419ade0e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -16,7 +16,7 @@ * Changes: * Roger Venning : 6to4 support * Nate Thompson : 6to4 support - * Fred L. Templin : isatap support + * Fred Templin : isatap support */ #include @@ -197,6 +197,119 @@ failed: return NULL; } +static struct ip_tunnel_prl_entry * +ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) +{ + struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL; + + for (p = t->prl; p; p = p->next) + if (p->entry.addr == addr) + break; + return p; + +} + +static int +ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) +{ + struct ip_tunnel_prl_entry *p; + + for (p = t->prl; p; p = p->next) { + if (p->entry.addr == a->addr) { + if (chg) { + p->entry = *a; + return 0; + } + return -EEXIST; + } + } + + if (chg) + return -ENXIO; + + p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); + if (!p) + return -ENOBUFS; + + p->entry = *a; + p->next = t->prl; + t->prl = p; + return 0; +} + +static int +ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) +{ + struct ip_tunnel_prl_entry *x, **p; + + if (a) { + for (p = &t->prl; *p; p = &(*p)->next) { + if ((*p)->entry.addr == a->addr) { + x = *p; + *p = x->next; + kfree(x); + return 0; + } + } + return -ENXIO; + } else { + while (t->prl) { + x = t->prl; + t->prl = t->prl->next; + kfree(x); + } + } + return 0; +} + +/* copied directly from anycast.c */ +static int +ipip6_onlink(struct in6_addr *addr, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + int onlink; + + onlink = 0; + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { + onlink = ipv6_prefix_equal(addr, &ifa->addr, + ifa->prefix_len); + if (onlink) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + return onlink; +} + +static int +isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) +{ + struct ip_tunnel_prl_entry *p = ipip6_tunnel_locate_prl(t, iph->saddr); + int ok = 1; + + if (p) { + if (p->entry.flags & PRL_DEFAULT) + skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; + else + skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; + } else { + struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; + if (ipv6_addr_is_isatap(addr6) && + (addr6->s6_addr32[3] == iph->saddr) && + ipip6_onlink(addr6, t->dev)) + skb->ndisc_nodetype = NDISC_NODETYPE_HOST; + else + ok = 0; + } + return ok; +} + static void ipip6_tunnel_uninit(struct net_device *dev) { if (dev == ipip6_fb_tunnel_dev) { @@ -206,6 +319,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } else { ipip6_tunnel_unlink(netdev_priv(dev)); + ipip6_tunnel_del_prl(netdev_priv(dev), 0); dev_put(dev); } } @@ -365,48 +479,6 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) IP6_ECN_set_ce(ipv6_hdr(skb)); } -/* ISATAP (RFC4214) - check source address */ -static int -isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev) -{ - struct neighbour *neigh; - struct dst_entry *dst; - struct rt6_info *rt; - struct flowi fl; - struct in6_addr *addr6; - struct in6_addr rtr; - struct ipv6hdr *iph6; - int ok = 0; - - /* from onlink default router */ - ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0); - ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr); - if ((rt = rt6_get_dflt_router(&rtr, dev))) { - dst_release(&rt->u.dst); - return 1; - } - - iph6 = ipv6_hdr(skb); - memset(&fl, 0, sizeof(fl)); - fl.proto = iph6->nexthdr; - ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr); - fl.oif = dev->ifindex; - security_skb_classify_flow(skb, &fl); - - dst = ip6_route_output(&init_net, NULL, &fl); - if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) { - - addr6 = (struct in6_addr*)&neigh->primary_key; - - /* from correct previous hop */ - if (ipv6_addr_is_isatap(addr6) && - (addr6->s6_addr32[3] == iph->saddr)) - ok = 1; - } - dst_release(dst); - return ok; -} - static int ipip6_rcv(struct sk_buff *skb) { struct iphdr *iph; @@ -427,7 +499,7 @@ static int ipip6_rcv(struct sk_buff *skb) skb->pkt_type = PACKET_HOST; if ((tunnel->dev->priv_flags & IFF_ISATAP) && - !isatap_srcok(skb, iph, tunnel->dev)) { + !isatap_chksrc(skb, iph, tunnel)) { tunnel->stat.rx_errors++; read_unlock(&ipip6_lock); kfree_skb(skb); @@ -707,6 +779,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; + struct ip_tunnel_prl prl; struct ip_tunnel *t; switch (cmd) { @@ -806,6 +879,31 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = 0; break; + case SIOCADDPRL: + case SIOCDELPRL: + case SIOCCHGPRL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + err = -EINVAL; + if (dev == ipip6_fb_tunnel_dev) + goto done; + err = -EFAULT; + if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + goto done; + err = -ENOENT; + if (!(t = netdev_priv(dev))) + goto done; + + ipip6_tunnel_unlink(t); + if (cmd == SIOCDELPRL) + err = ipip6_tunnel_del_prl(t, &prl); + else + err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); + ipip6_tunnel_link(t); + netdev_state_change(dev); + break; + default: err = -EINVAL; } -- cgit v1.2.3-71-gd317 From 300aaeeaab5f447fcf40e911afe96df3de28f0db Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 24 Mar 2008 18:28:39 +0900 Subject: [IPV6] SIT: Add SIOCGETPRL ioctl to get/dump PRL. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/if_tunnel.h | 4 ++ include/net/ipip.h | 5 ++- net/ipv6/sit.c | 96 ++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 95 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index f20c224d544c..f1fbe9c930d7 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -7,6 +7,7 @@ #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) #define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2) #define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3) +#define SIOCGETPRL (SIOCDEVPRIVATE + 4) #define SIOCADDPRL (SIOCDEVPRIVATE + 5) #define SIOCDELPRL (SIOCDEVPRIVATE + 6) #define SIOCCHGPRL (SIOCDEVPRIVATE + 7) @@ -38,6 +39,9 @@ struct ip_tunnel_prl { __be32 addr; __u16 flags; __u16 __reserved; + __u32 datalen; + __u32 __reserved2; + void __user *data; }; /* PRL flags */ diff --git a/include/net/ipip.h b/include/net/ipip.h index 205536a014e8..633ed4def8e3 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -24,13 +24,16 @@ struct ip_tunnel int mlink; struct ip_tunnel_parm parms; + struct ip_tunnel_prl_entry *prl; /* potential router list */ + unsigned int prl_count; /* # of entries in PRL */ }; struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry *next; - struct ip_tunnel_prl entry; + __be32 addr; + u16 flags; }; #define IPTUNNEL_XMIT() do { \ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 84c1ed246afb..08a483a8de50 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -203,12 +203,73 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL; for (p = t->prl; p; p = p->next) - if (p->entry.addr == addr) + if (p->addr == addr) break; return p; } +static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) +{ + struct ip_tunnel_prl *kp; + struct ip_tunnel_prl_entry *prl; + unsigned int cmax, c = 0, ca, len; + int ret = 0; + + cmax = a->datalen / sizeof(*a); + if (cmax > 1 && a->addr != htonl(INADDR_ANY)) + cmax = 1; + + /* For simple GET or for root users, + * we try harder to allocate. + */ + kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? + kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : + NULL; + + read_lock(&ipip6_lock); + + ca = t->prl_count < cmax ? t->prl_count : cmax; + + if (!kp) { + /* We don't try hard to allocate much memory for + * non-root users. + * For root users, retry allocating enough memory for + * the answer. + */ + kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); + if (!kp) { + ret = -ENOMEM; + goto out; + } + } + + c = 0; + for (prl = t->prl; prl; prl = prl->next) { + if (c > cmax) + break; + if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr) + continue; + kp[c].addr = prl->addr; + kp[c].flags = prl->flags; + c++; + if (a->addr != htonl(INADDR_ANY)) + break; + } +out: + read_unlock(&ipip6_lock); + + len = sizeof(*kp) * c; + ret = len ? copy_to_user(a->data, kp, len) : 0; + + kfree(kp); + if (ret) + return -EFAULT; + + a->datalen = len; + return 0; +} + static int ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) { @@ -221,7 +282,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) write_lock(&ipip6_lock); for (p = t->prl; p; p = p->next) { - if (p->entry.addr == a->addr) { + if (p->addr == a->addr) { if (chg) goto update; err = -EEXIST; @@ -242,8 +303,10 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) p->next = t->prl; t->prl = p; + t->prl_count++; update: - p->entry = *a; + p->addr = a->addr; + p->flags = a->flags; out: write_unlock(&ipip6_lock); return err; @@ -259,10 +322,11 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { - if ((*p)->entry.addr == a->addr) { + if ((*p)->addr == a->addr) { x = *p; *p = x->next; kfree(x); + t->prl_count--; goto out; } } @@ -272,6 +336,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) x = t->prl; t->prl = t->prl->next; kfree(x); + t->prl_count--; } } out: @@ -313,7 +378,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) read_lock(&ipip6_lock); p = __ipip6_tunnel_locate_prl(t, iph->saddr); if (p) { - if (p->entry.flags & PRL_DEFAULT) + if (p->flags & PRL_DEFAULT) skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; else skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; @@ -899,11 +964,12 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = 0; break; + case SIOCGETPRL: case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN)) goto done; err = -EINVAL; if (dev == ipip6_fb_tunnel_dev) @@ -915,11 +981,23 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if (!(t = netdev_priv(dev))) goto done; - if (cmd == SIOCDELPRL) + switch (cmd) { + case SIOCGETPRL: + err = ipip6_tunnel_get_prl(t, &prl); + if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, + &prl, sizeof(prl))) + err = -EFAULT; + break; + case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); - else + break; + case SIOCADDPRL: + case SIOCCHGPRL: err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); - netdev_state_change(dev); + break; + } + if (cmd != SIOCGETPRL) + netdev_state_change(dev); break; default: -- cgit v1.2.3-71-gd317 From de357cc01334a468e4d5b7ba66a17b0d3ca9d63e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sat, 15 Mar 2008 23:59:18 -0400 Subject: [IPV6] NDISC: Don't rely on node-type hint from L2 unless required. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/skbuff.h | 2 ++ net/ipv6/Kconfig | 4 ++++ net/ipv6/ndisc.c | 10 ++++++++++ 3 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e10e55c9b081..e517701c25ba 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -313,7 +313,9 @@ struct sk_buff { __u16 tc_verd; /* traffic control verdict */ #endif #endif +#ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; +#endif /* 14 bit hole */ #ifdef CONFIG_NET_DMA diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 47263e45bacb..7d2e7f0941ac 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -168,6 +168,7 @@ config IPV6_SIT tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" depends on IPV6 select INET_TUNNEL + select IPV6_NDISC_NODETYPE default y ---help--- Tunneling means encapsulating data of one protocol type within @@ -178,6 +179,9 @@ config IPV6_SIT Saying M here will produce a module called sit.ko. If unsure, say Y. +config IPV6_NDISC_NODETYPE + bool + config IPV6_TUNNEL tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 16273e11e53d..c400b874097a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1092,11 +1092,13 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } +#ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { ND_PRINTK2(KERN_WARNING "ICMPv6 RA: from host or unauthorized router\n"); return; } +#endif /* * set the RA_RECV flag in the interface @@ -1121,9 +1123,11 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } +#ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) goto skip_linkparms; +#endif if (in6_dev->if_flags & IF_RS_SENT) { /* @@ -1239,7 +1243,9 @@ skip_defrtr: } } +#ifdef CONFIG_IPV6_NDISC_NODETYPE skip_linkparms: +#endif /* * Process options. @@ -1286,9 +1292,11 @@ skip_linkparms: } #endif +#ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) goto out; +#endif if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; @@ -1353,6 +1361,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) int optlen; u8 *lladdr = NULL; +#ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: @@ -1360,6 +1369,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) "ICMPv6 Redirect: from host or unauthorized router\n"); return; } +#endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK2(KERN_WARNING -- cgit v1.2.3-71-gd317 From a4aa834a9165150252c5cd953faab4de29d51b87 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 3 Apr 2008 13:04:33 -0700 Subject: [NETNS]: Declare init_net even without CONFIG_NET defined. This does not look good, but there is no other choice. The compilation without CONFIG_NET is broken and can not be fixed with ease. After that there is no need for the following commits: 1567ca7eec7664b8be3b07755ac59dc1b1ec76cb 3edf8fa5ccf10688a9280b5cbca8ed3947c42866 2d38f9a4f8d2ebdc799f03eecf82345825495711 Revert them. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ---- include/net/net_namespace.h | 3 ++- lib/kobject_uevent.c | 2 -- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c36c76caf20b..8b17ed40dea2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -741,7 +741,6 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) -#ifdef CONFIG_NET /* * Net namespace inlines */ @@ -762,7 +761,6 @@ void dev_net_set(struct net_device *dev, struct net *net) dev->nd_net = net; #endif } -#endif /** * netdev_priv - access network device private data @@ -827,7 +825,6 @@ struct packet_type { extern rwlock_t dev_base_lock; /* Device list lock */ -#ifdef CONFIG_NET #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ @@ -851,7 +848,6 @@ static inline struct net_device *first_net_device(struct net *net) return list_empty(&net->dev_base_head) ? NULL : net_device_entry(net->dev_base_head.next); } -#endif extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6c9a48a46685..0ab62ed2fdef 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,11 +60,12 @@ struct net { }; -#ifdef CONFIG_NET #include /* Init's network namespace */ extern struct net init_net; + +#ifdef CONFIG_NET #define INIT_NET_NS(net_ns) .net_ns = &init_net, extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 0d56dad319ad..b06185ed1895 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -19,12 +19,10 @@ #include #include -#ifdef CONFIG_NET #include #include #include #include -#endif u64 uevent_seqnum; -- cgit v1.2.3-71-gd317 From 2e8046271f68198dd37451017c1a4a2432e4ec68 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 3 Apr 2008 09:22:09 +0900 Subject: [IPV4] MROUTE: Move PIM definitions to . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/Kbuild | 1 + include/linux/mroute.h | 22 +--------------------- include/linux/pim.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 21 deletions(-) create mode 100644 include/linux/pim.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 9cdd12a9e843..84736acb4b99 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -289,6 +289,7 @@ unifdef-y += parport.h unifdef-y += patchkey.h unifdef-y += pci.h unifdef-y += personality.h +unifdef-y += pim.h unifdef-y += pktcdvd.h unifdef-y += pmu.h unifdef-y += poll.h diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 35a8277ec1bd..c41b4217ae3b 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -3,6 +3,7 @@ #include #include +#include /* * Based on the MROUTING 3.5 defines primarily to keep @@ -210,27 +211,6 @@ struct mfc_cache #define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */ #ifdef __KERNEL__ - -#define PIM_V1_VERSION __constant_htonl(0x10000000) -#define PIM_V1_REGISTER 1 - -#define PIM_VERSION 2 -#define PIM_REGISTER 1 - -#define PIM_NULL_REGISTER __constant_htonl(0x40000000) - -/* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ - -struct pimreghdr -{ - __u8 type; - __u8 reserved; - __be16 csum; - __be32 flags; -}; - -extern int pim_rcv_v1(struct sk_buff *); - struct rtmsg; extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); #endif diff --git a/include/linux/pim.h b/include/linux/pim.h new file mode 100644 index 000000000000..6f689dc85503 --- /dev/null +++ b/include/linux/pim.h @@ -0,0 +1,29 @@ +#ifndef __LINUX_PIM_H +#define __LINUX_PIM_H + +#include + +/* Message types - V1 */ +#define PIM_V1_VERSION __constant_htonl(0x10000000) +#define PIM_V1_REGISTER 1 + +/* Message types - V2 */ +#define PIM_VERSION 2 +#define PIM_REGISTER 1 + +#if defined(__KERNEL__) +#define PIM_NULL_REGISTER __constant_htonl(0x40000000) + +/* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ +struct pimreghdr +{ + __u8 type; + __u8 reserved; + __be16 csum; + __be32 flags; +}; + +struct sk_buff; +extern int pim_rcv_v1(struct sk_buff *); +#endif +#endif -- cgit v1.2.3-71-gd317 From 80a9492a33dd7d852465625022d56ff76d62174d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 3 Apr 2008 09:22:52 +0900 Subject: [IPV4] MROUTE: Adjust include files for user-space. needs . Avoid including in user-space, which conflicts with standard . Add basic struct and constant in . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/mroute.h | 3 +++ include/linux/pim.h | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index c41b4217ae3b..de4decfa1bfc 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -2,7 +2,10 @@ #define __LINUX_MROUTE_H #include +#include +#ifdef __KERNEL__ #include +#endif #include /* diff --git a/include/linux/pim.h b/include/linux/pim.h index 6f689dc85503..236ffd317394 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -3,6 +3,22 @@ #include +#ifndef __KERNEL__ +struct pim { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 pim_type:4, /* PIM message type */ + pim_ver:4; /* PIM version */ +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 pim_ver:4; /* PIM version */ + pim_type:4; /* PIM message type */ +#endif + __u8 pim_rsv; /* Reserved */ + __be16 pim_cksum; /* Checksum */ +}; + +#define PIM_MINLEN 8 +#endif + /* Message types - V1 */ #define PIM_V1_VERSION __constant_htonl(0x10000000) #define PIM_V1_REGISTER 1 -- cgit v1.2.3-71-gd317 From 7bc570c8b4f75ddb3fd5dbeb38127cdc4acbcc9c Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 3 Apr 2008 09:22:53 +0900 Subject: [IPV6] MROUTE: Support multicast forwarding. Based on ancient patch by Mickael Hoerdt , which is available at . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/Kbuild | 1 + include/linux/ipv6.h | 5 + include/linux/mroute6.h | 227 ++++++++ net/ipv6/Kconfig | 7 + net/ipv6/Makefile | 2 + net/ipv6/addrconf.c | 15 +- net/ipv6/af_inet6.c | 6 + net/ipv6/ip6_input.c | 87 ++- net/ipv6/ip6_output.c | 6 +- net/ipv6/ip6mr.c | 1384 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ipv6_sockglue.c | 7 + net/ipv6/raw.c | 7 +- net/ipv6/route.c | 30 +- 13 files changed, 1754 insertions(+), 30 deletions(-) create mode 100644 include/linux/mroute6.h create mode 100644 net/ipv6/ip6mr.c (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 84736acb4b99..29ab9b95d376 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -261,6 +261,7 @@ unifdef-y += mempolicy.h unifdef-y += mii.h unifdef-y += mman.h unifdef-y += mroute.h +unifdef-y += mroute6.h unifdef-y += msdos_fs.h unifdef-y += msg.h unifdef-y += nbd.h diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index b90d3d461d4e..f53e4764fc05 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -159,6 +159,9 @@ struct ipv6_devconf { __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; +#endif +#ifdef CONFIG_IPV6_MROUTE + __s32 mc_forwarding; #endif void *sysctl; }; @@ -190,6 +193,7 @@ enum { DEVCONF_PROXY_NDP, DEVCONF_OPTIMISTIC_DAD, DEVCONF_ACCEPT_SOURCE_ROUTE, + DEVCONF_MC_FORWARDING, DEVCONF_MAX }; @@ -230,6 +234,7 @@ struct inet6_skb_parm { #endif #define IP6SKB_XFRM_TRANSFORMED 1 +#define IP6SKB_FORWARDED 2 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h new file mode 100644 index 000000000000..b92190304e0b --- /dev/null +++ b/include/linux/mroute6.h @@ -0,0 +1,227 @@ +#ifndef __LINUX_MROUTE6_H +#define __LINUX_MROUTE6_H + +#include +#include + +/* + * Based on the MROUTING 3.5 defines primarily to keep + * source compatibility with BSD. + * + * See the pim6sd code for the original history. + * + * Protocol Independent Multicast (PIM) data structures included + * Carlos Picoto (cap@di.fc.ul.pt) + * + */ + +#define MRT6_BASE 200 +#define MRT6_INIT (MRT6_BASE) /* Activate the kernel mroute code */ +#define MRT6_DONE (MRT6_BASE+1) /* Shutdown the kernel mroute */ +#define MRT6_ADD_MIF (MRT6_BASE+2) /* Add a virtual interface */ +#define MRT6_DEL_MIF (MRT6_BASE+3) /* Delete a virtual interface */ +#define MRT6_ADD_MFC (MRT6_BASE+4) /* Add a multicast forwarding entry */ +#define MRT6_DEL_MFC (MRT6_BASE+5) /* Delete a multicast forwarding entry */ +#define MRT6_VERSION (MRT6_BASE+6) /* Get the kernel multicast version */ + +#define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ +#define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) +#define SIOCGETRPF (SIOCPROTOPRIVATE+2) + +#define MAXMIFS 32 +typedef unsigned long mifbitmap_t; /* User mode code depends on this lot */ +typedef unsigned short mifi_t; +#define ALL_MIFS ((mifi_t)(-1)) + +#ifndef IF_SETSIZE +#define IF_SETSIZE 256 +#endif + +typedef __u32 if_mask; +#define NIFBITS (sizeof(if_mask) * 8) /* bits per mask */ + +#if !defined(__KERNEL__) && !defined(DIV_ROUND_UP) +#define DIV_ROUND_UP(x,y) (((x) + ((y) - 1)) / (y)) +#endif + +typedef struct if_set { + if_mask ifs_bits[DIV_ROUND_UP(IF_SETSIZE, NIFBITS)]; +} if_set; + +#define IF_SET(n, p) ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS))) +#define IF_CLR(n, p) ((p)->ifs_bits[(n)/NIFBITS] &= ~(1 << ((n) % NIFBITS))) +#define IF_ISSET(n, p) ((p)->ifs_bits[(n)/NIFBITS] & (1 << ((n) % NIFBITS))) +#define IF_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#define IF_ZERO(p) bzero(p, sizeof(*(p))) + +/* + * Passed by mrouted for an MRT_ADD_MIF - again we use the + * mrouted 3.6 structures for compatibility + */ + +struct mif6ctl { + mifi_t mif6c_mifi; /* Index of MIF */ + unsigned char mif6c_flags; /* MIFF_ flags */ + unsigned char vifc_threshold; /* ttl limit */ + u_short mif6c_pifi; /* the index of the physical IF */ + unsigned int vifc_rate_limit; /* Rate limiter values (NI) */ +}; + +#define MIFF_REGISTER 0x1 /* register vif */ + +/* + * Cache manipulation structures for mrouted and PIMd + */ + +struct mf6cctl +{ + struct sockaddr_in6 mf6cc_origin; /* Origin of mcast */ + struct sockaddr_in6 mf6cc_mcastgrp; /* Group in question */ + mifi_t mf6cc_parent; /* Where it arrived */ + struct if_set mf6cc_ifset; /* Where it is going */ +}; + +/* + * Group count retrieval for pim6sd + */ + +struct sioc_sg_req6 +{ + struct sockaddr_in6 src; + struct sockaddr_in6 grp; + unsigned long pktcnt; + unsigned long bytecnt; + unsigned long wrong_if; +}; + +/* + * To get vif packet counts + */ + +struct sioc_mif_req6 +{ + mifi_t mifi; /* Which iface */ + unsigned long icount; /* In packets */ + unsigned long ocount; /* Out packets */ + unsigned long ibytes; /* In bytes */ + unsigned long obytes; /* Out bytes */ +}; + +/* + * That's all usermode folks + */ + +#ifdef __KERNEL__ + +#include /* for struct sk_buff_head */ + +struct net_device; +struct inet6_dev *ipv6_find_idev(struct net_device *dev); + +#ifdef CONFIG_IPV6_MROUTE +static inline int ip6_mroute_opt(int opt) +{ + return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10); +} +#else +static inline int ip6_mroute_opt(int opt) +{ + return 0; +} +#endif + +struct sock; + +extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int); +extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +extern int ip6_mr_input(struct sk_buff *skb); +extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); +extern void ip6_mr_init(void); + +struct mif_device +{ + struct net_device *dev; /* Device we are using */ + unsigned long bytes_in,bytes_out; + unsigned long pkt_in,pkt_out; /* Statistics */ + unsigned long rate_limit; /* Traffic shaping (NI) */ + unsigned char threshold; /* TTL threshold */ + unsigned short flags; /* Control flags */ + int link; /* Physical interface index */ +}; + +#define VIFF_STATIC 0x8000 + +struct mfc6_cache +{ + struct mfc6_cache *next; /* Next entry on cache line */ + struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ + struct in6_addr mf6c_origin; /* Source of packet */ + mifi_t mf6c_parent; /* Source interface */ + int mfc_flags; /* Flags on line */ + + union { + struct { + unsigned long expires; + struct sk_buff_head unresolved; /* Unresolved buffers */ + } unres; + struct { + unsigned long last_assert; + int minvif; + int maxvif; + unsigned long bytes; + unsigned long pkt; + unsigned long wrong_if; + unsigned char ttls[MAXMIFS]; /* TTL thresholds */ + } res; + } mfc_un; +}; + +#define MFC_STATIC 1 +#define MFC_NOTIFY 2 + +#define MFC6_LINES 64 + +#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \ + (__force u32)(a)->s6_addr32[1] ^ \ + (__force u32)(a)->s6_addr32[2] ^ \ + (__force u32)(a)->s6_addr32[3] ^ \ + (__force u32)(g)->s6_addr32[0] ^ \ + (__force u32)(g)->s6_addr32[1] ^ \ + (__force u32)(g)->s6_addr32[2] ^ \ + (__force u32)(g)->s6_addr32[3]) % MFC6_LINES) + +#define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ + +#endif + +#ifdef __KERNEL__ +struct rtmsg; +extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); + +#ifdef CONFIG_IPV6_MROUTE +extern struct sock *mroute6_socket; +extern int ip6mr_sk_done(struct sock *sk); +#else +#define mroute6_socket NULL +static inline int ip6mr_sk_done(struct sock *sk) { return 0; } +#endif +#endif + +/* + * Structure used to communicate from kernel to multicast router. + * We'll overlay the structure onto an MLD header (not an IPv6 heder like igmpmsg{} + * used for IPv4 implementation). This is because this structure will be passed via an + * IPv6 raw socket, on wich an application will only receiver the payload i.e the data after + * the IPv6 header and all the extension headers. (See section 3 of RFC 3542) + */ + +struct mrt6msg { +#define MRT6MSG_NOCACHE 1 + __u8 im6_mbz; /* must be zero */ + __u8 im6_msgtype; /* what type of message */ + __u16 im6_mif; /* mif rec'd on */ + __u32 im6_pad; /* padding for 64 bit arch */ + struct in6_addr im6_src, im6_dst; +}; + +#endif diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 47263e45bacb..9a2ea81e499f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -209,3 +209,10 @@ config IPV6_SUBTREES If unsure, say N. +config IPV6_MROUTE + bool "IPv6: multicast routing (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + ---help--- + Experimental support for IPv6 multicast forwarding. + If unsure, say N. + diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index ae14617e607f..686934acfac1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -11,6 +11,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o +ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o + ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 11037615bc73..dbc51af69017 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -412,7 +412,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) return ndev; } -static struct inet6_dev * ipv6_find_idev(struct net_device *dev) +struct inet6_dev * ipv6_find_idev(struct net_device *dev) { struct inet6_dev *idev; @@ -3547,6 +3547,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; #endif +#ifdef CONFIG_IPV6_MROUTE + array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; +#endif } static inline size_t inet6_if_nlmsg_size(void) @@ -4094,6 +4097,16 @@ static struct addrconf_sysctl_table .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_IPV6_MROUTE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "mc_forwarding", + .data = &ipv6_devconf.mc_forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = 0, /* sentinel */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 1731b0abf7f5..3c6aafb02183 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,9 @@ #include #include +#ifdef CONFIG_IPV6_MROUTE +#include +#endif MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -953,6 +956,9 @@ static int __init inet6_init(void) err = icmpv6_init(); if (err) goto icmp_fail; +#ifdef CONFIG_IPV6_MROUTE + ip6_mr_init(); +#endif err = ndisc_init(); if (err) goto ndisc_fail; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 43a617e2268b..09a3201e408a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -236,36 +237,84 @@ int ip6_mc_input(struct sk_buff *skb) hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); +#ifdef CONFIG_IPV6_MROUTE /* - * IPv6 multicast router mode isnt currently supported. + * IPv6 multicast router mode is now supported ;) */ -#if 0 - if (ipv6_config.multicast_route) { - int addr_type; - - addr_type = ipv6_addr_type(&hdr->daddr); - - if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) { - struct sk_buff *skb2; - struct dst_entry *dst; + if (ipv6_devconf.mc_forwarding && + likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { + /* + * Okay, we try to forward - split and duplicate + * packets. + */ + struct sk_buff *skb2; + struct inet6_skb_parm *opt = IP6CB(skb); + + /* Check for MLD */ + if (unlikely(opt->ra)) { + /* Check if this is a mld message */ + u8 *ptr = skb_network_header(skb) + opt->ra; + struct icmp6hdr *icmp6; + u8 nexthdr = hdr->nexthdr; + int offset; + + /* Check if the value of Router Alert + * is for MLD (0x0000). + */ + if ((ptr[2] | ptr[3]) == 0) { + if (!ipv6_ext_hdr(nexthdr)) { + /* BUG */ + goto discard; + } + offset = ipv6_skip_exthdr(skb, sizeof(*hdr), + &nexthdr); + if (offset < 0) + goto discard; + + if (nexthdr != IPPROTO_ICMPV6) + goto discard; + + if (!pskb_may_pull(skb, (skb_network_header(skb) + + offset + 1 - skb->data))) + goto discard; + + icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); + + switch (icmp6->icmp6_type) { + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MLD2_REPORT: + break; + default: + /* Bogus */ + goto discard; + } + deliver = 1; + goto out; + } + /* unknown RA - process it normally */ + } - dst = skb->dst; + if (deliver) + skb2 = skb_clone(skb, GFP_ATOMIC); + else { + skb2 = skb; + skb = NULL; + } - if (deliver) { - skb2 = skb_clone(skb, GFP_ATOMIC); - dst_output(skb2); - } else { - dst_output(skb); - return 0; - } + if (skb2) { + skb2->dev = skb2->dst->dev; + ip6_mr_input(skb2); } } #endif - +out: if (likely(deliver)) { ip6_input(skb); return 0; } +discard: /* discard */ kfree_skb(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a8b4da25b0a7..c0dbe549cc42 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -55,6 +55,7 @@ #include #include #include +#include static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); @@ -137,8 +138,9 @@ static int ip6_output2(struct sk_buff *skb) struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && - ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr)) { + ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || + ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c new file mode 100644 index 000000000000..1bdf3c177d58 --- /dev/null +++ b/net/ipv6/ip6mr.c @@ -0,0 +1,1384 @@ +/* + * Linux IPv6 multicast routing support for BSD pim6sd + * Based on net/ipv4/ipmr.c. + * + * (c) 2004 Mickael Hoerdt, + * LSIIT Laboratory, Strasbourg, France + * (c) 2004 Jean-Philippe Andriot, + * 6WIND, Paris, France + * Copyright (C)2007,2008 USAGI/WIDE Project + * YOSHIFUJI Hideaki + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct sock *mroute6_socket; + + +/* Big lock, protecting vif table, mrt cache and mroute socket state. + Note that the changes are semaphored via rtnl_lock. + */ + +static DEFINE_RWLOCK(mrt_lock); + +/* + * Multicast router control variables + */ + +static struct mif_device vif6_table[MAXMIFS]; /* Devices */ +static int maxvif; + +#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) + +static struct mfc6_cache *mfc6_cache_array[MFC_LINES]; /* Forwarding cache */ + +static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ +static atomic_t cache_resolve_queue_len; /* Size of unresolved */ + +/* Special spinlock for queue of unresolved entries */ +static DEFINE_SPINLOCK(mfc_unres_lock); + +/* We return to original Alan's scheme. Hash table of resolved + entries is changed only in process context and protected + with weak lock mrt_lock. Queue of unresolved entries is protected + with strong spinlock mfc_unres_lock. + + In this case data path is free of exclusive locks at all. + */ + +static struct kmem_cache *mrt_cachep __read_mostly; + +static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); +static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); +static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); + +static struct timer_list ipmr_expire_timer; + + +#ifdef CONFIG_PROC_FS + +struct ipmr_mfc_iter { + struct mfc6_cache **cache; + int ct; +}; + + +static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) +{ + struct mfc6_cache *mfc; + + it->cache = mfc6_cache_array; + read_lock(&mrt_lock); + for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++) + for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) + if (pos-- == 0) + return mfc; + read_unlock(&mrt_lock); + + it->cache = &mfc_unres_queue; + spin_lock_bh(&mfc_unres_lock); + for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) + if (pos-- == 0) + return mfc; + spin_unlock_bh(&mfc_unres_lock); + + it->cache = NULL; + return NULL; +} + + + + +/* + * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif + */ + +struct ipmr_vif_iter { + int ct; +}; + +static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, + loff_t pos) +{ + for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { + if (!MIF_EXISTS(iter->ct)) + continue; + if (pos-- == 0) + return &vif6_table[iter->ct]; + } + return NULL; +} + +static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(mrt_lock) +{ + read_lock(&mrt_lock); + return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1) + : SEQ_START_TOKEN); +} + +static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct ipmr_vif_iter *iter = seq->private; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ip6mr_vif_seq_idx(iter, 0); + + while (++iter->ct < maxvif) { + if (!MIF_EXISTS(iter->ct)) + continue; + return &vif6_table[iter->ct]; + } + return NULL; +} + +static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) + __releases(mrt_lock) +{ + read_unlock(&mrt_lock); +} + +static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); + } else { + const struct mif_device *vif = v; + const char *name = vif->dev ? vif->dev->name : "none"; + + seq_printf(seq, + "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", + vif - vif6_table, + name, vif->bytes_in, vif->pkt_in, + vif->bytes_out, vif->pkt_out, + vif->flags); + } + return 0; +} + +static struct seq_operations ip6mr_vif_seq_ops = { + .start = ip6mr_vif_seq_start, + .next = ip6mr_vif_seq_next, + .stop = ip6mr_vif_seq_stop, + .show = ip6mr_vif_seq_show, +}; + +static int ip6mr_vif_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &ip6mr_vif_seq_ops, + sizeof(struct ipmr_vif_iter)); +} + +static struct file_operations ip6mr_vif_fops = { + .owner = THIS_MODULE, + .open = ip6mr_vif_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) +{ + return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) + : SEQ_START_TOKEN); +} + +static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mfc6_cache *mfc = v; + struct ipmr_mfc_iter *it = seq->private; + + ++*pos; + + if (v == SEQ_START_TOKEN) + return ipmr_mfc_seq_idx(seq->private, 0); + + if (mfc->next) + return mfc->next; + + if (it->cache == &mfc_unres_queue) + goto end_of_list; + + BUG_ON(it->cache != mfc6_cache_array); + + while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) { + mfc = mfc6_cache_array[it->ct]; + if (mfc) + return mfc; + } + + /* exhausted cache_array, show unresolved */ + read_unlock(&mrt_lock); + it->cache = &mfc_unres_queue; + it->ct = 0; + + spin_lock_bh(&mfc_unres_lock); + mfc = mfc_unres_queue; + if (mfc) + return mfc; + + end_of_list: + spin_unlock_bh(&mfc_unres_lock); + it->cache = NULL; + + return NULL; +} + +static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) +{ + struct ipmr_mfc_iter *it = seq->private; + + if (it->cache == &mfc_unres_queue) + spin_unlock_bh(&mfc_unres_lock); + else if (it->cache == mfc6_cache_array) + read_unlock(&mrt_lock); +} + +static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) +{ + int n; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Group " + "Origin " + "Iif Pkts Bytes Wrong Oifs\n"); + } else { + const struct mfc6_cache *mfc = v; + const struct ipmr_mfc_iter *it = seq->private; + + seq_printf(seq, + NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld", + NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin), + mfc->mf6c_parent, + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); + + if (it->cache != &mfc_unres_queue) { + for (n = mfc->mfc_un.res.minvif; + n < mfc->mfc_un.res.maxvif; n++) { + if (MIF_EXISTS(n) && + mfc->mfc_un.res.ttls[n] < 255) + seq_printf(seq, + " %2d:%-3d", + n, mfc->mfc_un.res.ttls[n]); + } + } + seq_putc(seq, '\n'); + } + return 0; +} + +static struct seq_operations ipmr_mfc_seq_ops = { + .start = ipmr_mfc_seq_start, + .next = ipmr_mfc_seq_next, + .stop = ipmr_mfc_seq_stop, + .show = ipmr_mfc_seq_show, +}; + +static int ipmr_mfc_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &ipmr_mfc_seq_ops, + sizeof(struct ipmr_mfc_iter)); +} + +static struct file_operations ip6mr_mfc_fops = { + .owner = THIS_MODULE, + .open = ipmr_mfc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +/* + * Delete a VIF entry + */ + +static int mif6_delete(int vifi) +{ + struct mif_device *v; + struct net_device *dev; + if (vifi < 0 || vifi >= maxvif) + return -EADDRNOTAVAIL; + + v = &vif6_table[vifi]; + + write_lock_bh(&mrt_lock); + dev = v->dev; + v->dev = NULL; + + if (!dev) { + write_unlock_bh(&mrt_lock); + return -EADDRNOTAVAIL; + } + + if (vifi + 1 == maxvif) { + int tmp; + for (tmp = vifi - 1; tmp >= 0; tmp--) { + if (MIF_EXISTS(tmp)) + break; + } + maxvif = tmp + 1; + } + + write_unlock_bh(&mrt_lock); + + dev_set_allmulti(dev, -1); + + if (v->flags & MIFF_REGISTER) + unregister_netdevice(dev); + + dev_put(dev); + return 0; +} + +/* Destroy an unresolved cache entry, killing queued skbs + and reporting error to netlink readers. + */ + +static void ip6mr_destroy_unres(struct mfc6_cache *c) +{ + struct sk_buff *skb; + + atomic_dec(&cache_resolve_queue_len); + + while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { + if (ipv6_hdr(skb)->version == 0) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; + rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + } else + kfree_skb(skb); + } + + kmem_cache_free(mrt_cachep, c); +} + + +/* Single timer process for all the unresolved queue. */ + +static void ipmr_do_expire_process(unsigned long dummy) +{ + unsigned long now = jiffies; + unsigned long expires = 10 * HZ; + struct mfc6_cache *c, **cp; + + cp = &mfc_unres_queue; + + while ((c = *cp) != NULL) { + if (time_after(c->mfc_un.unres.expires, now)) { + /* not yet... */ + unsigned long interval = c->mfc_un.unres.expires - now; + if (interval < expires) + expires = interval; + cp = &c->next; + continue; + } + + *cp = c->next; + ip6mr_destroy_unres(c); + } + + if (atomic_read(&cache_resolve_queue_len)) + mod_timer(&ipmr_expire_timer, jiffies + expires); +} + +static void ipmr_expire_process(unsigned long dummy) +{ + if (!spin_trylock(&mfc_unres_lock)) { + mod_timer(&ipmr_expire_timer, jiffies + 1); + return; + } + + if (atomic_read(&cache_resolve_queue_len)) + ipmr_do_expire_process(dummy); + + spin_unlock(&mfc_unres_lock); +} + +/* Fill oifs list. It is called under write locked mrt_lock. */ + +static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls) +{ + int vifi; + + cache->mfc_un.res.minvif = MAXVIFS; + cache->mfc_un.res.maxvif = 0; + memset(cache->mfc_un.res.ttls, 255, MAXVIFS); + + for (vifi = 0; vifi < maxvif; vifi++) { + if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { + cache->mfc_un.res.ttls[vifi] = ttls[vifi]; + if (cache->mfc_un.res.minvif > vifi) + cache->mfc_un.res.minvif = vifi; + if (cache->mfc_un.res.maxvif <= vifi) + cache->mfc_un.res.maxvif = vifi + 1; + } + } +} + +static int mif6_add(struct mif6ctl *vifc, int mrtsock) +{ + int vifi = vifc->mif6c_mifi; + struct mif_device *v = &vif6_table[vifi]; + struct net_device *dev; + + /* Is vif busy ? */ + if (MIF_EXISTS(vifi)) + return -EADDRINUSE; + + switch (vifc->mif6c_flags) { + case 0: + dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); + if (!dev) + return -EADDRNOTAVAIL; + dev_put(dev); + break; + default: + return -EINVAL; + } + + dev_set_allmulti(dev, 1); + + /* + * Fill in the VIF structures + */ + v->rate_limit = vifc->vifc_rate_limit; + v->flags = vifc->mif6c_flags; + if (!mrtsock) + v->flags |= VIFF_STATIC; + v->threshold = vifc->vifc_threshold; + v->bytes_in = 0; + v->bytes_out = 0; + v->pkt_in = 0; + v->pkt_out = 0; + v->link = dev->ifindex; + if (v->flags & MIFF_REGISTER) + v->link = dev->iflink; + + /* And finish update writing critical data */ + write_lock_bh(&mrt_lock); + dev_hold(dev); + v->dev = dev; + if (vifi + 1 > maxvif) + maxvif = vifi + 1; + write_unlock_bh(&mrt_lock); + return 0; +} + +static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp) +{ + int line = MFC6_HASH(mcastgrp, origin); + struct mfc6_cache *c; + + for (c = mfc6_cache_array[line]; c; c = c->next) { + if (ipv6_addr_equal(&c->mf6c_origin, origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) + break; + } + return c; +} + +/* + * Allocate a multicast cache entry + */ +static struct mfc6_cache *ip6mr_cache_alloc(void) +{ + struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL); + if (c == NULL) + return NULL; + memset(c, 0, sizeof(*c)); + c->mfc_un.res.minvif = MAXVIFS; + return c; +} + +static struct mfc6_cache *ip6mr_cache_alloc_unres(void) +{ + struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); + if (c == NULL) + return NULL; + memset(c, 0, sizeof(*c)); + skb_queue_head_init(&c->mfc_un.unres.unresolved); + c->mfc_un.unres.expires = jiffies + 10 * HZ; + return c; +} + +/* + * A cache entry has gone into a resolved state from queued + */ + +static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) +{ + struct sk_buff *skb; + + /* + * Play the pending entries through our router + */ + + while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + if (ipv6_hdr(skb)->version == 0) { + int err; + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); + + if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { + nlh->nlmsg_len = skb->tail - (u8 *)nlh; + } else { + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; + } + err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + } else + ip6_mr_forward(skb, c); + } +} + +/* + * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd + * expects the following bizarre scheme. + * + * Called under mrt_lock. + */ + +static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) +{ + struct sk_buff *skb; + struct mrt6msg *msg; + int ret; + + skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); + + if (!skb) + return -ENOBUFS; + + /* I suppose that internal messages + * do not require checksums */ + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* + * Copy the IP header + */ + + skb_put(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr)); + + /* + * Add our header + */ + skb_put(skb, sizeof(*msg)); + skb_reset_transport_header(skb); + msg = (struct mrt6msg *)skb_transport_header(skb); + + msg->im6_mbz = 0; + msg->im6_msgtype = assert; + msg->im6_mif = vifi; + msg->im6_pad = 0; + ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); + ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); + + skb->dst = dst_clone(pkt->dst); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_pull(skb, sizeof(struct ipv6hdr)); + + if (mroute6_socket == NULL) { + kfree_skb(skb); + return -EINVAL; + } + + /* + * Deliver to user space multicast routing algorithms + */ + if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) { + if (net_ratelimit()) + printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); + kfree_skb(skb); + } + + return ret; +} + +/* + * Queue a packet for resolution. It gets locked cache entry! + */ + +static int +ip6mr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) +{ + int err; + struct mfc6_cache *c; + + spin_lock_bh(&mfc_unres_lock); + for (c = mfc_unres_queue; c; c = c->next) { + if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && + ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) + break; + } + + if (c == NULL) { + /* + * Create a new entry if allowable + */ + + if (atomic_read(&cache_resolve_queue_len) >= 10 || + (c = ip6mr_cache_alloc_unres()) == NULL) { + spin_unlock_bh(&mfc_unres_lock); + + kfree_skb(skb); + return -ENOBUFS; + } + + /* + * Fill in the new cache entry + */ + c->mf6c_parent = -1; + c->mf6c_origin = ipv6_hdr(skb)->saddr; + c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; + + /* + * Reflect first query at pim6sd + */ + if ((err = ip6mr_cache_report(skb, vifi, MRT6MSG_NOCACHE)) < 0) { + /* If the report failed throw the cache entry + out - Brad Parker + */ + spin_unlock_bh(&mfc_unres_lock); + + kmem_cache_free(mrt_cachep, c); + kfree_skb(skb); + return err; + } + + atomic_inc(&cache_resolve_queue_len); + c->next = mfc_unres_queue; + mfc_unres_queue = c; + + ipmr_do_expire_process(1); + } + + /* + * See if we can append the packet + */ + if (c->mfc_un.unres.unresolved.qlen > 3) { + kfree_skb(skb); + err = -ENOBUFS; + } else { + skb_queue_tail(&c->mfc_un.unres.unresolved, skb); + err = 0; + } + + spin_unlock_bh(&mfc_unres_lock); + return err; +} + +/* + * MFC6 cache manipulation by user space + */ + +static int ip6mr_mfc_delete(struct mf6cctl *mfc) +{ + int line; + struct mfc6_cache *c, **cp; + + line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); + + for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && + ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + write_lock_bh(&mrt_lock); + *cp = c->next; + write_unlock_bh(&mrt_lock); + + kmem_cache_free(mrt_cachep, c); + return 0; + } + } + return -ENOENT; +} + +static int ip6mr_device_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct mif_device *v; + int ct; + + if (dev_net(dev) != &init_net) + return NOTIFY_DONE; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + + v = &vif6_table[0]; + for (ct = 0; ct < maxvif; ct++, v++) { + if (v->dev == dev) + mif6_delete(ct); + } + return NOTIFY_DONE; +} + +static struct notifier_block ip6_mr_notifier = { + .notifier_call = ip6mr_device_event +}; + +/* + * Setup for IP multicast routing + */ + +void __init ip6_mr_init(void) +{ + mrt_cachep = kmem_cache_create("ip6_mrt_cache", + sizeof(struct mfc6_cache), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (!mrt_cachep) + panic("cannot allocate ip6_mrt_cache"); + + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); + register_netdevice_notifier(&ip6_mr_notifier); +#ifdef CONFIG_PROC_FS + proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops); + proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops); +#endif +} + + +static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) +{ + int line; + struct mfc6_cache *uc, *c, **cp; + unsigned char ttls[MAXVIFS]; + int i; + + memset(ttls, 255, MAXVIFS); + for (i = 0; i < MAXVIFS; i++) { + if (IF_ISSET(i, &mfc->mf6cc_ifset)) + ttls[i] = 1; + + } + + line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); + + for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && + ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) + break; + } + + if (c != NULL) { + write_lock_bh(&mrt_lock); + c->mf6c_parent = mfc->mf6cc_parent; + ip6mr_update_thresholds(c, ttls); + if (!mrtsock) + c->mfc_flags |= MFC_STATIC; + write_unlock_bh(&mrt_lock); + return 0; + } + + if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + return -EINVAL; + + c = ip6mr_cache_alloc(); + if (c == NULL) + return -ENOMEM; + + c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; + c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; + c->mf6c_parent = mfc->mf6cc_parent; + ip6mr_update_thresholds(c, ttls); + if (!mrtsock) + c->mfc_flags |= MFC_STATIC; + + write_lock_bh(&mrt_lock); + c->next = mfc6_cache_array[line]; + mfc6_cache_array[line] = c; + write_unlock_bh(&mrt_lock); + + /* + * Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. + */ + spin_lock_bh(&mfc_unres_lock); + for (cp = &mfc_unres_queue; (uc = *cp) != NULL; + cp = &uc->next) { + if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && + ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { + *cp = uc->next; + if (atomic_dec_and_test(&cache_resolve_queue_len)) + del_timer(&ipmr_expire_timer); + break; + } + } + spin_unlock_bh(&mfc_unres_lock); + + if (uc) { + ip6mr_cache_resolve(uc, c); + kmem_cache_free(mrt_cachep, uc); + } + return 0; +} + +/* + * Close the multicast socket, and clear the vif tables etc + */ + +static void mroute_clean_tables(struct sock *sk) +{ + int i; + + /* + * Shut down all active vif entries + */ + for (i = 0; i < maxvif; i++) { + if (!(vif6_table[i].flags & VIFF_STATIC)) + mif6_delete(i); + } + + /* + * Wipe the cache + */ + for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) { + struct mfc6_cache *c, **cp; + + cp = &mfc6_cache_array[i]; + while ((c = *cp) != NULL) { + if (c->mfc_flags & MFC_STATIC) { + cp = &c->next; + continue; + } + write_lock_bh(&mrt_lock); + *cp = c->next; + write_unlock_bh(&mrt_lock); + + kmem_cache_free(mrt_cachep, c); + } + } + + if (atomic_read(&cache_resolve_queue_len) != 0) { + struct mfc6_cache *c; + + spin_lock_bh(&mfc_unres_lock); + while (mfc_unres_queue != NULL) { + c = mfc_unres_queue; + mfc_unres_queue = c->next; + spin_unlock_bh(&mfc_unres_lock); + + ip6mr_destroy_unres(c); + + spin_lock_bh(&mfc_unres_lock); + } + spin_unlock_bh(&mfc_unres_lock); + } +} + +static int ip6mr_sk_init(struct sock *sk) +{ + int err = 0; + + rtnl_lock(); + write_lock_bh(&mrt_lock); + if (likely(mroute6_socket == NULL)) + mroute6_socket = sk; + else + err = -EADDRINUSE; + write_unlock_bh(&mrt_lock); + + rtnl_unlock(); + + return err; +} + +int ip6mr_sk_done(struct sock *sk) +{ + int err = 0; + + rtnl_lock(); + if (sk == mroute6_socket) { + write_lock_bh(&mrt_lock); + mroute6_socket = NULL; + write_unlock_bh(&mrt_lock); + + mroute_clean_tables(sk); + } else + err = -EACCES; + rtnl_unlock(); + + return err; +} + +/* + * Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. + */ + +int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) +{ + int ret; + struct mif6ctl vif; + struct mf6cctl mfc; + mifi_t mifi; + + if (optname != MRT6_INIT) { + if (sk != mroute6_socket && !capable(CAP_NET_ADMIN)) + return -EACCES; + } + + switch (optname) { + case MRT6_INIT: + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + if (optlen < sizeof(int)) + return -EINVAL; + + return ip6mr_sk_init(sk); + + case MRT6_DONE: + return ip6mr_sk_done(sk); + + case MRT6_ADD_MIF: + if (optlen < sizeof(vif)) + return -EINVAL; + if (copy_from_user(&vif, optval, sizeof(vif))) + return -EFAULT; + if (vif.mif6c_mifi >= MAXVIFS) + return -ENFILE; + rtnl_lock(); + ret = mif6_add(&vif, sk == mroute6_socket); + rtnl_unlock(); + return ret; + + case MRT6_DEL_MIF: + if (optlen < sizeof(mifi_t)) + return -EINVAL; + if (copy_from_user(&mifi, optval, sizeof(mifi_t))) + return -EFAULT; + rtnl_lock(); + ret = mif6_delete(mifi); + rtnl_unlock(); + return ret; + + /* + * Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ + case MRT6_ADD_MFC: + case MRT6_DEL_MFC: + if (optlen < sizeof(mfc)) + return -EINVAL; + if (copy_from_user(&mfc, optval, sizeof(mfc))) + return -EFAULT; + rtnl_lock(); + if (optname == MRT6_DEL_MFC) + ret = ip6mr_mfc_delete(&mfc); + else + ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket); + rtnl_unlock(); + return ret; + + /* + * Spurious command, or MRT_VERSION which you cannot + * set. + */ + default: + return -ENOPROTOOPT; + } +} + +/* + * Getsock opt support for the multicast routing system. + */ + +int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) +{ + int olr; + int val; + + switch (optname) { + case MRT6_VERSION: + val = 0x0305; + break; + default: + return -ENOPROTOOPT; + } + + if (get_user(olr, optlen)) + return -EFAULT; + + olr = min_t(int, olr, sizeof(int)); + if (olr < 0) + return -EINVAL; + + if (put_user(olr, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, olr)) + return -EFAULT; + return 0; +} + +/* + * The IP multicast ioctl support routines. + */ + +int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) +{ + struct sioc_sg_req6 sr; + struct sioc_mif_req6 vr; + struct mif_device *vif; + struct mfc6_cache *c; + + switch (cmd) { + case SIOCGETMIFCNT_IN6: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.mifi >= maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &vif6_table[vr.mifi]; + if (MIF_EXISTS(vr.mifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT_IN6: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + read_lock(&mrt_lock); + c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} + + +static inline int ip6mr_forward2_finish(struct sk_buff *skb) +{ + /* XXX stats */ + return dst_output(skb); +} + +/* + * Processing handlers for ip6mr_forward + */ + +static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) +{ + struct ipv6hdr *ipv6h; + struct mif_device *vif = &vif6_table[vifi]; + struct net_device *dev; + struct dst_entry *dst; + struct flowi fl; + + if (vif->dev == NULL) + goto out_free; + + ipv6h = ipv6_hdr(skb); + + fl = (struct flowi) { + .oif = vif->link, + .nl_u = { .ip6_u = + { .daddr = ipv6h->daddr, } + } + }; + + dst = ip6_route_output(&init_net, NULL, &fl); + if (!dst) + goto out_free; + + dst_release(skb->dst); + skb->dst = dst; + + /* + * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + * not only before forwarding, but after forwarding on all output + * interfaces. It is clear, if mrouter runs a multicasting + * program, it should receive packets not depending to what interface + * program is joined. + * If we will not make it, the program will have to join on all + * interfaces. On the other hand, multihoming host (or router, but + * not mrouter) cannot join to more than one interface - it will + * result in receiving multiple packets. + */ + dev = vif->dev; + skb->dev = dev; + vif->pkt_out++; + vif->bytes_out += skb->len; + + /* We are about to write */ + /* XXX: extension headers? */ + if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) + goto out_free; + + ipv6h = ipv6_hdr(skb); + ipv6h->hop_limit--; + + IP6CB(skb)->flags |= IP6SKB_FORWARDED; + + return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev, + ip6mr_forward2_finish); + +out_free: + kfree_skb(skb); + return 0; +} + +static int ip6mr_find_vif(struct net_device *dev) +{ + int ct; + for (ct = maxvif - 1; ct >= 0; ct--) { + if (vif6_table[ct].dev == dev) + break; + } + return ct; +} + +static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) +{ + int psend = -1; + int vif, ct; + + vif = cache->mf6c_parent; + cache->mfc_un.res.pkt++; + cache->mfc_un.res.bytes += skb->len; + + vif6_table[vif].pkt_in++; + vif6_table[vif].bytes_in += skb->len; + + /* + * Forward the frame + */ + for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { + if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + if (psend != -1) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) + ip6mr_forward2(skb2, cache, psend); + } + psend = ct; + } + } + if (psend != -1) { + ip6mr_forward2(skb, cache, psend); + return 0; + } + + kfree_skb(skb); + return 0; +} + + +/* + * Multicast packets for forwarding arrive here + */ + +int ip6_mr_input(struct sk_buff *skb) +{ + struct mfc6_cache *cache; + + read_lock(&mrt_lock); + cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + + /* + * No usable cache entry + */ + if (cache == NULL) { + int vif; + + vif = ip6mr_find_vif(skb->dev); + if (vif >= 0) { + int err = ip6mr_cache_unresolved(vif, skb); + read_unlock(&mrt_lock); + + return err; + } + read_unlock(&mrt_lock); + kfree_skb(skb); + return -ENODEV; + } + + ip6_mr_forward(skb, cache); + + read_unlock(&mrt_lock); + + return 0; +} + + +static int +ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) +{ + int ct; + struct rtnexthop *nhp; + struct net_device *dev = vif6_table[c->mf6c_parent].dev; + u8 *b = skb->tail; + struct rtattr *mp_head; + + if (dev) + RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); + + mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); + + for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { + if (c->mfc_un.res.ttls[ct] < 255) { + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = 0; + nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; + nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex; + nhp->rtnh_len = sizeof(*nhp); + } + } + mp_head->rta_type = RTA_MULTIPATH; + mp_head->rta_len = skb->tail - (u8 *)mp_head; + rtm->rtm_type = RTN_MULTICAST; + return 1; + +rtattr_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) +{ + int err; + struct mfc6_cache *cache; + struct rt6_info *rt = (struct rt6_info *)skb->dst; + + read_lock(&mrt_lock); + cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr); + + if (!cache) { + struct sk_buff *skb2; + struct ipv6hdr *iph; + struct net_device *dev; + int vif; + + if (nowait) { + read_unlock(&mrt_lock); + return -EAGAIN; + } + + dev = skb->dev; + if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) { + read_unlock(&mrt_lock); + return -ENODEV; + } + + /* really correct? */ + skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); + if (!skb2) { + read_unlock(&mrt_lock); + return -ENOMEM; + } + + skb_reset_transport_header(skb2); + + skb_put(skb2, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb2); + + iph = ipv6_hdr(skb2); + iph->version = 0; + iph->priority = 0; + iph->flow_lbl[0] = 0; + iph->flow_lbl[1] = 0; + iph->flow_lbl[2] = 0; + iph->payload_len = 0; + iph->nexthdr = IPPROTO_NONE; + iph->hop_limit = 0; + ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); + ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); + + err = ip6mr_cache_unresolved(vif, skb2); + read_unlock(&mrt_lock); + + return err; + } + + if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) + cache->mfc_flags |= MFC_NOTIFY; + + err = ip6mr_fill_mroute(skb, cache, rtm); + read_unlock(&mrt_lock); + return err; +} + diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4195ac92345e..99624109c010 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, valbool = (val!=0); + if (ip6_mroute_opt(optname)) + return ip6_mroute_setsockopt(sk, optname, optval, optlen); + lock_sock(sk); switch (optname) { @@ -790,6 +794,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int len; int val; + if (ip6_mroute_opt(optname)) + return ip6_mroute_getsockopt(sk, optname, optval, optlen); + if (get_user(len, optlen)) return -EFAULT; switch (optname) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index aae6cedf1709..088b80b4ce74 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -53,6 +53,7 @@ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif +#include #include #include @@ -1135,7 +1136,11 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) } default: +#ifdef CONFIG_IPV6_MROUTE + return ip6mr_ioctl(sk, cmd, (void __user *)arg); +#else return -ENOIOCTLCMD; +#endif } } @@ -1143,7 +1148,7 @@ static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->num == IPPROTO_RAW) ip6_ra_control(sk, -1, NULL); - + ip6mr_sk_done(sk); sk_common_release(sk); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cd82b6db35ff..3c314d5f46c6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -2106,7 +2107,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, - int prefix, unsigned int flags) + int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; @@ -2166,9 +2167,24 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } else if (rtm->rtm_src_len) NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif - if (iif) - NLA_PUT_U32(skb, RTA_IIF, iif); - else if (dst) { + if (iif) { +#ifdef CONFIG_IPV6_MROUTE + if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + int err = ip6mr_get_route(skb, rtm, nowait); + if (err <= 0) { + if (!nowait) { + if (err == 0) + return 0; + goto nla_put_failure; + } else { + if (err == -EMSGSIZE) + goto nla_put_failure; + } + } + } else +#endif + NLA_PUT_U32(skb, RTA_IIF, iif); + } else if (dst) { struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, dst, 0, &saddr_buf) == 0) @@ -2211,7 +2227,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, - prefix, NLM_F_MULTI); + prefix, 0, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) @@ -2277,7 +2293,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, 0); + nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2303,7 +2319,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0); + event, info->pid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); -- cgit v1.2.3-71-gd317 From 14fb64e1f449ef6666f1c3a3fa4e13aec669b98d Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 3 Apr 2008 09:22:54 +0900 Subject: [IPV6] MROUTE: Support PIM-SM (SSM). Based on ancient patch by Mickael Hoerdt , which is available at . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/mroute6.h | 4 + net/ipv6/Kconfig | 7 ++ net/ipv6/ip6mr.c | 275 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 285 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index b92190304e0b..f6469fb90840 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -23,6 +23,8 @@ #define MRT6_ADD_MFC (MRT6_BASE+4) /* Add a multicast forwarding entry */ #define MRT6_DEL_MFC (MRT6_BASE+5) /* Delete a multicast forwarding entry */ #define MRT6_VERSION (MRT6_BASE+6) /* Get the kernel multicast version */ +#define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ +#define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) @@ -217,6 +219,8 @@ static inline int ip6mr_sk_done(struct sock *sk) { return 0; } struct mrt6msg { #define MRT6MSG_NOCACHE 1 +#define MRT6MSG_WRONGMIF 2 +#define MRT6MSG_WHOLEPKT 3 /* used for use level encap */ __u8 im6_mbz; /* must be zero */ __u8 im6_msgtype; /* what type of message */ __u16 im6_mif; /* mif rec'd on */ diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 9a2ea81e499f..82f987b4ef84 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -216,3 +216,10 @@ config IPV6_MROUTE Experimental support for IPv6 multicast forwarding. If unsure, say N. +config IPV6_PIMSM_V2 + bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)" + depends on IPV6_MROUTE + ---help--- + Support for IPv6 PIM multicast routing protocol PIM-SMv2. + If unsure, say N. + diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1bdf3c177d58..2b70774be61f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,13 @@ static int maxvif; #define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) +static int mroute_do_assert; /* Set in PIM assert */ +#ifdef CONFIG_IPV6_PIMSM_V2 +static int mroute_do_pim; +#else +#define mroute_do_pim 0 +#endif + static struct mfc6_cache *mfc6_cache_array[MFC_LINES]; /* Forwarding cache */ static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ @@ -97,6 +105,10 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); +#ifdef CONFIG_IPV6_PIMSM_V2 +static struct inet6_protocol pim6_protocol; +#endif + static struct timer_list ipmr_expire_timer; @@ -339,6 +351,132 @@ static struct file_operations ip6mr_mfc_fops = { }; #endif +#ifdef CONFIG_IPV6_PIMSM_V2 +static int reg_vif_num = -1; + +static int pim6_rcv(struct sk_buff *skb) +{ + struct pimreghdr *pim; + struct ipv6hdr *encap; + struct net_device *reg_dev = NULL; + + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) + goto drop; + + pim = (struct pimreghdr *)skb_transport_header(skb); + if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || + (pim->flags & PIM_NULL_REGISTER) || + (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && + (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) + goto drop; + + /* check if the inner packet is destined to mcast group */ + encap = (struct ipv6hdr *)(skb_transport_header(skb) + + sizeof(*pim)); + + if (!ipv6_addr_is_multicast(&encap->daddr) || + encap->payload_len == 0 || + ntohs(encap->payload_len) + sizeof(*pim) > skb->len) + goto drop; + + read_lock(&mrt_lock); + if (reg_vif_num >= 0) + reg_dev = vif6_table[reg_vif_num].dev; + if (reg_dev) + dev_hold(reg_dev); + read_unlock(&mrt_lock); + + if (reg_dev == NULL) + goto drop; + + skb->mac_header = skb->network_header; + skb_pull(skb, (u8 *)encap - skb->data); + skb_reset_network_header(skb); + skb->dev = reg_dev; + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + dst_release(skb->dst); + ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len; + ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++; + skb->dst = NULL; + nf_reset(skb); + netif_rx(skb); + dev_put(reg_dev); + return 0; + drop: + kfree_skb(skb); + return 0; +} + +static struct inet6_protocol pim6_protocol = { + .handler = pim6_rcv, +}; + +/* Service routines creating virtual interfaces: PIMREG */ + +static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) +{ + read_lock(&mrt_lock); + ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len; + ((struct net_device_stats *)netdev_priv(dev))->tx_packets++; + ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); + read_unlock(&mrt_lock); + kfree_skb(skb); + return 0; +} + +static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) +{ + return (struct net_device_stats *)netdev_priv(dev); +} + +static void reg_vif_setup(struct net_device *dev) +{ + dev->type = ARPHRD_PIMREG; + dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; + dev->flags = IFF_NOARP; + dev->hard_start_xmit = reg_vif_xmit; + dev->get_stats = reg_vif_get_stats; + dev->destructor = free_netdev; +} + +static struct net_device *ip6mr_reg_vif(void) +{ + struct net_device *dev; + struct inet6_dev *in_dev; + + dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg", + reg_vif_setup); + + if (dev == NULL) + return NULL; + + if (register_netdevice(dev)) { + free_netdev(dev); + return NULL; + } + dev->iflink = 0; + + in_dev = ipv6_find_idev(dev); + if (!in_dev) + goto failure; + + if (dev_open(dev)) + goto failure; + + return dev; + +failure: + /* allow the register to be completed before unregistering. */ + rtnl_unlock(); + rtnl_lock(); + + unregister_netdevice(dev); + return NULL; +} +#endif + /* * Delete a VIF entry */ @@ -361,6 +499,11 @@ static int mif6_delete(int vifi) return -EADDRNOTAVAIL; } +#ifdef CONFIG_IPV6_PIMSM_V2 + if (vifi == reg_vif_num) + reg_vif_num = -1; +#endif + if (vifi + 1 == maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { @@ -480,6 +623,19 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) return -EADDRINUSE; switch (vifc->mif6c_flags) { +#ifdef CONFIG_IPV6_PIMSM_V2 + case MIFF_REGISTER: + /* + * Special Purpose VIF in PIM + * All the packets will be sent to the daemon + */ + if (reg_vif_num >= 0) + return -EADDRINUSE; + dev = ip6mr_reg_vif(); + if (!dev) + return -ENOBUFS; + break; +#endif case 0: dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); if (!dev) @@ -512,6 +668,10 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) write_lock_bh(&mrt_lock); dev_hold(dev); v->dev = dev; +#ifdef CONFIG_IPV6_PIMSM_V2 + if (v->flags & MIFF_REGISTER) + reg_vif_num = vifi; +#endif if (vifi + 1 > maxvif) maxvif = vifi + 1; write_unlock_bh(&mrt_lock); @@ -599,7 +759,13 @@ static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) struct mrt6msg *msg; int ret; - skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); +#ifdef CONFIG_IPV6_PIMSM_V2 + if (assert == MRT6MSG_WHOLEPKT) + skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) + +sizeof(*msg)); + else +#endif + skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC); if (!skb) return -ENOBUFS; @@ -609,6 +775,29 @@ static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) skb->ip_summed = CHECKSUM_UNNECESSARY; +#ifdef CONFIG_IPV6_PIMSM_V2 + if (assert == MRT6MSG_WHOLEPKT) { + /* Ugly, but we have no choice with this interface. + Duplicate old header, fix length etc. + And all this only to mangle msg->im6_msgtype and + to set msg->im6_mbz to "mbz" :-) + */ + skb_push(skb, -skb_network_offset(pkt)); + + skb_push(skb, sizeof(*msg)); + skb_reset_transport_header(skb); + msg = (struct mrt6msg *)skb_transport_header(skb); + msg->im6_mbz = 0; + msg->im6_msgtype = MRT6MSG_WHOLEPKT; + msg->im6_mif = reg_vif_num; + msg->im6_pad = 0; + ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); + ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else +#endif + { /* * Copy the IP header */ @@ -635,6 +824,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) skb->ip_summed = CHECKSUM_UNNECESSARY; skb_pull(skb, sizeof(struct ipv6hdr)); + } if (mroute6_socket == NULL) { kfree_skb(skb); @@ -1033,6 +1223,44 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int rtnl_unlock(); return ret; + /* + * Control PIM assert (to activate pim will activate assert) + */ + case MRT6_ASSERT: + { + int v; + if (get_user(v, (int __user *)optval)) + return -EFAULT; + mroute_do_assert = !!v; + return 0; + } + +#ifdef CONFIG_IPV6_PIMSM_V2 + case MRT6_PIM: + { + int v, ret; + if (get_user(v, (int __user *)optval)) + return -EFAULT; + v = !!v; + rtnl_lock(); + ret = 0; + if (v != mroute_do_pim) { + mroute_do_pim = v; + mroute_do_assert = v; + if (mroute_do_pim) + ret = inet6_add_protocol(&pim6_protocol, + IPPROTO_PIM); + else + ret = inet6_del_protocol(&pim6_protocol, + IPPROTO_PIM); + if (ret < 0) + ret = -EAGAIN; + } + rtnl_unlock(); + return ret; + } + +#endif /* * Spurious command, or MRT_VERSION which you cannot * set. @@ -1056,6 +1284,14 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, case MRT6_VERSION: val = 0x0305; break; +#ifdef CONFIG_IPV6_PIMSM_V2 + case MRT6_PIM: + val = mroute_do_pim; + break; +#endif + case MRT6_ASSERT: + val = mroute_do_assert; + break; default: return -ENOPROTOOPT; } @@ -1151,6 +1387,18 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) if (vif->dev == NULL) goto out_free; +#ifdef CONFIG_IPV6_PIMSM_V2 + if (vif->flags & MIFF_REGISTER) { + vif->pkt_out++; + vif->bytes_out += skb->len; + ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len; + ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++; + ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); + kfree_skb(skb); + return 0; + } +#endif + ipv6h = ipv6_hdr(skb); fl = (struct flowi) { @@ -1220,6 +1468,30 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + /* + * Wrong interface: drop packet and (maybe) send PIM assert. + */ + if (vif6_table[vif].dev != skb->dev) { + int true_vifi; + + cache->mfc_un.res.wrong_if++; + true_vifi = ip6mr_find_vif(skb->dev); + + if (true_vifi >= 0 && mroute_do_assert && + /* pimsm uses asserts, when switching from RPT to SPT, + so that we cannot check that packet arrived on an oif. + It is bad, but otherwise we would need to move pretty + large chunk of pimd to kernel. Ough... --ANK + */ + (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && + time_after(jiffies, + cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { + cache->mfc_un.res.last_assert = jiffies; + ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF); + } + goto dont_forward; + } + vif6_table[vif].pkt_in++; vif6_table[vif].bytes_in += skb->len; @@ -1241,6 +1513,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) return 0; } +dont_forward: kfree_skb(skb); return 0; } -- cgit v1.2.3-71-gd317 From 12802d058a003048104fe405a8d283b94ac50801 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 3 Apr 2008 09:22:56 +0900 Subject: [IPV6]: Comment MRT6_xxx sockopts in include/linux/in6.h. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index f674000c6c99..e6aa8de2b939 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -260,4 +260,19 @@ struct in6_flowlabel_req #define IPV6_PREFER_SRC_CGA 0x0008 #define IPV6_PREFER_SRC_NONCGA 0x0800 +/* + * Multicast Routing: + * see include/linux/mroute6.h. + * + * MRT6_INIT 200 + * MRT6_DONE 201 + * MRT6_ADD_MIF 202 + * MRT6_DEL_MIF 203 + * MRT6_ADD_MFC 204 + * MRT6_DEL_MFC 205 + * MRT6_VERSION 206 + * MRT6_ASSERT 207 + * MRT6_PIM 208 + * (reserved) 209 + */ #endif -- cgit v1.2.3-71-gd317 From 8fe2b65a18e49bfde56a59ed4ab3fc7aa0c2f325 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Sun, 30 Mar 2008 00:10:50 +0100 Subject: ssb: Turn suspend/resume upside down Turn the SSB bus suspend mechanism upside down. Instead of deciding by an internal reference count when to suspend/resume, let the parent bus call us in their suspend/resume routine. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/net/wireless/b43/pcmcia.c | 10 ++-- drivers/ssb/driver_chipcommon.c | 2 +- drivers/ssb/main.c | 81 +++++++++++++------------------ drivers/ssb/pcihost_wrapper.c | 10 ++++ drivers/ssb/pcmcia.c | 34 +++++++++---- drivers/ssb/ssb_private.h | 5 ++ include/linux/ssb/ssb.h | 10 ++-- include/linux/ssb/ssb_driver_chipcommon.h | 3 +- 8 files changed, 88 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index 371e4a119511..b8aa16307f79 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -43,14 +43,16 @@ MODULE_DEVICE_TABLE(pcmcia, b43_pcmcia_tbl); #ifdef CONFIG_PM static int b43_pcmcia_suspend(struct pcmcia_device *dev) { - //TODO - return 0; + struct ssb_bus *ssb = dev->priv; + + return ssb_bus_suspend(ssb); } static int b43_pcmcia_resume(struct pcmcia_device *dev) { - //TODO - return 0; + struct ssb_bus *ssb = dev->priv; + + return ssb_bus_resume(ssb); } #else /* CONFIG_PM */ # define b43_pcmcia_suspend NULL diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c index 45b672a69003..571f4fd55236 100644 --- a/drivers/ssb/driver_chipcommon.c +++ b/drivers/ssb/driver_chipcommon.c @@ -251,7 +251,7 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc) calc_fast_powerup_delay(cc); } -void ssb_chipco_suspend(struct ssb_chipcommon *cc, pm_message_t state) +void ssb_chipco_suspend(struct ssb_chipcommon *cc) { if (!cc->dev) return; diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 2fcfd73b3b6e..c0cbdba07aee 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -120,35 +120,12 @@ static void ssb_device_put(struct ssb_device *dev) put_device(dev->dev); } -static int ssb_bus_resume(struct ssb_bus *bus) -{ - int err; - - ssb_pci_xtal(bus, SSB_GPIO_XTAL | SSB_GPIO_PLL, 1); - err = ssb_pcmcia_init(bus); - if (err) { - /* No need to disable XTAL, as we don't have one on PCMCIA. */ - return err; - } - ssb_chipco_resume(&bus->chipco); - - return 0; -} - static int ssb_device_resume(struct device *dev) { struct ssb_device *ssb_dev = dev_to_ssb_dev(dev); struct ssb_driver *ssb_drv; - struct ssb_bus *bus; int err = 0; - bus = ssb_dev->bus; - if (bus->suspend_cnt == bus->nr_devices) { - err = ssb_bus_resume(bus); - if (err) - return err; - } - bus->suspend_cnt--; if (dev->driver) { ssb_drv = drv_to_ssb_drv(dev->driver); if (ssb_drv && ssb_drv->resume) @@ -160,27 +137,10 @@ out: return err; } -static void ssb_bus_suspend(struct ssb_bus *bus, pm_message_t state) -{ - ssb_chipco_suspend(&bus->chipco, state); - ssb_pci_xtal(bus, SSB_GPIO_XTAL | SSB_GPIO_PLL, 0); - - /* Reset HW state information in memory, so that HW is - * completely reinitialized on resume. */ - bus->mapped_device = NULL; -#ifdef CONFIG_SSB_DRIVER_PCICORE - bus->pcicore.setup_done = 0; -#endif -#ifdef CONFIG_SSB_DEBUG - bus->powered_up = 0; -#endif -} - static int ssb_device_suspend(struct device *dev, pm_message_t state) { struct ssb_device *ssb_dev = dev_to_ssb_dev(dev); struct ssb_driver *ssb_drv; - struct ssb_bus *bus; int err = 0; if (dev->driver) { @@ -190,17 +150,44 @@ static int ssb_device_suspend(struct device *dev, pm_message_t state) if (err) goto out; } +out: + return err; +} + +int ssb_bus_resume(struct ssb_bus *bus) +{ + int err; - bus = ssb_dev->bus; - bus->suspend_cnt++; - if (bus->suspend_cnt == bus->nr_devices) { - /* All devices suspended. Shutdown the bus. */ - ssb_bus_suspend(bus, state); + /* Reset HW state information in memory, so that HW is + * completely reinitialized. */ + bus->mapped_device = NULL; +#ifdef CONFIG_SSB_DRIVER_PCICORE + bus->pcicore.setup_done = 0; +#endif + + err = ssb_bus_powerup(bus, 0); + if (err) + return err; + err = ssb_pcmcia_hardware_setup(bus); + if (err) { + ssb_bus_may_powerdown(bus); + return err; } + ssb_chipco_resume(&bus->chipco); + ssb_bus_may_powerdown(bus); -out: - return err; + return 0; +} +EXPORT_SYMBOL(ssb_bus_resume); + +int ssb_bus_suspend(struct ssb_bus *bus) +{ + ssb_chipco_suspend(&bus->chipco); + ssb_pci_xtal(bus, SSB_GPIO_XTAL | SSB_GPIO_PLL, 0); + + return 0; } +EXPORT_SYMBOL(ssb_bus_suspend); #ifdef CONFIG_SSB_PCIHOST int ssb_devices_freeze(struct ssb_bus *bus) diff --git a/drivers/ssb/pcihost_wrapper.c b/drivers/ssb/pcihost_wrapper.c index 82a10abef640..e82db4aaa050 100644 --- a/drivers/ssb/pcihost_wrapper.c +++ b/drivers/ssb/pcihost_wrapper.c @@ -18,6 +18,12 @@ #ifdef CONFIG_PM static int ssb_pcihost_suspend(struct pci_dev *dev, pm_message_t state) { + struct ssb_bus *ssb = pci_get_drvdata(dev); + int err; + + err = ssb_bus_suspend(ssb); + if (err) + return err; pci_save_state(dev); pci_disable_device(dev); pci_set_power_state(dev, pci_choose_state(dev, state)); @@ -27,6 +33,7 @@ static int ssb_pcihost_suspend(struct pci_dev *dev, pm_message_t state) static int ssb_pcihost_resume(struct pci_dev *dev) { + struct ssb_bus *ssb = pci_get_drvdata(dev); int err; pci_set_power_state(dev, 0); @@ -34,6 +41,9 @@ static int ssb_pcihost_resume(struct pci_dev *dev) if (err) return err; pci_restore_state(dev); + err = ssb_bus_resume(ssb); + if (err) + return err; return 0; } diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index d674cef7210d..dcaf2412bea7 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -684,6 +684,29 @@ static int ssb_pcmcia_cor_setup(struct ssb_bus *bus, u8 cor) return 0; } +/* Initialize the PCMCIA hardware. This is called on Init and Resume. */ +int ssb_pcmcia_hardware_setup(struct ssb_bus *bus) +{ + int err; + + if (bus->bustype != SSB_BUSTYPE_PCMCIA) + return 0; + + /* Switch segment to a known state and sync + * bus->mapped_pcmcia_seg with hardware state. */ + ssb_pcmcia_switch_segment(bus, 0); + /* Init the COR register. */ + err = ssb_pcmcia_cor_setup(bus, CISREG_COR); + if (err) + return err; + /* Some cards also need this register to get poked. */ + err = ssb_pcmcia_cor_setup(bus, CISREG_COR + 0x80); + if (err) + return err; + + return 0; +} + void ssb_pcmcia_exit(struct ssb_bus *bus) { if (bus->bustype != SSB_BUSTYPE_PCMCIA) @@ -699,16 +722,7 @@ int ssb_pcmcia_init(struct ssb_bus *bus) if (bus->bustype != SSB_BUSTYPE_PCMCIA) return 0; - /* Switch segment to a known state and sync - * bus->mapped_pcmcia_seg with hardware state. */ - ssb_pcmcia_switch_segment(bus, 0); - - /* Init the COR register. */ - err = ssb_pcmcia_cor_setup(bus, CISREG_COR); - if (err) - goto error; - /* Some cards also need this register to get poked. */ - err = ssb_pcmcia_cor_setup(bus, CISREG_COR + 0x80); + err = ssb_pcmcia_hardware_setup(bus); if (err) goto error; diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h index a83bf7a4d80b..ebc32d8fe15f 100644 --- a/drivers/ssb/ssb_private.h +++ b/drivers/ssb/ssb_private.h @@ -81,6 +81,7 @@ extern int ssb_pcmcia_switch_segment(struct ssb_bus *bus, u8 seg); extern int ssb_pcmcia_get_invariants(struct ssb_bus *bus, struct ssb_init_invariants *iv); +extern int ssb_pcmcia_hardware_setup(struct ssb_bus *bus); extern void ssb_pcmcia_exit(struct ssb_bus *bus); extern int ssb_pcmcia_init(struct ssb_bus *bus); extern const struct ssb_bus_ops ssb_pcmcia_ops; @@ -100,6 +101,10 @@ static inline int ssb_pcmcia_switch_segment(struct ssb_bus *bus, { return 0; } +static inline int ssb_pcmcia_hardware_setup(struct ssb_bus *bus) +{ + return 0; +} static inline void ssb_pcmcia_exit(struct ssb_bus *bus) { } diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 8644e03cf588..a8ca396f810a 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -260,9 +260,6 @@ struct ssb_bus { struct ssb_device devices[SSB_MAX_NR_CORES]; u8 nr_devices; - /* Reference count. Number of suspended devices. */ - u8 suspend_cnt; - /* Software ID number for this bus. */ unsigned int busnumber; @@ -334,6 +331,13 @@ extern int ssb_bus_pcmciabus_register(struct ssb_bus *bus, extern void ssb_bus_unregister(struct ssb_bus *bus); +/* Suspend a SSB bus. + * Call this from the parent bus suspend routine. */ +extern int ssb_bus_suspend(struct ssb_bus *bus); +/* Resume a SSB bus. + * Call this from the parent bus resume routine. */ +extern int ssb_bus_resume(struct ssb_bus *bus); + extern u32 ssb_clockspeed(struct ssb_bus *bus); /* Is the device enabled in hardware? */ diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h index b548a54ff1f5..7d7e03dcf77c 100644 --- a/include/linux/ssb/ssb_driver_chipcommon.h +++ b/include/linux/ssb/ssb_driver_chipcommon.h @@ -367,8 +367,7 @@ static inline bool ssb_chipco_available(struct ssb_chipcommon *cc) extern void ssb_chipcommon_init(struct ssb_chipcommon *cc); -#include -extern void ssb_chipco_suspend(struct ssb_chipcommon *cc, pm_message_t state); +extern void ssb_chipco_suspend(struct ssb_chipcommon *cc); extern void ssb_chipco_resume(struct ssb_chipcommon *cc); extern void ssb_chipco_get_clockcpu(struct ssb_chipcommon *cc, -- cgit v1.2.3-71-gd317 From d625a29ba649a4df6027520ffc378f23c0e6883e Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 2 Apr 2008 19:46:56 +0200 Subject: ssb: Add support for block-I/O This adds support for block based I/O to SSB. This is needed in order to efficiently support PIO data transfers to the card. The block-I/O support is only compiled, if it's selected by the weird driver that needs it. So there's no overhead for sane devices. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/Kconfig | 5 ++ drivers/ssb/main.c | 102 +++++++++++++++++++++++++++++++++++++++++ drivers/ssb/pci.c | 70 ++++++++++++++++++++++++++++ drivers/ssb/pcmcia.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ssb/ssb.h | 19 ++++++++ 5 files changed, 315 insertions(+) (limited to 'include/linux') diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index 0f7cce2560d1..cd845b8acd17 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -24,6 +24,11 @@ config SSB config SSB_SPROM bool +# Support for Block-I/O. SELECT this from the driver that needs it. +config SSB_BLOCKIO + bool + depends on SSB + config SSB_PCIHOST_POSSIBLE bool depends on SSB && (PCI = y || PCI = SSB) diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 3e58db7ef608..19ddd2bd1057 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -555,6 +555,55 @@ static u32 ssb_ssb_read32(struct ssb_device *dev, u16 offset) return readl(bus->mmio + offset); } +#ifdef CONFIG_SSB_BLOCKIO +static void ssb_ssb_block_read(struct ssb_device *dev, void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + struct ssb_bus *bus = dev->bus; + void __iomem *addr; + + offset += dev->core_index * SSB_CORE_SIZE; + addr = bus->mmio + offset; + + switch (reg_width) { + case sizeof(u8): { + u8 *buf = buffer; + + while (count) { + *buf = __raw_readb(addr); + buf++; + count--; + } + break; + } + case sizeof(u16): { + __le16 *buf = buffer; + + SSB_WARN_ON(count & 1); + while (count) { + *buf = (__force __le16)__raw_readw(addr); + buf++; + count -= 2; + } + break; + } + case sizeof(u32): { + __le32 *buf = buffer; + + SSB_WARN_ON(count & 3); + while (count) { + *buf = (__force __le32)__raw_readl(addr); + buf++; + count -= 4; + } + break; + } + default: + SSB_WARN_ON(1); + } +} +#endif /* CONFIG_SSB_BLOCKIO */ + static void ssb_ssb_write8(struct ssb_device *dev, u16 offset, u8 value) { struct ssb_bus *bus = dev->bus; @@ -579,6 +628,55 @@ static void ssb_ssb_write32(struct ssb_device *dev, u16 offset, u32 value) writel(value, bus->mmio + offset); } +#ifdef CONFIG_SSB_BLOCKIO +static void ssb_ssb_block_write(struct ssb_device *dev, const void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + struct ssb_bus *bus = dev->bus; + void __iomem *addr; + + offset += dev->core_index * SSB_CORE_SIZE; + addr = bus->mmio + offset; + + switch (reg_width) { + case sizeof(u8): { + const u8 *buf = buffer; + + while (count) { + __raw_writeb(*buf, addr); + buf++; + count--; + } + break; + } + case sizeof(u16): { + const __le16 *buf = buffer; + + SSB_WARN_ON(count & 1); + while (count) { + __raw_writew((__force u16)(*buf), addr); + buf++; + count -= 2; + } + break; + } + case sizeof(u32): { + const __le32 *buf = buffer; + + SSB_WARN_ON(count & 3); + while (count) { + __raw_writel((__force u32)(*buf), addr); + buf++; + count -= 4; + } + break; + } + default: + SSB_WARN_ON(1); + } +} +#endif /* CONFIG_SSB_BLOCKIO */ + /* Ops for the plain SSB bus without a host-device (no PCI or PCMCIA). */ static const struct ssb_bus_ops ssb_ssb_ops = { .read8 = ssb_ssb_read8, @@ -587,6 +685,10 @@ static const struct ssb_bus_ops ssb_ssb_ops = { .write8 = ssb_ssb_write8, .write16 = ssb_ssb_write16, .write32 = ssb_ssb_write32, +#ifdef CONFIG_SSB_BLOCKIO + .block_read = ssb_ssb_block_read, + .block_write = ssb_ssb_block_write, +#endif }; static int ssb_fetch_invariants(struct ssb_bus *bus, diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index f1514b33cfae..904b1a8d0885 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -613,6 +613,41 @@ static u32 ssb_pci_read32(struct ssb_device *dev, u16 offset) return ioread32(bus->mmio + offset); } +#ifdef CONFIG_SSB_BLOCKIO +static void ssb_pci_block_read(struct ssb_device *dev, void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + struct ssb_bus *bus = dev->bus; + void __iomem *addr = bus->mmio + offset; + + if (unlikely(ssb_pci_assert_buspower(bus))) + goto error; + if (unlikely(bus->mapped_device != dev)) { + if (unlikely(ssb_pci_switch_core(bus, dev))) + goto error; + } + switch (reg_width) { + case sizeof(u8): + ioread8_rep(addr, buffer, count); + break; + case sizeof(u16): + SSB_WARN_ON(count & 1); + ioread16_rep(addr, buffer, count >> 1); + break; + case sizeof(u32): + SSB_WARN_ON(count & 3); + ioread32_rep(addr, buffer, count >> 2); + break; + default: + SSB_WARN_ON(1); + } + + return; +error: + memset(buffer, 0xFF, count); +} +#endif /* CONFIG_SSB_BLOCKIO */ + static void ssb_pci_write8(struct ssb_device *dev, u16 offset, u8 value) { struct ssb_bus *bus = dev->bus; @@ -652,6 +687,37 @@ static void ssb_pci_write32(struct ssb_device *dev, u16 offset, u32 value) iowrite32(value, bus->mmio + offset); } +#ifdef CONFIG_SSB_BLOCKIO +static void ssb_pci_block_write(struct ssb_device *dev, const void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + struct ssb_bus *bus = dev->bus; + void __iomem *addr = bus->mmio + offset; + + if (unlikely(ssb_pci_assert_buspower(bus))) + return; + if (unlikely(bus->mapped_device != dev)) { + if (unlikely(ssb_pci_switch_core(bus, dev))) + return; + } + switch (reg_width) { + case sizeof(u8): + iowrite8_rep(addr, buffer, count); + break; + case sizeof(u16): + SSB_WARN_ON(count & 1); + iowrite16_rep(addr, buffer, count >> 1); + break; + case sizeof(u32): + SSB_WARN_ON(count & 3); + iowrite32_rep(addr, buffer, count >> 2); + break; + default: + SSB_WARN_ON(1); + } +} +#endif /* CONFIG_SSB_BLOCKIO */ + /* Not "static", as it's used in main.c */ const struct ssb_bus_ops ssb_pci_ops = { .read8 = ssb_pci_read8, @@ -660,6 +726,10 @@ const struct ssb_bus_ops ssb_pci_ops = { .write8 = ssb_pci_write8, .write16 = ssb_pci_write16, .write32 = ssb_pci_write32, +#ifdef CONFIG_SSB_BLOCKIO + .block_read = ssb_pci_block_read, + .block_write = ssb_pci_block_write, +#endif }; static ssize_t ssb_pci_attr_sprom_show(struct device *pcidev, diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index dcaf2412bea7..24c2a46c1476 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -285,6 +285,64 @@ static u32 ssb_pcmcia_read32(struct ssb_device *dev, u16 offset) return (lo | (hi << 16)); } +#ifdef CONFIG_SSB_BLOCKIO +static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + struct ssb_bus *bus = dev->bus; + unsigned long flags; + void __iomem *addr = bus->mmio + offset; + int err; + + spin_lock_irqsave(&bus->bar_lock, flags); + err = select_core_and_segment(dev, &offset); + if (unlikely(err)) { + memset(buffer, 0xFF, count); + goto unlock; + } + switch (reg_width) { + case sizeof(u8): { + u8 *buf = buffer; + + while (count) { + *buf = __raw_readb(addr); + buf++; + count--; + } + break; + } + case sizeof(u16): { + __le16 *buf = buffer; + + SSB_WARN_ON(count & 1); + while (count) { + *buf = (__force __le16)__raw_readw(addr); + buf++; + count -= 2; + } + break; + } + case sizeof(u32): { + __le16 *buf = buffer; + + SSB_WARN_ON(count & 3); + while (count) { + *buf = (__force __le16)__raw_readw(addr); + buf++; + *buf = (__force __le16)__raw_readw(addr + 2); + buf++; + count -= 4; + } + break; + } + default: + SSB_WARN_ON(1); + } +unlock: + spin_unlock_irqrestore(&bus->bar_lock, flags); +} +#endif /* CONFIG_SSB_BLOCKIO */ + static void ssb_pcmcia_write8(struct ssb_device *dev, u16 offset, u8 value) { struct ssb_bus *bus = dev->bus; @@ -329,6 +387,63 @@ static void ssb_pcmcia_write32(struct ssb_device *dev, u16 offset, u32 value) spin_unlock_irqrestore(&bus->bar_lock, flags); } +#ifdef CONFIG_SSB_BLOCKIO +static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + struct ssb_bus *bus = dev->bus; + unsigned long flags; + void __iomem *addr = bus->mmio + offset; + int err; + + spin_lock_irqsave(&bus->bar_lock, flags); + err = select_core_and_segment(dev, &offset); + if (unlikely(err)) + goto unlock; + switch (reg_width) { + case sizeof(u8): { + const u8 *buf = buffer; + + while (count) { + __raw_writeb(*buf, addr); + buf++; + count--; + } + break; + } + case sizeof(u16): { + const __le16 *buf = buffer; + + SSB_WARN_ON(count & 1); + while (count) { + __raw_writew((__force u16)(*buf), addr); + buf++; + count -= 2; + } + break; + } + case sizeof(u32): { + const __le16 *buf = buffer; + + SSB_WARN_ON(count & 3); + while (count) { + __raw_writew((__force u16)(*buf), addr); + buf++; + __raw_writew((__force u16)(*buf), addr + 2); + buf++; + count -= 4; + } + break; + } + default: + SSB_WARN_ON(1); + } +unlock: + mmiowb(); + spin_unlock_irqrestore(&bus->bar_lock, flags); +} +#endif /* CONFIG_SSB_BLOCKIO */ + /* Not "static", as it's used in main.c */ const struct ssb_bus_ops ssb_pcmcia_ops = { .read8 = ssb_pcmcia_read8, @@ -337,6 +452,10 @@ const struct ssb_bus_ops ssb_pcmcia_ops = { .write8 = ssb_pcmcia_write8, .write16 = ssb_pcmcia_write16, .write32 = ssb_pcmcia_write32, +#ifdef CONFIG_SSB_BLOCKIO + .block_read = ssb_pcmcia_block_read, + .block_write = ssb_pcmcia_block_write, +#endif }; static int ssb_pcmcia_sprom_command(struct ssb_bus *bus, u8 command) diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index a8ca396f810a..9f95afd0e9e3 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -78,6 +78,12 @@ struct ssb_bus_ops { void (*write8)(struct ssb_device *dev, u16 offset, u8 value); void (*write16)(struct ssb_device *dev, u16 offset, u16 value); void (*write32)(struct ssb_device *dev, u16 offset, u32 value); +#ifdef CONFIG_SSB_BLOCKIO + void (*block_read)(struct ssb_device *dev, void *buffer, + size_t count, u16 offset, u8 reg_width); + void (*block_write)(struct ssb_device *dev, const void *buffer, + size_t count, u16 offset, u8 reg_width); +#endif }; @@ -374,6 +380,19 @@ static inline void ssb_write32(struct ssb_device *dev, u16 offset, u32 value) { dev->ops->write32(dev, offset, value); } +#ifdef CONFIG_SSB_BLOCKIO +static inline void ssb_block_read(struct ssb_device *dev, void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + dev->ops->block_read(dev, buffer, count, offset, reg_width); +} + +static inline void ssb_block_write(struct ssb_device *dev, const void *buffer, + size_t count, u16 offset, u8 reg_width) +{ + dev->ops->block_write(dev, buffer, count, offset, reg_width); +} +#endif /* CONFIG_SSB_BLOCKIO */ /* Translation (routing) bits that need to be ORed to DMA -- cgit v1.2.3-71-gd317 From b715631fad3ed320b85d386a84a6fb0b3f86b0b9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Apr 2008 01:33:47 -0700 Subject: socket: sk_filter minor cleanups Some minor style cleanups: * Move __KERNEL__ definitions to one place in filter.h * Use const for sk_filter_len * Line wrapping * Put EXPORT_SYMBOL next to function definition Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/filter.h | 31 +++++++++++++++---------------- net/core/filter.c | 5 ++--- 2 files changed, 17 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ddfa0372a3b7..bfc5d319b946 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -37,21 +37,6 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sock_filter __user *filter; }; -#ifdef __KERNEL__ -struct sk_filter -{ - atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct rcu_head rcu; - struct sock_filter insns[0]; -}; - -static inline unsigned int sk_filter_len(struct sk_filter *fp) -{ - return fp->len*sizeof(struct sock_filter) + sizeof(*fp); -} -#endif - /* * Instruction classes */ @@ -141,10 +126,24 @@ static inline unsigned int sk_filter_len(struct sk_filter *fp) #define SKF_LL_OFF (-0x200000) #ifdef __KERNEL__ +struct sk_filter +{ + atomic_t refcnt; + unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; + struct sock_filter insns[0]; +}; + +static inline unsigned int sk_filter_len(const struct sk_filter *fp) +{ + return fp->len * sizeof(struct sock_filter) + sizeof(*fp); +} + struct sk_buff; struct sock; -extern unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen); +extern unsigned int sk_run_filter(struct sk_buff *skb, + struct sock_filter *filter, int flen); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, int flen); diff --git a/net/core/filter.c b/net/core/filter.c index e0a06942c025..85a5febab567 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -275,6 +275,7 @@ load_b: return 0; } +EXPORT_SYMBOL(sk_run_filter); /** * sk_chk_filter - verify socket filter code @@ -385,6 +386,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } +EXPORT_SYMBOL(sk_chk_filter); /** * sk_filter_rcu_release: Release a socket filter by rcu_head @@ -467,6 +469,3 @@ int sk_detach_filter(struct sock *sk) rcu_read_unlock_bh(); return ret; } - -EXPORT_SYMBOL(sk_chk_filter); -EXPORT_SYMBOL(sk_run_filter); -- cgit v1.2.3-71-gd317 From 43db6d65e0ef943a361cb91f8baa49132009227b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Apr 2008 01:43:09 -0700 Subject: socket: sk_filter deinline The sk_filter function is too big to be inlined. This saves 2296 bytes of text on allyesconfig. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + include/net/sock.h | 35 ----------------------------------- net/core/filter.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index bfc5d319b946..673e5677ebcc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -142,6 +142,7 @@ static inline unsigned int sk_filter_len(const struct sk_filter *fp) struct sk_buff; struct sock; +extern int sk_filter(struct sock *sk, struct sk_buff *skb); extern unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index f4fdd101c9a2..09255eae93e9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -927,41 +927,6 @@ extern void sk_common_release(struct sock *sk); /* Initialise core socket variables */ extern void sock_init_data(struct socket *sock, struct sock *sk); -/** - * sk_filter - run a packet through a socket filter - * @sk: sock associated with &sk_buff - * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. - * - * Run the filter code and then cut skb->data to correct size returned by - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller - * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to sk_run_filter. It returns 0 if the packet should - * be accepted or -EPERM if the packet should be tossed. - * - */ - -static inline int sk_filter(struct sock *sk, struct sk_buff *skb) -{ - int err; - struct sk_filter *filter; - - err = security_sock_rcv_skb(sk, skb); - if (err) - return err; - - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - rcu_read_unlock_bh(); - - return err; -} - /** * sk_filter_release: Release a socket filter * @sk: socket diff --git a/net/core/filter.c b/net/core/filter.c index 85a5febab567..bbb53c69857c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -63,6 +63,41 @@ static inline void *load_pointer(struct sk_buff *skb, int k, } } +/** + * sk_filter - run a packet through a socket filter + * @sk: sock associated with &sk_buff + * @skb: buffer to filter + * @needlock: set to 1 if the sock is not locked by caller. + * + * Run the filter code and then cut skb->data to correct size returned by + * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller + * than pkt_len we keep whole skb->data. This is the socket level + * wrapper to sk_run_filter. It returns 0 if the packet should + * be accepted or -EPERM if the packet should be tossed. + * + */ +int sk_filter(struct sock *sk, struct sk_buff *skb) +{ + int err; + struct sk_filter *filter; + + err = security_sock_rcv_skb(sk, skb); + if (err) + return err; + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; + } + rcu_read_unlock_bh(); + + return err; +} +EXPORT_SYMBOL(sk_filter); + /** * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on -- cgit v1.2.3-71-gd317 From 4738c1db1593687713869fa69e733eebc7b0d6d8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Apr 2008 02:02:28 -0700 Subject: [SKFILTER]: Add SKF_ADF_NLATTR instruction SKF_ADF_NLATTR searches for a netlink attribute, which avoids manually parsing and walking attributes. It takes the offset at which to start searching in the 'A' register and the attribute type in the 'X' register and returns the offset in the 'A' register. When the attribute is not found it returns zero. A top-level attribute can be located using a filter like this (example for nfnetlink, using struct nfgenmsg): ... { /* A = offset of first attribute */ .code = BPF_LD | BPF_IMM, .k = sizeof(struct nlmsghdr) + sizeof(struct nfgenmsg) }, { /* X = CTA_PROTOINFO */ .code = BPF_LDX | BPF_IMM, .k = CTA_PROTOINFO, }, { /* A = netlink attribute offset */ .code = BPF_LD | BPF_B | BPF_ABS, .k = SKF_AD_OFF + SKF_AD_NLATTR }, { /* Exit if not found */ .code = BPF_JMP | BPF_JEQ | BPF_K, .k = 0, .jt = }, ... A nested attribute below the CTA_PROTOINFO attribute would then be parsed like this: ... { /* A += sizeof(struct nlattr) */ .code = BPF_ALU | BPF_ADD | BPF_K, .k = sizeof(struct nlattr), }, { /* X = CTA_PROTOINFO_TCP */ .code = BPF_LDX | BPF_IMM, .k = CTA_PROTOINFO_TCP, }, { /* A = netlink attribute offset */ .code = BPF_LD | BPF_B | BPF_ABS, .k = SKF_AD_OFF + SKF_AD_NLATTR }, ... The data of an attribute can be loaded into 'A' like this: ... { /* X = A (attribute offset) */ .code = BPF_MISC | BPF_TAX, }, { /* A = skb->data[X + k] */ .code = BPF_LD | BPF_B | BPF_IND, .k = sizeof(struct nlattr), }, ... Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- net/core/filter.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 673e5677ebcc..b6ea9aa9e853 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -121,7 +121,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_PROTOCOL 0 #define SKF_AD_PKTTYPE 4 #define SKF_AD_IFINDEX 8 -#define SKF_AD_MAX 12 +#define SKF_AD_NLATTR 12 +#define SKF_AD_MAX 16 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index bbb53c69857c..f5f3cf603064 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -303,6 +304,22 @@ load_b: case SKF_AD_IFINDEX: A = skb->dev->ifindex; continue; + case SKF_AD_NLATTR: { + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = nla_find((struct nlattr *)&skb->data[A], + skb->len - A, X); + if (nla) + A = (void *)nla - (void *)skb->data; + else + A = 0; + continue; + } default: return 0; } -- cgit v1.2.3-71-gd317 From f3ee4010e84452aa133e5163e6cfabc52b194e94 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 10 Apr 2008 15:42:11 +0900 Subject: [IPV6]: Define constants for link-local multicast addresses. - Define link-local all-node / all-router multicast addresses. - Remove ipv6_addr_all_nodes() and ipv6_addr_all_routers(). Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 8 ++++++++ include/net/addrconf.h | 11 ----------- net/ipv6/addrconf.c | 25 +++++++------------------ net/ipv6/mcast.c | 23 ++++++++--------------- net/ipv6/ndisc.c | 5 +---- 5 files changed, 24 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index e6aa8de2b939..bc492048c349 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -48,6 +48,14 @@ extern const struct in6_addr in6addr_any; #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; #define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } +#ifdef __KERNEL__ +extern const struct in6_addr in6addr_linklocal_allnodes; +#define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ + { { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } +extern const struct in6_addr in6addr_linklocal_allrouters; +#define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ + { { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } } +#endif struct sockaddr_in6 { unsigned short int sin6_family; /* AF_INET6 */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 92af23d66eb9..0a2f0372df31 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -205,17 +205,6 @@ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr, htonl(0xFF000000) | addr->s6_addr32[3]); } - -static inline void ipv6_addr_all_nodes(struct in6_addr *addr) -{ - ipv6_addr_set(addr, htonl(0xFF020000), 0, 0, htonl(0x1)); -} - -static inline void ipv6_addr_all_routers(struct in6_addr *addr) -{ - ipv6_addr_set(addr, htonl(0xFF020000), 0, 0, htonl(0x2)); -} - static inline int ipv6_addr_is_multicast(const struct in6_addr *addr) { return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4048c2b73b0b..7df04d294924 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -222,6 +222,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; /* Check if a valid qdisc is available */ static inline int addrconf_qdisc_ok(struct net_device *dev) @@ -321,7 +323,6 @@ EXPORT_SYMBOL(in6_dev_finish_destroy); static struct inet6_dev * ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; - struct in6_addr maddr; ASSERT_RTNL(); @@ -406,8 +407,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rcu_assign_pointer(dev->ip6_ptr, ndev); /* Join all-node multicast group */ - ipv6_addr_all_nodes(&maddr); - ipv6_dev_mc_inc(dev, &maddr); + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); return ndev; } @@ -433,18 +433,15 @@ static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; struct inet6_ifaddr *ifa; - struct in6_addr addr; if (!idev) return; dev = idev->dev; if (dev && (dev->flags & IFF_MULTICAST)) { - ipv6_addr_all_routers(&addr); - if (idev->cnf.forwarding) - ipv6_dev_mc_inc(dev, &addr); + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); else - ipv6_dev_mc_dec(dev, &addr); + ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); } for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { if (ifa->flags&IFA_F_TENTATIVE) @@ -2654,8 +2651,6 @@ static void addrconf_rs_timer(unsigned long data) spin_lock(&ifp->lock); if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { - struct in6_addr all_routers; - /* The wait after the last probe can be shorter */ addrconf_mod_timer(ifp, AC_RS, (ifp->probes == ifp->idev->cnf.rtr_solicits) ? @@ -2663,9 +2658,7 @@ static void addrconf_rs_timer(unsigned long data) ifp->idev->cnf.rtr_solicit_interval); spin_unlock(&ifp->lock); - ipv6_addr_all_routers(&all_routers); - - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); + ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); } else { spin_unlock(&ifp->lock); /* @@ -2806,16 +2799,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { - struct in6_addr all_routers; - - ipv6_addr_all_routers(&all_routers); - /* * If a host as already performed a random delay * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); + ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); spin_lock_bh(&ifp->lock); ifp->probes = 1; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0a0132a1c443..c2dc2e2b6c07 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1766,10 +1766,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct inet6_dev *idev; struct sk_buff *skb; struct icmp6hdr *hdr; - struct in6_addr *snd_addr; + const struct in6_addr *snd_addr; struct in6_addr *addrp; struct in6_addr addr_buf; - struct in6_addr all_routers; int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, @@ -1780,11 +1779,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IP6_INC_STATS(__in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); - snd_addr = addr; - if (type == ICMPV6_MGM_REDUCTION) { - snd_addr = &all_routers; - ipv6_addr_all_routers(&all_routers); - } + if (type == ICMPV6_MGM_REDUCTION) + snd_addr = &in6addr_linklocal_allrouters; + else + snd_addr = addr; len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); payload_len = len + sizeof(ra); @@ -2309,24 +2307,19 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) void ipv6_mc_destroy_dev(struct inet6_dev *idev) { struct ifmcaddr6 *i; - struct in6_addr maddr; /* Deactivate timers */ ipv6_mc_down(idev); /* Delete all-nodes address. */ - ipv6_addr_all_nodes(&maddr); - /* We cannot call ipv6_dev_mc_dec() directly, our caller in * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will * fail. */ - __ipv6_dev_mc_dec(idev, &maddr); + __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allnodes); - if (idev->cnf.forwarding) { - ipv6_addr_all_routers(&maddr); - __ipv6_dev_mc_dec(idev, &maddr); - } + if (idev->cnf.forwarding) + __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters); write_lock_bh(&idev->lock); while ((i = idev->mc_list) != NULL) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5b9ad5e2f56d..2c74885f8355 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -818,10 +818,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) is_router = !!idev->cnf.forwarding; if (dad) { - struct in6_addr maddr; - - ipv6_addr_all_nodes(&maddr); - ndisc_send_na(dev, NULL, &maddr, &msg->target, + ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target, is_router, 0, (ifp != NULL), 1); goto out; } -- cgit v1.2.3-71-gd317 From 03e1ad7b5d871d4189b1da3125c2f12d1b5f7d0b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sat, 12 Apr 2008 19:07:52 -0700 Subject: LSM: Make the Labeled IPsec hooks more stack friendly The xfrm_get_policy() and xfrm_add_pol_expire() put some rather large structs on the stack to work around the LSM API. This patch attempts to fix that problem by changing the LSM API to require only the relevant "security" pointers instead of the entire SPD entry; we do this for all of the security_xfrm_policy*() functions to keep things consistent. Signed-off-by: Paul Moore Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/security.h | 48 ++++++++++++++++++++--------------------- net/key/af_key.c | 23 ++++++++++---------- net/xfrm/xfrm_policy.c | 24 +++++++++++++-------- net/xfrm/xfrm_user.c | 33 ++++++++++++++-------------- security/dummy.c | 14 ++++++------ security/security.c | 21 +++++++++--------- security/selinux/include/xfrm.h | 13 +++++------ security/selinux/xfrm.c | 39 ++++++++++++++------------------- 8 files changed, 109 insertions(+), 106 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index c673dfd4dffc..f5eb9ff47ac5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -910,24 +910,24 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Security hooks for XFRM operations. * * @xfrm_policy_alloc_security: - * @xp contains the xfrm_policy being added to Security Policy Database - * used by the XFRM system. + * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy + * Database used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). * Allocate a security structure to the xp->security field; the security * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: - * @old contains an existing xfrm_policy in the SPD. - * @new contains a new xfrm_policy being cloned from old. - * Allocate a security structure to the new->security field - * that contains the information from the old->security field. + * @old_ctx contains an existing xfrm_sec_ctx. + * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. + * Allocate a security structure in new_ctxp that contains the + * information from the old_ctx structure. * Return 0 if operation was successful (memory to allocate). * @xfrm_policy_free_security: - * @xp contains the xfrm_policy + * @ctx contains the xfrm_sec_ctx * Deallocate xp->security. * @xfrm_policy_delete_security: - * @xp contains the xfrm_policy. + * @ctx contains the xfrm_sec_ctx. * Authorize deletion of xp->security. * @xfrm_state_alloc_security: * @x contains the xfrm_state being added to the Security Association @@ -947,7 +947,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @x contains the xfrm_state. * Authorize deletion of x->security. * @xfrm_policy_lookup: - * @xp contains the xfrm_policy for which the access control is being + * @ctx contains the xfrm_sec_ctx for which the access control is being * checked. * @fl_secid contains the flow security label that is used to authorize * access to the policy xp. @@ -1454,17 +1454,17 @@ struct security_operations { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, + int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx); - int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); - void (*xfrm_policy_free_security) (struct xfrm_policy *xp); - int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); + int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); + void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); + int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx, u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); + int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); @@ -2562,16 +2562,16 @@ static inline void security_inet_conn_established(struct sock *sk, #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); -int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); -void security_xfrm_policy_free(struct xfrm_policy *xp); -int security_xfrm_policy_delete(struct xfrm_policy *xp); +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx); +int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); +void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx); +int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); int security_xfrm_state_alloc_acquire(struct xfrm_state *x, struct xfrm_sec_ctx *polsec, u32 secid); int security_xfrm_state_delete(struct xfrm_state *x); void security_xfrm_state_free(struct xfrm_state *x); -int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); +int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); @@ -2579,21 +2579,21 @@ void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); #else /* CONFIG_SECURITY_NETWORK_XFRM */ -static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) { return 0; } -static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +static inline int security_xfrm_policy_clone(struct xfrm_sec_ctx *old, struct xfrm_sec_ctx **new_ctxp) { return 0; } -static inline void security_xfrm_policy_free(struct xfrm_policy *xp) +static inline void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { } -static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) +static inline int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { return 0; } @@ -2619,7 +2619,7 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x) return 0; } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) { return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 6db58924368a..1fb0fe42a72e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2292,7 +2292,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h goto out; } - err = security_xfrm_policy_alloc(xp, uctx); + err = security_xfrm_policy_alloc(&xp->security, uctx); kfree(uctx); if (err) @@ -2352,10 +2352,11 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg int err; struct sadb_address *sa; struct sadb_x_policy *pol; - struct xfrm_policy *xp, tmp; + struct xfrm_policy *xp; struct xfrm_selector sel; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; + struct xfrm_sec_ctx *pol_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || @@ -2385,25 +2386,23 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg sel.dport_mask = htons(0xffff); sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; - memset(&tmp, 0, sizeof(struct xfrm_policy)); - if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); if (!uctx) return -ENOMEM; - err = security_xfrm_policy_alloc(&tmp, uctx); + err = security_xfrm_policy_alloc(&pol_ctx, uctx); kfree(uctx); - if (err) return err; - } - - xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, - &sel, tmp.security, 1, &err); - security_xfrm_policy_free(&tmp); + } else + pol_ctx = NULL; + xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, + pol->sadb_x_policy_dir - 1, &sel, pol_ctx, + 1, &err); + security_xfrm_policy_free(pol_ctx); if (xp == NULL) return -ENOENT; @@ -3298,7 +3297,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); - *dir = security_xfrm_policy_alloc(xp, uctx); + *dir = security_xfrm_policy_alloc(&xp->security, uctx); kfree(uctx); if (*dir) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 15d73e47cc2c..ab4d0e598a2c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -263,7 +263,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) list_del(&policy->bytype); write_unlock_bh(&xfrm_policy_lock); - security_xfrm_policy_free(policy); + security_xfrm_policy_free(policy->security); kfree(policy); } EXPORT_SYMBOL(xfrm_policy_destroy); @@ -676,7 +676,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); if (delete) { - *err = security_xfrm_policy_delete(pol); + *err = security_xfrm_policy_delete( + pol->security); if (*err) { write_unlock_bh(&xfrm_policy_lock); return pol; @@ -718,7 +719,8 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, if (pol->type == type && pol->index == id) { xfrm_pol_hold(pol); if (delete) { - *err = security_xfrm_policy_delete(pol); + *err = security_xfrm_policy_delete( + pol->security); if (*err) { write_unlock_bh(&xfrm_policy_lock); return pol; @@ -756,7 +758,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) &xfrm_policy_inexact[dir], bydst) { if (pol->type != type) continue; - err = security_xfrm_policy_delete(pol); + err = security_xfrm_policy_delete(pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, @@ -770,7 +772,8 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) bydst) { if (pol->type != type) continue; - err = security_xfrm_policy_delete(pol); + err = security_xfrm_policy_delete( + pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, @@ -931,7 +934,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol, fl->secid, dir); + ret = security_xfrm_policy_lookup(pol->security, fl->secid, + dir); return ret; } @@ -1048,8 +1052,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) { - err = security_xfrm_policy_lookup(pol, fl->secid, - policy_to_flow_dir(dir)); + err = security_xfrm_policy_lookup(pol->security, + fl->secid, + policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); else if (err == -ESRCH) @@ -1138,7 +1143,8 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) if (newp) { newp->selector = old->selector; - if (security_xfrm_policy_clone(old, newp)) { + if (security_xfrm_policy_clone(old->security, + &newp->security)) { kfree(newp); return NULL; /* ENOMEM */ } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5578c909fcf6..ecf9d67daef5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -959,7 +959,7 @@ static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs return 0; uctx = nla_data(rt); - return security_xfrm_policy_alloc(pol, uctx); + return security_xfrm_policy_alloc(&pol->security, uctx); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -1143,7 +1143,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).sid); if (err) { - security_xfrm_policy_free(xp); + security_xfrm_policy_free(xp->security); kfree(xp); return err; } @@ -1337,22 +1337,23 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; - struct xfrm_policy tmp; + struct xfrm_sec_ctx *ctx; err = verify_sec_ctx_len(attrs); if (err) return err; - memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - if ((err = security_xfrm_policy_alloc(&tmp, uctx))) + err = security_xfrm_policy_alloc(&ctx, uctx); + if (err) return err; - } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, + } else + ctx = NULL; + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, delete, &err); - security_xfrm_policy_free(&tmp); + security_xfrm_policy_free(ctx); } if (xp == NULL) return -ENOENT; @@ -1572,26 +1573,26 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; - struct xfrm_policy tmp; + struct xfrm_sec_ctx *ctx; err = verify_sec_ctx_len(attrs); if (err) return err; - memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - if ((err = security_xfrm_policy_alloc(&tmp, uctx))) + err = security_xfrm_policy_alloc(&ctx, uctx); + if (err) return err; - } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, - 0, &err); - security_xfrm_policy_free(&tmp); + } else + ctx = NULL; + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, 0, &err); + security_xfrm_policy_free(ctx); } - if (xp == NULL) return -ENOENT; + read_lock(&xp->lock); if (xp->dead) { read_unlock(&xp->lock); diff --git a/security/dummy.c b/security/dummy.c index 78d8f92310a4..480366f9c41d 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -876,22 +876,23 @@ static inline void dummy_req_classify_flow(const struct request_sock *req, #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_policy_alloc_security(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx) { return 0; } -static inline int dummy_xfrm_policy_clone_security(struct xfrm_policy *old, struct xfrm_policy *new) +static inline int dummy_xfrm_policy_clone_security(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctxp) { return 0; } -static void dummy_xfrm_policy_free_security(struct xfrm_policy *xp) +static void dummy_xfrm_policy_free_security(struct xfrm_sec_ctx *ctx) { } -static int dummy_xfrm_policy_delete_security(struct xfrm_policy *xp) +static int dummy_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx) { return 0; } @@ -911,7 +912,8 @@ static int dummy_xfrm_state_delete_security(struct xfrm_state *x) return 0; } -static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static int dummy_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, + u32 sk_sid, u8 dir) { return 0; } diff --git a/security/security.c b/security/security.c index b1387a6b416d..c9ff7d18c2f4 100644 --- a/security/security.c +++ b/security/security.c @@ -1014,26 +1014,27 @@ void security_inet_conn_established(struct sock *sk, #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(ctxp, sec_ctx); } EXPORT_SYMBOL(security_xfrm_policy_alloc); -int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctxp) { - return security_ops->xfrm_policy_clone_security(old, new); + return security_ops->xfrm_policy_clone_security(old_ctx, new_ctxp); } -void security_xfrm_policy_free(struct xfrm_policy *xp) +void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { - security_ops->xfrm_policy_free_security(xp); + security_ops->xfrm_policy_free_security(ctx); } EXPORT_SYMBOL(security_xfrm_policy_free); -int security_xfrm_policy_delete(struct xfrm_policy *xp) +int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - return security_ops->xfrm_policy_delete_security(xp); + return security_ops->xfrm_policy_delete_security(ctx); } int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) @@ -1065,9 +1066,9 @@ void security_xfrm_state_free(struct xfrm_state *x) security_ops->xfrm_state_free_security(x); } -int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) { - return security_ops->xfrm_policy_lookup(xp, fl_secid, dir); + return security_ops->xfrm_policy_lookup(ctx, fl_secid, dir); } int security_xfrm_state_pol_flow_match(struct xfrm_state *x, diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 36b0510efa7b..289e24b39e3e 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -7,16 +7,17 @@ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *sec_ctx); -int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); -void selinux_xfrm_policy_free(struct xfrm_policy *xp); -int selinux_xfrm_policy_delete(struct xfrm_policy *xp); +int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctxp); +void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx); +int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx, u32 secid); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); +int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 7e158205d081..874d17c83c61 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -77,20 +77,18 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) * LSM hook implementation that authorizes that a flow can use * a xfrm policy rule. */ -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) { int rc; u32 sel_sid; - struct xfrm_sec_ctx *ctx; /* Context sid is either set to label or ANY_ASSOC */ - if ((ctx = xp->security)) { + if (ctx) { if (!selinux_authorizable_ctx(ctx)) return -EINVAL; sel_sid = ctx->ctx_sid; - } - else + } else /* * All flows should be treated as polmatch'ing an * otherwise applicable "non-labeled" policy. This @@ -103,7 +101,7 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) NULL); if (rc == -EACCES) - rc = -ESRCH; + return -ESRCH; return rc; } @@ -287,15 +285,14 @@ out2: * LSM hook implementation that allocs and transfers uctx spec to * xfrm_policy. */ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *uctx) { int err; - BUG_ON(!xp); BUG_ON(!uctx); - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, 0); + err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0); if (err == 0) atomic_inc(&selinux_xfrm_refcount); @@ -307,32 +304,29 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, * LSM hook implementation that copies security data structure from old to * new for policy cloning. */ -int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctxp) { - struct xfrm_sec_ctx *old_ctx, *new_ctx; - - old_ctx = old->security; + struct xfrm_sec_ctx *new_ctx; if (old_ctx) { - new_ctx = new->security = kmalloc(sizeof(*new_ctx) + - old_ctx->ctx_len, - GFP_KERNEL); - + new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len, + GFP_KERNEL); if (!new_ctx) return -ENOMEM; memcpy(new_ctx, old_ctx, sizeof(*new_ctx)); memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len); + *new_ctxp = new_ctx; } return 0; } /* - * LSM hook implementation that frees xfrm_policy security information. + * LSM hook implementation that frees xfrm_sec_ctx security information. */ -void selinux_xfrm_policy_free(struct xfrm_policy *xp) +void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { - struct xfrm_sec_ctx *ctx = xp->security; if (ctx) kfree(ctx); } @@ -340,10 +334,9 @@ void selinux_xfrm_policy_free(struct xfrm_policy *xp) /* * LSM hook implementation that authorizes deletion of labeled policies. */ -int selinux_xfrm_policy_delete(struct xfrm_policy *xp) +int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { struct task_security_struct *tsec = current->security; - struct xfrm_sec_ctx *ctx = xp->security; int rc = 0; if (ctx) { -- cgit v1.2.3-71-gd317 From cee8947338d46bccece54c752bf6cd4043035f05 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sun, 13 Apr 2008 23:21:16 -0700 Subject: [IPV6] MROUTE: Do not call ipv6_find_idev() directly. Since NETDEV_REGISTER notifier chain is responsible for creating inet6_dev{}, we do not need to call ipv6_find_idev() directly here. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/mroute6.h | 3 --- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6mr.c | 5 ----- 3 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index f6469fb90840..e7989593142b 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -117,9 +117,6 @@ struct sioc_mif_req6 #include /* for struct sk_buff_head */ -struct net_device; -struct inet6_dev *ipv6_find_idev(struct net_device *dev); - #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e93fa62089f8..9d49ed2578d7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -412,7 +412,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) return ndev; } -struct inet6_dev * ipv6_find_idev(struct net_device *dev) +static struct inet6_dev * ipv6_find_idev(struct net_device *dev) { struct inet6_dev *idev; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 94ede696da2a..6e2e3c957a31 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -435,7 +435,6 @@ static void reg_vif_setup(struct net_device *dev) static struct net_device *ip6mr_reg_vif(void) { struct net_device *dev; - struct inet6_dev *in_dev; dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg", reg_vif_setup); @@ -449,10 +448,6 @@ static struct net_device *ip6mr_reg_vif(void) } dev->iflink = 0; - in_dev = ipv6_find_idev(dev); - if (!in_dev) - goto failure; - if (dev_open(dev)) goto failure; -- cgit v1.2.3-71-gd317 From f525c06d12b72cddb085df7f6f348c3c5a39b3ce Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 14 Apr 2008 00:04:12 -0700 Subject: [SKB]: __skb_dequeue = skb_peek + __skb_unlink By rearranging the order of declarations, __skb_dequeue() is expressed in terms of * skb_peek() and * __skb_unlink(), thus in effect mirroring the analogue implementation of __skb_dequeue_tail(). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/skbuff.h | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e517701c25ba..c2116200580a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -734,35 +734,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, next->prev = prev->next = newsk; } - -/** - * __skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. This function does not take any locks - * so must be used with appropriate locks held only. The head item is - * returned or %NULL if the list is empty. - */ -extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); -static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) -{ - struct sk_buff *next, *prev, *result; - - prev = (struct sk_buff *) list; - next = prev->next; - result = NULL; - if (next != prev) { - result = next; - next = next->next; - list->qlen--; - next->prev = prev; - prev->next = next; - result->next = result->prev = NULL; - } - return result; -} - - /* * Insert a packet on a list. */ @@ -803,8 +774,22 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) prev->next = next; } - -/* XXX: more streamlined implementation */ +/** + * __skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. This function does not take any locks + * so must be used with appropriate locks held only. The head item is + * returned or %NULL if the list is empty. + */ +extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) +{ + struct sk_buff *skb = skb_peek(list); + if (skb) + __skb_unlink(skb, list); + return skb; +} /** * __skb_dequeue_tail - remove from the tail of the queue -- cgit v1.2.3-71-gd317 From bf299275882624b1908521ee8074df85160e9679 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 14 Apr 2008 00:04:51 -0700 Subject: [SKB]: __skb_queue_after(prev) = __skb_insert(prev, prev->next) By reordering, __skb_queue_after() is expressed in terms of __skb_insert(). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/skbuff.h | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c2116200580a..bb107ab675fc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -663,11 +663,21 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list, } /* - * Insert an sk_buff at the start of a list. + * Insert an sk_buff on a list. * * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ +extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +static inline void __skb_insert(struct sk_buff *newsk, + struct sk_buff *prev, struct sk_buff *next, + struct sk_buff_head *list) +{ + newsk->next = next; + newsk->prev = prev; + next->prev = prev->next = newsk; + list->qlen++; +} /** * __skb_queue_after - queue a buffer at the list head @@ -684,13 +694,7 @@ static inline void __skb_queue_after(struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *newsk) { - struct sk_buff *next; - list->qlen++; - - next = prev->next; - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; + __skb_insert(newsk, prev, prev->next, list); } /** @@ -734,20 +738,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, next->prev = prev->next = newsk; } -/* - * Insert a packet on a list. - */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); -static inline void __skb_insert(struct sk_buff *newsk, - struct sk_buff *prev, struct sk_buff *next, - struct sk_buff_head *list) -{ - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; - list->qlen++; -} - /* * Place a packet after a given packet in a list. */ -- cgit v1.2.3-71-gd317 From 7de6c033367ab86f39c7723392caf73325cbf286 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 14 Apr 2008 00:05:09 -0700 Subject: [SKB]: __skb_append = __skb_queue_after This expresses __skb_append in terms of __skb_queue_after, exploiting that __skb_append(old, new, list) = __skb_queue_after(list, old, new). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 +++--------- include/net/tcp.h | 2 +- net/core/skbuff.c | 2 +- net/ipv4/tcp_input.c | 2 +- 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bb107ab675fc..83c851846829 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -697,6 +697,9 @@ static inline void __skb_queue_after(struct sk_buff_head *list, __skb_insert(newsk, prev, prev->next, list); } +extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, + struct sk_buff_head *list); + /** * __skb_queue_head - queue a buffer at the list head * @list: list to use @@ -738,15 +741,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, next->prev = prev->next = newsk; } -/* - * Place a packet after a given packet in a list. - */ -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - __skb_insert(newsk, old, old->next, list); -} - /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. diff --git a/include/net/tcp.h b/include/net/tcp.h index 58d82822414d..2ab350eca02e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1247,7 +1247,7 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, struct sk_buff *buff, struct sock *sk) { - __skb_append(skb, buff, &sk->sk_write_queue); + __skb_queue_after(&sk->sk_write_queue, skb, buff); } /* Insert skb between prev and next on the write queue of sk. */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e4259215607f..4cd12d99b12e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1860,7 +1860,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head unsigned long flags; spin_lock_irqsave(&list->lock, flags); - __skb_append(old, newsk, list); + __skb_queue_after(list, old, newsk); spin_unlock_irqrestore(&list->lock, flags); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6e46b4c0f28c..743611956045 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3968,7 +3968,7 @@ drop: u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_append(skb1, skb, &tp->out_of_order_queue); + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); if (!tp->rx_opt.num_sacks || tp->selective_acks[0].end_seq != seq) -- cgit v1.2.3-71-gd317 From f5572855ec492334d8c3ec0e0e86c31865d5cf07 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 14 Apr 2008 00:05:28 -0700 Subject: [SKB]: __skb_queue_tail = __skb_insert before This expresses __skb_queue_tail() in terms of __skb_insert(), using __skb_insert_before() as auxiliary function. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 83c851846829..11fd9f2c4093 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -700,6 +700,13 @@ static inline void __skb_queue_after(struct sk_buff_head *list, extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +static inline void __skb_queue_before(struct sk_buff_head *list, + struct sk_buff *next, + struct sk_buff *newsk) +{ + __skb_insert(newsk, next->prev, next, list); +} + /** * __skb_queue_head - queue a buffer at the list head * @list: list to use @@ -731,14 +738,7 @@ extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { - struct sk_buff *prev, *next; - - list->qlen++; - next = (struct sk_buff *)list; - prev = next->prev; - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; + __skb_queue_before(list, (struct sk_buff *)list, newsk); } /* -- cgit v1.2.3-71-gd317 From 666953df353194bef76086fa3f126241cbac3e3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 14 Apr 2008 09:56:02 +0200 Subject: [NETFILTER]: ip_tables: per-netns FILTER/MANGLE/RAW tables for real Commit 9335f047fe61587ec82ff12fbb1220bcfdd32006 aka "[NETFILTER]: ip_tables: per-netns FILTER, MANGLE, RAW" added per-netns _view_ of iptables rules. They were shown to user, but ignored by filtering code. Now that it's possible to at least ping loopback, per-netns tables can affect filtering decisions. netns is taken in case of PRE_ROUTING, LOCAL_IN -- from in device, POST_ROUTING, LOCAL_OUT -- from out device, FORWARD -- from in device which should be equal to out device's netns. This code is relatively new, so BUG_ON was plugged. Wrappers were added to a) keep code the same from CONFIG_NET_NS=n users (overwhelming majority), b) consolidate code in one place -- similar changes will be done in ipv6 and arp netfilter code. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 54 ++++++++++++++++++++++++++++++++++++- net/ipv4/netfilter/iptable_filter.c | 19 ++++++++++--- net/ipv4/netfilter/iptable_mangle.c | 49 ++++++++++++++++++++++++++++----- net/ipv4/netfilter/iptable_raw.c | 6 +++-- 4 files changed, 115 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 89e6c72ad295..66bc52060fd6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -6,11 +6,13 @@ #include #include #include +#include #include #include #include #include #include +#include #endif #include @@ -76,7 +78,6 @@ extern void netfilter_init(void); #define NF_MAX_HOOKS 8 struct sk_buff; -struct net_device; typedef unsigned int nf_hookfn(unsigned int hooknum, struct sk_buff *skb, @@ -320,5 +321,56 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *); static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif +static inline struct net *nf_pre_routing_net(const struct net_device *in, + const struct net_device *out) +{ +#ifdef CONFIG_NET_NS + return in->nd_net; +#else + return &init_net; +#endif +} + +static inline struct net *nf_local_in_net(const struct net_device *in, + const struct net_device *out) +{ +#ifdef CONFIG_NET_NS + return in->nd_net; +#else + return &init_net; +#endif +} + +static inline struct net *nf_forward_net(const struct net_device *in, + const struct net_device *out) +{ +#ifdef CONFIG_NET_NS + BUG_ON(in->nd_net != out->nd_net); + return in->nd_net; +#else + return &init_net; +#endif +} + +static inline struct net *nf_local_out_net(const struct net_device *in, + const struct net_device *out) +{ +#ifdef CONFIG_NET_NS + return out->nd_net; +#else + return &init_net; +#endif +} + +static inline struct net *nf_post_routing_net(const struct net_device *in, + const struct net_device *out) +{ +#ifdef CONFIG_NET_NS + return out->nd_net; +#else + return &init_net; +#endif +} + #endif /*__KERNEL__*/ #endif /*__LINUX_NETFILTER_H*/ diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 69f3d7e6e96f..7fcf60adbbed 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -62,6 +62,17 @@ static struct xt_table packet_filter = { }; /* The work comes in here from netfilter.c. */ +static unsigned int +ipt_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv4.iptable_filter); +} + static unsigned int ipt_hook(unsigned int hook, struct sk_buff *skb, @@ -69,7 +80,8 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); + return ipt_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv4.iptable_filter); } static unsigned int @@ -88,12 +100,13 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); + return ipt_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { { - .hook = ipt_hook, + .hook = ipt_local_in_hook, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index c55a210853a7..ba827035b691 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -74,13 +74,47 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int -ipt_route_hook(unsigned int hook, +ipt_pre_routing_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_pre_routing_net(in, out)->ipv4.iptable_mangle); +} + +static unsigned int +ipt_post_routing_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_post_routing_net(in, out)->ipv4.iptable_mangle); +} + +static unsigned int +ipt_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv4.iptable_mangle); +} + +static unsigned int +ipt_forward_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); + return ipt_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv4.iptable_mangle); } static unsigned int @@ -112,7 +146,8 @@ ipt_local_hook(unsigned int hook, daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); + ret = ipt_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -130,21 +165,21 @@ ipt_local_hook(unsigned int hook, static struct nf_hook_ops ipt_ops[] __read_mostly = { { - .hook = ipt_route_hook, + .hook = ipt_pre_routing_hook, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_MANGLE, }, { - .hook = ipt_route_hook, + .hook = ipt_local_in_hook, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_MANGLE, }, { - .hook = ipt_route_hook, + .hook = ipt_forward_hook, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_FORWARD, @@ -158,7 +193,7 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { .priority = NF_IP_PRI_MANGLE, }, { - .hook = ipt_route_hook, + .hook = ipt_post_routing_hook, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_POST_ROUTING, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index e41fe8ca4e1c..4b689742d58b 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -52,7 +52,8 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); + return ipt_do_table(skb, hook, in, out, + nf_pre_routing_net(in, out)->ipv4.iptable_raw); } static unsigned int @@ -70,7 +71,8 @@ ipt_local_hook(unsigned int hook, "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); + return ipt_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv4.iptable_raw); } /* 'raw' is the very first table. */ -- cgit v1.2.3-71-gd317 From b9f61b160336da5eaaacb0cb41ebe32169e3bde5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Apr 2008 09:56:04 +0200 Subject: [NETFILTER]: xt_sctp: simplify xt_sctp.h The use of xt_sctp.h flagged up -Wshadow warnings in userspace, which prompted me to look at it and clean it up. Basic operations have been directly replaced by library calls (memcpy, memset is both available in the kernel and userspace, and usually faster than a self-made loop). The is_set and is_clear functions now use a processing time shortcut, too. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_sctp.h | 84 ++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_sctp.h b/include/linux/netfilter/xt_sctp.h index dd5a4fd4cfd3..32000ba6ecef 100644 --- a/include/linux/netfilter/xt_sctp.h +++ b/include/linux/netfilter/xt_sctp.h @@ -37,68 +37,54 @@ struct xt_sctp_info { #define SCTP_CHUNKMAP_SET(chunkmap, type) \ do { \ - chunkmap[type / bytes(u_int32_t)] |= \ + (chunkmap)[type / bytes(u_int32_t)] |= \ 1 << (type % bytes(u_int32_t)); \ } while (0) #define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ do { \ - chunkmap[type / bytes(u_int32_t)] &= \ + (chunkmap)[type / bytes(u_int32_t)] &= \ ~(1 << (type % bytes(u_int32_t))); \ } while (0) #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ ({ \ - (chunkmap[type / bytes (u_int32_t)] & \ + ((chunkmap)[type / bytes (u_int32_t)] & \ (1 << (type % bytes (u_int32_t)))) ? 1: 0; \ }) -#define SCTP_CHUNKMAP_RESET(chunkmap) \ - do { \ - int i; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ - chunkmap[i] = 0; \ - } while (0) - -#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \ - do { \ - int i; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ - chunkmap[i] = ~0; \ - } while (0) - -#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \ - do { \ - int i; \ - for (i = 0; i < ARRAY_SIZE(srcmap); i++) \ - destmap[i] = srcmap[i]; \ - } while (0) - -#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \ -({ \ - int i; \ - int flag = 1; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ - if (chunkmap[i]) { \ - flag = 0; \ - break; \ - } \ - } \ - flag; \ -}) - -#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \ -({ \ - int i; \ - int flag = 1; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ - if (chunkmap[i] != ~0) { \ - flag = 0; \ - break; \ - } \ - } \ - flag; \ -}) +#define SCTP_CHUNKMAP_RESET(chunkmap) \ + memset((chunkmap), 0, sizeof(chunkmap)) + +#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \ + memset((chunkmap), ~0U, sizeof(chunkmap)) + +#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \ + memcpy((destmap), (srcmap), sizeof(srcmap)) + +#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \ + __sctp_chunkmap_is_clear((chunkmap), ARRAY_SIZE(chunkmap)) +static inline bool +__sctp_chunkmap_is_clear(const u_int32_t *chunkmap, unsigned int n) +{ + unsigned int i; + for (i = 0; i < n; ++i) + if (chunkmap[i]) + return false; + return true; +} + +#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \ + __sctp_chunkmap_is_all_set((chunkmap), ARRAY_SIZE(chunkmap)) +static inline bool +__sctp_chunkmap_is_all_set(const u_int32_t *chunkmap, unsigned int n) +{ + unsigned int i; + for (i = 0; i < n; ++i) + if (chunkmap[i] != ~0U) + return false; + return true; +} #endif /* _XT_SCTP_H_ */ -- cgit v1.2.3-71-gd317 From 5452e425adfdfc4647b618e303f73d48f2405b0e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Apr 2008 11:15:35 +0200 Subject: [NETFILTER]: annotate {arp,ip,ip6,x}tables with const Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 4 ++-- net/ipv4/netfilter/arp_tables.c | 31 ++++++++++++++++--------------- net/ipv4/netfilter/arpt_mangle.c | 2 +- net/ipv4/netfilter/ip_tables.c | 31 ++++++++++++++++--------------- net/ipv6/netfilter/ip6_tables.c | 29 +++++++++++++++-------------- net/netfilter/x_tables.c | 18 +++++++++--------- 6 files changed, 59 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b2c62cc618f5..2326296b6f25 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -430,13 +430,13 @@ extern int xt_compat_add_offset(int af, unsigned int offset, short delta); extern void xt_compat_flush_offsets(int af); extern short xt_compat_calc_jump(int af, unsigned int offset); -extern int xt_compat_match_offset(struct xt_match *match); +extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); extern int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, unsigned int *size); -extern int xt_compat_target_offset(struct xt_target *target); +extern int xt_compat_target_offset(const struct xt_target *target); extern void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); extern int xt_compat_target_to_user(struct xt_entry_target *t, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1563f29b5117..10cc442330c3 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -59,7 +59,7 @@ do { \ #endif static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, - char *hdr_addr, int len) + const char *hdr_addr, int len) { int i, ret; @@ -80,8 +80,8 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *outdev, const struct arpt_arp *arpinfo) { - char *arpptr = (char *)(arphdr + 1); - char *src_devaddr, *tgt_devaddr; + const char *arpptr = (char *)(arphdr + 1); + const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; int i, ret; @@ -226,12 +226,12 @@ unsigned int arpt_do_table(struct sk_buff *skb, { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; - struct arphdr *arp; + const struct arphdr *arp; bool hotdrop = false; struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; - struct xt_table_info *private; + const struct xt_table_info *private; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -352,7 +352,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, e->counters.pcnt = pos; for (;;) { - struct arpt_standard_target *t + const struct arpt_standard_target *t = (void *)arpt_get_target(e); int visited = e->comefrom & (1 << hook); @@ -437,7 +437,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, static inline int check_entry(struct arpt_entry *e, const char *name) { - struct arpt_entry_target *t; + const struct arpt_entry_target *t; if (!arp_checkentry(&e->arp)) { duprintf("arp_tables: arp check failed %p %s.\n", e, name); @@ -710,7 +710,7 @@ static inline struct xt_counters *alloc_counters(struct arpt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -737,7 +737,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; @@ -872,7 +872,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT if (compat) { @@ -927,7 +927,8 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NF_ARP, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) @@ -1087,11 +1088,11 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; - char *name; + const char *name; int size; void *ptmp; struct arpt_table *t; - struct xt_table_info *private; + const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; #ifdef CONFIG_COMPAT @@ -1558,7 +1559,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1609,7 +1610,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NF_ARP); t = xt_find_table_lock(net, NF_ARP, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 3f4222b0a803..3e732c827fc2 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -15,7 +15,7 @@ target(struct sk_buff *skb, const void *targinfo) { const struct arpt_mangle *mangle = targinfo; - struct arphdr *arp; + const struct arphdr *arp; unsigned char *arpptr; int pln, hln; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a819d191e1aa..aa124b50cb4a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -296,7 +296,7 @@ static void trace_packet(struct sk_buff *skb, struct ipt_entry *e) { void *table_base; - struct ipt_entry *root; + const struct ipt_entry *root; char *hookname, *chainname, *comment; unsigned int rulenum = 0; @@ -327,7 +327,7 @@ ipt_do_table(struct sk_buff *skb, { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; - struct iphdr *ip; + const struct iphdr *ip; u_int16_t datalen; bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ @@ -926,7 +926,7 @@ static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -953,9 +953,9 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; struct ipt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -975,8 +975,8 @@ copy_entries_to_user(unsigned int total_size, /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; - struct ipt_entry_match *m; - struct ipt_entry_target *t; + const struct ipt_entry_match *m; + const struct ipt_entry_target *t; e = (struct ipt_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -1116,7 +1116,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) "iptable_%s", name); if (t && !IS_ERR(t)) { struct ipt_getinfo info; - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT if (compat) { @@ -1172,7 +1172,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1337,11 +1337,11 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; - char *name; + const char *name; int size; void *ptmp; struct xt_table *t; - struct xt_table_info *private; + const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; #ifdef CONFIG_COMPAT @@ -1878,11 +1878,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; unsigned int i = 0; counters = alloc_counters(table); @@ -1929,7 +1929,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); @@ -2130,7 +2130,8 @@ icmp_match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct icmphdr _icmph, *ic; + const struct icmphdr *ic; + struct icmphdr _icmph; const struct ipt_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 70ef0d276cc0..782183f63366 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -325,7 +325,7 @@ static void trace_packet(struct sk_buff *skb, struct ip6t_entry *e) { void *table_base; - struct ip6t_entry *root; + const struct ip6t_entry *root; char *hookname, *chainname, *comment; unsigned int rulenum = 0; @@ -952,7 +952,7 @@ static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -979,9 +979,9 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; struct ip6t_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1001,8 +1001,8 @@ copy_entries_to_user(unsigned int total_size, /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; - struct ip6t_entry_match *m; - struct ip6t_entry_target *t; + const struct ip6t_entry_match *m; + const struct ip6t_entry_target *t; e = (struct ip6t_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -1142,7 +1142,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT if (compat) { @@ -1225,7 +1225,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_old_entry; + const void *loc_cpu_old_entry; ret = 0; counters = vmalloc_node(num_counters * sizeof(struct xt_counters), @@ -1369,9 +1369,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int size; void *ptmp; struct xt_table *t; - struct xt_table_info *private; + const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1905,11 +1905,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; unsigned int i = 0; counters = alloc_counters(table); @@ -1956,7 +1956,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; + const struct xt_table_info *private = t->private; struct xt_table_info info; duprintf("t->private->number = %u\n", private->number); ret = compat_table_info(private, &info); @@ -2155,7 +2155,8 @@ icmp6_match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct icmp6hdr _icmph, *ic; + const struct icmp6hdr *ic; + struct icmp6hdr _icmph; const struct ip6t_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0bd95680a494..f52f7f810ac4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -58,7 +58,7 @@ static struct xt_af *xt; #define duprintf(format, args...) #endif -static const char *xt_prefix[NPROTO] = { +static const char *const xt_prefix[NPROTO] = { [AF_INET] = "ip", [AF_INET6] = "ip6", [NF_ARP] = "arp", @@ -248,7 +248,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_target); static int match_revfn(int af, const char *name, u8 revision, int *bestp) { - struct xt_match *m; + const struct xt_match *m; int have_rev = 0; list_for_each_entry(m, &xt[af].match, list) { @@ -264,7 +264,7 @@ static int match_revfn(int af, const char *name, u8 revision, int *bestp) static int target_revfn(int af, const char *name, u8 revision, int *bestp) { - struct xt_target *t; + const struct xt_target *t; int have_rev = 0; list_for_each_entry(t, &xt[af].target, list) { @@ -385,7 +385,7 @@ short xt_compat_calc_jump(int af, unsigned int offset) } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); -int xt_compat_match_offset(struct xt_match *match) +int xt_compat_match_offset(const struct xt_match *match) { u_int16_t csize = match->compatsize ? : match->matchsize; return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); @@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(xt_compat_match_offset); int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size) { - struct xt_match *match = m->u.kernel.match; + const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; @@ -422,7 +422,7 @@ EXPORT_SYMBOL_GPL(xt_compat_match_from_user); int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, unsigned int *size) { - struct xt_match *match = m->u.kernel.match; + const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match __user *cm = *dstptr; int off = xt_compat_match_offset(match); u_int16_t msize = m->u.user.match_size - off; @@ -479,7 +479,7 @@ int xt_check_target(const struct xt_target *target, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_target); #ifdef CONFIG_COMPAT -int xt_compat_target_offset(struct xt_target *target) +int xt_compat_target_offset(const struct xt_target *target) { u_int16_t csize = target->compatsize ? : target->targetsize; return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); @@ -489,7 +489,7 @@ EXPORT_SYMBOL_GPL(xt_compat_target_offset); void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size) { - struct xt_target *target = t->u.kernel.target; + const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; @@ -515,7 +515,7 @@ EXPORT_SYMBOL_GPL(xt_compat_target_from_user); int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, unsigned int *size) { - struct xt_target *target = t->u.kernel.target; + const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target __user *ct = *dstptr; int off = xt_compat_target_offset(target); u_int16_t tsize = t->u.user.target_size - off; -- cgit v1.2.3-71-gd317 From 4abff0775d5e4feb20b21371e1c63a1b30fc2140 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Apr 2008 11:15:43 +0200 Subject: [NETFILTER]: remove arpt_table indirection macro Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_arp/arp_tables.h | 11 +++++------ net/ipv4/netfilter/arp_tables.c | 27 +++++++++++++-------------- net/ipv4/netfilter/arptable_filter.c | 2 +- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index db223ca92c8b..102c4134a713 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -24,7 +24,6 @@ #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN #define arpt_target xt_target -#define arpt_table xt_table #define ARPT_DEV_ADDR_LEN_MAX 16 @@ -271,15 +270,15 @@ struct arpt_error xt_register_target(tgt); }) #define arpt_unregister_target(tgt) xt_unregister_target(tgt) -extern struct arpt_table *arpt_register_table(struct net *net, - struct arpt_table *table, - const struct arpt_replace *repl); -extern void arpt_unregister_table(struct arpt_table *table); +extern struct xt_table *arpt_register_table(struct net *net, + struct xt_table *table, + const struct arpt_replace *repl); +extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table); + struct xt_table *table); #define ARPT_ALIGN(s) XT_ALIGN(s) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 10cc442330c3..34c42c831b18 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -222,7 +222,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; @@ -706,7 +706,7 @@ static void get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct arpt_table *table) +static inline struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; @@ -731,13 +731,13 @@ static inline struct xt_counters *alloc_counters(struct arpt_table *table) } static int copy_entries_to_user(unsigned int total_size, - struct arpt_table *table, + struct xt_table *table, void __user *userptr) { unsigned int off, num; struct arpt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; @@ -851,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info, static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[ARPT_TABLE_MAXNAMELEN]; - struct arpt_table *t; + struct xt_table *t; int ret; if (*len != sizeof(struct arpt_getinfo)) { @@ -911,7 +911,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, { int ret; struct arpt_get_entries get; - struct arpt_table *t; + struct xt_table *t; if (*len < sizeof(get)) { duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); @@ -954,7 +954,7 @@ static int __do_replace(struct net *net, const char *name, void __user *counters_ptr) { int ret; - struct arpt_table *t; + struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; @@ -1091,7 +1091,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, const char *name; int size; void *ptmp; - struct arpt_table *t; + struct xt_table *t; const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; @@ -1555,7 +1555,7 @@ out: } static int compat_copy_entries_to_user(unsigned int total_size, - struct arpt_table *table, + struct xt_table *table, void __user *userptr) { struct xt_counters *counters; @@ -1593,7 +1593,7 @@ static int compat_get_entries(struct net *net, { int ret; struct compat_arpt_get_entries get; - struct arpt_table *t; + struct xt_table *t; if (*len < sizeof(get)) { duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); @@ -1723,9 +1723,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -struct arpt_table *arpt_register_table(struct net *net, - struct arpt_table *table, - const struct arpt_replace *repl) +struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, + const struct arpt_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -1767,7 +1766,7 @@ out: return ERR_PTR(ret); } -void arpt_unregister_table(struct arpt_table *table) +void arpt_unregister_table(struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 629e4951a9b1..9f6526c87757 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -45,7 +45,7 @@ static struct .term = ARPT_ERROR_INIT, }; -static struct arpt_table packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), -- cgit v1.2.3-71-gd317 From 95eea855af69bfd54a7b73546190e76046ca2e07 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Apr 2008 11:15:43 +0200 Subject: [NETFILTER]: remove arpt_target indirection macro Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_arp/arp_tables.h | 1 - net/ipv4/netfilter/arp_tables.c | 8 ++++---- net/ipv4/netfilter/arpt_mangle.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 102c4134a713..782b83e5bdb9 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -23,7 +23,6 @@ #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN -#define arpt_target xt_target #define ARPT_DEV_ADDR_LEN_MAX 16 diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 34c42c831b18..d55f3b42eba5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -457,7 +457,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name) static inline int check_target(struct arpt_entry *e, const char *name) { struct arpt_entry_target *t; - struct arpt_target *target; + struct xt_target *target; int ret; t = arpt_get_target(e); @@ -480,7 +480,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, unsigned int *i) { struct arpt_entry_target *t; - struct arpt_target *target; + struct xt_target *target; int ret; ret = check_entry(e, name); @@ -1784,7 +1784,7 @@ void arpt_unregister_table(struct xt_table *table) } /* The built-in targets: standard (NULL) and error. */ -static struct arpt_target arpt_standard_target __read_mostly = { +static struct xt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NF_ARP, @@ -1795,7 +1795,7 @@ static struct arpt_target arpt_standard_target __read_mostly = { #endif }; -static struct arpt_target arpt_error_target __read_mostly = { +static struct xt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 3e732c827fc2..f9c102ab891b 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -73,7 +73,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, return true; } -static struct arpt_target arpt_mangle_reg __read_mostly = { +static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", .target = target, .targetsize = sizeof(struct arpt_mangle), -- cgit v1.2.3-71-gd317 From 3bb0362d2f53fa54a17b88c96b43fc093e47699b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Apr 2008 11:15:44 +0200 Subject: [NETFILTER]: remove arpt_(un)register_target indirection macros Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter_arp/arp_tables.h | 5 ----- net/ipv4/netfilter/arpt_mangle.c | 8 +++----- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 782b83e5bdb9..dd9c97f2d436 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -264,11 +264,6 @@ struct arpt_error .target.errorname = "ERROR", \ } -#define arpt_register_target(tgt) \ -({ (tgt)->family = NF_ARP; \ - xt_register_target(tgt); }) -#define arpt_unregister_target(tgt) xt_unregister_target(tgt) - extern struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, const struct arpt_replace *repl); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index f9c102ab891b..a385959d2655 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -75,6 +75,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", + .family = NF_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, @@ -83,15 +84,12 @@ static struct xt_target arpt_mangle_reg __read_mostly = { static int __init arpt_mangle_init(void) { - if (arpt_register_target(&arpt_mangle_reg)) - return -EINVAL; - - return 0; + return xt_register_target(&arpt_mangle_reg); } static void __exit arpt_mangle_fini(void) { - arpt_unregister_target(&arpt_mangle_reg); + xt_unregister_target(&arpt_mangle_reg); } module_init(arpt_mangle_init); -- cgit v1.2.3-71-gd317 From d63a650736f566a1f9e9434725d2089597c0d2cc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Mar 2008 15:15:53 +0100 Subject: [NETFILTER]: Add partial checksum validation helper Move the UDP-Lite conntrack checksum validation to a generic helper similar to nf_checksum() and make it fall back to nf_checksum() in case the full packet is to be checksummed and hardware checksums are available. This is to be used by DCCP conntrack, which also needs to verify partial checksums. Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 22 ++++++++++++++++ net/ipv4/netfilter.c | 37 +++++++++++++++++++++----- net/ipv6/netfilter.c | 42 +++++++++++++++++++++++++----- net/netfilter/nf_conntrack_proto_udplite.c | 33 +++++------------------ 4 files changed, 94 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 66bc52060fd6..e4c66593b5c6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -234,6 +234,11 @@ struct nf_afinfo { unsigned short family; __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); + __sum16 (*checksum_partial)(struct sk_buff *skb, + unsigned int hook, + unsigned int dataoff, + unsigned int len, + u_int8_t protocol); int (*route)(struct dst_entry **dst, struct flowi *fl); void (*saveroute)(const struct sk_buff *skb, struct nf_queue_entry *entry); @@ -263,6 +268,23 @@ nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, return csum; } +static inline __sum16 +nf_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol, unsigned short family) +{ + const struct nf_afinfo *afinfo; + __sum16 csum = 0; + + rcu_read_lock(); + afinfo = nf_get_afinfo(family); + if (afinfo) + csum = afinfo->checksum_partial(skb, hook, dataoff, len, + protocol); + rcu_read_unlock(); + return csum; +} + extern int nf_register_afinfo(const struct nf_afinfo *afinfo); extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 9a904c6c0dc8..f8edacdf991d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -182,21 +182,44 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, } return csum; } - EXPORT_SYMBOL(nf_ip_checksum); +static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol) +{ + const struct iphdr *iph = ip_hdr(skb); + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (len == skb->len - dataoff) + return nf_ip_checksum(skb, hook, dataoff, protocol); + /* fall through */ + case CHECKSUM_NONE: + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, + skb->len - dataoff, 0); + skb->ip_summed = CHECKSUM_NONE; + csum = __skb_checksum_complete_head(skb, dataoff + len); + if (!csum) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + return csum; +} + static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) { return ip_route_output_key(&init_net, (struct rtable **)dst, fl); } static const struct nf_afinfo nf_ip_afinfo = { - .family = AF_INET, - .checksum = nf_ip_checksum, - .route = nf_ip_route, - .saveroute = nf_ip_saveroute, - .reroute = nf_ip_reroute, - .route_key_size = sizeof(struct ip_rt_info), + .family = AF_INET, + .checksum = nf_ip_checksum, + .checksum_partial = nf_ip_checksum_partial, + .route = nf_ip_route, + .saveroute = nf_ip_saveroute, + .reroute = nf_ip_reroute, + .route_key_size = sizeof(struct ip_rt_info), }; static int ipv4_netfilter_init(void) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index aed51bcc66b4..8c6c5e71f210 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -121,16 +121,44 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, } return csum; } - EXPORT_SYMBOL(nf_ip6_checksum); +static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + __wsum hsum; + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (len == skb->len - dataoff) + return nf_ip6_checksum(skb, hook, dataoff, protocol); + /* fall through */ + case CHECKSUM_NONE: + hsum = skb_checksum(skb, 0, dataoff, 0); + skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb->len - dataoff, + protocol, + csum_sub(0, hsum))); + skb->ip_summed = CHECKSUM_NONE; + csum = __skb_checksum_complete_head(skb, dataoff + len); + if (!csum) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + return csum; +}; + static const struct nf_afinfo nf_ip6_afinfo = { - .family = AF_INET6, - .checksum = nf_ip6_checksum, - .route = nf_ip6_route, - .saveroute = nf_ip6_saveroute, - .reroute = nf_ip6_reroute, - .route_key_size = sizeof(struct ip6_rt_info), + .family = AF_INET6, + .checksum = nf_ip6_checksum, + .checksum_partial = nf_ip6_checksum_partial, + .route = nf_ip6_route, + .saveroute = nf_ip6_saveroute, + .reroute = nf_ip6_reroute, + .route_key_size = sizeof(struct ip6_rt_info), }; int __init ipv6_netfilter_init(void) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 9dd03c7aeac6..c3eaee6afffd 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -127,32 +127,13 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, } /* Checksum invalid? Ignore. */ - if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) && - hooknum == NF_INET_PRE_ROUTING) { - if (pf == PF_INET) { - struct iphdr *iph = ip_hdr(skb); - - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - udplen, IPPROTO_UDPLITE, 0); - } else { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - __wsum hsum = skb_checksum(skb, 0, dataoff, 0); - - skb->csum = ~csum_unfold( - csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - udplen, IPPROTO_UDPLITE, - csum_sub(0, hsum))); - } - - skb->ip_summed = CHECKSUM_NONE; - if (__skb_checksum_complete_head(skb, dataoff + cscov)) { - if (LOG_INVALID(IPPROTO_UDPLITE)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite " - "checksum "); - return -NF_ACCEPT; - } - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, + pf)) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: bad UDPLite checksum "); + return -NF_ACCEPT; } return NF_ACCEPT; -- cgit v1.2.3-71-gd317 From 2bc780499aa33311ec0f3e42624dfaa7be0ade5e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Mar 2008 15:15:55 +0100 Subject: [NETFILTER]: nf_conntrack: add DCCP protocol support Add DCCP conntrack helper. Thanks to Gerrit Renker for review and testing. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_dccp.h | 40 ++ include/linux/netfilter/nfnetlink_conntrack.h | 8 + include/net/netfilter/nf_conntrack.h | 2 + include/net/netfilter/nf_conntrack_tuple.h | 6 + net/netfilter/Kconfig | 10 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 816 ++++++++++++++++++++++++++ 7 files changed, 883 insertions(+) create mode 100644 include/linux/netfilter/nf_conntrack_dccp.h create mode 100644 net/netfilter/nf_conntrack_proto_dccp.c (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h new file mode 100644 index 000000000000..40dcc82058d1 --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -0,0 +1,40 @@ +#ifndef _NF_CONNTRACK_DCCP_H +#define _NF_CONNTRACK_DCCP_H + +/* Exposed to userspace over nfnetlink */ +enum ct_dccp_states { + CT_DCCP_NONE, + CT_DCCP_REQUEST, + CT_DCCP_RESPOND, + CT_DCCP_PARTOPEN, + CT_DCCP_OPEN, + CT_DCCP_CLOSEREQ, + CT_DCCP_CLOSING, + CT_DCCP_TIMEWAIT, + CT_DCCP_IGNORE, + CT_DCCP_INVALID, + __CT_DCCP_MAX +}; +#define CT_DCCP_MAX (__CT_DCCP_MAX - 1) + +enum ct_dccp_roles { + CT_DCCP_ROLE_CLIENT, + CT_DCCP_ROLE_SERVER, + __CT_DCCP_ROLE_MAX +}; +#define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) + +#ifdef __KERNEL__ +#include + +struct nf_ct_dccp { + u_int8_t role[IP_CT_DIR_MAX]; + u_int8_t state; + u_int8_t last_pkt; + u_int8_t last_dir; + u_int64_t handshake_seq; +}; + +#endif /* __KERNEL__ */ + +#endif /* _NF_CONNTRACK_DCCP_H */ diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index e3e1533aba2d..0a383ac083cb 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -80,6 +80,7 @@ enum ctattr_l4proto { enum ctattr_protoinfo { CTA_PROTOINFO_UNSPEC, CTA_PROTOINFO_TCP, + CTA_PROTOINFO_DCCP, __CTA_PROTOINFO_MAX }; #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) @@ -95,6 +96,13 @@ enum ctattr_protoinfo_tcp { }; #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) +enum ctattr_protoinfo_dccp { + CTA_PROTOINFO_DCCP_UNSPEC, + CTA_PROTOINFO_DCCP_STATE, + __CTA_PROTOINFO_DCCP_MAX, +}; +#define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) + enum ctattr_counters { CTA_COUNTERS_UNSPEC, CTA_COUNTERS_PACKETS, /* old 64bit counters */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a3567a7a6d67..bb9fc852e973 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,7 @@ /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ + struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct ip_ct_icmp icmp; diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 168c91754d89..bdeec3461384 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -39,6 +39,9 @@ union nf_conntrack_man_proto struct { __be16 id; } icmp; + struct { + __be16 port; + } dccp; struct { __be16 port; } sctp; @@ -77,6 +80,9 @@ struct nf_conntrack_tuple struct { u_int8_t type, code; } icmp; + struct { + __be16 port; + } dccp; struct { __be16 port; } sctp; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index daf5b881064d..c1fc0f1a641c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -86,6 +86,16 @@ config NF_CONNTRACK_EVENTS If unsure, say `N'. +config NF_CT_PROTO_DCCP + tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK + depends on NETFILTER_ADVANCED + help + With this option enabled, the layer 3 independent connection + tracking code will be able to do state tracking on DCCP connections. + + If unsure, say 'N'. + config NF_CT_PROTO_GRE tristate depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ea7508387f95..5c4b183f6422 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o # SCTP protocol connection tracking +obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c new file mode 100644 index 000000000000..db88c5bcc5fd --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -0,0 +1,816 @@ +/* + * DCCP connection tracking protocol helper + * + * Copyright (c) 2005, 2006, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static DEFINE_RWLOCK(dccp_lock); + +static int nf_ct_dccp_loose __read_mostly = 1; + +/* Timeouts are based on values from RFC4340: + * + * - REQUEST: + * + * 8.1.2. Client Request + * + * A client MAY give up on its DCCP-Requests after some time + * (3 minutes, for example). + * + * - RESPOND: + * + * 8.1.3. Server Response + * + * It MAY also leave the RESPOND state for CLOSED after a timeout of + * not less than 4MSL (8 minutes); + * + * - PARTOPEN: + * + * 8.1.5. Handshake Completion + * + * If the client remains in PARTOPEN for more than 4MSL (8 minutes), + * it SHOULD reset the connection with Reset Code 2, "Aborted". + * + * - OPEN: + * + * The DCCP timestamp overflows after 11.9 hours. If the connection + * stays idle this long the sequence number won't be recognized + * as valid anymore. + * + * - CLOSEREQ/CLOSING: + * + * 8.3. Termination + * + * The retransmission timer should initially be set to go off in two + * round-trip times and should back off to not less than once every + * 64 seconds ... + * + * - TIMEWAIT: + * + * 4.3. States + * + * A server or client socket remains in this state for 2MSL (4 minutes) + * after the connection has been town down, ... + */ + +#define DCCP_MSL (2 * 60 * HZ) + +static unsigned int dccp_timeout[CT_DCCP_MAX + 1] __read_mostly = { + [CT_DCCP_REQUEST] = 2 * DCCP_MSL, + [CT_DCCP_RESPOND] = 4 * DCCP_MSL, + [CT_DCCP_PARTOPEN] = 4 * DCCP_MSL, + [CT_DCCP_OPEN] = 12 * 3600 * HZ, + [CT_DCCP_CLOSEREQ] = 64 * HZ, + [CT_DCCP_CLOSING] = 64 * HZ, + [CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL, +}; + +static const char * const dccp_state_names[] = { + [CT_DCCP_NONE] = "NONE", + [CT_DCCP_REQUEST] = "REQUEST", + [CT_DCCP_RESPOND] = "RESPOND", + [CT_DCCP_PARTOPEN] = "PARTOPEN", + [CT_DCCP_OPEN] = "OPEN", + [CT_DCCP_CLOSEREQ] = "CLOSEREQ", + [CT_DCCP_CLOSING] = "CLOSING", + [CT_DCCP_TIMEWAIT] = "TIMEWAIT", + [CT_DCCP_IGNORE] = "IGNORE", + [CT_DCCP_INVALID] = "INVALID", +}; + +#define sNO CT_DCCP_NONE +#define sRQ CT_DCCP_REQUEST +#define sRS CT_DCCP_RESPOND +#define sPO CT_DCCP_PARTOPEN +#define sOP CT_DCCP_OPEN +#define sCR CT_DCCP_CLOSEREQ +#define sCG CT_DCCP_CLOSING +#define sTW CT_DCCP_TIMEWAIT +#define sIG CT_DCCP_IGNORE +#define sIV CT_DCCP_INVALID + +/* + * DCCP state transistion table + * + * The assumption is the same as for TCP tracking: + * + * We are the man in the middle. All the packets go through us but might + * get lost in transit to the destination. It is assumed that the destination + * can't receive segments we haven't seen. + * + * The following states exist: + * + * NONE: Initial state, expecting Request + * REQUEST: Request seen, waiting for Response from server + * RESPOND: Response from server seen, waiting for Ack from client + * PARTOPEN: Ack after Response seen, waiting for packet other than Response, + * Reset or Sync from server + * OPEN: Packet other than Response, Reset or Sync seen + * CLOSEREQ: CloseReq from server seen, expecting Close from client + * CLOSING: Close seen, expecting Reset + * TIMEWAIT: Reset seen + * IGNORE: Not determinable whether packet is valid + * + * Some states exist only on one side of the connection: REQUEST, RESPOND, + * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to + * the one it was in before. + * + * Packets are marked as ignored (sIG) if we don't know if they're valid + * (for example a reincarnation of a connection we didn't notice is dead + * already) and the server may send back a connection closing Reset or a + * Response. They're also used for Sync/SyncAck packets, which we don't + * care about. + */ +static const u_int8_t +dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { + [CT_DCCP_ROLE_CLIENT] = { + [DCCP_PKT_REQUEST] = { + /* + * sNO -> sRQ Regular Request + * sRQ -> sRQ Retransmitted Request or reincarnation + * sRS -> sRS Retransmitted Request (apparently Response + * got lost after we saw it) or reincarnation + * sPO -> sIG Ignore, conntrack might be out of sync + * sOP -> sIG Ignore, conntrack might be out of sync + * sCR -> sIG Ignore, conntrack might be out of sync + * sCG -> sIG Ignore, conntrack might be out of sync + * sTW -> sRQ Reincarnation + * + * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ + sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, + }, + [DCCP_PKT_RESPONSE] = { + /* + * sNO -> sIV Invalid + * sRQ -> sIG Ignore, might be response to ignored Request + * sRS -> sIG Ignore, might be response to ignored Request + * sPO -> sIG Ignore, might be response to ignored Request + * sOP -> sIG Ignore, might be response to ignored Request + * sCR -> sIG Ignore, might be response to ignored Request + * sCG -> sIG Ignore, might be response to ignored Request + * sTW -> sIV Invalid, reincarnation in reverse direction + * goes through sRQ + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, + }, + [DCCP_PKT_ACK] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) + * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN + * sOP -> sOP Regular ACK, remain in OPEN + * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) + * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) + * sTW -> sIV + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV + }, + [DCCP_PKT_DATA] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) + * sOP -> sOP Regular Data packet + * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) + * sCG -> sCG Data in CLOSING MAY be processed (8.3.) + * sTW -> sIV + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, + }, + [DCCP_PKT_DATAACK] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) + * sPO -> sPO Remain in PARTOPEN state + * sOP -> sOP Regular DataAck packet in OPEN state + * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) + * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) + * sTW -> sIV + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV + }, + [DCCP_PKT_CLOSEREQ] = { + /* + * CLOSEREQ may only be sent by the server. + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV + }, + [DCCP_PKT_CLOSE] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sCG Client-initiated close + * sOP -> sCG Client-initiated close + * sCR -> sCG Close in response to CloseReq (8.3.) + * sCG -> sCG Retransmit + * sTW -> sIV Late retransmit, already in TIME_WAIT + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV + }, + [DCCP_PKT_RESET] = { + /* + * sNO -> sIV No connection + * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) + * sRS -> sTW Response received without Request + * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) + * sOP -> sTW Connection reset + * sCR -> sTW Connection reset + * sCG -> sTW Connection reset + * sTW -> sIG Ignore (don't refresh timer) + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG + }, + [DCCP_PKT_SYNC] = { + /* + * We currently ignore Sync packets + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + }, + [DCCP_PKT_SYNCACK] = { + /* + * We currently ignore SyncAck packets + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + }, + }, + [CT_DCCP_ROLE_SERVER] = { + [DCCP_PKT_REQUEST] = { + /* + * sNO -> sIV Invalid + * sRQ -> sIG Ignore, conntrack might be out of sync + * sRS -> sIG Ignore, conntrack might be out of sync + * sPO -> sIG Ignore, conntrack might be out of sync + * sOP -> sIG Ignore, conntrack might be out of sync + * sCR -> sIG Ignore, conntrack might be out of sync + * sCG -> sIG Ignore, conntrack might be out of sync + * sTW -> sRQ Reincarnation, must reverse roles + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ + }, + [DCCP_PKT_RESPONSE] = { + /* + * sNO -> sIV Response without Request + * sRQ -> sRS Response to clients Request + * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) + * sPO -> sIG Response to an ignored Request or late retransmit + * sOP -> sIG Ignore, might be response to ignored Request + * sCR -> sIG Ignore, might be response to ignored Request + * sCG -> sIG Ignore, might be response to ignored Request + * sTW -> sIV Invalid, Request from client in sTW moves to sRQ + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV + }, + [DCCP_PKT_ACK] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sOP Enter OPEN state (8.1.5.) + * sOP -> sOP Regular Ack in OPEN state + * sCR -> sIV Waiting for Close from client + * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) + * sTW -> sIV + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV + }, + [DCCP_PKT_DATA] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sOP Enter OPEN state (8.1.5.) + * sOP -> sOP Regular Data packet in OPEN state + * sCR -> sIV Waiting for Close from client + * sCG -> sCG Data in CLOSING MAY be processed (8.3.) + * sTW -> sIV + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV + }, + [DCCP_PKT_DATAACK] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sOP Enter OPEN state (8.1.5.) + * sOP -> sOP Regular DataAck in OPEN state + * sCR -> sIV Waiting for Close from client + * sCG -> sCG Data in CLOSING MAY be processed (8.3.) + * sTW -> sIV + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV + }, + [DCCP_PKT_CLOSEREQ] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) + * sOP -> sCR CloseReq in OPEN state + * sCR -> sCR Retransmit + * sCG -> sCR Simultaneous close, client sends another Close + * sTW -> sIV Already closed + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV + }, + [DCCP_PKT_CLOSE] = { + /* + * sNO -> sIV No connection + * sRQ -> sIV No connection + * sRS -> sIV No connection + * sPO -> sOP -> sCG Move direcly to CLOSING + * sOP -> sCG Move to CLOSING + * sCR -> sIV Close after CloseReq is invalid + * sCG -> sCG Retransmit + * sTW -> sIV Already closed + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV + }, + [DCCP_PKT_RESET] = { + /* + * sNO -> sIV No connection + * sRQ -> sTW Reset in response to Request + * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) + * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) + * sOP -> sTW + * sCR -> sTW + * sCG -> sTW + * sTW -> sIG Ignore (don't refresh timer) + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ + sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG + }, + [DCCP_PKT_SYNC] = { + /* + * We currently ignore Sync packets + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + }, + [DCCP_PKT_SYNCACK] = { + /* + * We currently ignore SyncAck packets + * + * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ + sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + }, + }, +}; + +static int dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct dccp_hdr _hdr, *dh; + + dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (dh == NULL) + return 0; + + tuple->src.u.dccp.port = dh->dccph_sport; + tuple->dst.u.dccp.port = dh->dccph_dport; + return 1; +} + +static int dccp_invert_tuple(struct nf_conntrack_tuple *inv, + const struct nf_conntrack_tuple *tuple) +{ + inv->src.u.dccp.port = tuple->dst.u.dccp.port; + inv->dst.u.dccp.port = tuple->src.u.dccp.port; + return 1; +} + +static int dccp_new(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff) +{ + int pf = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + struct dccp_hdr _dh, *dh; + const char *msg; + u_int8_t state; + + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + BUG_ON(dh == NULL); + + state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; + switch (state) { + default: + if (nf_ct_dccp_loose == 0) { + msg = "nf_ct_dccp: not picking up existing connection "; + goto out_invalid; + } + case CT_DCCP_REQUEST: + break; + case CT_DCCP_INVALID: + msg = "nf_ct_dccp: invalid state transition "; + goto out_invalid; + } + + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.state = CT_DCCP_NONE; + return 1; + +out_invalid: + if (LOG_INVALID(IPPROTO_DCCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); + return 0; +} + +static u64 dccp_ack_seq(const struct dccp_hdr *dh) +{ + const struct dccp_hdr_ack_bits *dhack; + + dhack = (void *)dh + __dccp_basic_hdr_len(dh); + return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + + ntohl(dhack->dccph_ack_nr_low); +} + +static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info ctinfo, + int pf, unsigned int hooknum) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct dccp_hdr _dh, *dh; + u_int8_t type, old_state, new_state; + enum ct_dccp_roles role; + + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + BUG_ON(dh == NULL); + type = dh->dccph_type; + + if (type == DCCP_PKT_RESET && + !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + /* Tear down connection immediately if only reply is a RESET */ + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + return NF_ACCEPT; + } + + write_lock_bh(&dccp_lock); + + role = ct->proto.dccp.role[dir]; + old_state = ct->proto.dccp.state; + new_state = dccp_state_table[role][type][old_state]; + + switch (new_state) { + case CT_DCCP_REQUEST: + if (old_state == CT_DCCP_TIMEWAIT && + role == CT_DCCP_ROLE_SERVER) { + /* Reincarnation in the reverse direction: reopen and + * reverse client/server roles. */ + ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; + } + break; + case CT_DCCP_RESPOND: + if (old_state == CT_DCCP_REQUEST) + ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); + break; + case CT_DCCP_PARTOPEN: + if (old_state == CT_DCCP_RESPOND && + type == DCCP_PKT_ACK && + dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) + set_bit(IPS_ASSURED_BIT, &ct->status); + break; + case CT_DCCP_IGNORE: + /* + * Connection tracking might be out of sync, so we ignore + * packets that might establish a new connection and resync + * if the server responds with a valid Response. + */ + if (ct->proto.dccp.last_dir == !dir && + ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && + type == DCCP_PKT_RESPONSE) { + ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); + new_state = CT_DCCP_RESPOND; + break; + } + ct->proto.dccp.last_dir = dir; + ct->proto.dccp.last_pkt = type; + + write_unlock_bh(&dccp_lock); + if (LOG_INVALID(IPPROTO_DCCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_dccp: invalid packet ignored "); + return NF_ACCEPT; + case CT_DCCP_INVALID: + write_unlock_bh(&dccp_lock); + if (LOG_INVALID(IPPROTO_DCCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_dccp: invalid state transition "); + return -NF_ACCEPT; + } + + ct->proto.dccp.last_dir = dir; + ct->proto.dccp.last_pkt = type; + ct->proto.dccp.state = new_state; + write_unlock_bh(&dccp_lock); + nf_ct_refresh_acct(ct, ctinfo, skb, dccp_timeout[new_state]); + + return NF_ACCEPT; +} + +static int dccp_error(struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, int pf, + unsigned int hooknum) +{ + struct dccp_hdr _dh, *dh; + unsigned int dccp_len = skb->len - dataoff; + unsigned int cscov; + const char *msg; + + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + if (dh == NULL) { + msg = "nf_ct_dccp: short packet "; + goto out_invalid; + } + + if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || + dh->dccph_doff * 4 > dccp_len) { + msg = "nf_ct_dccp: truncated/malformed packet "; + goto out_invalid; + } + + cscov = dccp_len; + if (dh->dccph_cscov) { + cscov = (dh->dccph_cscov - 1) * 4; + if (cscov > dccp_len) { + msg = "nf_ct_dccp: bad checksum coverage "; + goto out_invalid; + } + } + + if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, + pf)) { + msg = "nf_ct_dccp: bad checksum "; + goto out_invalid; + } + + if (dh->dccph_type >= DCCP_PKT_INVALID) { + msg = "nf_ct_dccp: reserved packet type "; + goto out_invalid; + } + + return NF_ACCEPT; + +out_invalid: + if (LOG_INVALID(IPPROTO_DCCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); + return -NF_ACCEPT; +} + +static int dccp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.dccp.port), + ntohs(tuple->dst.u.dccp.port)); +} + +static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +{ + return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); +} + +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, + const struct nf_conn *ct) +{ + struct nlattr *nest_parms; + + read_lock_bh(&dccp_lock); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + nla_nest_end(skb, nest_parms); + read_unlock_bh(&dccp_lock); + return 0; + +nla_put_failure: + read_unlock_bh(&dccp_lock); + return -1; +} + +static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { + [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, +}; + +static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) +{ + struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; + struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; + int err; + + if (!attr) + return 0; + + err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, + dccp_nla_policy); + if (err < 0) + return err; + + if (!tb[CTA_PROTOINFO_DCCP_STATE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + return -EINVAL; + + write_lock_bh(&dccp_lock); + ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + write_unlock_bh(&dccp_lock); + return 0; +} +#endif + +#ifdef CONFIG_SYSCTL +static unsigned int dccp_sysctl_table_users; +static struct ctl_table_header *dccp_sysctl_header; +static ctl_table dccp_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_request", + .data = &dccp_timeout[CT_DCCP_REQUEST], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_respond", + .data = &dccp_timeout[CT_DCCP_RESPOND], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_partopen", + .data = &dccp_timeout[CT_DCCP_PARTOPEN], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_open", + .data = &dccp_timeout[CT_DCCP_OPEN], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_closereq", + .data = &dccp_timeout[CT_DCCP_CLOSEREQ], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_closing", + .data = &dccp_timeout[CT_DCCP_CLOSING], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_timeout_timewait", + .data = &dccp_timeout[CT_DCCP_TIMEWAIT], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_dccp_loose", + .data = &nf_ct_dccp_loose, + .maxlen = sizeof(nf_ct_dccp_loose), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .ctl_name = 0, + } +}; +#endif /* CONFIG_SYSCTL */ + +static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { + .l3proto = AF_INET, + .l4proto = IPPROTO_DCCP, + .name = "dccp", + .pkt_to_tuple = dccp_pkt_to_tuple, + .invert_tuple = dccp_invert_tuple, + .new = dccp_new, + .packet = dccp_packet, + .error = dccp_error, + .print_tuple = dccp_print_tuple, + .print_conntrack = dccp_print_conntrack, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = dccp_to_nlattr, + .from_nlattr = nlattr_to_dccp, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, +#endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &dccp_sysctl_table_users, + .ctl_table_header = &dccp_sysctl_header, + .ctl_table = dccp_sysctl_table, +#endif +}; + +static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { + .l3proto = AF_INET6, + .l4proto = IPPROTO_DCCP, + .name = "dccp", + .pkt_to_tuple = dccp_pkt_to_tuple, + .invert_tuple = dccp_invert_tuple, + .new = dccp_new, + .packet = dccp_packet, + .error = dccp_error, + .print_tuple = dccp_print_tuple, + .print_conntrack = dccp_print_conntrack, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = dccp_to_nlattr, + .from_nlattr = nlattr_to_dccp, + .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nla_policy = nf_ct_port_nla_policy, +#endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &dccp_sysctl_table_users, + .ctl_table_header = &dccp_sysctl_header, + .ctl_table = dccp_sysctl_table, +#endif +}; + +static int __init nf_conntrack_proto_dccp_init(void) +{ + int err; + + err = nf_conntrack_l4proto_register(&dccp_proto4); + if (err < 0) + goto err1; + + err = nf_conntrack_l4proto_register(&dccp_proto6); + if (err < 0) + goto err2; + return 0; + +err2: + nf_conntrack_l4proto_unregister(&dccp_proto4); +err1: + return err; +} + +static void __exit nf_conntrack_proto_dccp_fini(void) +{ + nf_conntrack_l4proto_unregister(&dccp_proto6); + nf_conntrack_l4proto_unregister(&dccp_proto4); +} + +module_init(nf_conntrack_proto_dccp_init); +module_exit(nf_conntrack_proto_dccp_fini); + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("DCCP connection tracking protocol helper"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-71-gd317 From dd13b010368f85dfa59364ba87bfe8ae930b2832 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Apr 2008 11:15:52 +0200 Subject: [NETFILTER]: nf_nat: kill helper and seq_adjust hooks Connection tracking helpers (specifically FTP) need to be called before NAT sequence numbers adjustments are performed to be able to compare them against previously seen ones. We've introduced two new hooks around 2.6.11 to maintain this ordering when NAT modules were changed to get called from conntrack helpers directly. The cost of netfilter hooks is quite high and sequence number adjustments are only rarely needed however. Add a RCU-protected sequence number adjustment function pointer and call it from IPv4 conntrack after calling the helper. Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv4.h | 2 - include/net/netfilter/nf_nat_helper.h | 3 ++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 56 +++++++++++++------------- net/ipv4/netfilter/nf_nat_core.c | 5 +++ net/ipv4/netfilter/nf_nat_helper.c | 1 - net/ipv4/netfilter/nf_nat_standalone.c | 35 ---------------- 6 files changed, 35 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 9a10092e358c..650318b0c405 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -62,8 +62,6 @@ enum nf_ip_hook_priorities { NF_IP_PRI_FILTER = 0, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, - NF_IP_PRI_CONNTRACK_HELPER = INT_MAX - 2, - NF_IP_PRI_NAT_SEQ_ADJUST = INT_MAX - 1, NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, NF_IP_PRI_LAST = INT_MAX, }; diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 58dd22687949..237a961f40e1 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -24,6 +24,9 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, extern int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo); +extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); /* Setup NAT on this expected conntrack so it follows master, but goes * to port ct->master->saved_proto. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index a65b845c5f15..41e79613eb0a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -23,6 +23,12 @@ #include #include #include +#include + +int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); +EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) @@ -100,36 +106,42 @@ static unsigned int ipv4_confirm(unsigned int hooknum, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) -{ - /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); -} - -static unsigned int ipv4_conntrack_help(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; + unsigned int ret; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) - return NF_ACCEPT; + goto out; help = nfct_help(ct); if (!help) - return NF_ACCEPT; + goto out; + /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - return NF_ACCEPT; - return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), - ct, ctinfo); + goto out; + + ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + typeof(nf_nat_seq_adjust_hook) seq_adjust; + + seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); + if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) + return NF_DROP; + } +out: + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(skb); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, @@ -210,20 +222,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, - { - .hook = ipv4_conntrack_help, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_HELPER, - }, - { - .hook = ipv4_conntrack_help, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_HELPER, - }, { .hook = ipv4_confirm, .owner = THIS_MODULE, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 9320c7ac5729..25c3efe4207e 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -618,6 +618,9 @@ static int __init nf_nat_init(void) nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); + + BUG_ON(nf_nat_seq_adjust_hook != NULL); + rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); return 0; cleanup_extend: @@ -644,6 +647,8 @@ static void __exit nf_nat_cleanup(void) nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); + rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); + synchronize_net(); } MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 2fca727aa8ba..11976ea29884 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -416,7 +416,6 @@ nf_nat_seq_adjust(struct sk_buff *skb, return 1; } -EXPORT_SYMBOL(nf_nat_seq_adjust); /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index a366b5865b9c..b7dd695691a0 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -245,25 +245,6 @@ nf_nat_local_fn(unsigned int hooknum, return ret; } -static unsigned int -nf_nat_adjust(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - ct = nf_ct_get(skb, &ctinfo); - if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { - pr_debug("nf_nat_standalone: adjusting sequence number\n"); - if (!nf_nat_seq_adjust(skb, ct, ctinfo)) - return NF_DROP; - } - return NF_ACCEPT; -} - /* We must be after connection tracking and before packet filtering. */ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { @@ -283,14 +264,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, - /* After conntrack, adjust sequence number */ - { - .hook = nf_nat_adjust, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SEQ_ADJUST, - }, /* Before packet filtering, change destination */ { .hook = nf_nat_local_fn, @@ -307,14 +280,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, }, - /* After conntrack, adjust sequence number */ - { - .hook = nf_nat_adjust, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SEQ_ADJUST, - }, }; static int __init nf_nat_standalone_init(void) -- cgit v1.2.3-71-gd317 From e7bfd0a1a6c8f82977253dab19be9d9979c1ec1b Mon Sep 17 00:00:00 2001 From: Peter Warasin Date: Mon, 14 Apr 2008 11:15:54 +0200 Subject: [NETFILTER]: bridge: add ebt_nflog watcher This patch adds the ebtables nflog watcher to the kernel in order to allow ebtables log through the nfnetlink_log backend. Signed-off-by: Peter Warasin Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebt_nflog.h | 21 +++++++++ net/bridge/netfilter/Kconfig | 14 ++++++ net/bridge/netfilter/Makefile | 1 + net/bridge/netfilter/ebt_nflog.c | 74 ++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+) create mode 100644 include/linux/netfilter_bridge/ebt_nflog.h create mode 100644 net/bridge/netfilter/ebt_nflog.c (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h new file mode 100644 index 000000000000..052817849b83 --- /dev/null +++ b/include/linux/netfilter_bridge/ebt_nflog.h @@ -0,0 +1,21 @@ +#ifndef __LINUX_BRIDGE_EBT_NFLOG_H +#define __LINUX_BRIDGE_EBT_NFLOG_H + +#define EBT_NFLOG_MASK 0x0 + +#define EBT_NFLOG_PREFIX_SIZE 64 +#define EBT_NFLOG_WATCHER "nflog" + +#define EBT_NFLOG_DEFAULT_GROUP 0x1 +#define EBT_NFLOG_DEFAULT_THRESHOLD 1 + +struct ebt_nflog_info { + u_int32_t len; + u_int16_t group; + u_int16_t threshold; + u_int16_t flags; + u_int16_t pad; + char prefix[EBT_NFLOG_PREFIX_SIZE]; +}; + +#endif /* __LINUX_BRIDGE_EBT_NFLOG_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 4a3e2bf892c7..7beeefa0f9c0 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -212,4 +212,18 @@ config BRIDGE_EBT_ULOG To compile it as a module, choose M here. If unsure, say N. +config BRIDGE_EBT_NFLOG + tristate "ebt: nflog support" + depends on BRIDGE_NF_EBTABLES + help + This option enables the nflog watcher, which allows to LOG + messages through the netfilter logging API, which can use + either the old LOG target, the old ULOG target or nfnetlink_log + as backend. + + This option adds the ulog watcher, that you can use in any rule + in any ebtables table. + + To compile it as a module, choose M here. If unsure, say N. + endmenu diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 905087e0d485..83715d73a503 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o # watchers obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o obj-$(CONFIG_BRIDGE_EBT_ULOG) += ebt_ulog.o +obj-$(CONFIG_BRIDGE_EBT_NFLOG) += ebt_nflog.o diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c new file mode 100644 index 000000000000..8e799aa9e560 --- /dev/null +++ b/net/bridge/netfilter/ebt_nflog.c @@ -0,0 +1,74 @@ +/* + * ebt_nflog + * + * Author: + * Peter Warasin + * + * February, 2008 + * + * Based on: + * xt_NFLOG.c, (C) 2006 by Patrick McHardy + * ebt_ulog.c, (C) 2004 by Bart De Schuymer + * + */ + +#include +#include +#include +#include +#include + +static void ebt_nflog(const struct sk_buff *skb, + unsigned int hooknr, + const struct net_device *in, + const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_ULOG; + li.u.ulog.copy_len = info->len; + li.u.ulog.group = info->group; + li.u.ulog.qthreshold = info->threshold; + + nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix); +} + +static int ebt_nflog_check(const char *tablename, + unsigned int hookmask, + const struct ebt_entry *e, + void *data, unsigned int datalen) +{ + struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_nflog_info))) + return -EINVAL; + if (info->flags & ~EBT_NFLOG_MASK) + return -EINVAL; + info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; + return 0; +} + +static struct ebt_watcher nflog __read_mostly = { + .name = EBT_NFLOG_WATCHER, + .watcher = ebt_nflog, + .check = ebt_nflog_check, + .me = THIS_MODULE, +}; + +static int __init ebt_nflog_init(void) +{ + return ebt_register_watcher(&nflog); +} + +static void __exit ebt_nflog_fini(void) +{ + ebt_unregister_watcher(&nflog); +} + +module_init(ebt_nflog_init); +module_exit(ebt_nflog_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Peter Warasin "); +MODULE_DESCRIPTION("ebtables NFLOG netfilter logging module"); -- cgit v1.2.3-71-gd317 From 99971e70fdc1862e120f3319fc0a4dba8c728acf Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 15 Apr 2008 00:27:58 -0700 Subject: [WANPIPE]: Forgotten bits of Sangoma drivers removal. Robert P. J. Day spotted that my removal of the Sangoma drivers missed a few bits. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 - Documentation/networking/wan-router.txt | 621 -------------------------------- include/linux/Kbuild | 1 - include/linux/if_wanpipe.h | 124 ------- 4 files changed, 748 deletions(-) delete mode 100644 Documentation/networking/wan-router.txt delete mode 100644 include/linux/if_wanpipe.h (limited to 'include/linux') diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index c485ee028bd9..1634c6dcecae 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -100,8 +100,6 @@ tuntap.txt - TUN/TAP device driver, allowing user space Rx/Tx of packets. vortex.txt - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. -wan-router.txt - - WAN router documentation wavelan.txt - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver x25.txt diff --git a/Documentation/networking/wan-router.txt b/Documentation/networking/wan-router.txt deleted file mode 100644 index bc2ab419a74a..000000000000 --- a/Documentation/networking/wan-router.txt +++ /dev/null @@ -1,621 +0,0 @@ ------------------------------------------------------------------------------- -Linux WAN Router Utilities Package ------------------------------------------------------------------------------- -Version 2.2.1 -Mar 28, 2001 -Author: Nenad Corbic -Copyright (c) 1995-2001 Sangoma Technologies Inc. ------------------------------------------------------------------------------- - -INTRODUCTION - -Wide Area Networks (WANs) are used to interconnect Local Area Networks (LANs) -and/or stand-alone hosts over vast distances with data transfer rates -significantly higher than those achievable with commonly used dial-up -connections. - -Usually an external device called `WAN router' sitting on your local network -or connected to your machine's serial port provides physical connection to -WAN. Although router's job may be as simple as taking your local network -traffic, converting it to WAN format and piping it through the WAN link, these -devices are notoriously expensive, with prices as much as 2 - 5 times higher -then the price of a typical PC box. - -Alternatively, considering robustness and multitasking capabilities of Linux, -an internal router can be built (most routers use some sort of stripped down -Unix-like operating system anyway). With a number of relatively inexpensive WAN -interface cards available on the market, a perfectly usable router can be -built for less than half a price of an external router. Yet a Linux box -acting as a router can still be used for other purposes, such as fire-walling, -running FTP, WWW or DNS server, etc. - -This kernel module introduces the notion of a WAN Link Driver (WLD) to Linux -operating system and provides generic hardware-independent services for such -drivers. Why can existing Linux network device interface not be used for -this purpose? Well, it can. However, there are a few key differences between -a typical network interface (e.g. Ethernet) and a WAN link. - -Many WAN protocols, such as X.25 and frame relay, allow for multiple logical -connections (known as `virtual circuits' in X.25 terminology) over a single -physical link. Each such virtual circuit may (and almost always does) lead -to a different geographical location and, therefore, different network. As a -result, it is the virtual circuit, not the physical link, that represents a -route and, therefore, a network interface in Linux terms. - -To further complicate things, virtual circuits are usually volatile in nature -(excluding so called `permanent' virtual circuits or PVCs). With almost no -time required to set up and tear down a virtual circuit, it is highly desirable -to implement on-demand connections in order to minimize network charges. So -unlike a typical network driver, the WAN driver must be able to handle multiple -network interfaces and cope as multiple virtual circuits come into existence -and go away dynamically. - -Last, but not least, WAN configuration is much more complex than that of say -Ethernet and may well amount to several dozens of parameters. Some of them -are "link-wide" while others are virtual circuit-specific. The same holds -true for WAN statistics which is by far more extensive and extremely useful -when troubleshooting WAN connections. Extending the ifconfig utility to suit -these needs may be possible, but does not seem quite reasonable. Therefore, a -WAN configuration utility and corresponding application programmer's interface -is needed for this purpose. - -Most of these problems are taken care of by this module. Its goal is to -provide a user with more-or-less standard look and feel for all WAN devices and -assist a WAN device driver writer by providing common services, such as: - - o User-level interface via /proc file system - o Centralized configuration - o Device management (setup, shutdown, etc.) - o Network interface management (dynamic creation/destruction) - o Protocol encapsulation/decapsulation - -To ba able to use the Linux WAN Router you will also need a WAN Tools package -available from - - ftp.sangoma.com/pub/linux/current_wanpipe/wanpipe-X.Y.Z.tgz - -where vX.Y.Z represent the wanpipe version number. - -For technical questions and/or comments please e-mail to ncorbic@sangoma.com. -For general inquiries please contact Sangoma Technologies Inc. by - - Hotline: 1-800-388-2475 (USA and Canada, toll free) - Phone: (905) 474-1990 ext: 106 - Fax: (905) 474-9223 - E-mail: dm@sangoma.com (David Mandelstam) - WWW: http://www.sangoma.com - - -INSTALLATION - -Please read the WanpipeForLinux.pdf manual on how to -install the WANPIPE tools and drivers properly. - - -After installing wanpipe package: /usr/local/wanrouter/doc. -On the ftp.sangoma.com : /linux/current_wanpipe/doc - - -COPYRIGHT AND LICENSING INFORMATION - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; either version 2, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 675 Mass -Ave, Cambridge, MA 02139, USA. - - - -ACKNOWLEDGEMENTS - -This product is based on the WANPIPE(tm) Multiprotocol WAN Router developed -by Sangoma Technologies Inc. for Linux 2.0.x and 2.2.x. Success of the WANPIPE -together with the next major release of Linux kernel in summer 1996 commanded -adequate changes to the WANPIPE code to take full advantage of new Linux -features. - -Instead of continuing developing proprietary interface tied to Sangoma WAN -cards, we decided to separate all hardware-independent code into a separate -module and defined two levels of interfaces - one for user-level applications -and another for kernel-level WAN drivers. WANPIPE is now implemented as a -WAN driver compliant with the WAN Link Driver interface. Also a general -purpose WAN configuration utility and a set of shell scripts was developed to -support WAN router at the user level. - -Many useful ideas concerning hardware-independent interface implementation -were given by Mike McLagan and his implementation -of the Frame Relay router and drivers for Sangoma cards (dlci/sdla). - -With the new implementation of the APIs being incorporated into the WANPIPE, -a special thank goes to Alan Cox in providing insight into BSD sockets. - -Special thanks to all the WANPIPE users who performed field-testing, reported -bugs and made valuable comments and suggestions that help us to improve this -product. - - - -NEW IN THIS RELEASE - - o Updated the WANCFG utility - Calls the pppconfig to configure the PPPD - for async connections. - - o Added the PPPCONFIG utility - Used to configure the PPPD daemon for the - WANPIPE Async PPP and standard serial port. - The wancfg calls the pppconfig to configure - the pppd. - - o Fixed the PCI autodetect feature. - The SLOT 0 was used as an autodetect option - however, some high end PC's slot numbers start - from 0. - - o This release has been tested with the new backupd - daemon release. - - -PRODUCT COMPONENTS AND RELATED FILES - -/etc: (or user defined) - wanpipe1.conf default router configuration file - -/lib/modules/X.Y.Z/misc: - wanrouter.o router kernel loadable module - af_wanpipe.o wanpipe api socket module - -/lib/modules/X.Y.Z/net: - sdladrv.o Sangoma SDLA support module - wanpipe.o Sangoma WANPIPE(tm) driver module - -/proc/net/wanrouter - Config reads current router configuration - Status reads current router status - {name} reads WAN driver statistics - -/usr/sbin: - wanrouter wanrouter start-up script - wanconfig wanrouter configuration utility - sdladump WANPIPE adapter memory dump utility - fpipemon Monitor for Frame Relay - cpipemon Monitor for Cisco HDLC - ppipemon Monitor for PPP - xpipemon Monitor for X25 - wpkbdmon WANPIPE keyboard led monitor/debugger - -/usr/local/wanrouter: - README this file - COPYING GNU General Public License - Setup installation script - Filelist distribution definition file - wanrouter.rc meta-configuration file - (used by the Setup and wanrouter script) - -/usr/local/wanrouter/doc: - wanpipeForLinux.pdf WAN Router User's Manual - -/usr/local/wanrouter/patches: - wanrouter-v2213.gz patch for Linux kernels 2.2.11 up to 2.2.13. - wanrouter-v2214.gz patch for Linux kernel 2.2.14. - wanrouter-v2215.gz patch for Linux kernels 2.2.15 to 2.2.17. - wanrouter-v2218.gz patch for Linux kernels 2.2.18 and up. - wanrouter-v240.gz patch for Linux kernel 2.4.0. - wanrouter-v242.gz patch for Linux kernel 2.4.2 and up. - wanrouter-v2034.gz patch for Linux kernel 2.0.34 - wanrouter-v2036.gz patch for Linux kernel 2.0.36 and up. - -/usr/local/wanrouter/patches/kdrivers: - Sources of the latest WANPIPE device drivers. - These are used to UPGRADE the linux kernel to the newest - version if the kernel source has already been patched with - WANPIPE drivers. - -/usr/local/wanrouter/samples: - interface sample interface configuration file - wanpipe1.cpri CHDLC primary port - wanpipe2.csec CHDLC secondary port - wanpipe1.fr Frame Relay protocol - wanpipe1.ppp PPP protocol ) - wanpipe1.asy CHDLC ASYNC protocol - wanpipe1.x25 X25 protocol - wanpipe1.stty Sync TTY driver (Used by Kernel PPPD daemon) - wanpipe1.atty Async TTY driver (Used by Kernel PPPD daemon) - wanrouter.rc sample meta-configuration file - -/usr/local/wanrouter/util: - * wan-tools utilities source code - -/usr/local/wanrouter/api/x25: - * x25 api sample programs. -/usr/local/wanrouter/api/chdlc: - * chdlc api sample programs. -/usr/local/wanrouter/api/fr: - * fr api sample programs. -/usr/local/wanrouter/config/wancfg: - wancfg WANPIPE GUI configuration program. - Creates wanpipe#.conf files. -/usr/local/wanrouter/config/cfgft1: - cfgft1 GUI CSU/DSU configuration program. - -/usr/include/linux: - wanrouter.h router API definitions - wanpipe.h WANPIPE API definitions - sdladrv.h SDLA support module API definitions - sdlasfm.h SDLA firmware module definitions - if_wanpipe.h WANPIPE Socket definitions - sdlapci.h WANPIPE PCI definitions - - -/usr/src/linux/net/wanrouter: - * wanrouter source code - -/var/log: - wanrouter wanrouter start-up log (created by the Setup script) - -/var/lock: (or /var/lock/subsys for RedHat) - wanrouter wanrouter lock file (created by the Setup script) - -/usr/local/wanrouter/firmware: - fr514.sfm Frame relay firmware for Sangoma S508/S514 card - cdual514.sfm Dual Port Cisco HDLC firmware for Sangoma S508/S514 card - ppp514.sfm PPP Firmware for Sangoma S508 and S514 cards - x25_508.sfm X25 Firmware for Sangoma S508 card. - - -REVISION HISTORY - -1.0.0 December 31, 1996 Initial version - -1.0.1 January 30, 1997 Status and statistics can be read via /proc - filesystem entries. - -1.0.2 April 30, 1997 Added UDP management via monitors. - -1.0.3 June 3, 1997 UDP management for multiple boards using Frame - Relay and PPP - Enabled continuous transmission of Configure - Request Packet for PPP (for 508 only) - Connection Timeout for PPP changed from 900 to 0 - Flow Control Problem fixed for Frame Relay - -1.0.4 July 10, 1997 S508/FT1 monitoring capability in fpipemon and - ppipemon utilities. - Configurable TTL for UDP packets. - Multicast and Broadcast IP source addresses are - silently discarded. - -1.0.5 July 28, 1997 Configurable T391,T392,N391,N392,N393 for Frame - Relay in router.conf. - Configurable Memory Address through router.conf - for Frame Relay, PPP and X.25. (commenting this - out enables auto-detection). - Fixed freeing up received buffers using kfree() - for Frame Relay and X.25. - Protect sdla_peek() by calling save_flags(), - cli() and restore_flags(). - Changed number of Trace elements from 32 to 20 - Added DLCI specific data monitoring in FPIPEMON. -2.0.0 Nov 07, 1997 Implemented protection of RACE conditions by - critical flags for FRAME RELAY and PPP. - DLCI List interrupt mode implemented. - IPX support in FRAME RELAY and PPP. - IPX Server Support (MARS) - More driver specific stats included in FPIPEMON - and PIPEMON. - -2.0.1 Nov 28, 1997 Bug Fixes for version 2.0.0. - Protection of "enable_irq()" while - "disable_irq()" has been enabled from any other - routine (for Frame Relay, PPP and X25). - Added additional Stats for Fpipemon and Ppipemon - Improved Load Sharing for multiple boards - -2.0.2 Dec 09, 1997 Support for PAP and CHAP for ppp has been - implemented. - -2.0.3 Aug 15, 1998 New release supporting Cisco HDLC, CIR for Frame - relay, Dynamic IP assignment for PPP and Inverse - Arp support for Frame-relay. Man Pages are - included for better support and a new utility - for configuring FT1 cards. - -2.0.4 Dec 09, 1998 Dual Port support for Cisco HDLC. - Support for HDLC (LAPB) API. - Supports BiSync Streaming code for S502E - and S503 cards. - Support for Streaming HDLC API. - Provides a BSD socket interface for - creating applications using BiSync - streaming. - -2.0.5 Aug 04, 1999 CHDLC initialization bug fix. - PPP interrupt driven driver: - Fix to the PPP line hangup problem. - New PPP firmware - Added comments to the startup SYSTEM ERROR messages - Xpipemon debugging application for the X25 protocol - New USER_MANUAL.txt - Fixed the odd boundary 4byte writes to the board. - BiSync Streaming code has been taken out. - Available as a patch. - Streaming HDLC API has been taken out. - Available as a patch. - -2.0.6 Aug 17, 1999 Increased debugging in statup scripts - Fixed installation bugs from 2.0.5 - Kernel patch works for both 2.2.10 and 2.2.11 kernels. - There is no functional difference between the two packages - -2.0.7 Aug 26, 1999 o Merged X25API code into WANPIPE. - o Fixed a memory leak for X25API - o Updated the X25API code for 2.2.X kernels. - o Improved NEM handling. - -2.1.0 Oct 25, 1999 o New code for S514 PCI Card - o New CHDLC and Frame Relay drivers - o PPP and X25 are not supported in this release - -2.1.1 Nov 30, 1999 o PPP support for S514 PCI Cards - -2.1.3 Apr 06, 2000 o Socket based x25api - o Socket based chdlc api - o Socket based fr api - o Dual Port Receive only CHDLC support. - o Asynchronous CHDLC support (Secondary Port) - o cfgft1 GUI csu/dsu configurator - o wancfg GUI configuration file - configurator. - o Architectural directory changes. - -beta-2.1.4 Jul 2000 o Dynamic interface configuration: - Network interfaces reflect the state - of protocol layer. If the protocol becomes - disconnected, driver will bring down - the interface. Once the protocol reconnects - the interface will be brought up. - - Note: This option is turned off by default. - - o Dynamic wanrouter setup using 'wanconfig': - wanconfig utility can be used to - shutdown,restart,start or reconfigure - a virtual circuit dynamically. - - Frame Relay: Each DLCI can be: - created,stopped,restarted and reconfigured - dynamically using wanconfig. - - ex: wanconfig card wanpipe1 dev wp1_fr16 up - - o Wanrouter startup via command line arguments: - wanconfig also supports wanrouter startup via command line - arguments. Thus, there is no need to create a wanpipe#.conf - configuration file. - - o Socket based x25api update/bug fixes. - Added support for LCN numbers greater than 255. - Option to pass up modem messages. - Provided a PCI IRQ check, so a single S514 - card is guaranteed to have a non-sharing interrupt. - - o Fixes to the wancfg utility. - o New FT1 debugging support via *pipemon utilities. - o Frame Relay ARP support Enabled. - -beta3-2.1.4 Jul 2000 o X25 M_BIT Problem fix. - o Added the Multi-Port PPP - Updated utilities for the Multi-Port PPP. - -2.1.4 Aut 2000 - o In X25API: - Maximum packet an application can send - to the driver has been extended to 4096 bytes. - - Fixed the x25 startup bug. Enable - communications only after all interfaces - come up. HIGH SVC/PVC is used to calculate - the number of channels. - Enable protocol only after all interfaces - are enabled. - - o Added an extra state to the FT1 config, kernel module. - o Updated the pipemon debuggers. - - o Blocked the Multi-Port PPP from running on kernels - 2.2.16 or greater, due to syncppp kernel module - change. - -beta1-2.1.5 Nov 15 2000 - o Fixed the MultiPort PPP Support for kernels 2.2.16 and above. - 2.2.X kernels only - - o Secured the driver UDP debugging calls - - All illegal network debugging calls are reported to - the log. - - Defined a set of allowed commands, all other denied. - - o Cpipemon - - Added set FT1 commands to the cpipemon. Thus CSU/DSU - configuration can be performed using cpipemon. - All systems that cannot run cfgft1 GUI utility should - use cpipemon to configure the on board CSU/DSU. - - - o Keyboard Led Monitor/Debugger - - A new utility /usr/sbin/wpkbdmon uses keyboard leds - to convey operational statistic information of the - Sangoma WANPIPE cards. - NUM_LOCK = Line State (On=connected, Off=disconnected) - CAPS_LOCK = Tx data (On=transmitting, Off=no tx data) - SCROLL_LOCK = Rx data (On=receiving, Off=no rx data - - o Hardware probe on module load and dynamic device allocation - - During WANPIPE module load, all Sangoma cards are probed - and found information is printed in the /var/log/messages. - - If no cards are found, the module load fails. - - Appropriate number of devices are dynamically loaded - based on the number of Sangoma cards found. - - Note: The kernel configuration option - CONFIG_WANPIPE_CARDS has been taken out. - - o Fixed the Frame Relay and Chdlc network interfaces so they are - compatible with libpcap libraries. Meaning, tcpdump, snort, - ethereal, and all other packet sniffers and debuggers work on - all WANPIPE network interfaces. - - Set the network interface encoding type to ARPHRD_PPP. - This tell the sniffers that data obtained from the - network interface is in pure IP format. - Fix for 2.2.X kernels only. - - o True interface encoding option for Frame Relay and CHDLC - - The above fix sets the network interface encoding - type to ARPHRD_PPP, however some customers use - the encoding interface type to determine the - protocol running. Therefore, the TURE ENCODING - option will set the interface type back to the - original value. - - NOTE: If this option is used with Frame Relay and CHDLC - libpcap library support will be broken. - i.e. tcpdump will not work. - Fix for 2.2.x Kernels only. - - o Ethernet Bridgind over Frame Relay - - The Frame Relay bridging has been developed by - Kristian Hoffmann and Mark Wells. - - The Linux kernel bridge is used to send ethernet - data over the frame relay links. - For 2.2.X Kernels only. - - o Added extensive 2.0.X support. Most new features of - 2.1.5 for protocols Frame Relay, PPP and CHDLC are - supported under 2.0.X kernels. - -beta1-2.2.0 Dec 30 2000 - o Updated drivers for 2.4.X kernels. - o Updated drivers for SMP support. - o X25API is now able to share PCI interrupts. - o Took out a general polling routine that was used - only by X25API. - o Added appropriate locks to the dynamic reconfiguration - code. - o Fixed a bug in the keyboard debug monitor. - -beta2-2.2.0 Jan 8 2001 - o Patches for 2.4.0 kernel - o Patches for 2.2.18 kernel - o Minor updates to PPP and CHLDC drivers. - Note: No functional difference. - -beta3-2.2.9 Jan 10 2001 - o I missed the 2.2.18 kernel patches in beta2-2.2.0 - release. They are included in this release. - -Stable Release -2.2.0 Feb 01 2001 - o Bug fix in wancfg GUI configurator. - The edit function didn't work properly. - - -bata1-2.2.1 Feb 09 2001 - o WANPIPE TTY Driver emulation. - Two modes of operation Sync and Async. - Sync: Using the PPPD daemon, kernel SyncPPP layer - and the Wanpipe sync TTY driver: a PPP protocol - connection can be established via Sangoma adapter, over - a T1 leased line. - - The 2.4.0 kernel PPP layer supports MULTILINK - protocol, that can be used to bundle any number of Sangoma - adapters (T1 lines) into one, under a single IP address. - Thus, efficiently obtaining multiple T1 throughput. - - NOTE: The remote side must also implement MULTILINK PPP - protocol. - - Async:Using the PPPD daemon, kernel AsyncPPP layer - and the WANPIPE async TTY driver: a PPP protocol - connection can be established via Sangoma adapter and - a modem, over a telephone line. - - Thus, the WANPIPE async TTY driver simulates a serial - TTY driver that would normally be used to interface the - MODEM to the linux kernel. - - o WANPIPE PPP Backup Utility - This utility will monitor the state of the PPP T1 line. - In case of failure, a dial up connection will be established - via pppd daemon, ether via a serial tty driver (serial port), - or a WANPIPE async TTY driver (in case serial port is unavailable). - - Furthermore, while in dial up mode, the primary PPP T1 link - will be monitored for signs of life. - - If the PPP T1 link comes back to life, the dial up connection - will be shutdown and T1 line re-established. - - - o New Setup installation script. - Option to UPGRADE device drivers if the kernel source has - already been patched with WANPIPE. - - Option to COMPILE WANPIPE modules against the currently - running kernel, thus no need for manual kernel and module - re-compilation. - - o Updates and Bug Fixes to wancfg utility. - -bata2-2.2.1 Feb 20 2001 - - o Bug fixes to the CHDLC device drivers. - The driver had compilation problems under kernels - 2.2.14 or lower. - - o Bug fixes to the Setup installation script. - The device drivers compilation options didn't work - properly. - - o Update to the wpbackupd daemon. - Optimized the cross-over times, between the primary - link and the backup dialup. - -beta3-2.2.1 Mar 02 2001 - o Patches for 2.4.2 kernel. - - o Bug fixes to util/ make files. - o Bug fixes to the Setup installation script. - - o Took out the backupd support and made it into - as separate package. - -beta4-2.2.1 Mar 12 2001 - - o Fix to the Frame Relay Device driver. - IPSAC sends a packet of zero length - header to the frame relay driver. The - driver tries to push its own 2 byte header - into the packet, which causes the driver to - crash. - - o Fix the WANPIPE re-configuration code. - Bug was found by trying to run the cfgft1 while the - interface was already running. - - o Updates to cfgft1. - Writes a wanpipe#.cfgft1 configuration file - once the CSU/DSU is configured. This file can - holds the current CSU/DSU configuration. - - - ->>>>>> END OF README <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - diff --git a/include/linux/Kbuild b/include/linux/Kbuild index e56b739d8e23..b3d9ccde0c27 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -231,7 +231,6 @@ unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_tr.h unifdef-y += if_vlan.h -unifdef-y += if_wanpipe.h unifdef-y += igmp.h unifdef-y += inet_diag.h unifdef-y += in.h diff --git a/include/linux/if_wanpipe.h b/include/linux/if_wanpipe.h deleted file mode 100644 index e594ca6069e5..000000000000 --- a/include/linux/if_wanpipe.h +++ /dev/null @@ -1,124 +0,0 @@ -/***************************************************************************** -* if_wanpipe.h Header file for the Sangoma AF_WANPIPE Socket -* -* Author: Nenad Corbic -* -* Copyright: (c) 2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* -* Jan 28, 2000 Nenad Corbic Initial Version -* -*****************************************************************************/ - -#ifndef __LINUX_IF_WAN_PACKET_H -#define __LINUX_IF_WAN_PACKET_H - -struct wan_sockaddr_ll -{ - unsigned short sll_family; - unsigned short sll_protocol; - int sll_ifindex; - unsigned short sll_hatype; - unsigned char sll_pkttype; - unsigned char sll_halen; - unsigned char sll_addr[8]; - unsigned char sll_device[14]; - unsigned char sll_card[14]; -}; - -typedef struct -{ - unsigned char free; - unsigned char state_sk; - int rcvbuf; - int sndbuf; - int rmem; - int wmem; - int sk_count; - unsigned char bound; - char name[14]; - unsigned char d_state; - unsigned char svc; - unsigned short lcn; - unsigned char mbox; - unsigned char cmd_busy; - unsigned char command; - unsigned poll; - unsigned poll_cnt; - int rblock; -} wan_debug_hdr_t; - -#define MAX_NUM_DEBUG 10 -#define X25_PROT 0x16 -#define PVC_PROT 0x17 - -typedef struct -{ - wan_debug_hdr_t debug[MAX_NUM_DEBUG]; -}wan_debug_t; - -#define SIOC_WANPIPE_GET_CALL_DATA (SIOCPROTOPRIVATE + 0) -#define SIOC_WANPIPE_SET_CALL_DATA (SIOCPROTOPRIVATE + 1) -#define SIOC_WANPIPE_ACCEPT_CALL (SIOCPROTOPRIVATE + 2) -#define SIOC_WANPIPE_CLEAR_CALL (SIOCPROTOPRIVATE + 3) -#define SIOC_WANPIPE_RESET_CALL (SIOCPROTOPRIVATE + 4) -#define SIOC_WANPIPE_DEBUG (SIOCPROTOPRIVATE + 5) -#define SIOC_WANPIPE_SET_NONBLOCK (SIOCPROTOPRIVATE + 6) -#define SIOC_WANPIPE_CHECK_TX (SIOCPROTOPRIVATE + 7) -#define SIOC_WANPIPE_SOCK_STATE (SIOCPROTOPRIVATE + 8) - -/* Packet types */ - -#define WAN_PACKET_HOST 0 /* To us */ -#define WAN_PACKET_BROADCAST 1 /* To all */ -#define WAN_PACKET_MULTICAST 2 /* To group */ -#define WAN_PACKET_OTHERHOST 3 /* To someone else */ -#define WAN_PACKET_OUTGOING 4 /* Outgoing of any type */ -/* These ones are invisible by user level */ -#define WAN_PACKET_LOOPBACK 5 /* MC/BRD frame looped back */ -#define WAN_PACKET_FASTROUTE 6 /* Fastrouted frame */ - - -/* X25 specific */ -#define WAN_PACKET_DATA 7 -#define WAN_PACKET_CMD 8 -#define WAN_PACKET_ASYNC 9 -#define WAN_PACKET_ERR 10 - -/* Packet socket options */ - -#define WAN_PACKET_ADD_MEMBERSHIP 1 -#define WAN_PACKET_DROP_MEMBERSHIP 2 - -#define WAN_PACKET_MR_MULTICAST 0 -#define WAN_PACKET_MR_PROMISC 1 -#define WAN_PACKET_MR_ALLMULTI 2 - -#ifdef __KERNEL__ - -/* Private wanpipe socket structures. */ -struct wanpipe_opt -{ - void *mbox; /* Mail box */ - void *card; /* Card bouded to */ - struct net_device *dev; /* Bounded device */ - unsigned short lcn; /* Binded LCN */ - unsigned char svc; /* 0=pvc, 1=svc */ - unsigned char timer; /* flag for delayed transmit*/ - struct timer_list tx_timer; - unsigned poll_cnt; - unsigned char force; /* Used to force sock release */ - atomic_t packet_sent; - unsigned short num; -}; - -#define wp_sk(__sk) ((struct wanpipe_opt *)(__sk)->sk_protinfo) - -#endif - -#endif -- cgit v1.2.3-71-gd317 From 31efdf0530b6351b0658d35a602a0f2d6bc2ed6f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 15 Apr 2008 00:30:16 -0700 Subject: [ISDN] include/linux/isdn.h: remove dead code This patch remove the usage of a nonexisting kconfig variable. Reported-by: Robert P. J. Day Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/linux/isdn.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/isdn.h b/include/linux/isdn.h index 9cb2855bb170..44cd663c53b6 100644 --- a/include/linux/isdn.h +++ b/include/linux/isdn.h @@ -16,14 +16,8 @@ #include -#ifdef CONFIG_COBALT_MICRO_SERVER -/* Save memory */ -#define ISDN_MAX_DRIVERS 2 -#define ISDN_MAX_CHANNELS 8 -#else #define ISDN_MAX_DRIVERS 32 #define ISDN_MAX_CHANNELS 64 -#endif /* New ioctl-codes */ #define IIOCNETAIF _IO('I',1) -- cgit v1.2.3-71-gd317 From a9fde2607895667823e9d1172fc193087125ef68 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 16 Apr 2008 00:48:04 -0700 Subject: [VLAN]: Tag vlan_group_device with net device, not ifindex. Currently vlan group is searched using one key - the ifindex. We'll have to lookup the vlan_group by two keys - ifindex and net. Turning the vlan_group lookup key to struct net_device pointer will make this process easier. Besides, this will eliminate one more place in the networking, that assumes that indexes are unique in the kernel. Signed-off-by: Pavel Emelyanov Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 +++- net/8021q/vlan.c | 22 +++++++++++----------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index edd55af7ebd6..15ace02b7b24 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -81,7 +81,9 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); #define VLAN_GROUP_ARRAY_PART_LEN (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS) struct vlan_group { - int real_dev_ifindex; /* The ifindex of the ethernet(like) device the vlan is attached to. */ + struct net_device *real_dev; /* The ethernet(like) device + * the vlan is attached to. + */ unsigned int nr_vlans; struct hlist_node hlist; /* linked list */ struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 5975ec3be7f3..cf8d810a130d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -65,14 +65,14 @@ static inline unsigned int vlan_grp_hashfn(unsigned int idx) } /* Must be invoked with RCU read lock (no preempt) */ -static struct vlan_group *__vlan_find_group(int real_dev_ifindex) +static struct vlan_group *__vlan_find_group(struct net_device *real_dev) { struct vlan_group *grp; struct hlist_node *n; - int hash = vlan_grp_hashfn(real_dev_ifindex); + int hash = vlan_grp_hashfn(real_dev->ifindex); hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) { - if (grp->real_dev_ifindex == real_dev_ifindex) + if (grp->real_dev == real_dev) return grp; } @@ -86,7 +86,7 @@ static struct vlan_group *__vlan_find_group(int real_dev_ifindex) struct net_device *__find_vlan_dev(struct net_device *real_dev, unsigned short VID) { - struct vlan_group *grp = __vlan_find_group(real_dev->ifindex); + struct vlan_group *grp = __vlan_find_group(real_dev); if (grp) return vlan_group_get_device(grp, VID); @@ -103,7 +103,7 @@ static void vlan_group_free(struct vlan_group *grp) kfree(grp); } -static struct vlan_group *vlan_group_alloc(int ifindex) +static struct vlan_group *vlan_group_alloc(struct net_device *real_dev) { struct vlan_group *grp; @@ -111,9 +111,9 @@ static struct vlan_group *vlan_group_alloc(int ifindex) if (!grp) return NULL; - grp->real_dev_ifindex = ifindex; + grp->real_dev = real_dev; hlist_add_head_rcu(&grp->hlist, - &vlan_group_hash[vlan_grp_hashfn(ifindex)]); + &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]); return grp; } @@ -151,7 +151,7 @@ void unregister_vlan_dev(struct net_device *dev) ASSERT_RTNL(); - grp = __vlan_find_group(real_dev->ifindex); + grp = __vlan_find_group(real_dev); BUG_ON(!grp); vlan_proc_rem_dev(dev); @@ -246,9 +246,9 @@ int register_vlan_dev(struct net_device *dev) struct vlan_group *grp, *ngrp = NULL; int err; - grp = __vlan_find_group(real_dev->ifindex); + grp = __vlan_find_group(real_dev); if (!grp) { - ngrp = grp = vlan_group_alloc(real_dev->ifindex); + ngrp = grp = vlan_group_alloc(real_dev); if (!grp) return -ENOBUFS; } @@ -412,7 +412,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, goto out; } - grp = __vlan_find_group(dev->ifindex); + grp = __vlan_find_group(dev); if (!grp) goto out; -- cgit v1.2.3-71-gd317 From f3005d7f4abe03ad41af33b1548602cd086d86a2 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 16 Apr 2008 02:02:18 -0700 Subject: [NETNS]: Add netns refcnt debug for network devices. dev_set_net is called for - just allocated devices - devices moving from one namespace to another release_net has proper check inside to distinguish these cases. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- net/core/dev.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8b17ed40dea2..7c1d4466583b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -758,7 +758,8 @@ static inline void dev_net_set(struct net_device *dev, struct net *net) { #ifdef CONFIG_NET_NS - dev->nd_net = net; + release_net(dev->nd_net); + dev->nd_net = hold_net(net); #endif } diff --git a/net/core/dev.c b/net/core/dev.c index 7aa01125287e..77530e9a34fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4042,6 +4042,8 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { + release_net(dev_net(dev)); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); -- cgit v1.2.3-71-gd317 From 9d9326d3bc0ea9a8bbe40bf3e5e66c7b9858caa0 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Wed, 9 Apr 2008 19:38:13 -0500 Subject: phy: Change mii_bus id field to a string Having the id field be an int was making more complex bus topologies excessively difficult. For now, just convert it to a string, and change all instances of "bus->id = val" to snprintf(id, MII_BUS_ID_LEN, "%x", val). Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- arch/powerpc/platforms/82xx/ep8248e.c | 2 +- arch/powerpc/platforms/pasemi/gpio_mdio.c | 2 +- arch/powerpc/sysdev/fsl_soc.c | 5 +++-- drivers/net/au1000_eth.c | 6 +++--- drivers/net/bfin_mac.c | 2 +- drivers/net/cpmac.c | 5 ++--- drivers/net/fec_mpc52xx.c | 2 +- drivers/net/fec_mpc52xx_phy.c | 2 +- drivers/net/fs_enet/fs_enet-main.c | 4 ++-- drivers/net/fs_enet/mii-bitbang.c | 4 ++-- drivers/net/fs_enet/mii-fec.c | 4 ++-- drivers/net/gianfar_mii.c | 2 +- drivers/net/macb.c | 2 +- drivers/net/pasemi_mac.c | 2 +- drivers/net/phy/fixed.c | 2 +- drivers/net/sb1250-mac.c | 2 +- drivers/net/ucc_geth.c | 2 +- drivers/net/ucc_geth.h | 2 +- drivers/net/ucc_geth_mii.c | 2 +- include/linux/fsl_devices.h | 2 +- include/linux/phy.h | 12 ++++++++---- 21 files changed, 36 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index ba93d8ae9b0c..d5770fdf7f09 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -138,7 +138,7 @@ static int __devinit ep8248e_mdio_probe(struct of_device *ofdev, bus->name = "ep8248e-mdio-bitbang"; bus->dev = &ofdev->dev; - bus->id = res.start; + snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); return mdiobus_register(bus); } diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index b46542990cf8..ab6955412ba4 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -241,7 +241,7 @@ static int __devinit gpio_mdio_probe(struct of_device *ofdev, new_bus->reset = &gpio_mdio_reset; prop = of_get_property(np, "reg", NULL); - new_bus->id = *prop; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop); new_bus->priv = priv; new_bus->phy_mask = 0; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 2c5388ce902a..3581416905ea 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -341,7 +341,7 @@ static int __init gfar_of_init(void) goto unreg; } - gfar_data.bus_id = 0; + snprintf(gfar_data.bus_id, MII_BUS_ID_SIZE, "0"); gfar_data.phy_id = fixed_link[0]; } else { phy = of_find_node_by_phandle(*ph); @@ -362,7 +362,8 @@ static int __init gfar_of_init(void) } gfar_data.phy_id = *id; - gfar_data.bus_id = res.start; + snprintf(gfar_data.bus_id, MII_BUS_ID_SIZE, "%x", + res.start); of_node_put(phy); of_node_put(mdio); diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index 504b7ce2747d..3634b5fd7919 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -701,7 +701,7 @@ static struct net_device * au1000_probe(int port_num) aup->mii_bus.write = mdiobus_write; aup->mii_bus.reset = mdiobus_reset; aup->mii_bus.name = "au1000_eth_mii"; - aup->mii_bus.id = aup->mac_id; + snprintf(aup->mii_bus.id, MII_BUS_ID_SIZE, "%x", aup->mac_id); aup->mii_bus.irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); for(i = 0; i < PHY_MAX_ADDR; ++i) aup->mii_bus.irq[i] = PHY_POLL; @@ -709,11 +709,11 @@ static struct net_device * au1000_probe(int port_num) /* if known, set corresponding PHY IRQs */ #if defined(AU1XXX_PHY_STATIC_CONFIG) # if defined(AU1XXX_PHY0_IRQ) - if (AU1XXX_PHY0_BUSID == aup->mii_bus.id) + if (AU1XXX_PHY0_BUSID == aup->mac_id) aup->mii_bus.irq[AU1XXX_PHY0_ADDR] = AU1XXX_PHY0_IRQ; # endif # if defined(AU1XXX_PHY1_IRQ) - if (AU1XXX_PHY1_BUSID == aup->mii_bus.id) + if (AU1XXX_PHY1_BUSID == aup->mac_id) aup->mii_bus.irq[AU1XXX_PHY1_ADDR] = AU1XXX_PHY1_IRQ; # endif #endif diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 26b2dd5016cd..717dcc1aa1e9 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -969,7 +969,7 @@ static int __init bf537mac_probe(struct net_device *dev) lp->mii_bus.write = mdiobus_write; lp->mii_bus.reset = mdiobus_reset; lp->mii_bus.name = "bfin_mac_mdio"; - lp->mii_bus.id = 0; + snprintf(lp->mii_bus.id, MII_BUS_ID_SIZE, "0"); lp->mii_bus.irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); for (i = 0; i < PHY_MAX_ADDR; ++i) lp->mii_bus.irq[i] = PHY_POLL; diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c index c85194f2cd2d..9da7ff437031 100644 --- a/drivers/net/cpmac.c +++ b/drivers/net/cpmac.c @@ -987,7 +987,7 @@ static int external_switch; static int __devinit cpmac_probe(struct platform_device *pdev) { int rc, phy_id, i; - int mdio_bus_id = cpmac_mii.id; + char *mdio_bus_id = "0"; struct resource *mem; struct cpmac_priv *priv; struct net_device *dev; @@ -1008,8 +1008,6 @@ static int __devinit cpmac_probe(struct platform_device *pdev) if (external_switch || dumb_switch) { struct fixed_phy_status status = {}; - mdio_bus_id = 0; - /* * FIXME: this should be in the platform code! * Since there is not platform code at all (that is, @@ -1143,6 +1141,7 @@ int __devinit cpmac_init(void) } cpmac_mii.phy_mask = ~(mask | 0x80000000); + snprintf(cpmac_mii.id, MII_BUS_ID_SIZE, "0"); res = mdiobus_register(&cpmac_mii); if (res) diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c index 58b71e60204e..43b5f30743c2 100644 --- a/drivers/net/fec_mpc52xx.c +++ b/drivers/net/fec_mpc52xx.c @@ -198,7 +198,7 @@ static int mpc52xx_fec_init_phy(struct net_device *dev) struct phy_device *phydev; char phy_id[BUS_ID_SIZE]; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, + snprintf(phy_id, BUS_ID_SIZE, "%x:%02x", (unsigned int)dev->base_addr, priv->phy_addr); priv->link = PHY_DOWN; diff --git a/drivers/net/fec_mpc52xx_phy.c b/drivers/net/fec_mpc52xx_phy.c index 6a3ac4ea97e9..956836fc5ec0 100644 --- a/drivers/net/fec_mpc52xx_phy.c +++ b/drivers/net/fec_mpc52xx_phy.c @@ -124,7 +124,7 @@ static int mpc52xx_fec_mdio_probe(struct of_device *of, const struct of_device_i goto out_free; } - bus->id = res.start; + snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); bus->priv = priv; bus->dev = dev; diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index 940e2041ba38..67b4b0728fce 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -1178,7 +1178,7 @@ static int __devinit find_phy(struct device_node *np, data = of_get_property(np, "fixed-link", NULL); if (data) { - snprintf(fpi->bus_id, 16, PHY_ID_FMT, 0, *data); + snprintf(fpi->bus_id, 16, "%x:%02x", 0, *data); return 0; } @@ -1202,7 +1202,7 @@ static int __devinit find_phy(struct device_node *np, if (!data || len != 4) goto out_put_mdio; - snprintf(fpi->bus_id, 16, PHY_ID_FMT, res.start, *data); + snprintf(fpi->bus_id, 16, "%x:%02x", res.start, *data); out_put_mdio: of_node_put(mdionode); diff --git a/drivers/net/fs_enet/mii-bitbang.c b/drivers/net/fs_enet/mii-bitbang.c index b8e4a736a130..1620030cd33c 100644 --- a/drivers/net/fs_enet/mii-bitbang.c +++ b/drivers/net/fs_enet/mii-bitbang.c @@ -130,7 +130,7 @@ static int __devinit fs_mii_bitbang_init(struct mii_bus *bus, * we get is an int, and the odds of multiple bitbang mdio buses * is low enough that it's not worth going too crazy. */ - bus->id = res.start; + snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); data = of_get_property(np, "fsl,mdio-pin", &len); if (!data || len != 4) @@ -307,7 +307,7 @@ static int __devinit fs_enet_mdio_probe(struct device *dev) return -ENOMEM; new_bus->name = "BB MII Bus", - new_bus->id = pdev->id; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); new_bus->phy_mask = ~0x9; pdata = (struct fs_mii_bb_platform_info *)pdev->dev.platform_data; diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c index a89cf15090b8..ba75efc9f5b5 100644 --- a/drivers/net/fs_enet/mii-fec.c +++ b/drivers/net/fs_enet/mii-fec.c @@ -196,7 +196,7 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev, if (ret) return ret; - new_bus->id = res.start; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); fec->fecp = ioremap(res.start, res.end - res.start + 1); if (!fec->fecp) @@ -309,7 +309,7 @@ static int __devinit fs_enet_fec_mdio_probe(struct device *dev) new_bus->read = &fs_enet_fec_mii_read, new_bus->write = &fs_enet_fec_mii_write, new_bus->reset = &fs_enet_fec_mii_reset, - new_bus->id = pdev->id; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); pdata = (struct fs_mii_fec_platform_info *)pdev->dev.platform_data; diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c index 24327629bf03..b8898927236a 100644 --- a/drivers/net/gianfar_mii.c +++ b/drivers/net/gianfar_mii.c @@ -173,7 +173,7 @@ int gfar_mdio_probe(struct device *dev) new_bus->read = &gfar_mdio_read, new_bus->write = &gfar_mdio_write, new_bus->reset = &gfar_mdio_reset, - new_bus->id = pdev->id; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); pdata = (struct gianfar_mdio_data *)pdev->dev.platform_data; diff --git a/drivers/net/macb.c b/drivers/net/macb.c index 489c7c3b90d9..d513bb8a4902 100644 --- a/drivers/net/macb.c +++ b/drivers/net/macb.c @@ -246,7 +246,7 @@ static int macb_mii_init(struct macb *bp) bp->mii_bus.read = &macb_mdio_read; bp->mii_bus.write = &macb_mdio_write; bp->mii_bus.reset = &macb_mdio_reset; - bp->mii_bus.id = bp->pdev->id; + snprintf(bp->mii_bus.id, MII_BUS_ID_SIZE, "%x", bp->pdev->id); bp->mii_bus.priv = bp; bp->mii_bus.dev = &bp->dev->dev; pdata = bp->pdev->dev.platform_data; diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 2e39e0285d8f..bcd7f9814ed8 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -1012,7 +1012,7 @@ static int pasemi_mac_phy_init(struct net_device *dev) goto err; phy_id = *prop; - snprintf(mac->phy_id, BUS_ID_SIZE, PHY_ID_FMT, (int)r.start, phy_id); + snprintf(mac->phy_id, BUS_ID_SIZE, "%x:%02x", (int)r.start, phy_id); of_node_put(phy_dn); diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c index ca9b040f9ad9..4e07956a483b 100644 --- a/drivers/net/phy/fixed.c +++ b/drivers/net/phy/fixed.c @@ -213,7 +213,7 @@ static int __init fixed_mdio_bus_init(void) goto err_pdev; } - fmb->mii_bus.id = 0; + snprintf(fmb->mii_bus.id, MII_BUS_ID_SIZE, "0"); fmb->mii_bus.name = "Fixed MDIO Bus"; fmb->mii_bus.dev = &pdev->dev; fmb->mii_bus.read = &fixed_mdio_read; diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c index 7b53d658e337..888b7dec9866 100644 --- a/drivers/net/sb1250-mac.c +++ b/drivers/net/sb1250-mac.c @@ -2374,7 +2374,7 @@ static int sbmac_init(struct platform_device *pldev, long long base) dev->name, base, print_mac(mac, eaddr)); sc->mii_bus.name = sbmac_mdio_string; - sc->mii_bus.id = idx; + snprintf(sc->mii_bus.id, MII_BUS_ID_SIZE, "%x", idx); sc->mii_bus.priv = sc; sc->mii_bus.read = sbmac_mii_read; sc->mii_bus.write = sbmac_mii_write; diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 0ee4c168e4c0..29a4d650e8a8 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -3954,7 +3954,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma if (err) return -1; - ug_info->mdio_bus = res.start; + snprintf(ug_info->mdio_bus, MII_BUS_ID_SIZE, "%x", res.start); } /* get the phy interface type, or default to MII */ diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h index 4fb95b3af948..9f8b7580a3a4 100644 --- a/drivers/net/ucc_geth.h +++ b/drivers/net/ucc_geth.h @@ -1156,7 +1156,7 @@ struct ucc_geth_info { u16 pausePeriod; u16 extensionField; u8 phy_address; - u32 mdio_bus; + char mdio_bus[MII_BUS_ID_SIZE]; u8 weightfactor[NUM_TX_QUEUES]; u8 interruptcoalescingmaxvalue[NUM_RX_QUEUES]; u8 l2qt[UCC_GETH_VLAN_PRIORITY_MAX]; diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c index c69e654d539f..e4d3f330bac3 100644 --- a/drivers/net/ucc_geth_mii.c +++ b/drivers/net/ucc_geth_mii.c @@ -157,7 +157,7 @@ static int uec_mdio_probe(struct of_device *ofdev, const struct of_device_id *ma if (err) goto reg_map_fail; - new_bus->id = res.start; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); new_bus->irq = kmalloc(32 * sizeof(int), GFP_KERNEL); diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 1831b196c70a..2cad5c67397e 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -50,7 +50,7 @@ struct gianfar_platform_data { u32 device_flags; /* board specific information */ u32 board_flags; - u32 bus_id; + char bus_id[MII_BUS_ID_SIZE]; u32 phy_id; u8 mac_addr[6]; phy_interface_t interface; diff --git a/include/linux/phy.h b/include/linux/phy.h index 5e43ae751412..6509f377bb10 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -63,8 +63,6 @@ typedef enum { PHY_INTERFACE_MODE_RTBI } phy_interface_t; -#define MII_BUS_MAX 4 - #define PHY_INIT_TIMEOUT 100000 #define PHY_STATE_TIME 1 @@ -74,13 +72,19 @@ typedef enum { #define PHY_MAX_ADDR 32 /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ -#define PHY_ID_FMT "%x:%02x" +#define PHY_ID_FMT "%s:%02x" + +/* + * Need to be a little smaller than phydev->dev.bus_id to leave room + * for the ":%02x" + */ +#define MII_BUS_ID_SIZE (BUS_ID_SIZE - 3) /* The Bus class for PHYs. Devices which provide access to * PHYs should register using this structure */ struct mii_bus { const char *name; - int id; + char id[MII_BUS_ID_SIZE]; void *priv; int (*read)(struct mii_bus *bus, int phy_id, int regnum); int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val); -- cgit v1.2.3-71-gd317 From c5e38a949bfa11d10f73927fbf4fe66b73bc3001 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Wed, 9 Apr 2008 19:38:27 -0500 Subject: phy: Clean up header style Multi-line comments weren't all CodingStyle compliant Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- include/linux/phy.h | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6509f377bb10..2d838448415c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -39,7 +39,8 @@ SUPPORTED_1000baseT_Half | \ SUPPORTED_1000baseT_Full) -/* Set phydev->irq to PHY_POLL if interrupts are not supported, +/* + * Set phydev->irq to PHY_POLL if interrupts are not supported, * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if * the attached driver handles the interrupt */ @@ -80,8 +81,10 @@ typedef enum { */ #define MII_BUS_ID_SIZE (BUS_ID_SIZE - 3) -/* The Bus class for PHYs. Devices which provide access to - * PHYs should register using this structure */ +/* + * The Bus class for PHYs. Devices which provide access to + * PHYs should register using this structure + */ struct mii_bus { const char *name; char id[MII_BUS_ID_SIZE]; @@ -90,8 +93,10 @@ struct mii_bus { int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val); int (*reset)(struct mii_bus *bus); - /* A lock to ensure that only one thing can read/write - * the MDIO bus at a time */ + /* + * A lock to ensure that only one thing can read/write + * the MDIO bus at a time + */ struct mutex mdio_lock; struct device *dev; @@ -102,8 +107,10 @@ struct mii_bus { /* Phy addresses to be ignored when probing */ u32 phy_mask; - /* Pointer to an array of interrupts, each PHY's - * interrupt at the index matching its address */ + /* + * Pointer to an array of interrupts, each PHY's + * interrupt at the index matching its address + */ int *irq; }; @@ -255,7 +262,8 @@ struct phy_device { /* Bus address of the PHY (0-32) */ int addr; - /* forced speed & duplex (no autoneg) + /* + * forced speed & duplex (no autoneg) * partner speed & duplex & pause (autoneg) */ int speed; @@ -278,8 +286,10 @@ struct phy_device { int link_timeout; - /* Interrupt number for this PHY - * -1 means no interrupt */ + /* + * Interrupt number for this PHY + * -1 means no interrupt + */ int irq; /* private data pointer */ @@ -329,22 +339,28 @@ struct phy_driver { u32 features; u32 flags; - /* Called to initialize the PHY, - * including after a reset */ + /* + * Called to initialize the PHY, + * including after a reset + */ int (*config_init)(struct phy_device *phydev); - /* Called during discovery. Used to set - * up device-specific structures, if any */ + /* + * Called during discovery. Used to set + * up device-specific structures, if any + */ int (*probe)(struct phy_device *phydev); /* PHY Power Management */ int (*suspend)(struct phy_device *phydev); int (*resume)(struct phy_device *phydev); - /* Configures the advertisement and resets + /* + * Configures the advertisement and resets * autonegotiation if phydev->autoneg is on, * forces the speed to the current settings in phydev - * if phydev->autoneg is off */ + * if phydev->autoneg is off + */ int (*config_aneg)(struct phy_device *phydev); /* Determines the negotiated speed and duplex */ -- cgit v1.2.3-71-gd317 From cac1f3c8a80f3fc0b4489d1d3ba29214677ffab2 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 15 Apr 2008 12:49:21 -0400 Subject: phylib: factor out get_phy_id from within get_phy_device We were already doing what amounts to a get_phy_id from within get_phy_device, and rather than duplicate this for the TBIPA probing, we might as well just factor it out and make it available instead. Signed-off-by: Paul Gortmaker Acked-by: Andy Fleming Signed-off-by: Jeff Garzik --- drivers/net/phy/phy_device.c | 38 +++++++++++++++++++++++++++++--------- include/linux/phy.h | 1 + 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index f4c4fd85425f..8b1121b02f98 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -86,35 +86,55 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id) EXPORT_SYMBOL(phy_device_create); /** - * get_phy_device - reads the specified PHY device and returns its @phy_device struct + * get_phy_id - reads the specified addr for its ID. * @bus: the target MII bus * @addr: PHY address on the MII bus + * @phy_id: where to store the ID retrieved. * * Description: Reads the ID registers of the PHY at @addr on the - * @bus, then allocates and returns the phy_device to represent it. + * @bus, stores it in @phy_id and returns zero on success. */ -struct phy_device * get_phy_device(struct mii_bus *bus, int addr) +int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id) { int phy_reg; - u32 phy_id; - struct phy_device *dev = NULL; /* Grab the bits from PHYIR1, and put them * in the upper half */ phy_reg = bus->read(bus, addr, MII_PHYSID1); if (phy_reg < 0) - return ERR_PTR(phy_reg); + return -EIO; - phy_id = (phy_reg & 0xffff) << 16; + *phy_id = (phy_reg & 0xffff) << 16; /* Grab the bits from PHYIR2, and put them in the lower half */ phy_reg = bus->read(bus, addr, MII_PHYSID2); if (phy_reg < 0) - return ERR_PTR(phy_reg); + return -EIO; + + *phy_id |= (phy_reg & 0xffff); + + return 0; +} + +/** + * get_phy_device - reads the specified PHY device and returns its @phy_device struct + * @bus: the target MII bus + * @addr: PHY address on the MII bus + * + * Description: Reads the ID registers of the PHY at @addr on the + * @bus, then allocates and returns the phy_device to represent it. + */ +struct phy_device * get_phy_device(struct mii_bus *bus, int addr) +{ + struct phy_device *dev = NULL; + u32 phy_id; + int r; - phy_id |= (phy_reg & 0xffff); + r = get_phy_id(bus, addr, &phy_id); + if (r) + return ERR_PTR(r); /* If the phy_id is all Fs, there is no device there */ if (0xffffffff == phy_id) diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d838448415c..779cbcd65f62 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -381,6 +381,7 @@ struct phy_driver { int phy_read(struct phy_device *phydev, u16 regnum); int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); -- cgit v1.2.3-71-gd317