From 2f268f129c2d1a05d297fe3ee34d393f862d2b22 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Wed, 25 Sep 2013 09:20:07 +0200 Subject: net: add adj_list to save only neighbours Currently, we distinguish neighbours (first-level linked devices) from non-neighbours by the neighbour bool in the netdev_adjacent. This could be quite time-consuming in case we would like to traverse *only* through neighbours - cause we'd have to traverse through all devices and check for this flag, and in a (quite common) scenario where we have lots of vlans on top of bridge, which is on top of a bond - the bonding would have to go through all those vlans to get its upper neighbour linked devices. This situation is really unpleasant, cause there are already a lot of cases when a device with slaves needs to go through them in hot path. To fix this, introduce a new upper/lower device lists structure - adj_list, which contains only the neighbours. It works always in pair with the all_adj_list structure (renamed from upper/lower_dev_list), i.e. both of them contain the same links, only that all_adj_list contains also non-neighbour device links. It's really a small change visible, currently, only for __netdev_adjacent_dev_insert/remove(), and doesn't change the main linked logic at all. Also, add some comments a fix a name collision in netdev_for_each_upper_dev_rcu() and rework the naming by the following rules: netdev_(all_)(upper|lower)_* If "all_" is present, then we work with the whole list of upper/lower devices, otherwise - only with direct neighbours. Uninline functions - to get better stack traces. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Alexander Duyck CC: Cong Wang Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/linux/netdevice.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3de49aca4519..514045c704a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1143,8 +1143,18 @@ struct net_device { struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; - struct list_head upper_dev_list; /* List of upper devices */ - struct list_head lower_dev_list; + + /* directly linked devices, like slaves for bonding */ + struct { + struct list_head upper; + struct list_head lower; + } adj_list; + + /* all linked devices, *including* neighbours */ + struct { + struct list_head upper; + struct list_head lower; + } all_adj_list; /* currently active device features */ @@ -2813,15 +2823,15 @@ extern int bpf_jit_enable; extern bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); extern bool netdev_has_any_upper_dev(struct net_device *dev); -extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter); +extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); /* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \ - for (iter = &(dev)->upper_dev_list, \ - upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ - upper; \ - upper = netdev_upper_get_next_dev_rcu(dev, &(iter))) +#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ + for (iter = &(dev)->all_adj_list.upper, \ + updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ + updev; \ + updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); -- cgit v1.2.3-71-gd317 From 402dae9614557296e84543008a8e582c28fb1db3 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Wed, 25 Sep 2013 09:20:09 +0200 Subject: net: add netdev_adjacent->private and allow to use it Currently, even though we can access any linked device, we can't attach anything to it, which is vital to properly manage them. To fix this, add a new void *private to netdev_adjacent and functions setting/getting it (per link), so that we can save, per example, bonding's slave structures there, per slave device. netdev_master_upper_dev_link_private(dev, upper_dev, private) links dev to upper dev and populates the neighbour link only with private. netdev_lower_dev_get_private{,_rcu}() returns the private, if found. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Alexander Duyck Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++ net/core/dev.c | 68 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 514045c704a8..75d5beac463b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2839,8 +2839,15 @@ extern int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); extern int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); +extern int netdev_master_upper_dev_link_private(struct net_device *dev, + struct net_device *upper_dev, + void *private); extern void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); +extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev, + struct net_device *lower_dev); +extern void *netdev_lower_dev_get_private(struct net_device *dev, + struct net_device *lower_dev); extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); diff --git a/net/core/dev.c b/net/core/dev.c index 9290f09cdf26..c69ab74fb201 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4376,6 +4376,9 @@ struct netdev_adjacent { /* counter for the number of times this device was added to us */ u16 ref_nr; + /* private field for the users */ + void *private; + struct list_head list; struct rcu_head rcu; }; @@ -4510,7 +4513,7 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, struct list_head *dev_list, - bool master) + void *private, bool master) { struct netdev_adjacent *adj; @@ -4528,6 +4531,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; adj->ref_nr = 1; + adj->private = private; dev_hold(adj_dev); pr_debug("dev_hold for %s, because of link added from %s to %s\n", @@ -4574,15 +4578,17 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, struct list_head *up_list, struct list_head *down_list, - bool master) + void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, + master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, + false); if (ret) { __netdev_adjacent_dev_remove(dev, upper_dev, up_list); return ret; @@ -4597,7 +4603,7 @@ int __netdev_adjacent_dev_link(struct net_device *dev, return __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, - false); + NULL, false); } void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, @@ -4619,7 +4625,7 @@ void __netdev_adjacent_dev_unlink(struct net_device *dev, int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, - bool master) + void *private, bool master) { int ret = __netdev_adjacent_dev_link(dev, upper_dev); @@ -4629,7 +4635,7 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->adj_list.upper, &upper_dev->adj_list.lower, - master); + private, master); if (ret) { __netdev_adjacent_dev_unlink(dev, upper_dev); return ret; @@ -4648,7 +4654,8 @@ void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, } static int __netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev, bool master) + struct net_device *upper_dev, bool master, + void *private) { struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; @@ -4668,7 +4675,8 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + master); if (ret) return ret; @@ -4759,7 +4767,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -4777,10 +4785,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, true); + return __netdev_upper_dev_link(dev, upper_dev, true, NULL); } EXPORT_SYMBOL(netdev_master_upper_dev_link); +int netdev_master_upper_dev_link_private(struct net_device *dev, + struct net_device *upper_dev, + void *private) +{ + return __netdev_upper_dev_link(dev, upper_dev, true, private); +} +EXPORT_SYMBOL(netdev_master_upper_dev_link_private); + /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device @@ -4818,6 +4834,36 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void *netdev_lower_dev_get_private_rcu(struct net_device *dev, + struct net_device *lower_dev) +{ + struct netdev_adjacent *lower; + + if (!lower_dev) + return NULL; + lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); + if (!lower) + return NULL; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); + +void *netdev_lower_dev_get_private(struct net_device *dev, + struct net_device *lower_dev) +{ + struct netdev_adjacent *lower; + + if (!lower_dev) + return NULL; + lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); + if (!lower) + return NULL; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_dev_get_private); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.2.3-71-gd317 From 31088a113c2a948856ed2047d8c21c217b13e85d Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Wed, 25 Sep 2013 09:20:12 +0200 Subject: net: add for_each iterators through neighbour lower link's private Add a possibility to iterate through netdev_adjacent's private, currently only for lower neighbours. Add both RCU and RTNL/other locking variants of iterators, and make the non-rcu variant to be safe from removal. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Alexander Duyck Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ++++++++++++++ net/core/dev.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 75d5beac463b..168974e40cf5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2833,6 +2833,23 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d updev; \ updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +extern void *netdev_lower_get_next_private(struct net_device *dev, + struct list_head **iter); +extern void *netdev_lower_get_next_private_rcu(struct net_device *dev, + struct list_head **iter); + +#define netdev_for_each_lower_private(dev, priv, iter) \ + for (iter = (dev)->adj_list.lower.next, \ + priv = netdev_lower_get_next_private(dev, &(iter)); \ + priv; \ + priv = netdev_lower_get_next_private(dev, &(iter))) + +#define netdev_for_each_lower_private_rcu(dev, priv, iter) \ + for (iter = &(dev)->adj_list.lower, \ + priv = netdev_lower_get_next_private_rcu(dev, &(iter)); \ + priv; \ + priv = netdev_lower_get_next_private_rcu(dev, &(iter))) + extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); extern int netdev_upper_dev_link(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index c69ab74fb201..0aa844aae40b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4466,7 +4466,8 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); -/* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list +/** + * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device * @iter: list_head ** of the current position * @@ -4491,6 +4492,63 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); +/** + * netdev_lower_get_next_private - Get the next ->private from the + * lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent->private from the dev's lower neighbour + * list, starting from iter position. The caller must hold either hold the + * RTNL lock or its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next_private(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + if (iter) + *iter = lower->list.next; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_get_next_private); + +/** + * netdev_lower_get_next_private_rcu - Get the next ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent->private from the dev's lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +void *netdev_lower_get_next_private_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + if (iter) + *iter = &lower->list; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); + /** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device -- cgit v1.2.3-71-gd317 From b6ccba4c681fdaf0070e580bf951badf7edc860b Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Wed, 25 Sep 2013 09:20:23 +0200 Subject: net: add a possibility to get private from netdev_adjacent->list It will be useful to get first/last element. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Alexander Duyck Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 168974e40cf5..b4cfb63f264e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2850,6 +2850,7 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) +extern void *netdev_adjacent_get_private(struct list_head *adj_list); extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); extern int netdev_upper_dev_link(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 0aa844aae40b..acc11810805f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4466,6 +4466,16 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +void *netdev_adjacent_get_private(struct list_head *adj_list) +{ + struct netdev_adjacent *adj; + + adj = list_entry(adj_list, struct netdev_adjacent, list); + + return adj->private; +} +EXPORT_SYMBOL(netdev_adjacent_get_private); + /** * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device -- cgit v1.2.3-71-gd317 From a0f4ecf3494c9869d20f606e7e2b2f50f0e67a7f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Sep 2013 14:48:15 -0700 Subject: netfilter: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Signed-off-by: Joe Perches --- include/linux/netfilter.h | 10 +- include/linux/netfilter/nf_conntrack_common.h | 2 +- include/linux/netfilter/nf_conntrack_h323.h | 14 +-- include/linux/netfilter/nf_conntrack_proto_gre.h | 4 +- include/linux/netfilter/nf_conntrack_sip.h | 57 +++++----- include/linux/netfilter/nfnetlink.h | 28 ++--- include/linux/netfilter/nfnetlink_acct.h | 6 +- include/linux/netfilter/x_tables.h | 128 +++++++++++------------ include/linux/netfilter_bridge.h | 4 +- include/linux/netfilter_ipv4.h | 6 +- include/linux/netfilter_ipv6.h | 10 +- 11 files changed, 133 insertions(+), 136 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 708fe72ab913..61223c52414f 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -35,7 +35,7 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, result->all[3] = a1->all[3] & mask->all[3]; } -extern int netfilter_init(void); +int netfilter_init(void); /* Largest hook number + 1 */ #define NF_MAX_HOOKS 8 @@ -208,7 +208,7 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, /* Call this before modifying an existing packet: ensures it is modifiable and linear to the point you care about (writable_len). Returns true or false. */ -extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len); +int skb_make_writable(struct sk_buff *skb, unsigned int writable_len); struct flowi; struct nf_queue_entry; @@ -269,8 +269,8 @@ nf_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; } -extern int nf_register_afinfo(const struct nf_afinfo *afinfo); -extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); +int nf_register_afinfo(const struct nf_afinfo *afinfo); +void nf_unregister_afinfo(const struct nf_afinfo *afinfo); #include extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); @@ -315,7 +315,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; -extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +void nf_ct_attach(struct sk_buff *, const struct sk_buff *); extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; struct nf_conn; diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 127d0b90604f..275505792664 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -23,6 +23,6 @@ struct ip_conntrack_stat { }; /* call to create an explicit dependency on nf_conntrack. */ -extern void need_conntrack(void); +void need_conntrack(void); #endif /* _NF_CONNTRACK_COMMON_H */ diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index f381020eee92..858d9b214053 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -29,13 +29,13 @@ struct nf_ct_h323_master { struct nf_conn; -extern int get_h225_addr(struct nf_conn *ct, unsigned char *data, - TransportAddress *taddr, - union nf_inet_addr *addr, __be16 *port); -extern void nf_conntrack_h245_expect(struct nf_conn *new, - struct nf_conntrack_expect *this); -extern void nf_conntrack_q931_expect(struct nf_conn *new, - struct nf_conntrack_expect *this); +int get_h225_addr(struct nf_conn *ct, unsigned char *data, + TransportAddress *taddr, union nf_inet_addr *addr, + __be16 *port); +void nf_conntrack_h245_expect(struct nf_conn *new, + struct nf_conntrack_expect *this); +void nf_conntrack_q931_expect(struct nf_conn *new, + struct nf_conntrack_expect *this); extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, H245_TransportAddress *taddr, diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 6a0664c0c451..ec2ffaf418c8 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -87,8 +87,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); -extern void nf_ct_gre_keymap_flush(struct net *net); -extern void nf_nat_need_gre(void); +void nf_ct_gre_keymap_flush(struct net *net); +void nf_nat_need_gre(void); #endif /* __KERNEL__ */ #endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index ba7f571a2b1c..5cac0207b95d 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -157,35 +157,34 @@ extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int medialen, union nf_inet_addr *rtp_addr); -extern int ct_sip_parse_request(const struct nf_conn *ct, - const char *dptr, unsigned int datalen, - unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr, __be16 *port); -extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, - unsigned int dataoff, unsigned int datalen, - enum sip_header_types type, - unsigned int *matchoff, unsigned int *matchlen); -extern int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, - unsigned int *dataoff, unsigned int datalen, - enum sip_header_types type, int *in_header, - unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr, __be16 *port); -extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, - unsigned int dataoff, unsigned int datalen, - const char *name, - unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr, bool delim); -extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, - unsigned int off, unsigned int datalen, - const char *name, - unsigned int *matchoff, unsigned int *matchen, - unsigned int *val); - -extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, - unsigned int dataoff, unsigned int datalen, - enum sdp_header_types type, - enum sdp_header_types term, - unsigned int *matchoff, unsigned int *matchlen); +int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, + unsigned int datalen, unsigned int *matchoff, + unsigned int *matchlen, union nf_inet_addr *addr, + __be16 *port); +int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sip_header_types type, unsigned int *matchoff, + unsigned int *matchlen); +int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, + unsigned int *dataoff, unsigned int datalen, + enum sip_header_types type, int *in_header, + unsigned int *matchoff, unsigned int *matchlen, + union nf_inet_addr *addr, __be16 *port); +int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + const char *name, unsigned int *matchoff, + unsigned int *matchlen, union nf_inet_addr *addr, + bool delim); +int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, + unsigned int off, unsigned int datalen, + const char *name, unsigned int *matchoff, + unsigned int *matchen, unsigned int *val); + +int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + enum sdp_header_types type, + enum sdp_header_types term, + unsigned int *matchoff, unsigned int *matchlen); #endif /* __KERNEL__ */ #endif /* __NF_CONNTRACK_SIP_H__ */ diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index cadb7402d7a7..4f68cd7141d2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -25,20 +25,20 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ }; -extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); -extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); - -extern int nfnetlink_has_listeners(struct net *net, unsigned int group); -extern struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, - u32 dst_portid, gfp_t gfp_mask); -extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, - unsigned int group, int echo, gfp_t flags); -extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, - u32 portid, int flags); - -extern void nfnl_lock(__u8 subsys_id); -extern void nfnl_unlock(__u8 subsys_id); +int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); +int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); + +int nfnetlink_has_listeners(struct net *net, unsigned int group); +struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, + u32 dst_portid, gfp_t gfp_mask); +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, + unsigned int group, int echo, gfp_t flags); +int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, + int flags); + +void nfnl_lock(__u8 subsys_id); +void nfnl_unlock(__u8 subsys_id); #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index bb4bbc9b7a18..b2e85e59f760 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -6,8 +6,8 @@ struct nf_acct; -extern struct nf_acct *nfnl_acct_find_get(const char *filter_name); -extern void nfnl_acct_put(struct nf_acct *acct); -extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); +struct nf_acct *nfnl_acct_find_get(const char *filter_name); +void nfnl_acct_put(struct nf_acct *acct); +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dd49566315c6..a3e215bb0241 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -229,50 +229,48 @@ struct xt_table_info { #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ + nr_cpu_ids * sizeof(char *)) -extern int xt_register_target(struct xt_target *target); -extern void xt_unregister_target(struct xt_target *target); -extern int xt_register_targets(struct xt_target *target, unsigned int n); -extern void xt_unregister_targets(struct xt_target *target, unsigned int n); - -extern int xt_register_match(struct xt_match *target); -extern void xt_unregister_match(struct xt_match *target); -extern int xt_register_matches(struct xt_match *match, unsigned int n); -extern void xt_unregister_matches(struct xt_match *match, unsigned int n); - -extern int xt_check_match(struct xt_mtchk_param *, - unsigned int size, u_int8_t proto, bool inv_proto); -extern int xt_check_target(struct xt_tgchk_param *, - unsigned int size, u_int8_t proto, bool inv_proto); - -extern struct xt_table *xt_register_table(struct net *net, - const struct xt_table *table, - struct xt_table_info *bootstrap, - struct xt_table_info *newinfo); -extern void *xt_unregister_table(struct xt_table *table); - -extern struct xt_table_info *xt_replace_table(struct xt_table *table, - unsigned int num_counters, - struct xt_table_info *newinfo, - int *error); - -extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); -extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); -extern struct xt_match *xt_request_find_match(u8 af, const char *name, - u8 revision); -extern struct xt_target *xt_request_find_target(u8 af, const char *name, - u8 revision); -extern int xt_find_revision(u8 af, const char *name, u8 revision, - int target, int *err); - -extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, - const char *name); -extern void xt_table_unlock(struct xt_table *t); - -extern int xt_proto_init(struct net *net, u_int8_t af); -extern void xt_proto_fini(struct net *net, u_int8_t af); - -extern struct xt_table_info *xt_alloc_table_info(unsigned int size); -extern void xt_free_table_info(struct xt_table_info *info); +int xt_register_target(struct xt_target *target); +void xt_unregister_target(struct xt_target *target); +int xt_register_targets(struct xt_target *target, unsigned int n); +void xt_unregister_targets(struct xt_target *target, unsigned int n); + +int xt_register_match(struct xt_match *target); +void xt_unregister_match(struct xt_match *target); +int xt_register_matches(struct xt_match *match, unsigned int n); +void xt_unregister_matches(struct xt_match *match, unsigned int n); + +int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, + bool inv_proto); +int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, + bool inv_proto); + +struct xt_table *xt_register_table(struct net *net, + const struct xt_table *table, + struct xt_table_info *bootstrap, + struct xt_table_info *newinfo); +void *xt_unregister_table(struct xt_table *table); + +struct xt_table_info *xt_replace_table(struct xt_table *table, + unsigned int num_counters, + struct xt_table_info *newinfo, + int *error); + +struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); +struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision); +struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); +int xt_find_revision(u8 af, const char *name, u8 revision, int target, + int *err); + +struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, + const char *name); +void xt_table_unlock(struct xt_table *t); + +int xt_proto_init(struct net *net, u_int8_t af); +void xt_proto_fini(struct net *net, u_int8_t af); + +struct xt_table_info *xt_alloc_table_info(unsigned int size); +void xt_free_table_info(struct xt_table_info *info); /** * xt_recseq - recursive seqcount for netfilter use @@ -353,8 +351,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } -extern struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); -extern void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); +struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); +void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); #ifdef CONFIG_COMPAT #include @@ -414,25 +412,25 @@ struct _compat_xt_align { #define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align)) -extern void xt_compat_lock(u_int8_t af); -extern void xt_compat_unlock(u_int8_t af); - -extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); -extern void xt_compat_flush_offsets(u_int8_t af); -extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); -extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); - -extern int xt_compat_match_offset(const struct xt_match *match); -extern int xt_compat_match_from_user(struct xt_entry_match *m, - void **dstptr, unsigned int *size); -extern int xt_compat_match_to_user(const struct xt_entry_match *m, - void __user **dstptr, unsigned int *size); - -extern int xt_compat_target_offset(const struct xt_target *target); -extern void xt_compat_target_from_user(struct xt_entry_target *t, - void **dstptr, unsigned int *size); -extern int xt_compat_target_to_user(const struct xt_entry_target *t, - void __user **dstptr, unsigned int *size); +void xt_compat_lock(u_int8_t af); +void xt_compat_unlock(u_int8_t af); + +int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); +void xt_compat_flush_offsets(u_int8_t af); +void xt_compat_init_offsets(u_int8_t af, unsigned int number); +int xt_compat_calc_jump(u_int8_t af, unsigned int offset); + +int xt_compat_match_offset(const struct xt_match *match); +int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size); +int xt_compat_match_to_user(const struct xt_entry_match *m, + void __user **dstptr, unsigned int *size); + +int xt_compat_target_offset(const struct xt_target *target); +void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, + unsigned int *size); +int xt_compat_target_to_user(const struct xt_entry_target *t, + void __user **dstptr, unsigned int *size); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */ diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index dfb4d9e52bcb..8ab1c278b66d 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -25,7 +25,7 @@ enum nf_br_hook_priorities { #define BRNF_PPPoE 0x20 /* Only used in br_forward.c */ -extern int nf_bridge_copy_header(struct sk_buff *skb); +int nf_bridge_copy_header(struct sk_buff *skb); static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) { if (skb->nf_bridge && @@ -53,7 +53,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) return 0; } -extern int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sk_buff *skb); /* Only used in br_device.c */ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index dfaf116b3e81..6e4591bb54d4 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -6,7 +6,7 @@ #include -extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); -extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); +int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); +__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 2d4df6ce043e..64dad1cc1a4b 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -11,12 +11,12 @@ #ifdef CONFIG_NETFILTER -extern int ip6_route_me_harder(struct sk_buff *skb); -extern __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); +int ip6_route_me_harder(struct sk_buff *skb); +__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); -extern int ipv6_netfilter_init(void); -extern void ipv6_netfilter_fini(void); +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); /* * Hook functions for ipv6 to allow xt_* modules to be built-in even -- cgit v1.2.3-71-gd317 From 7965bd4d71ef7cf1db00afb9e406ddfc13443c13 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Sep 2013 14:48:15 -0700 Subject: net.h/skbuff.h: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Signed-off-by: Joe Perches --- include/linux/net.h | 82 ++++++++-------- include/linux/skbuff.h | 250 ++++++++++++++++++++++--------------------------- 2 files changed, 148 insertions(+), 184 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 4f27575ce1d6..ca9ec8540905 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -195,27 +195,23 @@ enum { SOCK_WAKE_URG, }; -extern int sock_wake_async(struct socket *sk, int how, int band); -extern int sock_register(const struct net_proto_family *fam); -extern void sock_unregister(int family); -extern int __sock_create(struct net *net, int family, int type, int proto, - struct socket **res, int kern); -extern int sock_create(int family, int type, int proto, - struct socket **res); -extern int sock_create_kern(int family, int type, int proto, - struct socket **res); -extern int sock_create_lite(int family, int type, int proto, - struct socket **res); -extern void sock_release(struct socket *sock); -extern int sock_sendmsg(struct socket *sock, struct msghdr *msg, - size_t len); -extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags); -extern struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); -extern struct socket *sockfd_lookup(int fd, int *err); -extern struct socket *sock_from_file(struct file *file, int *err); +int sock_wake_async(struct socket *sk, int how, int band); +int sock_register(const struct net_proto_family *fam); +void sock_unregister(int family); +int __sock_create(struct net *net, int family, int type, int proto, + struct socket **res, int kern); +int sock_create(int family, int type, int proto, struct socket **res); +int sock_create_kern(int family, int type, int proto, struct socket **res); +int sock_create_lite(int family, int type, int proto, struct socket **res); +void sock_release(struct socket *sock); +int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); +struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); +struct socket *sockfd_lookup(int fd, int *err); +struct socket *sock_from_file(struct file *file, int *err); #define sockfd_put(sock) fput(sock->file) -extern int net_ratelimit(void); +int net_ratelimit(void); #define net_ratelimited_function(function, ...) \ do { \ @@ -243,32 +239,28 @@ do { \ #define net_random() prandom_u32() #define net_srandom(seed) prandom_seed((__force u32)(seed)) -extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, size_t len); -extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, - size_t len, int flags); +int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, + size_t num, size_t len); +int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, + size_t num, size_t len, int flags); -extern int kernel_bind(struct socket *sock, struct sockaddr *addr, - int addrlen); -extern int kernel_listen(struct socket *sock, int backlog); -extern int kernel_accept(struct socket *sock, struct socket **newsock, - int flags); -extern int kernel_connect(struct socket *sock, struct sockaddr *addr, - int addrlen, int flags); -extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen); -extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen); -extern int kernel_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen); -extern int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, unsigned int optlen); -extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); -extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); -extern int kernel_sock_shutdown(struct socket *sock, - enum sock_shutdown_cmd how); +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen); +int kernel_listen(struct socket *sock, int backlog); +int kernel_accept(struct socket *sock, struct socket **newsock, int flags); +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, + int flags); +int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen); +int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen); +int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, + int *optlen); +int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, + unsigned int optlen); +int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); +int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); +int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ddb48d9312c..6d56840e561e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -585,8 +585,8 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, - bool force); +void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, + bool force); /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference @@ -634,20 +634,20 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) return (struct rtable *)skb_dst(skb); } -extern void kfree_skb(struct sk_buff *skb); -extern void kfree_skb_list(struct sk_buff *segs); -extern void skb_tx_error(struct sk_buff *skb); -extern void consume_skb(struct sk_buff *skb); -extern void __kfree_skb(struct sk_buff *skb); +void kfree_skb(struct sk_buff *skb); +void kfree_skb_list(struct sk_buff *segs); +void skb_tx_error(struct sk_buff *skb); +void consume_skb(struct sk_buff *skb); +void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; -extern void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); -extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, - bool *fragstolen, int *delta_truesize); +void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); +bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, + bool *fragstolen, int *delta_truesize); -extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int flags, int node); -extern struct sk_buff *build_skb(void *data, unsigned int frag_size); +struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, + int node); +struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { @@ -660,41 +660,33 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node); +struct sk_buff *__alloc_skb_head(gfp_t priority, int node); static inline struct sk_buff *alloc_skb_head(gfp_t priority) { return __alloc_skb_head(priority, -1); } -extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); -extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); -extern struct sk_buff *skb_clone(struct sk_buff *skb, - gfp_t priority); -extern struct sk_buff *skb_copy(const struct sk_buff *skb, - gfp_t priority); -extern struct sk_buff *__pskb_copy(struct sk_buff *skb, - int headroom, gfp_t gfp_mask); - -extern int pskb_expand_head(struct sk_buff *skb, - int nhead, int ntail, - gfp_t gfp_mask); -extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, - unsigned int headroom); -extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, - gfp_t priority); -extern int skb_to_sgvec(struct sk_buff *skb, - struct scatterlist *sg, int offset, - int len); -extern int skb_cow_data(struct sk_buff *skb, int tailbits, - struct sk_buff **trailer); -extern int skb_pad(struct sk_buff *skb, int pad); +struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); +struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); +struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); +struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask); + +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); +struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, + unsigned int headroom); +struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, + int newtailroom, gfp_t priority); +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, + int len); +int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); +int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) consume_skb(a) -extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int getfrag(void *from, char *to, int offset, - int len,int odd, struct sk_buff *skb), - void *from, int length); +int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length); struct skb_seq_state { __u32 lower_offset; @@ -706,18 +698,17 @@ struct skb_seq_state { __u8 *frag_data; }; -extern void skb_prepare_seq_read(struct sk_buff *skb, - unsigned int from, unsigned int to, - struct skb_seq_state *st); -extern unsigned int skb_seq_read(unsigned int consumed, const u8 **data, - struct skb_seq_state *st); -extern void skb_abort_seq_read(struct skb_seq_state *st); +void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, + unsigned int to, struct skb_seq_state *st); +unsigned int skb_seq_read(unsigned int consumed, const u8 **data, + struct skb_seq_state *st); +void skb_abort_seq_read(struct skb_seq_state *st); -extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state); +unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, + unsigned int to, struct ts_config *config, + struct ts_state *state); -extern void __skb_get_rxhash(struct sk_buff *skb); +void __skb_get_rxhash(struct sk_buff *skb); static inline __u32 skb_get_rxhash(struct sk_buff *skb) { if (!skb->l4_rxhash) @@ -1095,7 +1086,8 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list, * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, + struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -1201,8 +1193,8 @@ static inline void __skb_queue_after(struct sk_buff_head *list, __skb_insert(newsk, prev, prev->next, list); } -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, - struct sk_buff_head *list); +void skb_append(struct sk_buff *old, struct sk_buff *newsk, + struct sk_buff_head *list); static inline void __skb_queue_before(struct sk_buff_head *list, struct sk_buff *next, @@ -1221,7 +1213,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { @@ -1238,7 +1230,7 @@ static inline void __skb_queue_head(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { @@ -1249,7 +1241,7 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ -extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; @@ -1270,7 +1262,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ -extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); +struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); @@ -1287,7 +1279,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ -extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); @@ -1373,8 +1365,8 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; } -extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, - int off, int size, unsigned int truesize); +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size, unsigned int truesize); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) @@ -1418,7 +1410,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) /* * Add data to an sk_buff */ -extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len); +unsigned char *skb_put(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) { unsigned char *tmp = skb_tail_pointer(skb); @@ -1428,7 +1420,7 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) return tmp; } -extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len); +unsigned char *skb_push(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; @@ -1436,7 +1428,7 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) return skb->data; } -extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); +unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) { skb->len -= len; @@ -1449,7 +1441,7 @@ static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int l return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } -extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); +unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) { @@ -1753,7 +1745,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) #define NET_SKB_PAD max(32, L1_CACHE_BYTES) #endif -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); +int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { @@ -1765,7 +1757,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } -extern void skb_trim(struct sk_buff *skb, unsigned int len); +void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { @@ -1838,7 +1830,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -extern void skb_queue_purge(struct sk_buff_head *list); +void skb_queue_purge(struct sk_buff_head *list); static inline void __skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; @@ -1850,11 +1842,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) #define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE -extern void *netdev_alloc_frag(unsigned int fragsz); +void *netdev_alloc_frag(unsigned int fragsz); -extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, - gfp_t gfp_mask); +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, + gfp_t gfp_mask); /** * netdev_alloc_skb - allocate an skbuff for rx on a specific device @@ -2342,60 +2333,42 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) -extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *off, int *err); -extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, - int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); -extern int skb_copy_datagram_iovec(const struct sk_buff *from, - int offset, struct iovec *to, - int size); -extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, - int hlen, - struct iovec *iov); -extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, - int offset, - const struct iovec *from, - int from_offset, - int len); -extern int zerocopy_sg_from_iovec(struct sk_buff *skb, - const struct iovec *frm, - int offset, - size_t count); -extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, - int offset, - const struct iovec *to, - int to_offset, - int size); -extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -extern void skb_free_datagram_locked(struct sock *sk, - struct sk_buff *skb); -extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, - unsigned int flags); -extern __wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, - void *to, int len); -extern int skb_store_bits(struct sk_buff *skb, int offset, - const void *from, int len); -extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, - int offset, u8 *to, int len, - __wsum csum); -extern int skb_splice_bits(struct sk_buff *skb, - unsigned int offset, - struct pipe_inode_info *pipe, - unsigned int len, - unsigned int flags); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); -extern void skb_split(struct sk_buff *skb, - struct sk_buff *skb1, const u32 len); -extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, - int shiftlen); -extern void skb_scrub_packet(struct sk_buff *skb, bool xnet); - -extern struct sk_buff *skb_segment(struct sk_buff *skb, - netdev_features_t features); +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *off, int *err); +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, + int *err); +unsigned int datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); +int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, + struct iovec *to, int size); +int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, + struct iovec *iov); +int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, + const struct iovec *from, int from_offset, + int len); +int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, + int offset, size_t count); +int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, + const struct iovec *to, int to_offset, + int size); +void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); +int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); +__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum); +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); +int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); +__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, + int len, __wsum csum); +int skb_splice_bits(struct sk_buff *skb, unsigned int offset, + struct pipe_inode_info *pipe, unsigned int len, + unsigned int flags); +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); +void skb_scrub_packet(struct sk_buff *skb, bool xnet); + +struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) @@ -2440,7 +2413,7 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, memcpy(skb->data + offset, from, len); } -extern void skb_init(void); +void skb_init(void); static inline ktime_t skb_get_ktime(const struct sk_buff *skb) { @@ -2483,12 +2456,12 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } -extern void skb_timestamping_init(void); +void skb_timestamping_init(void); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING -extern void skb_clone_tx_timestamp(struct sk_buff *skb); -extern bool skb_defer_rx_timestamp(struct sk_buff *skb); +void skb_clone_tx_timestamp(struct sk_buff *skb); +bool skb_defer_rx_timestamp(struct sk_buff *skb); #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */ @@ -2529,8 +2502,8 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, * generates a software time stamp (otherwise), then queues the clone * to the error queue of the socket. Errors are silently ignored. */ -extern void skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps); +void skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps); static inline void sw_tx_timestamp(struct sk_buff *skb) { @@ -2562,8 +2535,8 @@ static inline void skb_tx_timestamp(struct sk_buff *skb) */ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked); -extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); -extern __sum16 __skb_checksum_complete(struct sk_buff *skb); +__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); +__sum16 __skb_checksum_complete(struct sk_buff *skb); static inline int skb_csum_unnecessary(const struct sk_buff *skb) { @@ -2593,7 +2566,7 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) } #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -extern void nf_conntrack_destroy(struct nf_conntrack *nfct); +void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) { if (nfct && atomic_dec_and_test(&nfct->use)) @@ -2732,9 +2705,8 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } -extern u16 __skb_tx_hash(const struct net_device *dev, - const struct sk_buff *skb, - unsigned int num_tx_queues); +u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, + unsigned int num_tx_queues); #ifdef CONFIG_XFRM static inline struct sec_path *skb_sec_path(struct sk_buff *skb) @@ -2788,7 +2760,7 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } -extern void __skb_warn_lro_forwarding(const struct sk_buff *skb); +void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) { -- cgit v1.2.3-71-gd317 From f629d208d27a22f495b7734eede585b5d207e912 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Sep 2013 14:48:15 -0700 Subject: [networking]device.h: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Signed-off-by: Joe Perches --- include/linux/etherdevice.h | 35 ++-- include/linux/fcdevice.h | 2 +- include/linux/fddidevice.h | 7 +- include/linux/hippidevice.h | 10 +- include/linux/inetdevice.h | 28 +-- include/linux/netdevice.h | 432 +++++++++++++++++++++----------------------- 6 files changed, 248 insertions(+), 266 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index d8b512496e50..fc4a9aa7dd82 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -28,27 +28,24 @@ #include #ifdef __KERNEL__ -extern __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); +__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; -extern int eth_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned len); -extern int eth_rebuild_header(struct sk_buff *skb); -extern int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); -extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); -extern void eth_header_cache_update(struct hh_cache *hh, - const struct net_device *dev, - const unsigned char *haddr); -extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p); -extern void eth_commit_mac_addr_change(struct net_device *dev, void *p); -extern int eth_mac_addr(struct net_device *dev, void *p); -extern int eth_change_mtu(struct net_device *dev, int new_mtu); -extern int eth_validate_addr(struct net_device *dev); - - - -extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, +int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, + const void *daddr, const void *saddr, unsigned len); +int eth_rebuild_header(struct sk_buff *skb); +int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); +int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, + __be16 type); +void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, + const unsigned char *haddr); +int eth_prepare_mac_addr_change(struct net_device *dev, void *p); +void eth_commit_mac_addr_change(struct net_device *dev, void *p); +int eth_mac_addr(struct net_device *dev, void *p); +int eth_change_mtu(struct net_device *dev, int new_mtu); +int eth_validate_addr(struct net_device *dev); + +struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs); #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count) diff --git a/include/linux/fcdevice.h b/include/linux/fcdevice.h index e460ef831984..5009fa16b5d8 100644 --- a/include/linux/fcdevice.h +++ b/include/linux/fcdevice.h @@ -27,7 +27,7 @@ #include #ifdef __KERNEL__ -extern struct net_device *alloc_fcdev(int sizeof_priv); +struct net_device *alloc_fcdev(int sizeof_priv); #endif #endif /* _LINUX_FCDEVICE_H */ diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index 155bafd9e886..9a79f0106da1 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -25,10 +25,9 @@ #include #ifdef __KERNEL__ -extern __be16 fddi_type_trans(struct sk_buff *skb, - struct net_device *dev); -extern int fddi_change_mtu(struct net_device *dev, int new_mtu); -extern struct net_device *alloc_fddidev(int sizeof_priv); +__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); +int fddi_change_mtu(struct net_device *dev, int new_mtu); +struct net_device *alloc_fddidev(int sizeof_priv); #endif #endif /* _LINUX_FDDIDEVICE_H */ diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index f148e4908410..8ec23fb0b412 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -31,11 +31,11 @@ struct hippi_cb { __u32 ifield; }; -extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); -extern int hippi_change_mtu(struct net_device *dev, int new_mtu); -extern int hippi_mac_addr(struct net_device *dev, void *p); -extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); -extern struct net_device *alloc_hippi_dev(int sizeof_priv); +__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); +int hippi_change_mtu(struct net_device *dev, int new_mtu); +int hippi_mac_addr(struct net_device *dev, void *p); +int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); +struct net_device *alloc_hippi_dev(int sizeof_priv); #endif #endif /* _LINUX_HIPPIDEVICE_H */ diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 79640e015a86..0d678aefe69d 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -147,25 +147,27 @@ struct in_ifaddr { unsigned long ifa_tstamp; /* updated timestamp */ }; -extern int register_inetaddr_notifier(struct notifier_block *nb); -extern int unregister_inetaddr_notifier(struct notifier_block *nb); +int register_inetaddr_notifier(struct notifier_block *nb); +int unregister_inetaddr_notifier(struct notifier_block *nb); -extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf); +void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf); -extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); +struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) { return __ip_dev_find(net, addr, true); } -extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); -extern void devinet_init(void); -extern struct in_device *inetdev_by_index(struct net *, int); -extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); -extern __be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, int scope); -extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); +int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); +int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); +void devinet_init(void); +struct in_device *inetdev_by_index(struct net *, int); +__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); +__be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, + int scope); +struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, + __be32 mask); static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { @@ -218,7 +220,7 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } -extern void in_dev_finish_destroy(struct in_device *idev); +void in_dev_finish_destroy(struct in_device *idev); static inline void in_dev_put(struct in_device *idev) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4cfb63f264e..5f01af3927ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,8 +60,8 @@ struct wireless_dev; #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) ) -extern void netdev_set_default_ethtool_ops(struct net_device *dev, - const struct ethtool_ops *ops); +void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops); /* hardware address assignment types */ #define NET_ADDR_PERM 0 /* address is permanent (default) */ @@ -298,7 +298,7 @@ struct netdev_boot_setup { }; #define NETDEV_BOOT_SETUP_MAX 8 -extern int __init netdev_boot_setup(char *str); +int __init netdev_boot_setup(char *str); /* * Structure for NAPI scheduling similar to tasklet but with weighting @@ -394,7 +394,7 @@ enum rx_handler_result { typedef enum rx_handler_result rx_handler_result_t; typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); -extern void __napi_schedule(struct napi_struct *n); +void __napi_schedule(struct napi_struct *n); static inline bool napi_disable_pending(struct napi_struct *n) { @@ -445,8 +445,8 @@ static inline bool napi_reschedule(struct napi_struct *napi) * * Mark NAPI processing as complete. */ -extern void __napi_complete(struct napi_struct *n); -extern void napi_complete(struct napi_struct *n); +void __napi_complete(struct napi_struct *n); +void napi_complete(struct napi_struct *n); /** * napi_by_id - lookup a NAPI by napi_id @@ -455,7 +455,7 @@ extern void napi_complete(struct napi_struct *n); * lookup @napi_id in napi_hash table * must be called under rcu_read_lock() */ -extern struct napi_struct *napi_by_id(unsigned int napi_id); +struct napi_struct *napi_by_id(unsigned int napi_id); /** * napi_hash_add - add a NAPI to global hashtable @@ -463,7 +463,7 @@ extern struct napi_struct *napi_by_id(unsigned int napi_id); * * generate a new napi_id and store a @napi under it in napi_hash */ -extern void napi_hash_add(struct napi_struct *napi); +void napi_hash_add(struct napi_struct *napi); /** * napi_hash_del - remove a NAPI from global table @@ -472,7 +472,7 @@ extern void napi_hash_add(struct napi_struct *napi); * Warning: caller must observe rcu grace period * before freeing memory containing @napi */ -extern void napi_hash_del(struct napi_struct *napi); +void napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling @@ -664,8 +664,8 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; #ifdef CONFIG_RFS_ACCEL -extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, - u32 flow_id, u16 filter_id); +bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, + u16 filter_id); #endif /* This structure contains an instance of an RX queue. */ @@ -1497,9 +1497,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } -extern struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb); -extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb); +u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); /* * Net namespace inlines @@ -1683,8 +1683,8 @@ struct packet_offload { #define NETDEV_CHANGEUPPER 0x0015 #define NETDEV_RESEND_IGMP 0x0016 -extern int register_netdevice_notifier(struct notifier_block *nb); -extern int unregister_netdevice_notifier(struct notifier_block *nb); +int register_netdevice_notifier(struct notifier_block *nb); +int unregister_netdevice_notifier(struct notifier_block *nb); struct netdev_notifier_info { struct net_device *dev; @@ -1707,9 +1707,9 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info) return info->dev; } -extern int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info); -extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info); +int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern rwlock_t dev_base_lock; /* Device list lock */ @@ -1764,54 +1764,52 @@ static inline struct net_device *first_net_device_rcu(struct net *net) return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } -extern int netdev_boot_setup_check(struct net_device *dev); -extern unsigned long netdev_boot_base(const char *prefix, int unit); -extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, - const char *hwaddr); -extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); -extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); -extern void dev_add_pack(struct packet_type *pt); -extern void dev_remove_pack(struct packet_type *pt); -extern void __dev_remove_pack(struct packet_type *pt); -extern void dev_add_offload(struct packet_offload *po); -extern void dev_remove_offload(struct packet_offload *po); -extern void __dev_remove_offload(struct packet_offload *po); - -extern struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, - unsigned short mask); -extern struct net_device *dev_get_by_name(struct net *net, const char *name); -extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); -extern struct net_device *__dev_get_by_name(struct net *net, const char *name); -extern int dev_alloc_name(struct net_device *dev, const char *name); -extern int dev_open(struct net_device *dev); -extern int dev_close(struct net_device *dev); -extern void dev_disable_lro(struct net_device *dev); -extern int dev_loopback_xmit(struct sk_buff *newskb); -extern int dev_queue_xmit(struct sk_buff *skb); -extern int register_netdevice(struct net_device *dev); -extern void unregister_netdevice_queue(struct net_device *dev, - struct list_head *head); -extern void unregister_netdevice_many(struct list_head *head); +int netdev_boot_setup_check(struct net_device *dev); +unsigned long netdev_boot_base(const char *prefix, int unit); +struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *hwaddr); +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); +void dev_add_pack(struct packet_type *pt); +void dev_remove_pack(struct packet_type *pt); +void __dev_remove_pack(struct packet_type *pt); +void dev_add_offload(struct packet_offload *po); +void dev_remove_offload(struct packet_offload *po); +void __dev_remove_offload(struct packet_offload *po); + +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, + unsigned short mask); +struct net_device *dev_get_by_name(struct net *net, const char *name); +struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); +struct net_device *__dev_get_by_name(struct net *net, const char *name); +int dev_alloc_name(struct net_device *dev, const char *name); +int dev_open(struct net_device *dev); +int dev_close(struct net_device *dev); +void dev_disable_lro(struct net_device *dev); +int dev_loopback_xmit(struct sk_buff *newskb); +int dev_queue_xmit(struct sk_buff *skb); +int register_netdevice(struct net_device *dev); +void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); +void unregister_netdevice_many(struct list_head *head); static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); } -extern int netdev_refcnt_read(const struct net_device *dev); -extern void free_netdev(struct net_device *dev); -extern void synchronize_net(void); -extern int init_dummy_netdev(struct net_device *dev); +int netdev_refcnt_read(const struct net_device *dev); +void free_netdev(struct net_device *dev); +void synchronize_net(void); +int init_dummy_netdev(struct net_device *dev); -extern struct net_device *dev_get_by_index(struct net *net, int ifindex); -extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); -extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); -extern int netdev_get_name(struct net *net, char *name, int ifindex); -extern int dev_restart(struct net_device *dev); +struct net_device *dev_get_by_index(struct net *net, int ifindex); +struct net_device *__dev_get_by_index(struct net *net, int ifindex); +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); +int netdev_get_name(struct net *net, char *name, int ifindex); +int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP -extern int netpoll_trap(void); +int netpoll_trap(void); #endif -extern int skb_gro_receive(struct sk_buff **head, - struct sk_buff *skb); +int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) { @@ -1883,7 +1881,7 @@ static inline int dev_parse_header(const struct sk_buff *skb, } typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); -extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); +int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) { return register_gifconf(family, NULL); @@ -1954,7 +1952,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); -extern void __netif_schedule(struct Qdisc *q); +void __netif_schedule(struct Qdisc *q); static inline void netif_schedule_queue(struct netdev_queue *txq) { @@ -2274,8 +2272,8 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) } #ifdef CONFIG_XPS -extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, - u16 index); +int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, + u16 index); #else static inline int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, @@ -2306,12 +2304,10 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) return dev->num_tx_queues > 1; } -extern int netif_set_real_num_tx_queues(struct net_device *dev, - unsigned int txq); +int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); #ifdef CONFIG_RPS -extern int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxq); +int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); #else static inline int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) @@ -2338,28 +2334,27 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, } #define DEFAULT_MAX_NUM_RSS_QUEUES (8) -extern int netif_get_num_default_rss_queues(void); +int netif_get_num_default_rss_queues(void); /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts * disabled. */ -extern void dev_kfree_skb_irq(struct sk_buff *skb); +void dev_kfree_skb_irq(struct sk_buff *skb); /* Use this variant in places where it could be invoked * from either hardware interrupt or other context, with hardware interrupts * either disabled or enabled. */ -extern void dev_kfree_skb_any(struct sk_buff *skb); +void dev_kfree_skb_any(struct sk_buff *skb); -extern int netif_rx(struct sk_buff *skb); -extern int netif_rx_ni(struct sk_buff *skb); -extern int netif_receive_skb(struct sk_buff *skb); -extern gro_result_t napi_gro_receive(struct napi_struct *napi, - struct sk_buff *skb); -extern void napi_gro_flush(struct napi_struct *napi, bool flush_old); -extern struct sk_buff * napi_get_frags(struct napi_struct *napi); -extern gro_result_t napi_gro_frags(struct napi_struct *napi); +int netif_rx(struct sk_buff *skb); +int netif_rx_ni(struct sk_buff *skb); +int netif_receive_skb(struct sk_buff *skb); +gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +void napi_gro_flush(struct napi_struct *napi, bool flush_old); +struct sk_buff *napi_get_frags(struct napi_struct *napi); +gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) { @@ -2367,40 +2362,35 @@ static inline void napi_free_frags(struct napi_struct *napi) napi->skb = NULL; } -extern int netdev_rx_handler_register(struct net_device *dev, - rx_handler_func_t *rx_handler, - void *rx_handler_data); -extern void netdev_rx_handler_unregister(struct net_device *dev); - -extern bool dev_valid_name(const char *name); -extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); -extern int dev_ethtool(struct net *net, struct ifreq *); -extern unsigned int dev_get_flags(const struct net_device *); -extern int __dev_change_flags(struct net_device *, unsigned int flags); -extern int dev_change_flags(struct net_device *, unsigned int); -extern void __dev_notify_flags(struct net_device *, unsigned int old_flags); -extern int dev_change_name(struct net_device *, const char *); -extern int dev_set_alias(struct net_device *, const char *, size_t); -extern int dev_change_net_namespace(struct net_device *, - struct net *, const char *); -extern int dev_set_mtu(struct net_device *, int); -extern void dev_set_group(struct net_device *, int); -extern int dev_set_mac_address(struct net_device *, - struct sockaddr *); -extern int dev_change_carrier(struct net_device *, - bool new_carrier); -extern int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_port_id *ppid); -extern int dev_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev, - struct netdev_queue *txq); -extern int dev_forward_skb(struct net_device *dev, - struct sk_buff *skb); +int netdev_rx_handler_register(struct net_device *dev, + rx_handler_func_t *rx_handler, + void *rx_handler_data); +void netdev_rx_handler_unregister(struct net_device *dev); + +bool dev_valid_name(const char *name); +int dev_ioctl(struct net *net, unsigned int cmd, void __user *); +int dev_ethtool(struct net *net, struct ifreq *); +unsigned int dev_get_flags(const struct net_device *); +int __dev_change_flags(struct net_device *, unsigned int flags); +int dev_change_flags(struct net_device *, unsigned int); +void __dev_notify_flags(struct net_device *, unsigned int old_flags); +int dev_change_name(struct net_device *, const char *); +int dev_set_alias(struct net_device *, const char *, size_t); +int dev_change_net_namespace(struct net_device *, struct net *, const char *); +int dev_set_mtu(struct net_device *, int); +void dev_set_group(struct net_device *, int); +int dev_set_mac_address(struct net_device *, struct sockaddr *); +int dev_change_carrier(struct net_device *, bool new_carrier); +int dev_get_phys_port_id(struct net_device *dev, + struct netdev_phys_port_id *ppid); +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq); +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ -extern void netdev_run_todo(void); +void netdev_run_todo(void); /** * dev_put - release reference to device @@ -2433,9 +2423,9 @@ static inline void dev_hold(struct net_device *dev) * kind of lower layer not just hardware media. */ -extern void linkwatch_init_dev(struct net_device *dev); -extern void linkwatch_fire_event(struct net_device *dev); -extern void linkwatch_forget_dev(struct net_device *dev); +void linkwatch_init_dev(struct net_device *dev); +void linkwatch_fire_event(struct net_device *dev); +void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -2448,13 +2438,13 @@ static inline bool netif_carrier_ok(const struct net_device *dev) return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); } -extern unsigned long dev_trans_start(struct net_device *dev); +unsigned long dev_trans_start(struct net_device *dev); -extern void __netdev_watchdog_up(struct net_device *dev); +void __netdev_watchdog_up(struct net_device *dev); -extern void netif_carrier_on(struct net_device *dev); +void netif_carrier_on(struct net_device *dev); -extern void netif_carrier_off(struct net_device *dev); +void netif_carrier_off(struct net_device *dev); /** * netif_dormant_on - mark device as dormant. @@ -2522,9 +2512,9 @@ static inline bool netif_device_present(struct net_device *dev) return test_bit(__LINK_STATE_PRESENT, &dev->state); } -extern void netif_device_detach(struct net_device *dev); +void netif_device_detach(struct net_device *dev); -extern void netif_device_attach(struct net_device *dev); +void netif_device_attach(struct net_device *dev); /* * Network interface message level settings @@ -2733,98 +2723,93 @@ static inline void netif_addr_unlock_bh(struct net_device *dev) /* These functions live elsewhere (drivers/net/net_init.c, but related) */ -extern void ether_setup(struct net_device *dev); +void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ -extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), - unsigned int txqs, unsigned int rxqs); +struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs); #define alloc_netdev(sizeof_priv, name, setup) \ alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) #define alloc_netdev_mq(sizeof_priv, name, setup, count) \ alloc_netdev_mqs(sizeof_priv, name, setup, count, count) -extern int register_netdev(struct net_device *dev); -extern void unregister_netdev(struct net_device *dev); +int register_netdev(struct net_device *dev); +void unregister_netdev(struct net_device *dev); /* General hardware address lists handling functions */ -extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type); -extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type); -extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len); -extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len); -extern void __hw_addr_flush(struct netdev_hw_addr_list *list); -extern void __hw_addr_init(struct netdev_hw_addr_list *list); +int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type); +void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type); +int __hw_addr_sync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len); +void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len); +void __hw_addr_flush(struct netdev_hw_addr_list *list); +void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ -extern int dev_addr_add(struct net_device *dev, const unsigned char *addr, - unsigned char addr_type); -extern int dev_addr_del(struct net_device *dev, const unsigned char *addr, - unsigned char addr_type); -extern int dev_addr_add_multiple(struct net_device *to_dev, - struct net_device *from_dev, - unsigned char addr_type); -extern int dev_addr_del_multiple(struct net_device *to_dev, - struct net_device *from_dev, - unsigned char addr_type); -extern void dev_addr_flush(struct net_device *dev); -extern int dev_addr_init(struct net_device *dev); +int dev_addr_add(struct net_device *dev, const unsigned char *addr, + unsigned char addr_type); +int dev_addr_del(struct net_device *dev, const unsigned char *addr, + unsigned char addr_type); +int dev_addr_add_multiple(struct net_device *to_dev, + struct net_device *from_dev, unsigned char addr_type); +int dev_addr_del_multiple(struct net_device *to_dev, + struct net_device *from_dev, unsigned char addr_type); +void dev_addr_flush(struct net_device *dev); +int dev_addr_init(struct net_device *dev); /* Functions used for unicast addresses handling */ -extern int dev_uc_add(struct net_device *dev, const unsigned char *addr); -extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); -extern int dev_uc_del(struct net_device *dev, const unsigned char *addr); -extern int dev_uc_sync(struct net_device *to, struct net_device *from); -extern int dev_uc_sync_multiple(struct net_device *to, struct net_device *from); -extern void dev_uc_unsync(struct net_device *to, struct net_device *from); -extern void dev_uc_flush(struct net_device *dev); -extern void dev_uc_init(struct net_device *dev); +int dev_uc_add(struct net_device *dev, const unsigned char *addr); +int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); +int dev_uc_del(struct net_device *dev, const unsigned char *addr); +int dev_uc_sync(struct net_device *to, struct net_device *from); +int dev_uc_sync_multiple(struct net_device *to, struct net_device *from); +void dev_uc_unsync(struct net_device *to, struct net_device *from); +void dev_uc_flush(struct net_device *dev); +void dev_uc_init(struct net_device *dev); /* Functions used for multicast addresses handling */ -extern int dev_mc_add(struct net_device *dev, const unsigned char *addr); -extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); -extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); -extern int dev_mc_del(struct net_device *dev, const unsigned char *addr); -extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); -extern int dev_mc_sync(struct net_device *to, struct net_device *from); -extern int dev_mc_sync_multiple(struct net_device *to, struct net_device *from); -extern void dev_mc_unsync(struct net_device *to, struct net_device *from); -extern void dev_mc_flush(struct net_device *dev); -extern void dev_mc_init(struct net_device *dev); +int dev_mc_add(struct net_device *dev, const unsigned char *addr); +int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); +int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); +int dev_mc_del(struct net_device *dev, const unsigned char *addr); +int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); +int dev_mc_sync(struct net_device *to, struct net_device *from); +int dev_mc_sync_multiple(struct net_device *to, struct net_device *from); +void dev_mc_unsync(struct net_device *to, struct net_device *from); +void dev_mc_flush(struct net_device *dev); +void dev_mc_init(struct net_device *dev); /* Functions used for secondary unicast and multicast support */ -extern void dev_set_rx_mode(struct net_device *dev); -extern void __dev_set_rx_mode(struct net_device *dev); -extern int dev_set_promiscuity(struct net_device *dev, int inc); -extern int dev_set_allmulti(struct net_device *dev, int inc); -extern void netdev_state_change(struct net_device *dev); -extern void netdev_notify_peers(struct net_device *dev); -extern void netdev_features_change(struct net_device *dev); +void dev_set_rx_mode(struct net_device *dev); +void __dev_set_rx_mode(struct net_device *dev); +int dev_set_promiscuity(struct net_device *dev, int inc); +int dev_set_allmulti(struct net_device *dev, int inc); +void netdev_state_change(struct net_device *dev); +void netdev_notify_peers(struct net_device *dev); +void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ -extern void dev_load(struct net *net, const char *name); -extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, - struct rtnl_link_stats64 *storage); -extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, - const struct net_device_stats *netdev_stats); +void dev_load(struct net *net, const char *name); +struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *storage); +void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; extern int bpf_jit_enable; -extern bool netdev_has_upper_dev(struct net_device *dev, - struct net_device *upper_dev); -extern bool netdev_has_any_upper_dev(struct net_device *dev); -extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter); +bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); +bool netdev_has_any_upper_dev(struct net_device *dev); +struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ @@ -2833,10 +2818,10 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d updev; \ updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) -extern void *netdev_lower_get_next_private(struct net_device *dev, - struct list_head **iter); -extern void *netdev_lower_get_next_private_rcu(struct net_device *dev, - struct list_head **iter); +void *netdev_lower_get_next_private(struct net_device *dev, + struct list_head **iter); +void *netdev_lower_get_next_private_rcu(struct net_device *dev, + struct list_head **iter); #define netdev_for_each_lower_private(dev, priv, iter) \ for (iter = (dev)->adj_list.lower.next, \ @@ -2850,27 +2835,26 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) -extern void *netdev_adjacent_get_private(struct list_head *adj_list); -extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev); -extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); -extern int netdev_upper_dev_link(struct net_device *dev, +void *netdev_adjacent_get_private(struct list_head *adj_list); +struct net_device *netdev_master_upper_dev_get(struct net_device *dev); +struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); +int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); +int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); -extern int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev); -extern int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private); -extern void netdev_upper_dev_unlink(struct net_device *dev, - struct net_device *upper_dev); -extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev); -extern void *netdev_lower_dev_get_private(struct net_device *dev, - struct net_device *lower_dev); -extern int skb_checksum_help(struct sk_buff *skb); -extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path); -extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features); +int netdev_master_upper_dev_link_private(struct net_device *dev, + struct net_device *upper_dev, + void *private); +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev); +void *netdev_lower_dev_get_private_rcu(struct net_device *dev, + struct net_device *lower_dev); +void *netdev_lower_dev_get_private(struct net_device *dev, + struct net_device *lower_dev); +int skb_checksum_help(struct sk_buff *skb); +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path); +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -2892,30 +2876,30 @@ static inline bool can_checksum_protocol(netdev_features_t features, } #ifdef CONFIG_BUG -extern void netdev_rx_csum_fault(struct net_device *dev); +void netdev_rx_csum_fault(struct net_device *dev); #else static inline void netdev_rx_csum_fault(struct net_device *dev) { } #endif /* rx skb timestamps */ -extern void net_enable_timestamp(void); -extern void net_disable_timestamp(void); +void net_enable_timestamp(void); +void net_disable_timestamp(void); #ifdef CONFIG_PROC_FS -extern int __init dev_proc_init(void); +int __init dev_proc_init(void); #else #define dev_proc_init() 0 #endif -extern int netdev_class_create_file(struct class_attribute *class_attr); -extern void netdev_class_remove_file(struct class_attribute *class_attr); +int netdev_class_create_file(struct class_attribute *class_attr); +void netdev_class_remove_file(struct class_attribute *class_attr); extern struct kobj_ns_type_operations net_ns_type_operations; -extern const char *netdev_drivername(const struct net_device *dev); +const char *netdev_drivername(const struct net_device *dev); -extern void linkwatch_run_queue(void); +void linkwatch_run_queue(void); static inline netdev_features_t netdev_get_wanted_features( struct net_device *dev) @@ -3007,22 +2991,22 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } -extern __printf(3, 4) +__printf(3, 4) int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_emerg(const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_alert(const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_crit(const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_err(const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_warn(const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_notice(const struct net_device *dev, const char *format, ...); -extern __printf(2, 3) +__printf(2, 3) int netdev_info(const struct net_device *dev, const char *format, ...); #define MODULE_ALIAS_NETDEV(device) \ -- cgit v1.2.3-71-gd317 From a528c219df2e865e178c538c7178961dfed5a13c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 25 Sep 2013 12:02:44 +0200 Subject: dev: update __dev_notify_flags() to send rtnl msg This patch only prepares the next one, there is no functional change. Now, __dev_notify_flags() can also be used to notify flags changes via rtnetlink. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- net/core/dev.c | 11 ++++++----- net/core/rtnetlink.c | 3 +-- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4cfb63f264e..f44f99a69977 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2378,7 +2378,9 @@ extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned int dev_get_flags(const struct net_device *); extern int __dev_change_flags(struct net_device *, unsigned int flags); extern int dev_change_flags(struct net_device *, unsigned int); -extern void __dev_notify_flags(struct net_device *, unsigned int old_flags); +void __dev_notify_flags(struct net_device *, + unsigned int old_flags, + unsigned int gchanges); extern int dev_change_name(struct net_device *, const char *); extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, diff --git a/net/core/dev.c b/net/core/dev.c index 25ab6fe80da2..594a6b0ab3da 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5235,10 +5235,14 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) return ret; } -void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, + unsigned int gchanges) { unsigned int changes = dev->flags ^ old_flags; + if (gchanges) + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges); + if (changes & IFF_UP) { if (dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_UP, dev); @@ -5274,10 +5278,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags) return ret; changes = old_flags ^ dev->flags; - if (changes) - rtmsg_ifinfo(RTM_NEWLINK, dev, changes); - - __dev_notify_flags(dev, old_flags); + __dev_notify_flags(dev, old_flags, changes); return ret; } EXPORT_SYMBOL(dev_change_flags); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2a0e21de3060..4aedf03da052 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); - __dev_notify_flags(dev, old_flags); + __dev_notify_flags(dev, old_flags, ~0U); return 0; } EXPORT_SYMBOL(rtnl_configure_link); -- cgit v1.2.3-71-gd317 From 35b8dcf8c3a0be1feb1c8b29b22e1685ba0c2e14 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 23:02:43 +0200 Subject: netfilter: ipset: Rename simple macro names to avoid namespace issues. Reported-by: David Laight Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 3 + net/netfilter/ipset/ip_set_bitmap_gen.h | 47 +++++---- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +- net/netfilter/ipset/ip_set_bitmap_port.c | 10 +- net/netfilter/ipset/ip_set_hash_gen.h | 147 +++++++++++++++------------- net/netfilter/ipset/ip_set_hash_ip.c | 10 +- net/netfilter/ipset/ip_set_hash_ipport.c | 12 +-- net/netfilter/ipset/ip_set_hash_ipportip.c | 12 +-- net/netfilter/ipset/ip_set_hash_ipportnet.c | 16 +-- net/netfilter/ipset/ip_set_hash_net.c | 14 +-- net/netfilter/ipset/ip_set_hash_netiface.c | 14 +-- net/netfilter/ipset/ip_set_hash_netport.c | 16 +-- net/netfilter/ipset/ip_set_list_set.c | 10 +- 14 files changed, 169 insertions(+), 162 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 9ac9fbde7b61..f900f33a5f3d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -398,4 +398,7 @@ bitmap_bytes(u32 a, u32 b) { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (map)->timeout } +#define IPSET_CONCAT(a, b) a##b +#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) + #endif /*_IP_SET_H */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f6af97cf8d3e..d39905e7e881 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -8,31 +8,28 @@ #ifndef __IP_SET_BITMAP_IP_GEN_H #define __IP_SET_BITMAP_IP_GEN_H -#define CONCAT(a, b) a##b -#define TOKEN(a,b) CONCAT(a, b) - -#define mtype_do_test TOKEN(MTYPE, _do_test) -#define mtype_gc_test TOKEN(MTYPE, _gc_test) -#define mtype_is_filled TOKEN(MTYPE, _is_filled) -#define mtype_do_add TOKEN(MTYPE, _do_add) -#define mtype_do_del TOKEN(MTYPE, _do_del) -#define mtype_do_list TOKEN(MTYPE, _do_list) -#define mtype_do_head TOKEN(MTYPE, _do_head) -#define mtype_adt_elem TOKEN(MTYPE, _adt_elem) -#define mtype_add_timeout TOKEN(MTYPE, _add_timeout) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test) +#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) +#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) +#define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) +#define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) +#define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) +#define mtype_adt_elem IPSET_TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout IPSET_TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE #define ext_timeout(e, m) \ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index f1a8128bef01..c2f89b1c1d92 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -25,12 +25,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); #define MTYPE bitmap_ip @@ -401,8 +401,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 3b30e0bef890..1d6551cc590e 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -25,12 +25,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac @@ -460,8 +460,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 8207d1fda528..b22048965d2a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -20,12 +20,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); #define MTYPE bitmap_port @@ -333,8 +333,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 09a21dd5f120..68b9ccebabaa 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -17,9 +17,6 @@ #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) -#define CONCAT(a, b) a##b -#define TOKEN(a, b) CONCAT(a, b) - /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. * Internally jhash is used with the assumption that the size of the @@ -222,41 +219,41 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef HKEY -#define mtype_data_equal TOKEN(MTYPE, _data_equal) +#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) #ifdef IP_SET_HASH_WITH_NETS -#define mtype_do_data_match TOKEN(MTYPE, _do_data_match) +#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) #else #define mtype_do_data_match(d) 1 #endif -#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags) -#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags) -#define mtype_data_netmask TOKEN(MTYPE, _data_netmask) -#define mtype_data_list TOKEN(MTYPE, _data_list) -#define mtype_data_next TOKEN(MTYPE, _data_next) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add_cidr TOKEN(MTYPE, _add_cidr) -#define mtype_del_cidr TOKEN(MTYPE, _del_cidr) -#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) +#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) +#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_expire TOKEN(MTYPE, _expire) -#define mtype_resize TOKEN(MTYPE, _resize) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) -#define mtype_variant TOKEN(MTYPE, _variant) -#define mtype_data_match TOKEN(MTYPE, _data_match) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_expire IPSET_TOKEN(MTYPE, _expire) +#define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_variant IPSET_TOKEN(MTYPE, _variant) +#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) @@ -941,13 +938,13 @@ nla_put_failure: } static int -TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); +IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int -TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); +IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, @@ -967,7 +964,7 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u32 cadt_flags = 0; @@ -1045,9 +1042,9 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) set->data = h; if (set->family == NFPROTO_IPV4) - set->variant = &TOKEN(HTYPE, 4_variant); + set->variant = &IPSET_TOKEN(HTYPE, 4_variant); else - set->variant = &TOKEN(HTYPE, 6_variant); + set->variant = &IPSET_TOKEN(HTYPE, 6_variant); if (tb[IPSET_ATTR_CADT_FLAGS]) cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); @@ -1058,64 +1055,74 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4ct_elem)); + h->dsize = sizeof(struct + IPSET_TOKEN(HTYPE, 4ct_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - timeout); + offsetof(struct + IPSET_TOKEN(HTYPE, 4ct_elem), + timeout); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - counter); - TOKEN(HTYPE, 4_gc_init)(set, - TOKEN(HTYPE, 4_gc)); + offsetof(struct + IPSET_TOKEN(HTYPE, 4ct_elem), + counter); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6ct_elem)); + h->dsize = sizeof(struct + IPSET_TOKEN(HTYPE, 6ct_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - timeout); + offsetof(struct + IPSET_TOKEN(HTYPE, 6ct_elem), + timeout); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - counter); - TOKEN(HTYPE, 6_gc_init)(set, - TOKEN(HTYPE, 6_gc)); + offsetof(struct + IPSET_TOKEN(HTYPE, 6ct_elem), + counter); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } } else { if (set->family == NFPROTO_IPV4) { h->dsize = - sizeof(struct TOKEN(HTYPE, 4c_elem)); + sizeof(struct + IPSET_TOKEN(HTYPE, 4c_elem)); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4c_elem), - counter); + offsetof(struct + IPSET_TOKEN(HTYPE, 4c_elem), + counter); } else { h->dsize = - sizeof(struct TOKEN(HTYPE, 6c_elem)); + sizeof(struct + IPSET_TOKEN(HTYPE, 6c_elem)); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6c_elem), - counter); + offsetof(struct + IPSET_TOKEN(HTYPE, 6c_elem), + counter); } } } else if (tb[IPSET_ATTR_TIMEOUT]) { h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4t_elem), + offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); - TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6t_elem), + offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); - TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } } else { if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); else - h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index c74e6e14cd93..de44fca687c3 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -23,12 +23,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counters support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ @@ -304,8 +304,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7a2d2bd98d04..b514ff4e83ae 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -24,13 +24,13 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ @@ -396,8 +396,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 34e8a1acce42..d05070d74604 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -24,13 +24,13 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ @@ -414,8 +414,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 9a80d8bc9f18..7d1dede4ab6d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -24,15 +24,15 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -574,8 +574,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 223e9f546d0f..9cb9ef43d941 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -22,14 +22,14 @@ #include #include -#define REVISION_MIN 0 -/* 1 Range as input support for IPv4 added */ -/* 2 nomatch flag support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Range as input support for IPv4 added */ +/* 2 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ @@ -406,8 +406,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7d798d5d5cd3..2310fc29e89e 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -23,14 +23,14 @@ #include #include -#define REVISION_MIN 0 -/* 1 nomatch flag support added */ -/* 2 /0 support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 nomatch flag support added */ +/* 2 /0 support added */ +#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -645,8 +645,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09d6690bee6f..1601d489fdbc 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -23,15 +23,15 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -518,8 +518,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 68299ee15847..a9e301f6e093 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -15,12 +15,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements */ @@ -703,8 +703,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, -- cgit v1.2.3-71-gd317 From b8cd97865c903e032db85e5a4f2783928c56f2bd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 2 May 2013 10:52:27 +0200 Subject: netfilter: ipset: Use fix sized type for timeout in the extension part Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f900f33a5f3d..69aa60487f05 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -67,7 +67,7 @@ enum ip_set_offset { #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) struct ip_set_ext { - unsigned long timeout; + u32 timeout; u64 packets; u64 bytes; }; -- cgit v1.2.3-71-gd317 From a04d8b6bd9113f3e7f0c216dcaa3c1ad498f2a96 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 09:05:54 +0200 Subject: netfilter: ipset: Prepare ipset to support multiple networks for hash types In order to support hash:net,net, hash:net,port,net etc. types, arrays are introduced for the book-keeping of existing cidr sizes and network numbers in a set. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 + net/netfilter/ipset/ip_set_hash_gen.h | 78 +++++++++++++++-------------- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +- net/netfilter/ipset/ip_set_hash_net.c | 4 +- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +- net/netfilter/ipset/ip_set_hash_netport.c | 4 +- 6 files changed, 50 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 69aa60487f05..56012a3431b2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -398,6 +398,8 @@ bitmap_bytes(u32 a, u32 b) { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (map)->timeout } +#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) + #define IPSET_CONCAT(a, b) a##b #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 68b9ccebabaa..a8332404d9b4 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -77,10 +77,14 @@ struct htable { #define hbucket(h, i) (&((h)->bucket[i])) +#ifndef IPSET_NET_COUNT +#define IPSET_NET_COUNT 1 +#endif + /* Book-keeping of the prefixes added to the set */ struct net_prefixes { - u8 cidr; /* the different cidr values in the set */ - u32 nets; /* number of elements per cidr */ + u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ + u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ }; /* Compute the hash table size */ @@ -165,13 +169,13 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_MULTI -#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) +#define NLEN(family) (SET_HOST_MASK(family) + 1) #else -#define NETS_LENGTH(family) SET_HOST_MASK(family) +#define NLEN(family) SET_HOST_MASK(family) #endif #else -#define NETS_LENGTH(family) 0 +#define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ #define ext_timeout(e, h) \ @@ -296,49 +300,49 @@ struct htype { /* Network cidr size book keeping when the hash stores different * sized networks */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { + for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) { if (j != -1) continue; - else if (h->nets[i].cidr < cidr) + else if (h->nets[i].cidr[n] < cidr) j = i; - else if (h->nets[i].cidr == cidr) { - h->nets[i].nets++; + else if (h->nets[i].cidr[n] == cidr) { + h->nets[i].nets[n]++; return; } } if (j != -1) { for (; i > j; i--) { - h->nets[i].cidr = h->nets[i - 1].cidr; - h->nets[i].nets = h->nets[i - 1].nets; + h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; + h->nets[i].nets[n] = h->nets[i - 1].nets[n]; } } - h->nets[i].cidr = cidr; - h->nets[i].nets = 1; + h->nets[i].cidr[n] = cidr; + h->nets[i].nets[n] = 1; } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr != cidr) + if (h->nets[i].cidr[n] != cidr) continue; - if (h->nets[i].nets > 1 || i == net_end || - h->nets[i + 1].nets == 0) { - h->nets[i].nets--; + if (h->nets[i].nets[n] > 1 || i == net_end || + h->nets[i + 1].nets[n] == 0) { + h->nets[i].nets[n]--; return; } - for (j = i; j < net_end && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; + for (j = i; j < net_end && h->nets[j].nets[n]; j++) { + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].nets[n] = h->nets[j + 1].nets[n]; } - h->nets[j].nets = 0; + h->nets[j].nets[n] = 0; return; } } @@ -382,8 +386,7 @@ mtype_flush(struct ip_set *set) } } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) - * NETS_LENGTH(set->family)); + memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif h->elements = 0; } @@ -459,7 +462,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), - nets_length); + nets_length, 0); #endif if (j != n->pos - 1) /* Not last one */ @@ -494,7 +497,7 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(h, NLEN(set->family), h->dsize); write_unlock_bh(&set->lock); h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; @@ -523,8 +526,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NETS_LENGTH(set->family), - h->dsize); + mtype_expire(set->data, NLEN(set->family), h->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -607,7 +609,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(h, NLEN(set->family), h->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -645,8 +647,8 @@ reuse_slot: /* Fill out reused slot */ data = ahash_data(n, j, h->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); + mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif } else { /* Use/create a new slot */ @@ -659,7 +661,7 @@ reuse_slot: } data = ahash_data(n, n->pos++, h->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif h->elements++; } @@ -711,7 +713,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) @@ -760,11 +762,11 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct mtype_elem *data; int i, j = 0; u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); + u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - mtype_data_netmask(d, h->nets[j].cidr); + for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { + mtype_data_netmask(d, h->nets[j].cidr[0]); key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -839,7 +841,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) size_t memsize; t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family)); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family)); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f111558c4597..6ce5a8e2c44e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -170,7 +170,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -454,7 +454,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 0a64dad156d9..ec1c7dc10489 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -143,7 +143,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -338,7 +338,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 846371b08630..814b4e31a7f8 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -265,7 +265,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -534,7 +534,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d98a685cd916..3bd923d3f179 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -162,7 +162,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -407,7 +407,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); -- cgit v1.2.3-71-gd317 From f925f7056920213889c5e61445f9529f1a86ae41 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 6 Sep 2013 22:31:40 +0200 Subject: netfilter: ipset: Rename extension offset ids to extension ids Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 16 ++++++++-------- net/netfilter/ipset/ip_set_bitmap_gen.h | 4 ++-- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_port.c | 10 +++++----- net/netfilter/ipset/ip_set_hash_gen.h | 22 +++++++++++----------- net/netfilter/ipset/ip_set_list_set.c | 14 +++++++------- 7 files changed, 43 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 56012a3431b2..b4db7912bf0d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -56,20 +56,20 @@ enum ip_set_extension { IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), }; -/* Extension offsets */ -enum ip_set_offset { - IPSET_OFFSET_TIMEOUT = 0, - IPSET_OFFSET_COUNTER, - IPSET_OFFSET_MAX, -}; - #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) +/* Extension id, in size order */ +enum ip_set_ext_id { + IPSET_EXT_ID_COUNTER = 0, + IPSET_EXT_ID_TIMEOUT, + IPSET_EXT_ID_MAX, +}; + struct ip_set_ext { - u32 timeout; u64 packets; u64 bytes; + u32 timeout; }; struct ip_set; diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index d39905e7e881..889a929b908f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -33,9 +33,9 @@ #define mtype MTYPE #define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) + (unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER]) + (struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER]) #define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) static void diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index ce99d2652d47..2ee210ef49a2 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -45,7 +45,7 @@ struct bitmap_ip { u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ @@ -342,9 +342,9 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipct_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -360,7 +360,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) bitmap_ip_gc_init(set, bitmap_ip_gc); } else { map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipc_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -371,7 +371,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipt_elem, timeout); if (!init_map_ip(set, map, first_ip, last_ip, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 6d5bad93026b..e711875ea452 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -52,7 +52,7 @@ struct bitmap_ipmac { struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ size_t dsize; /* size of element */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ }; /* ADT structure for generic function args */ @@ -405,9 +405,9 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacct_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -421,7 +421,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacc_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -432,7 +432,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmact_elem, timeout); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b22048965d2a..bebc137a5737 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -39,7 +39,7 @@ struct bitmap_port { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -282,9 +282,9 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portct_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -297,7 +297,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) bitmap_port_gc_init(set, bitmap_port_gc); } else { map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portc_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -306,7 +306,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portt_elem, timeout); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index a8332404d9b4..e4db9250f337 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -179,9 +179,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #endif /* IP_SET_HASH_WITH_NETS */ #define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) +(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER]) +(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER]) #endif /* _IP_SET_HASH_GEN_H */ @@ -278,7 +278,7 @@ struct htype { u32 initval; /* random jhash init value */ u32 timeout; /* timeout value, if enabled */ size_t dsize; /* data struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -1059,11 +1059,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) if (set->family == NFPROTO_IPV4) { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), timeout); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), counter); @@ -1072,11 +1072,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } else { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), timeout); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), counter); @@ -1088,7 +1088,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4c_elem), counter); @@ -1096,7 +1096,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6c_elem), counter); @@ -1107,14 +1107,14 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); IPSET_TOKEN(HTYPE, 6_gc_init)(set, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a9e301f6e093..0ed19b5e675e 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -59,7 +59,7 @@ struct set_adt_elem { /* Type structure */ struct list_set { size_t dsize; /* element size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ @@ -73,9 +73,9 @@ list_set_elem(const struct list_set *map, u32 id) } #define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) +(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER]) +(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER]) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -667,9 +667,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct setct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setct_elem, counter); list_set_gc_init(set, list_set_gc); } else { @@ -677,7 +677,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) sizeof(struct setc_elem), 0); if (!map) return -ENOMEM; - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setc_elem, counter); } } else if (tb[IPSET_ATTR_TIMEOUT]) { @@ -686,7 +686,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct sett_elem, timeout); list_set_gc_init(set, list_set_gc); } else { -- cgit v1.2.3-71-gd317 From ca134ce86451f3f5ac45ffbf1494a1f42110bf93 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Sep 2013 00:10:07 +0200 Subject: netfilter: ipset: Move extension data to set structure Default timeout and extension offsets are moved to struct set, because all set types supports all extensions and it makes possible to generalize extension support. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 29 ++++-- include/linux/netfilter/ipset/ip_set_timeout.h | 4 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 59 ++++++------ net/netfilter/ipset/ip_set_bitmap_ip.c | 45 +++++---- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 64 ++++++------- net/netfilter/ipset/ip_set_bitmap_port.c | 44 ++++----- net/netfilter/ipset/ip_set_hash_gen.h | 127 ++++++++++++------------- net/netfilter/ipset/ip_set_hash_ip.c | 8 +- net/netfilter/ipset/ip_set_hash_ipport.c | 10 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 10 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 14 +-- net/netfilter/ipset/ip_set_hash_net.c | 9 +- net/netfilter/ipset/ip_set_hash_netiface.c | 8 +- net/netfilter/ipset/ip_set_hash_netport.c | 8 +- net/netfilter/ipset/ip_set_list_set.c | 116 ++++++++++------------ 15 files changed, 266 insertions(+), 289 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index b4db7912bf0d..992a2f58dbd3 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -72,6 +72,16 @@ struct ip_set_ext { u32 timeout; }; +struct ip_set_counter { + atomic64_t bytes; + atomic64_t packets; +}; + +#define ext_timeout(e, s) \ +(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) +#define ext_counter(e, s) \ +(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) + struct ip_set; typedef int (*ipset_adtfn)(struct ip_set *set, void *value, @@ -179,15 +189,16 @@ struct ip_set { u8 revision; /* Extensions */ u8 extensions; + /* Default timeout value, if enabled */ + u32 timeout; + /* Element data size */ + size_t dsize; + /* Offsets to extensions in elements */ + size_t offset[IPSET_EXT_ID_MAX]; /* The type specific data */ void *data; }; -struct ip_set_counter { - atomic64_t bytes; - atomic64_t packets; -}; - static inline void ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) { @@ -390,13 +401,13 @@ bitmap_bytes(u32 a, u32 b) #include -#define IP_SET_INIT_KEXT(skb, opt, map) \ +#define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ - .timeout = ip_set_adt_opt_timeout(opt, map) } + .timeout = ip_set_adt_opt_timeout(opt, set) } -#define IP_SET_INIT_UEXT(map) \ +#define IP_SET_INIT_UEXT(set) \ { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ - .timeout = (map)->timeout } + .timeout = (set)->timeout } #define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 3aac04167ca7..83c2f9e0886c 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -23,8 +23,8 @@ /* Set is defined with timeout support: timeout value may be 0 */ #define IPSET_NO_TIMEOUT UINT_MAX -#define ip_set_adt_opt_timeout(opt, map) \ -((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (map)->timeout) +#define ip_set_adt_opt_timeout(opt, set) \ +((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout) static inline unsigned int ip_set_timeout_uget(struct nlattr *tb) diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 889a929b908f..f32ddbca3923 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -32,11 +32,7 @@ #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE -#define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER]) -#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) +#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) static void mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) @@ -46,7 +42,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -59,7 +55,7 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (map->dsize) + if (set->dsize) ip_set_free(map->extensions); kfree(map); @@ -88,9 +84,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - map->dsize * map->elements)) || + set->dsize * map->elements)) || (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || (SET_WITH_COUNTER(set) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS)))) @@ -108,16 +104,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_test(e, map); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_test(e, map, set->dsize); if (ret <= 0) return ret; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) return 0; if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(x, map), ext, mext, flags); + ip_set_update_counter(ext_counter(x, set), ext, mext, flags); return 1; } @@ -127,12 +123,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_add(e, map, flags); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_add(e, map, flags, set->dsize); if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; @@ -140,13 +136,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); + mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret); #else - ip_set_timeout_set(ext_timeout(x, map), ext->timeout); + ip_set_timeout_set(ext_timeout(x, set), ext->timeout); #endif if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(x, map), ext); + ip_set_init_counter(ext_counter(x, set), ext); return 0; } @@ -156,11 +152,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(map, e->id); + const void *x = get_ext(set, map, e->id); if (mtype_do_del(e, map) || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) return -IPSET_ERR_EXIST; return 0; @@ -180,13 +176,13 @@ mtype_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->elements; cb->args[2]++) { id = cb->args[2]; - x = get_ext(map, id); + x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT mtype_is_filled((const struct mtype_elem *) x) && #endif - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -196,23 +192,24 @@ mtype_list(const struct ip_set *set, } else goto nla_put_failure; } - if (mtype_do_list(skb, map, id)) + if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; if (SET_WITH_TIMEOUT(set)) { #ifdef IP_SET_BITMAP_STORED_TIMEOUT if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, map))))) + ext_timeout(x, set), + set->dsize)))) goto nla_put_failure; #else if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(x, map))))) + ext_timeout(x, set))))) goto nla_put_failure; #endif } if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, map))) + ip_set_put_counter(skb, ext_counter(x, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -245,14 +242,14 @@ mtype_gc(unsigned long ul_set) * but adding/deleting new entries is locked out */ read_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) - if (mtype_gc_test(id, map)) { - x = get_ext(map, id); - if (ip_set_timeout_expired(ext_timeout(x, map))) + if (mtype_gc_test(id, map, set->dsize)) { + x = get_ext(set, map, id); + if (ip_set_timeout_expired(ext_timeout(x, set))) clear_bit(id, map->members); } read_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 2ee210ef49a2..363022edb8fb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -44,10 +44,7 @@ struct bitmap_ip { u32 elements; /* number of max elements in the set */ u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -65,20 +62,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) /* Common functions */ static inline int -bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) +bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, + struct bitmap_ip *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map) +bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, - u32 flags) + u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -90,7 +88,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) } static inline int -bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id) +bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id, + size_t dsize) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); @@ -113,7 +112,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); @@ -133,7 +132,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -200,7 +199,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && x->netmask == y->netmask && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -240,8 +239,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -252,7 +251,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->elements = elements; map->hosts = hosts; map->netmask = netmask; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -341,10 +340,10 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipct_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -353,14 +352,14 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) return -ENOMEM; } - map->timeout = ip_set_timeout_uget( + set->timeout = ip_set_timeout_uget( tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ip_gc_init(set, bitmap_ip_gc); } else { - map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_ipc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipc_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -370,8 +369,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipt_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipt_elem, timeout); if (!init_map_ip(set, map, first_ip, last_ip, @@ -380,12 +379,12 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ip_gc_init(set, bitmap_ip_gc); } else { - map->dsize = 0; + set->dsize = 0; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index e711875ea452..74576cb19264 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -48,11 +48,8 @@ struct bitmap_ipmac { u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ - u32 timeout; /* timeout value */ - struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ - size_t dsize; /* size of element */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ + struct timer_list gc; /* garbage collector */ }; /* ADT structure for generic function args */ @@ -82,13 +79,13 @@ get_elem(void *extensions, u16 id, size_t dsize) static inline int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, - const struct bitmap_ipmac *map) + const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) return e->ether == NULL || ether_addr_equal(e->ether, elem->ether); @@ -97,13 +94,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, } static inline int -bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map) +bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; - elem = get_elem(map->extensions, id, map->dsize); + elem = get_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } @@ -117,13 +114,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) static inline int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, - const struct ip_set_ext *ext, + const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { - if (t == map->timeout) + if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); @@ -142,11 +139,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, static inline int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, - struct bitmap_ipmac *map, u32 flags) + struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (test_and_set_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->ether && (flags & IPSET_FLAG_EXIST)) @@ -179,10 +176,11 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, } static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) +ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout, + size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : *timeout; @@ -190,10 +188,10 @@ ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, - u32 id) + u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || @@ -216,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; /* MAC can be src only */ @@ -245,7 +243,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0; int ret = 0; @@ -285,7 +283,7 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -330,11 +328,11 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -343,7 +341,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -404,10 +402,10 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipmacct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacct_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -415,13 +413,13 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget( + set->timeout = ip_set_timeout_uget( tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { - map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_ipmacc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacc_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -431,19 +429,19 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipmact_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmact_elem, timeout); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { - map->dsize = sizeof(struct bitmap_ipmac_elem); + set->dsize = sizeof(struct bitmap_ipmac_elem); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index bebc137a5737..71da31935499 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -38,9 +38,6 @@ struct bitmap_port { u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -59,20 +56,20 @@ port_to_id(const struct bitmap_port *m, u16 port) static inline int bitmap_port_do_test(const struct bitmap_port_adt_elem *e, - const struct bitmap_port *map) + const struct bitmap_port *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_port_gc_test(u16 id, const struct bitmap_port *map) +bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, - struct bitmap_port *map, u32 flags) + struct bitmap_port *map, u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -85,7 +82,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e, } static inline int -bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id) +bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id, + size_t dsize) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port + id)); @@ -106,7 +104,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; @@ -131,7 +129,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; int ret = 0; @@ -191,7 +189,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_port == y->first_port && x->last_port == y->last_port && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -230,8 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * map->elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * map->elements); if (!map->extensions) { kfree(map->members); return false; @@ -239,7 +237,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } map->first_port = first_port; map->last_port = last_port; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_UNSPEC; @@ -281,23 +279,23 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_portct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portct_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; } - map->timeout = + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); } else { - map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_portc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portc_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -305,19 +303,19 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_portt_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portt_elem, timeout); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); } else { - map->dsize = 0; + set->dsize = 0; if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e4db9250f337..0cb840e1f8ae 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -178,11 +178,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ -#define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER]) - #endif /* _IP_SET_HASH_GEN_H */ /* Family dependent templates */ @@ -276,9 +271,6 @@ struct htype { u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ - u32 timeout; /* timeout value, if enabled */ - size_t dsize; /* data struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -351,7 +343,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Calculate the actual memory size of the set data */ static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t, - u8 nets_length) + u8 nets_length, size_t dsize) { u32 i; size_t memsize = sizeof(*h) @@ -362,7 +354,7 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, + jhash_size(t->htable_bits) * sizeof(struct hbucket); for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * h->dsize; + memsize += t->bucket[i].size * dsize; return memsize; } @@ -417,10 +409,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&h->gc); h->gc.data = (unsigned long) set; h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(h->timeout)); + IPSET_GC_PERIOD(set->timeout)); } static bool @@ -431,7 +423,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Resizing changes htable_bits, so we ignore it */ return x->maxelem == y->maxelem && - x->timeout == y->timeout && + a->timeout == b->timeout && #ifdef IP_SET_HASH_WITH_NETMASK x->netmask == y->netmask && #endif @@ -444,7 +436,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Delete expired elements from the hashtable */ static void -mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) { struct htable *t; struct hbucket *n; @@ -458,7 +450,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n = hbucket(t, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, h))) { + if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), @@ -497,10 +489,10 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NLEN(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); } @@ -526,7 +518,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NLEN(set->family), h->dsize); + mtype_expire(set, set->data, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -553,13 +545,13 @@ retry: for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS flags = 0; mtype_data_reset_flags(data, &flags); #endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); if (ret < 0) { #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); @@ -570,8 +562,8 @@ retry: goto retry; return ret; } - d = ahash_data(m, m->pos++, h->dsize); - memcpy(d, data, h->dsize); + d = ahash_data(m, m->pos++, set->dsize); + memcpy(d, data, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif @@ -609,7 +601,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NLEN(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -623,11 +615,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { /* Just the extensions could be overwritten */ j = i; goto reuse_slot; @@ -638,14 +630,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } /* Reuse first timed out entry */ if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)) && + ip_set_timeout_expired(ext_timeout(data, set)) && j != AHASH_MAX(h) + 1) j = i; } reuse_slot: if (j != AHASH_MAX(h) + 1) { /* Fill out reused slot */ - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); @@ -653,13 +645,13 @@ reuse_slot: } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); - ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); if (ret != 0) { if (ret == -EAGAIN) mtype_data_next(&h->next, d); goto out; } - data = ahash_data(n, n->pos++, h->dsize); + data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif @@ -670,9 +662,9 @@ reuse_slot: mtype_data_set_flags(data, flags); #endif if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(data, h), ext->timeout); + ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(data, h), ext); + ip_set_init_counter(ext_counter(data, set), ext); out: rcu_read_unlock_bh(); @@ -699,16 +691,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h))) + ip_set_timeout_expired(ext_timeout(data, set))) goto out; if (i != n->pos - 1) /* Not last one */ - memcpy(data, ahash_data(n, n->pos - 1, h->dsize), - h->dsize); + memcpy(data, ahash_data(n, n->pos - 1, set->dsize), + set->dsize); n->pos--; h->elements--; @@ -717,14 +709,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, #endif if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * h->dsize, + * set->dsize, GFP_ATOMIC); if (!tmp) { ret = 0; goto out; } n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * h->dsize); + memcpy(tmp, n->value, n->size * set->dsize); kfree(n->value); n->value = tmp; } @@ -742,8 +734,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(data, - (struct htype *)(set->data)), + ip_set_update_counter(ext_counter(data, set), ext, mext, flags); return mtype_do_data_match(data); } @@ -770,12 +761,12 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set)) { if (!ip_set_timeout_expired( - ext_timeout(data, h))) + ext_timeout(data, set))) return mtype_data_match(data, ext, mext, set, flags); @@ -818,10 +809,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { ret = mtype_data_match(data, ext, mext, set, flags); goto out; } @@ -841,7 +832,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) size_t memsize; t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NLEN(set->family)); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -858,7 +849,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || ((set->extensions & IPSET_EXT_COUNTER) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS)))) @@ -894,9 +885,9 @@ mtype_list(const struct ip_set *set, n = hbucket(t, cb->args[2]); pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); for (i = 0; i < n->pos; i++) { - e = ahash_data(n, i, h->dsize); + e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, h))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[2], n, i, e); @@ -913,10 +904,10 @@ mtype_list(const struct ip_set *set, if (SET_WITH_TIMEOUT(set) && nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(e, h))))) + ext_timeout(e, set))))) goto nla_put_failure; if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, h))) + ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -1026,7 +1017,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->netmask = netmask; #endif get_random_bytes(&h->initval, sizeof(h->initval)); - h->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); hsize = htable_size(hbits); @@ -1053,30 +1044,30 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), timeout); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), counter); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), timeout); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), counter); @@ -1085,36 +1076,36 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else { if (set->family == NFPROTO_IPV4) { - h->dsize = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4c_elem), counter); } else { - h->dsize = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6c_elem), counter); } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); IPSET_TOKEN(HTYPE, 6_gc_init)(set, @@ -1122,9 +1113,9 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else { if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); else - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 260c9a80d8a5..bbde7c304622 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -99,7 +99,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); @@ -118,7 +118,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, hosts; int ret = 0; @@ -253,7 +253,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); hash_ip6_netmask(&e.ip, h->netmask); @@ -270,7 +270,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 64caad35a391..dd175d6f3965 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -116,10 +116,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -136,7 +135,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -306,10 +305,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -326,7 +324,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 2873bbc20d7a..87a2cfab2568 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -120,10 +120,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -141,7 +140,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -319,10 +318,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -340,7 +338,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6ce5a8e2c44e..0b9a28d7c740 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -172,7 +172,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipportnet4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -195,7 +195,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; @@ -306,9 +306,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried - && ip == ntohl(h->next.ip) - && p == ntohs(h->next.port) + ip2 = retried && + ip == ntohl(h->next.ip) && + p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { e.ip2 = htonl(ip2); @@ -456,7 +456,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipportnet6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -479,7 +479,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index ec1c7dc10489..1d4caa50dacb 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -145,7 +145,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_net4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -165,7 +165,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; int ret; @@ -340,7 +340,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_net6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -357,10 +357,9 @@ static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 814b4e31a7f8..2f0ffe35c408 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -268,7 +268,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -319,7 +319,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; @@ -537,7 +537,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -584,7 +584,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); char iface[IFNAMSIZ]; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 3bd923d3f179..cab236625f97 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -164,7 +164,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netport4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -186,7 +186,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; @@ -409,7 +409,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netport6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -431,7 +431,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 0ed19b5e675e..f22d05d6e366 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -58,24 +58,13 @@ struct set_adt_elem { /* Type structure */ struct list_set { - size_t dsize; /* element size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ - u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ struct set_elem members[0]; /* the set members */ }; -static inline struct set_elem * -list_set_elem(const struct list_set *map, u32 id) -{ - return (struct set_elem *)((void *)map->members + id * map->dsize); -} - -#define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER]) +#define list_set_elem(set, map, id) \ + (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -92,16 +81,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_test(e->id, skb, par, opt); if (ret > 0) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(e, map), + ip_set_update_counter(ext_counter(e, set), ext, &opt->ext, cmdflags); return ret; @@ -121,11 +110,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_add(e->id, skb, par, opt); if (ret == 0) @@ -145,11 +134,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_del(e->id, skb, par, opt); if (ret == 0) @@ -163,8 +152,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct list_set *map = set->data; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); switch (adt) { case IPSET_TEST: @@ -188,10 +176,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) if (i >= map->size) return 0; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); return !!(e->id == id && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map)))); + ip_set_timeout_expired(ext_timeout(e, set)))); } static int @@ -199,28 +187,29 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, const struct ip_set_ext *ext) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { if (i == map->size - 1) /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); else { - struct set_elem *x = list_set_elem(map, map->size - 1); + struct set_elem *x = list_set_elem(set, map, + map->size - 1); /* Last element pushed off */ if (x->id != IPSET_INVALID_ID) ip_set_put_byindex(x->id); - memmove(list_set_elem(map, i + 1), e, - map->dsize * (map->size - (i + 1))); + memmove(list_set_elem(set, map, i + 1), e, + set->dsize * (map->size - (i + 1))); } } e->id = d->id; if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); return 0; } @@ -228,16 +217,16 @@ static int list_set_del(struct ip_set *set, u32 i) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(e->id); if (i < map->size - 1) - memmove(e, list_set_elem(map, i + 1), - map->dsize * (map->size - (i + 1))); + memmove(e, list_set_elem(set, map, i + 1), + set->dsize * (map->size - (i + 1))); /* Last element */ - e = list_set_elem(map, map->size - 1); + e = list_set_elem(set, map, map->size - 1); e->id = IPSET_INVALID_ID; return 0; } @@ -250,9 +239,9 @@ set_cleanup_entries(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); } } @@ -268,11 +257,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -301,11 +290,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Check already added element */ for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -320,9 +309,9 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, return -IPSET_ERR_EXIST; /* Update extensions */ if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(d->id); return 0; @@ -330,7 +319,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, insert: ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) ret = d->before != 0 ? -IPSET_ERR_REF_EXIST : list_set_add(set, i, d, ext); @@ -355,12 +344,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -383,10 +372,9 @@ static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set *s; int ret = 0; @@ -454,7 +442,7 @@ list_set_flush(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { ip_set_put_byindex(e->id); e->id = IPSET_INVALID_ID; @@ -486,13 +474,13 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || (SET_WITH_COUNTER(set) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * map->dsize))) + htonl(sizeof(*map) + map->size * set->dsize))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -515,11 +503,11 @@ list_set_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->size; cb->args[2]++) { i = cb->args[2]; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -535,10 +523,10 @@ list_set_list(const struct ip_set *set, if (SET_WITH_TIMEOUT(set) && nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(e, map))))) + ext_timeout(e, set))))) goto nla_put_failure; if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, map))) + ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -565,7 +553,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) const struct list_set *y = b->data; return x->size == y->size && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -594,7 +582,7 @@ list_set_gc(unsigned long ul_set) set_cleanup_entries(set); write_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -606,7 +594,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -625,12 +613,12 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize, return NULL; map->size = size; - map->dsize = dsize; - map->timeout = timeout; + set->dsize = dsize; + set->timeout = timeout; set->data = map; for (i = 0; i < size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); e->id = IPSET_INVALID_ID; } @@ -667,9 +655,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct setct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setct_elem, counter); list_set_gc_init(set, list_set_gc); } else { @@ -677,7 +665,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) sizeof(struct setc_elem), 0); if (!map) return -ENOMEM; - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setc_elem, counter); } } else if (tb[IPSET_ATTR_TIMEOUT]) { @@ -686,7 +674,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct sett_elem, timeout); list_set_gc_init(set, list_set_gc); } else { -- cgit v1.2.3-71-gd317 From 03c8b234e61a9a3aab8d970b3bf681934ecfe443 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Sep 2013 00:43:52 +0200 Subject: netfilter: ipset: Generalize extensions support Get rid of the structure based extensions and introduce a blob for the extensions. Thus we can support more extension types easily. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 +++++ net/netfilter/ipset/ip_set_bitmap_ip.c | 81 +++---------------------- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 91 +++-------------------------- net/netfilter/ipset/ip_set_bitmap_port.c | 71 ++-------------------- net/netfilter/ipset/ip_set_core.c | 46 +++++++++++++++ net/netfilter/ipset/ip_set_hash_gen.h | 86 ++++----------------------- net/netfilter/ipset/ip_set_hash_ip.c | 36 +----------- net/netfilter/ipset/ip_set_hash_ipport.c | 54 +---------------- net/netfilter/ipset/ip_set_hash_ipportip.c | 60 +------------------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 66 +-------------------- net/netfilter/ipset/ip_set_hash_net.c | 54 +---------------- net/netfilter/ipset/ip_set_hash_netiface.c | 68 +-------------------- net/netfilter/ipset/ip_set_hash_netport.c | 60 +------------------ net/netfilter/ipset/ip_set_list_set.c | 81 ++++--------------------- 14 files changed, 118 insertions(+), 749 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 992a2f58dbd3..66d6bd404d64 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -66,6 +66,17 @@ enum ip_set_ext_id { IPSET_EXT_ID_MAX, }; +/* Extension type */ +struct ip_set_ext_type { + enum ip_set_extension type; + enum ipset_cadt_flags flag; + /* Size and minimal alignment */ + u8 len; + u8 align; +}; + +extern const struct ip_set_ext_type ip_set_extensions[]; + struct ip_set_ext { u64 packets; u64 bytes; @@ -283,6 +294,8 @@ extern void *ip_set_alloc(size_t size); extern void ip_set_free(void *members); extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); +extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], + size_t len); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 363022edb8fb..94d985457c51 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -208,25 +208,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_ip_elem { }; -/* Timeout variant */ - -struct bitmap_ipt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -263,7 +244,7 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts; u64 elements; u8 netmask = 32; int ret; @@ -335,61 +316,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ip; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipct_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - set->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - set->dsize = sizeof(struct bitmap_ipc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipc_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipt_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipt_elem, timeout); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - set->dsize = 0; - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } } return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 74576cb19264..654a97bedfe9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -289,37 +289,6 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) /* Plain variant */ -/* Timeout variant */ - -struct bitmap_ipmact_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipmacc_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipmacct_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ @@ -328,7 +297,7 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize); + map->members = ip_set_alloc(map->memsize); if (!map->members) return false; if (set->dsize) { @@ -353,7 +322,7 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip = 0, last_ip = 0, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; @@ -397,57 +366,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ipmac; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipmacct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipmacct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipmacct_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - set->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - set->dsize = sizeof(struct bitmap_ipmacc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipmacc_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipmact_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipmact_elem, timeout); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct bitmap_ipmac_elem)); + if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - set->dsize = sizeof(struct bitmap_ipmac_elem); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - set->variant = &bitmap_ipmac; } return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 71da31935499..1ef2f3186b80 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -198,25 +198,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_port_elem { }; -/* Timeout variant */ - -struct bitmap_portt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_portc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_portct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -250,7 +231,6 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_port *map; u16 first_port, last_port; - u32 cadt_flags = 0; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -274,53 +254,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->elements = last_port - first_port + 1; map->memsize = map->elements * sizeof(unsigned long); set->variant = &bitmap_port; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_portct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_portct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_portct_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - set->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_port_gc_init(set, bitmap_port_gc); - } else { - set->dsize = sizeof(struct bitmap_portc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_portc_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_portt_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_portt_elem, timeout); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); - } else { - set->dsize = 0; - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } return 0; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 428c30a8586f..f35afed3814f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -315,6 +315,52 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +/* ipset data extension types, in size order */ + +const struct ip_set_ext_type ip_set_extensions[] = { + [IPSET_EXT_ID_COUNTER] = { + .type = IPSET_EXT_COUNTER, + .flag = IPSET_FLAG_WITH_COUNTERS, + .len = sizeof(struct ip_set_counter), + .align = __alignof__(struct ip_set_counter), + }, + [IPSET_EXT_ID_TIMEOUT] = { + .type = IPSET_EXT_TIMEOUT, + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, +}; +EXPORT_SYMBOL_GPL(ip_set_extensions); + +static inline bool +add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) +{ + return ip_set_extensions[id].flag ? + (flags & ip_set_extensions[id].flag) : + !!tb[IPSET_ATTR_TIMEOUT]; +} + +size_t +ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) +{ + enum ip_set_ext_id id; + size_t offset = 0; + u32 cadt_flags = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) + cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + for (id = 0; id < IPSET_EXT_ID_MAX; id++) { + if (!add_extension(id, cadt_flags, tb)) + continue; + offset += ALIGN(len + offset, ip_set_extensions[id].align); + set->offset[id] = offset; + set->extensions |= ip_set_extensions[id].type; + offset += ip_set_extensions[id].len; + } + return len + offset; +} +EXPORT_SYMBOL_GPL(ip_set_elem_len); + int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 0cb840e1f8ae..3999f1719f69 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -960,7 +960,6 @@ static int IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; - u32 cadt_flags = 0; u8 hbits; #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; @@ -1034,88 +1033,23 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) + if (set->family == NFPROTO_IPV4) { set->variant = &IPSET_TOKEN(HTYPE, 4_variant); - else + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); + } else { set->variant = &IPSET_TOKEN(HTYPE, 6_variant); - - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - set->dsize = sizeof(struct - IPSET_TOKEN(HTYPE, 4ct_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4ct_elem), - timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4ct_elem), - counter); - IPSET_TOKEN(HTYPE, 4_gc_init)(set, - IPSET_TOKEN(HTYPE, 4_gc)); - } else { - set->dsize = sizeof(struct - IPSET_TOKEN(HTYPE, 6ct_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6ct_elem), - timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6ct_elem), - counter); - IPSET_TOKEN(HTYPE, 6_gc_init)(set, - IPSET_TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) { - set->dsize = - sizeof(struct - IPSET_TOKEN(HTYPE, 4c_elem)); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4c_elem), - counter); - } else { - set->dsize = - sizeof(struct - IPSET_TOKEN(HTYPE, 6c_elem)); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6c_elem), - counter); - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), - timeout); + if (set->family == NFPROTO_IPV4) IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); - } else { - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), - timeout); + else IPSET_TOKEN(HTYPE, 6_gc_init)(set, IPSET_TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); - else - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index bbde7c304622..a111ffe40b46 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -35,7 +35,7 @@ MODULE_ALIAS("ip_set_hash:ip"); #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { @@ -43,22 +43,6 @@ struct hash_ip4_elem { __be32 ip; }; -struct hash_ip4t_elem { - __be32 ip; - unsigned long timeout; -}; - -struct hash_ip4c_elem { - __be32 ip; - struct ip_set_counter counter; -}; - -struct hash_ip4ct_elem { - __be32 ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -178,29 +162,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; -struct hash_ip6t_elem { - union nf_inet_addr ip; - unsigned long timeout; -}; - -struct hash_ip6c_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; -}; - -struct hash_ip6ct_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index dd175d6f3965..5dc735c4dac2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ #define HTYPE hash_ipport -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipport4_elem { @@ -46,31 +46,6 @@ struct hash_ipport4_elem { u8 padding; }; -struct hash_ipport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -221,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipport6_elem { union nf_inet_addr ip; @@ -230,31 +205,6 @@ struct hash_ipport6_elem { u8 padding; }; -struct hash_ipport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 87a2cfab2568..8c43dc7811cb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { @@ -47,34 +47,6 @@ struct hash_ipportip4_elem { u8 padding; }; -struct hash_ipportip4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - static inline bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, @@ -230,7 +202,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; @@ -240,34 +212,6 @@ struct hash_ipportip6_elem { u8 padding; }; -struct hash_ipportip6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 0b9a28d7c740..34890452366c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -46,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { @@ -58,37 +58,6 @@ struct hash_ipportnet4_elem { u8 proto; }; -struct hash_ipportnet4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -328,7 +297,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; @@ -339,37 +308,6 @@ struct hash_ipportnet6_elem { u8 proto; }; -struct hash_ipportnet6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1d4caa50dacb..d5598557f4a9 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:net"); #define HTYPE hash_net #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_net4_elem { @@ -46,31 +46,6 @@ struct hash_net4_elem { u8 cidr; }; -struct hash_net4t_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net4c_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net4ct_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -228,7 +203,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_net6_elem { union nf_inet_addr ip; @@ -237,31 +212,6 @@ struct hash_net6_elem { u8 cidr; }; -struct hash_net6t_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net6c_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net6ct_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 2f0ffe35c408..26703e9e5082 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -134,7 +134,7 @@ iface_add(struct rb_root *root, const char **iface) #define STREQ(a, b) (strcmp(a, b) == 0) -/* IPv4 variants */ +/* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; @@ -144,7 +144,7 @@ struct hash_netiface4_elem_hashed { u8 elem; }; -/* Member elements without timeout */ +/* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; @@ -154,37 +154,6 @@ struct hash_netiface4_elem { const char *iface; }; -struct hash_netiface4t_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface4c_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface4ct_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -399,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; @@ -418,37 +387,6 @@ struct hash_netiface6_elem { const char *iface; }; -struct hash_netiface6t_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface6c_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface6ct_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index cab236625f97..45b6e91b0636 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -45,7 +45,7 @@ MODULE_ALIAS("ip_set_hash:net,port"); */ #define IP_SET_HASH_WITH_NETS_PACKED -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_netport4_elem { @@ -56,34 +56,6 @@ struct hash_netport4_elem { u8 nomatch:1; }; -struct hash_netport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -287,7 +259,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netport6_elem { union nf_inet_addr ip; @@ -297,34 +269,6 @@ struct hash_netport6_elem { u8 nomatch:1; }; -struct hash_netport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f22d05d6e366..7fd11c79aff4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -28,28 +28,6 @@ struct set_elem { ip_set_id_t id; }; -struct sett_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -struct setc_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -struct setct_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; - unsigned long timeout; -}; - struct set_adt_elem { ip_set_id_t id; ip_set_id_t refid; @@ -600,21 +578,18 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) /* Create list:set type of sets */ -static struct list_set * -init_list_set(struct ip_set *set, u32 size, size_t dsize, - unsigned long timeout) +static bool +init_list_set(struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); if (!map) - return NULL; + return false; map->size = size; - set->dsize = dsize; - set->timeout = timeout; set->data = map; for (i = 0; i < size; i++) { @@ -622,15 +597,13 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize, e->id = IPSET_INVALID_ID; } - return map; + return true; } static int list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - struct list_set *map; - u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0; - unsigned long timeout = 0; + u32 size = IP_SET_LIST_DEFAULT_SIZE; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || @@ -642,45 +615,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (tb[IPSET_ATTR_TIMEOUT]) - timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &set_variant; - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct setct_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct setct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct setct_elem, counter); - list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, - sizeof(struct setc_elem), 0); - if (!map) - return -ENOMEM; - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct setc_elem, counter); - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct sett_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct sett_elem, timeout); + set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); + if (!init_list_set(set, size)) + return -ENOMEM; + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, sizeof(struct set_elem), 0); - if (!map) - return -ENOMEM; } return 0; } -- cgit v1.2.3-71-gd317 From 40cd63bf33b21ef4d43776b1d49c605f876fe32c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 9 Sep 2013 14:44:29 +0200 Subject: netfilter: ipset: Support extensions which need a per data destroy function Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 22 +++++++--- net/netfilter/ipset/ip_set_bitmap_gen.h | 38 ++++++++++++++---- net/netfilter/ipset/ip_set_hash_gen.h | 71 +++++++++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 19 ++++++--- 4 files changed, 107 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 66d6bd404d64..6372ee224fe8 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -49,11 +49,13 @@ enum ip_set_feature { /* Set extensions */ enum ip_set_extension { - IPSET_EXT_NONE = 0, - IPSET_EXT_BIT_TIMEOUT = 1, + IPSET_EXT_BIT_TIMEOUT = 0, IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT), - IPSET_EXT_BIT_COUNTER = 2, + IPSET_EXT_BIT_COUNTER = 1, IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), + /* Mark set with an extension which needs to call destroy */ + IPSET_EXT_BIT_DESTROY = 7, + IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY), }; #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) @@ -68,6 +70,8 @@ enum ip_set_ext_id { /* Extension type */ struct ip_set_ext_type { + /* Destroy extension private data (can be NULL) */ + void (*destroy)(void *ext); enum ip_set_extension type; enum ipset_cadt_flags flag; /* Size and minimal alignment */ @@ -88,13 +92,21 @@ struct ip_set_counter { atomic64_t packets; }; +struct ip_set; + +static inline void +ip_set_ext_destroy(struct ip_set *set, void *data) +{ + /* Check that the extension is enabled for the set and + * call it's destroy function for its extension part in data. + */ +} + #define ext_timeout(e, s) \ (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, s) \ (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) -struct ip_set; - typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 cmdflags); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f32ddbca3923..4515fe8b83dd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -12,6 +12,7 @@ #define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) #define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) #define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) #define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) #define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) @@ -46,6 +47,17 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) add_timer(&map->gc); } +static void +mtype_ext_cleanup(struct ip_set *set) +{ + struct mtype *map = set->data; + u32 id; + + for (id = 0; id < map->elements; id++) + if (test_bit(id, map->members)) + ip_set_ext_destroy(set, get_ext(set, map, id)); +} + static void mtype_destroy(struct ip_set *set) { @@ -55,8 +67,11 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (set->dsize) + if (set->dsize) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); ip_set_free(map->extensions); + } kfree(map); set->data = NULL; @@ -67,6 +82,8 @@ mtype_flush(struct ip_set *set) { struct mtype *map = set->data; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); } @@ -132,6 +149,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; + /* Element is re-added, cleanup extensions */ + ip_set_ext_destroy(set, x); } if (SET_WITH_TIMEOUT(set)) @@ -152,11 +171,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(set, map, e->id); + void *x = get_ext(set, map, e->id); - if (mtype_do_del(e, map) || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, set)))) + if (mtype_do_del(e, map)) + return -IPSET_ERR_EXIST; + + ip_set_ext_destroy(set, x); + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; return 0; @@ -235,7 +257,7 @@ mtype_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct mtype *map = set->data; - const void *x; + void *x; u32 id; /* We run parallel with other readers (test element) @@ -244,8 +266,10 @@ mtype_gc(unsigned long ul_set) for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { x = get_ext(set, map, id); - if (ip_set_timeout_expired(ext_timeout(x, set))) + if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); + ip_set_ext_destroy(set, x); + } } read_unlock_bh(&set->lock); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3999f1719f69..3c26e5b946f5 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -117,23 +117,6 @@ htable_bits(u32 hashsize) return bits; } -/* Destroy the hashtable part of the set */ -static void -ahash_destroy(struct htable *t) -{ - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) - /* FIXME: use slab cache */ - kfree(n->value); - } - - ip_set_free(t); -} - static int hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) { @@ -192,6 +175,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_data_next #undef mtype_elem +#undef mtype_ahash_destroy +#undef mtype_ext_cleanup #undef mtype_add_cidr #undef mtype_del_cidr #undef mtype_ahash_memsize @@ -230,6 +215,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) #define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) @@ -359,6 +346,19 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, return memsize; } +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize) \ + ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +static void +mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) +{ + int i; + + for (i = 0; i < n->pos; i++) + ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); +} + /* Flush a hash type of set: destroy all elements */ static void mtype_flush(struct ip_set *set) @@ -372,6 +372,8 @@ mtype_flush(struct ip_set *set) for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); n->size = n->pos = 0; /* FIXME: use slab cache */ kfree(n->value); @@ -383,6 +385,26 @@ mtype_flush(struct ip_set *set) h->elements = 0; } +/* Destroy the hashtable part of the set */ +static void +mtype_ahash_destroy(struct ip_set *set, struct htable *t) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + kfree(n->value); + } + } + + ip_set_free(t); +} + /* Destroy a hash type of set */ static void mtype_destroy(struct ip_set *set) @@ -392,7 +414,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(rcu_dereference_bh_nfnl(h->table)); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table)); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -430,10 +452,6 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } -/* Get the ith element from the array block n */ -#define ahash_data(n, i, dsize) \ - ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) - /* Delete expired elements from the hashtable */ static void mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) @@ -456,6 +474,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) mtype_del_cidr(h, CIDR(data->cidr), nets_length, 0); #endif + ip_set_ext_destroy(set, data); if (j != n->pos - 1) /* Not last one */ memcpy(data, @@ -557,7 +576,7 @@ retry: mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - ahash_destroy(t); + mtype_ahash_destroy(set, t); if (ret == -EAGAIN) goto retry; return ret; @@ -578,7 +597,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - ahash_destroy(orig); + mtype_ahash_destroy(set, orig); return 0; } @@ -642,6 +661,7 @@ reuse_slot: mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif + ip_set_ext_destroy(set, data); } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); @@ -707,6 +727,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif + ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) * set->dsize, @@ -1033,7 +1054,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) { + if (set->family == NFPROTO_IPV4) { set->variant = &IPSET_TOKEN(HTYPE, 4_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 7fd11c79aff4..e44986af1fc2 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -168,16 +168,19 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) + if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); - else { + ip_set_ext_destroy(set, e); + } else { struct set_elem *x = list_set_elem(set, map, map->size - 1); /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) + if (x->id != IPSET_INVALID_ID) { ip_set_put_byindex(x->id); + ip_set_ext_destroy(set, x); + } memmove(list_set_elem(set, map, i + 1), e, set->dsize * (map->size - (i + 1))); } @@ -198,6 +201,7 @@ list_set_del(struct ip_set *set, u32 i) struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(e->id); + ip_set_ext_destroy(set, e); if (i < map->size - 1) memmove(e, list_set_elem(set, map, i + 1), @@ -266,14 +270,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 i, ret = 0; + if (SET_WITH_TIMEOUT(set)) + set_cleanup_entries(set); + /* Check already added element */ for (i = 0; i < map->size; i++) { e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) - continue; else if (e->id != d->id) continue; @@ -286,6 +290,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ + ip_set_ext_destroy(set, e); + if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) @@ -423,6 +429,7 @@ list_set_flush(struct ip_set *set) e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { ip_set_put_byindex(e->id); + ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } } -- cgit v1.2.3-71-gd317 From 68b63f08d22f23161c43cd2417104aa213ff877f Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:30 +0200 Subject: netfilter: ipset: Support comments for ipset entries in the core. This adds the core support for having comments on ipset entries. The comments are stored as standard null-terminated strings in dynamically allocated memory after being passed to the kernel. As a result of this, code has been added to the generic destroy function to iterate all extensions and call that extension's destroy task if the set has that extension activated, and if such a task is defined. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 51 +++++++++++++++++++---- include/linux/netfilter/ipset/ip_set_comment.h | 57 ++++++++++++++++++++++++++ include/uapi/linux/netfilter/ipset/ip_set.h | 8 +++- net/netfilter/ipset/ip_set_core.c | 14 +++++++ 4 files changed, 121 insertions(+), 9 deletions(-) create mode 100644 include/linux/netfilter/ipset/ip_set_comment.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 6372ee224fe8..407f84df6a47 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -53,6 +53,8 @@ enum ip_set_extension { IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT), IPSET_EXT_BIT_COUNTER = 1, IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER), + IPSET_EXT_BIT_COMMENT = 2, + IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT), /* Mark set with an extension which needs to call destroy */ IPSET_EXT_BIT_DESTROY = 7, IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY), @@ -60,11 +62,13 @@ enum ip_set_extension { #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) +#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) /* Extension id, in size order */ enum ip_set_ext_id { IPSET_EXT_ID_COUNTER = 0, IPSET_EXT_ID_TIMEOUT, + IPSET_EXT_ID_COMMENT, IPSET_EXT_ID_MAX, }; @@ -85,6 +89,7 @@ struct ip_set_ext { u64 packets; u64 bytes; u32 timeout; + char *comment; }; struct ip_set_counter { @@ -92,20 +97,19 @@ struct ip_set_counter { atomic64_t packets; }; -struct ip_set; +struct ip_set_comment { + char *str; +}; -static inline void -ip_set_ext_destroy(struct ip_set *set, void *data) -{ - /* Check that the extension is enabled for the set and - * call it's destroy function for its extension part in data. - */ -} +struct ip_set; #define ext_timeout(e, s) \ (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, s) \ (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) +#define ext_comment(e, s) \ +(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) + typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -222,6 +226,36 @@ struct ip_set { void *data; }; +static inline void +ip_set_ext_destroy(struct ip_set *set, void *data) +{ + /* Check that the extension is enabled for the set and + * call it's destroy function for its extension part in data. + */ + if (SET_WITH_COMMENT(set)) + ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( + ext_comment(data, set)); +} + +static inline int +ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) +{ + u32 cadt_flags = 0; + + if (SET_WITH_TIMEOUT(set)) + if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(set->timeout)))) + return -EMSGSIZE; + if (SET_WITH_COUNTER(set)) + cadt_flags |= IPSET_FLAG_WITH_COUNTERS; + if (SET_WITH_COMMENT(set)) + cadt_flags |= IPSET_FLAG_WITH_COMMENT; + + if (!cadt_flags) + return 0; + return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); +} + static inline void ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) { @@ -425,6 +459,7 @@ bitmap_bytes(u32 a, u32 b) } #include +#include #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h new file mode 100644 index 000000000000..21217ea008d7 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -0,0 +1,57 @@ +#ifndef _IP_SET_COMMENT_H +#define _IP_SET_COMMENT_H + +/* Copyright (C) 2013 Oliver Smith + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline char* +ip_set_comment_uget(struct nlattr *tb) +{ + return nla_data(tb); +} + +static inline void +ip_set_init_comment(struct ip_set_comment *comment, + const struct ip_set_ext *ext) +{ + size_t len = ext->comment ? strlen(ext->comment) : 0; + + if (unlikely(comment->str)) { + kfree(comment->str); + comment->str = NULL; + } + if (!len) + return; + if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) + len = IPSET_MAX_COMMENT_SIZE; + comment->str = kzalloc(len + 1, GFP_ATOMIC); + if (unlikely(!comment->str)) + return; + strlcpy(comment->str, ext->comment, len + 1); +} + +static inline int +ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) +{ + if (!comment->str) + return 0; + return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); +} + +static inline void +ip_set_comment_free(struct ip_set_comment *comment) +{ + if (unlikely(!comment->str)) + return; + kfree(comment->str); + comment->str = NULL; +} + +#endif +#endif diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 2b61ac44dcc1..25d3b2f79c02 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -10,12 +10,14 @@ #ifndef _UAPI_IP_SET_H #define _UAPI_IP_SET_H - #include /* The protocol version */ #define IPSET_PROTOCOL 6 +/* The maximum permissible comment length we will accept over netlink */ +#define IPSET_MAX_COMMENT_SIZE 255 + /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -110,6 +112,7 @@ enum { IPSET_ATTR_IFACE, IPSET_ATTR_BYTES, IPSET_ATTR_PACKETS, + IPSET_ATTR_COMMENT, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) @@ -140,6 +143,7 @@ enum ipset_errno { IPSET_ERR_IPADDR_IPV4, IPSET_ERR_IPADDR_IPV6, IPSET_ERR_COUNTER, + IPSET_ERR_COMMENT, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -176,6 +180,8 @@ enum ipset_cadt_flags { IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), IPSET_FLAG_BIT_WITH_COUNTERS = 3, IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), + IPSET_FLAG_BIT_WITH_COMMENT = 4, + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), IPSET_FLAG_CADT_MAX = 15, }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f35afed3814f..3bf9a3d29dff 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -315,6 +315,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +typedef void (*destroyer)(void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { @@ -329,6 +330,13 @@ const struct ip_set_ext_type ip_set_extensions[] = { .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, + [IPSET_EXT_ID_COMMENT] = { + .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, + .flag = IPSET_FLAG_WITH_COMMENT, + .len = sizeof(struct ip_set_comment), + .align = __alignof__(struct ip_set_comment), + .destroy = (destroyer) ip_set_comment_free, + }, }; EXPORT_SYMBOL_GPL(ip_set_extensions); @@ -380,6 +388,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], ext->packets = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_PACKETS])); } + if (tb[IPSET_ATTR_COMMENT]) { + if (!(set->extensions & IPSET_EXT_COMMENT)) + return -IPSET_ERR_COMMENT; + ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); + } + return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); -- cgit v1.2.3-71-gd317 From 3fd986b3d99e3847f1cce6fc36043d0f16508e1d Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 25 Sep 2013 17:44:35 +0200 Subject: netfilter: ipset: Use a common function at listing the extensions Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 21 +++++++++++++++++++++ net/netfilter/ipset/ip_set_bitmap_gen.h | 29 ++++++++++------------------- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 11 ----------- net/netfilter/ipset/ip_set_hash_gen.h | 11 +---------- net/netfilter/ipset/ip_set_list_set.c | 11 +---------- 5 files changed, 33 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 407f84df6a47..da2a45acf74c 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -461,6 +461,27 @@ bitmap_bytes(u32 a, u32 b) #include #include +static inline int +ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active) +{ + if (SET_WITH_TIMEOUT(set)) { + unsigned long *timeout = ext_timeout(e, set); + + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(active ? ip_set_timeout_get(timeout) + : *timeout))) + return -EMSGSIZE; + } + if (SET_WITH_COUNTER(set) && + ip_set_put_counter(skb, ext_counter(e, set))) + return -EMSGSIZE; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + return -EMSGSIZE; + return 0; +} + #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ .timeout = ip_set_adt_opt_timeout(opt, set) } diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 6167fc9d0efe..a13e15be7911 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -183,6 +183,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, return 0; } +#ifndef IP_SET_BITMAP_STORED_TIMEOUT +static inline bool +mtype_is_filled(const struct mtype_elem *x) +{ + return true; +} +#endif + static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) @@ -215,25 +223,8 @@ mtype_list(const struct ip_set *set, } if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set)) { -#ifdef IP_SET_BITMAP_STORED_TIMEOUT - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, set), - set->dsize)))) - goto nla_put_failure; -#else - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(x, set))))) - goto nla_put_failure; -#endif - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(x, set))) + if (ip_set_put_extensions(skb, set, x, + mtype_is_filled((const struct mtype_elem *) x))) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index fb4d163dea82..87a218f8ab5f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -176,17 +176,6 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, return !test_and_clear_bit(e->id, map->members); } -static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout, - size_t dsize) -{ - const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, dsize); - - return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : - *timeout; -} - static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, u32 id, size_t dsize) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1cffeb977605..b4add206c603 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -961,16 +961,7 @@ mtype_list(const struct ip_set *set, } if (mtype_data_list(skb, e)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, set))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e23f33c14435..ba4232e451f4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -512,16 +512,7 @@ list_set_list(const struct ip_set *set, if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(e->id))) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, set))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } -- cgit v1.2.3-71-gd317 From 1785e8f473082aa60d62c7165856cf6484077b99 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrov Date: Mon, 30 Sep 2013 17:07:02 +0200 Subject: netfiler: ipset: Add net namespace for ipset This patch adds netns support for ipset. Major changes were made in ip_set_core.c and ip_set.h. Global variables are moved to per net namespace. Added initialization code and the destruction of the network namespace ipset subsystem. In the prototypes of public functions ip_set_* added parameter "struct net*". The remaining corrections related to the change prototypes of public functions ip_set_*. The patch for git://git.netfilter.org/ipset.git commit 6a4ec96c0b8caac5c35474e40e319704d92ca347 Signed-off-by: Vitaly Lavrov Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 16 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 3 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 +- net/netfilter/ipset/ip_set_bitmap_port.c | 3 +- net/netfilter/ipset/ip_set_core.c | 288 +++++++++++++++++++----------- net/netfilter/ipset/ip_set_hash_gen.h | 3 +- net/netfilter/ipset/ip_set_list_set.c | 31 ++-- net/netfilter/xt_set.c | 40 ++--- net/sched/em_ipset.c | 7 +- 9 files changed, 245 insertions(+), 148 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index da2a45acf74c..7967516adc0d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -184,7 +184,8 @@ struct ip_set_type { u8 revision_min, revision_max; /* Create set */ - int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags); + int (*create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags); /* Attribute policies */ const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1]; @@ -316,12 +317,13 @@ ip_set_init_counter(struct ip_set_counter *counter, } /* register and unregister set references */ -extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set); -extern void ip_set_put_byindex(ip_set_id_t index); -extern const char *ip_set_name_byindex(ip_set_id_t index); -extern ip_set_id_t ip_set_nfnl_get(const char *name); -extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index); -extern void ip_set_nfnl_put(ip_set_id_t index); +extern ip_set_id_t ip_set_get_byname(struct net *net, + const char *name, struct ip_set **set); +extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); +extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); +extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name); +extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); +extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); /* API for iptables set match, and SET target */ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index faac124e2645..6f1f9f494808 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -242,7 +242,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, } static int -bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_ip *map; u32 first_ip = 0, last_ip = 0, hosts; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 87a218f8ab5f..740eabededd9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -309,7 +309,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, } static int -bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], +bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 first_ip = 0, last_ip = 0; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 407a63caee6b..e7603c5b53d7 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -228,7 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } static int -bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_port *map; u16 first_port, last_port; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3bf9a3d29dff..dc9284bdd2dd 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ -static struct ip_set * __rcu *ip_set_list; /* all individual sets */ -static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ +struct ip_set_net { + struct ip_set * __rcu *ip_set_list; /* all individual sets */ + ip_set_id_t ip_set_max; /* max number of sets */ + int is_deleted; /* deleted by ip_set_net_exit */ +}; +static int ip_set_net_id __read_mostly; + +static inline struct ip_set_net *ip_set_pernet(struct net *net) +{ + return net_generic(net, ip_set_net_id); +} #define IP_SET_INC 64 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) @@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ #define nfnl_dereference(p) \ rcu_dereference_protected(p, 1) -#define nfnl_set(id) \ - nfnl_dereference(ip_set_list)[id] +#define nfnl_set(inst, id) \ + nfnl_dereference((inst)->ip_set_list)[id] /* * The set types are implemented in modules and registered set types @@ -434,13 +445,14 @@ __ip_set_put(struct ip_set *set) */ static inline struct ip_set * -ip_set_rcu_get(ip_set_id_t index) +ip_set_rcu_get(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); /* ip_set_list itself needs to be protected */ - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; rcu_read_unlock(); return set; @@ -450,7 +462,8 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -488,7 +501,8 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret; BUG_ON(set == NULL); @@ -510,7 +524,8 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -534,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); - for (i = 0; i < ip_set_max; i++) { - s = rcu_dereference(ip_set_list)[i]; + for (i = 0; i < inst->ip_set_max; i++) { + s = rcu_dereference(inst->ip_set_list)[i]; if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -561,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * to be valid, after calling this function. * */ -void -ip_set_put_byindex(ip_set_id_t index) + +static inline void +__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) { struct ip_set *set; rcu_read_lock(); - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; if (set != NULL) __ip_set_put(set); rcu_read_unlock(); } + +void +ip_set_put_byindex(struct net *net, ip_set_id_t index) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + __ip_set_put_byindex(inst, index); +} EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* @@ -582,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * */ const char * -ip_set_name_byindex(ip_set_id_t index) +ip_set_name_byindex(struct net *net, ip_set_id_t index) { - const struct ip_set *set = ip_set_rcu_get(index); + const struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(set == NULL); BUG_ON(set->ref == 0); @@ -606,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get(const char *name) +ip_set_nfnl_get(struct net *net, const char *name) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -633,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get_byindex(ip_set_id_t index) +ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); - if (index > ip_set_max) + if (index > inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set) __ip_set_get(set); else @@ -660,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); * The nfnl mutex is used in the function. */ void -ip_set_nfnl_put(ip_set_id_t index) +ip_set_nfnl_put(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); - if (set != NULL) - __ip_set_put(set); + if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ + set = nfnl_set(inst, index); + if (set != NULL) + __ip_set_put(set); + } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -724,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static struct ip_set * -find_set_and_id(const char *name, ip_set_id_t *id) +find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) { struct ip_set *set = NULL; ip_set_id_t i; *id = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - set = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); if (set != NULL && STREQ(set->name, name)) { *id = i; break; @@ -741,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id) } static inline struct ip_set * -find_set(const char *name) +find_set(struct ip_set_net *inst, const char *name) { ip_set_id_t id; - return find_set_and_id(name, &id); + return find_set_and_id(inst, name, &id); } static int -find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, + struct ip_set **set) { struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; @@ -785,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct net *net = sock_net(ctnl); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; @@ -843,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto put_out; } - ret = set->type->create(set, tb, flags); + ret = set->type->create(net, set, tb, flags); if (ret != 0) goto put_out; @@ -854,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - ret = find_free_id(set->name, &index, &clash); + ret = find_free_id(inst, set->name, &index, &clash); if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && @@ -867,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto cleanup; } else if (ret == -IPSET_ERR_MAX_SETS) { struct ip_set **list, **tmp; - ip_set_id_t i = ip_set_max + IP_SET_INC; + ip_set_id_t i = inst->ip_set_max + IP_SET_INC; - if (i < ip_set_max || i == IPSET_INVALID_ID) + if (i < inst->ip_set_max || i == IPSET_INVALID_ID) /* Wraparound */ goto cleanup; @@ -877,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = nfnl_dereference(ip_set_list); - memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max); - rcu_assign_pointer(ip_set_list, list); + tmp = nfnl_dereference(inst->ip_set_list); + memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); + rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ synchronize_net(); /* Use new list */ - index = ip_set_max; - ip_set_max = i; + index = inst->ip_set_max; + inst->ip_set_max = i; kfree(tmp); ret = 0; } else if (ret) @@ -894,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - nfnl_set(index) = set; + nfnl_set(inst, index) = set; return ret; @@ -917,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static void -ip_set_destroy_set(ip_set_id_t index) +ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) { - struct ip_set *set = nfnl_set(index); + struct ip_set *set = nfnl_set(inst, index); pr_debug("set: %s\n", set->name); - nfnl_set(index) = NULL; + nfnl_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -935,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -954,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, */ read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } } read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } } else { - s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i); + s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &i); if (s == NULL) { ret = -ENOENT; goto out; @@ -978,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } read_unlock_bh(&ip_set_ref_lock); - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } return 0; out: @@ -1003,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; @@ -1010,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) ip_set_flush_set(s); } } else { - s = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (s == NULL) return -ENOENT; @@ -1042,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1052,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1063,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; @@ -1091,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1100,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); + from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &from_id); if (from == NULL) return -ENOENT; - to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); + to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), + &to_id); if (to == NULL) return -IPSET_ERR_EXIST_SETNAME2; @@ -1121,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - nfnl_set(from_id) = to; - nfnl_set(to_id) = from; + nfnl_set(inst, from_id) = to; + nfnl_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -1141,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(cb->args[1])->name); - ip_set_put_byindex((ip_set_id_t) cb->args[1]); + pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); + __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); } return 0; } @@ -1169,6 +1211,7 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, @@ -1182,7 +1225,7 @@ dump_init(struct netlink_callback *cb) if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; - set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]), + set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); if (set == NULL) return -ENOENT; @@ -1210,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (!cb->args[0]) { ret = dump_init(cb); @@ -1223,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= ip_set_max) + if (cb->args[1] >= inst->ip_set_max) goto out; dump_type = DUMP_TYPE(cb->args[0]); dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; dump_last: pr_debug("args[0]: %u %u args[1]: %ld\n", dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1312,8 +1356,8 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(index)->name); - ip_set_put_byindex(index); + pr_debug("release set %s\n", nfnl_set(inst, index)->name); + __ip_set_put_byindex(inst, index); cb->args[2] = 0; } out: @@ -1331,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1338,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, + .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1416,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1434,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1470,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1488,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1524,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; int ret = 0; @@ -1534,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1559,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1568,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1793,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) unsigned int *op; void *data; int copylen = *len, ret = 0; + struct net *net = sock_net(sk); + struct ip_set_net *inst = ip_set_pernet(net); - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optval != SO_IP_SET) return -EBADF; @@ -1843,7 +1896,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; @@ -1858,10 +1911,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; if (id != IPSET_INVALID_ID) - req_get->family = nfnl_set(id)->family; + req_get->family = nfnl_set(inst, id)->family; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } @@ -1870,12 +1923,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || - req_get->set.index >= ip_set_max) { + req_get->set.index >= inst->ip_set_max) { ret = -EINVAL; goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(req_get->set.index); + set = nfnl_set(inst, req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); @@ -1904,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = { .owner = THIS_MODULE, }; -static int __init -ip_set_init(void) +static int __net_init +ip_set_net_init(struct net *net) { + struct ip_set_net *inst = ip_set_pernet(net); + struct ip_set **list; - int ret; - if (max_sets) - ip_set_max = max_sets; - if (ip_set_max >= IPSET_INVALID_ID) - ip_set_max = IPSET_INVALID_ID - 1; + inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; + if (inst->ip_set_max >= IPSET_INVALID_ID) + inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); + list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); if (!list) return -ENOMEM; + inst->is_deleted = 0; + rcu_assign_pointer(inst->ip_set_list, list); + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + struct ip_set *set = NULL; + ip_set_id_t i; + + inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ + + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); + if (set != NULL) + ip_set_destroy_set(inst, i); + } + kfree(rcu_dereference_protected(inst->ip_set_list, 1)); +} + +static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net) +}; + - rcu_assign_pointer(ip_set_list, list); - ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); +static int __init +ip_set_init(void) +{ + int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); - kfree(list); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); return ret; } - - pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + return ret; + } return 0; } static void __exit ip_set_fini(void) { - struct ip_set **list = rcu_dereference_protected(ip_set_list, 1); - - /* There can't be any existing set */ + unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index b4add206c603..6a80dbd30df7 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1011,7 +1011,8 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ba4232e451f4..ec6f6d15dded 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -39,6 +39,7 @@ struct set_adt_elem { struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ + struct net *net; /* namespace */ struct set_elem members[0]; /* the set members */ }; @@ -171,7 +172,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, if (e->id != IPSET_INVALID_ID) { if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); } else { struct set_elem *x = list_set_elem(set, map, @@ -179,7 +180,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, /* Last element pushed off */ if (x->id != IPSET_INVALID_ID) { - ip_set_put_byindex(x->id); + ip_set_put_byindex(map->net, x->id); ip_set_ext_destroy(set, x); } memmove(list_set_elem(set, map, i + 1), e, @@ -205,7 +206,7 @@ list_set_del(struct ip_set *set, u32 i) struct list_set *map = set->data; struct set_elem *e = list_set_elem(set, map, i); - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); if (i < map->size - 1) @@ -307,7 +308,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ - ip_set_put_byindex(d->id); + ip_set_put_byindex(map->net, d->id); return 0; } insert: @@ -366,6 +367,7 @@ static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { + struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -385,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -405,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } if (tb[IPSET_ATTR_NAMEREF]) { - e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + e.refid = ip_set_get_byname(map->net, + nla_data(tb[IPSET_ATTR_NAMEREF]), &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; @@ -421,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], finish: if (e.refid != IPSET_INVALID_ID) - ip_set_put_byindex(e.refid); + ip_set_put_byindex(map->net, e.refid); if (adt != IPSET_ADD || ret) - ip_set_put_byindex(e.id); + ip_set_put_byindex(map->net, e.id); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -438,7 +441,7 @@ list_set_flush(struct ip_set *set) for (i = 0; i < map->size; i++) { e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } @@ -510,7 +513,7 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(e->id))) + ip_set_name_byindex(map->net, e->id))) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; @@ -587,7 +590,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) /* Create list:set type of sets */ static bool -init_list_set(struct ip_set *set, u32 size) +init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; @@ -598,6 +601,7 @@ init_list_set(struct ip_set *set, u32 size) return false; map->size = size; + map->net = net; set->data = map; for (i = 0; i < size; i++) { @@ -609,7 +613,8 @@ init_list_set(struct ip_set *set, u32 size) } static int -list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { u32 size = IP_SET_LIST_DEFAULT_SIZE; @@ -625,7 +630,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->variant = &set_variant; set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); - if (!init_list_set(set, size)) + if (!init_list_set(net, set, size)) return -ENOMEM; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 2095488684ff..e7c4e0e01ff5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v0 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -106,7 +106,7 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v0 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } /* Revision 1 match */ @@ -131,7 +131,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -141,7 +141,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) if (info->match_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -153,7 +153,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } /* Revision 3 match */ @@ -228,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -237,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -251,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -270,9 +270,9 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v0 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 1 target */ @@ -301,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -310,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -324,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -339,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 2 target */ diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 938b7cbf5627..1ac41d3de5c3 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, { struct xt_set_info *set = data; ip_set_id_t index; + struct net *net = qdisc_dev(tp->q)->nd_net; if (data_len != sizeof(*set)) return -EINVAL; - index = ip_set_nfnl_get_byindex(set->index); + index = ip_set_nfnl_get_byindex(net, set->index); if (index == IPSET_INVALID_ID) return -ENOENT; @@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, if (em->data) return 0; - ip_set_nfnl_put(index); + ip_set_nfnl_put(net, index); return -ENOMEM; } @@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) { const struct xt_set_info *set = (const void *) em->data; if (set) { - ip_set_nfnl_put(set->index); + ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index); kfree((void *) em->data); } } -- cgit v1.2.3-71-gd317 From 36a8f39e05ccc308a5619a7edb5ad6e15ee82ff6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Sep 2013 01:21:32 -0700 Subject: net: skb_is_gso_v6() requires skb_is_gso() bnx2x makes a dangerous use of skb_is_gso_v6(). It should first make sure skb is a gso packet Signed-off-by: Eric Dumazet Cc: Eilon Greenstein Acked-by: Dmitry Kravkov Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 18 ++++++++++-------- include/linux/skbuff.h | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 61726af1de6e..0c7fb1ed1261 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3256,14 +3256,16 @@ static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb) if (prot == IPPROTO_TCP) rc |= XMIT_CSUM_TCP; - if (skb_is_gso_v6(skb)) { - rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP); - if (rc & XMIT_CSUM_ENC) - rc |= XMIT_GSO_ENC_V6; - } else if (skb_is_gso(skb)) { - rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP); - if (rc & XMIT_CSUM_ENC) - rc |= XMIT_GSO_ENC_V4; + if (skb_is_gso(skb)) { + if (skb_is_gso_v6(skb)) { + rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP); + if (rc & XMIT_CSUM_ENC) + rc |= XMIT_GSO_ENC_V6; + } else { + rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP); + if (rc & XMIT_CSUM_ENC) + rc |= XMIT_GSO_ENC_V4; + } } return rc; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6d56840e561e..d72d71efa7a3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2755,6 +2755,7 @@ static inline bool skb_is_gso(const struct sk_buff *skb) return skb_shinfo(skb)->gso_size; } +/* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_v6(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; -- cgit v1.2.3-71-gd317 From 5eb7906b47dcd906b3ffd811e689e0de4a6b1b6a Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 29 Aug 2013 15:03:14 +0300 Subject: ieee80211: fix vht cap definitions VHT_CAP_BEAMFORMER_ANTENNAS cap is actually defined in the draft as VHT_CAP_BEAMFORMEE_STS_MAX, and its size is 3 bits long. VHT_CAP_SOUNDING_DIMENSIONS is also 3 bits long. Fix the definitions and change the cap masking accordingly. Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++-- net/mac80211/vht.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a5b598a79bec..7c1e1ebc0e23 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1391,8 +1391,8 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 -#define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX 0x00006000 -#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX 0x00030000 +#define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX 0x0000e000 +#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX 0x00070000 #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 97c289414e32..de0112785aae 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -185,13 +185,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | - IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); } if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & - IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | + IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX); if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & -- cgit v1.2.3-71-gd317 From 180cf72f56fab2810e00497c087c7126bfe53c85 Mon Sep 17 00:00:00 2001 From: "holger@eitzenberger.org" Date: Mon, 30 Sep 2013 17:07:28 +0200 Subject: netfilter: nf_ct_sip: consolidate NAT hook functions There are currently seven different NAT hooks used in both nf_conntrack_sip and nf_nat_sip, each of the hooks is exported in nf_conntrack_sip, then set from the nf_nat_sip NAT helper. And because each of them is exported there is quite some overhead introduced due of this. By introducing nf_nat_sip_hooks I am able to reduce both text/data somewhat. For nf_conntrack_sip e. g. I get text data bss dec old 15243 5256 32 20531 new 15010 5192 32 20234 Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 107 +++++++++++++----------- net/netfilter/nf_conntrack_sip.c | 127 ++++++++--------------------- net/netfilter/nf_nat_sip.c | 35 ++++---- 3 files changed, 107 insertions(+), 162 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index ba7f571a2b1c..4cb71551f611 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -107,55 +107,64 @@ enum sdp_header_types { SDP_HDR_MEDIA, }; -extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen); -extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, - unsigned int protoff, s16 off); -extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen); -extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr); -extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port); -extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr); -extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr); +struct nf_nat_sip_hooks { + unsigned int (*msg)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen); + + void (*seq_adjust)(struct sk_buff *skb, + unsigned int protoff, s16 off); + + unsigned int (*expect)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *exp, + unsigned int matchoff, + unsigned int matchlen); + + unsigned int (*sdp_addr)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + unsigned int sdpoff, + enum sdp_header_types type, + enum sdp_header_types term, + const union nf_inet_addr *addr); + + unsigned int (*sdp_port)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + unsigned int matchoff, + unsigned int matchlen, + u_int16_t port); + + unsigned int (*sdp_session)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + unsigned int sdpoff, + const union nf_inet_addr *addr); + + unsigned int (*sdp_media)(struct sk_buff *skb, + unsigned int protoff, + unsigned int dataoff, + const char **dptr, + unsigned int *datalen, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp, + unsigned int mediaoff, + unsigned int medialen, + union nf_inet_addr *rtp_addr); +}; +extern const struct nf_nat_sip_hooks *nf_nat_sip_hooks; extern int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 5ed8c441dffd..466410eaa482 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_hook); - -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, - s16 off) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); - -unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); - -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); - -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); - -unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); - -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook); +const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; - typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; - typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; + const struct nf_nat_sip_hooks *hooks; saddr = NULL; if (sip_direct_media) { @@ -972,9 +913,9 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, rtcp_port = htons(base_port + 1); if (direct_rtp) { - nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); - if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && + !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -996,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); - nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, - rtp_exp, rtcp_exp, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp) + ret = hooks->sdp_media(skb, protoff, dataoff, dptr, + datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -1053,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int caddr_len, maddr_len; unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; + const struct nf_nat_sip_hooks *hooks; unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; - typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; - typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; - nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); + hooks = rcu_dereference(nf_nat_sip_hooks); /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1127,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, protoff, dataoff, + if (maddr_len && hooks && ct->status & IPS_NAT_MASK) { + ret = hooks->sdp_addr(skb, protoff, dataoff, dptr, datalen, mediaoff, - SDP_HDR_CONNECTION, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, + SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) { nf_ct_helper_log(skb, ct, "cannot mangle SDP"); @@ -1141,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update session connection and owner addresses */ - nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, protoff, dataoff, - dptr, datalen, sdpoff, &rtp_addr); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, + &rtp_addr); return ret; } @@ -1244,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; + const struct nf_nat_sip_hooks *hooks; __be16 port; u8 proto; unsigned int expires = 0; int ret; - typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; /* Expected connections can not register again. */ if (ct->status & IPS_EXPECTED) @@ -1311,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; - nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, - exp, matchoff, matchlen); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); @@ -1517,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen) { - typeof(nf_nat_sip_hook) nf_nat_sip; + const struct nf_nat_sip_hooks *hooks; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) @@ -1526,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) { + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && !hooks->msg(skb, protoff, dataoff, + dptr, datalen)) { nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; } @@ -1548,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, s16 diff, tdiff = 0; int ret = NF_ACCEPT; bool term; - typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -1612,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, } if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); - if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, protoff, tdiff); + const struct nf_nat_sip_hooks *hooks; + + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks) + hooks->seq_adjust(skb, protoff, tdiff); } return ret; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f9790405b7ff..b4d691db955e 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { - RCU_INIT_POINTER(nf_nat_sip_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } +static const struct nf_nat_sip_hooks sip_hooks = { + .msg = nf_nat_sip, + .seq_adjust = nf_nat_sip_seq_adjust, + .expect = nf_nat_sip_expect, + .sdp_addr = nf_nat_sdp_addr, + .sdp_port = nf_nat_sdp_port, + .sdp_session = nf_nat_sdp_session, + .sdp_media = nf_nat_sdp_media, +}; + static int __init nf_nat_sip_init(void) { - BUG_ON(nf_nat_sip_hook != NULL); - BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); - BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_addr_hook != NULL); - BUG_ON(nf_nat_sdp_port_hook != NULL); - BUG_ON(nf_nat_sdp_session_hook != NULL); - BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); + BUG_ON(nf_nat_sip_hooks != NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; } -- cgit v1.2.3-71-gd317 From 4bcef89f0c6ca1eb4f1a789c2a226f4c02656a4b Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 28 Sep 2013 23:15:27 +0200 Subject: ssb: provide phy address for Gigabit Ethernet driver Add a function to provide the phy address which should be used to the Gigabit Ethernet driver connected to ssb. Signed-off-by: Hauke Mehrtens Reviewed-by: Nithin Nayak Sujir Signed-off-by: David S. Miller --- include/linux/ssb/ssb_driver_gige.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 86a12b0cb239..0688472500bb 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -108,6 +108,16 @@ static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) return 0; } +/* Get the device phy address */ +static inline int ssb_gige_get_phyaddr(struct pci_dev *pdev) +{ + struct ssb_gige *dev = pdev_to_ssb_gige(pdev); + if (!dev) + return -ENODEV; + + return dev->dev->bus->sprom.et0phyaddr; +} + extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev, struct pci_dev *pdev); extern int ssb_gige_map_irq(struct ssb_device *sdev, @@ -174,6 +184,10 @@ static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) { return -ENODEV; } +static inline int ssb_gige_get_phyaddr(struct pci_dev *pdev) +{ + return -ENODEV; +} #endif /* CONFIG_SSB_DRIVER_GIGE */ #endif /* LINUX_SSB_DRIVER_GIGE_H_ */ -- cgit v1.2.3-71-gd317 From 0b3d8e087bbee2a4e3f479d538a7edd3f1d2950c Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Wed, 2 Oct 2013 05:58:32 +0400 Subject: include/linux/skbuff.h: move CONFIG_XFRM check inside the skb_sec_path() And thus we have only one function definition Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 71b1d9402fd3..1cd32f96055e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2708,17 +2708,14 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, unsigned int num_tx_queues); -#ifdef CONFIG_XFRM static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { +#ifdef CONFIG_XFRM return skb->sp; -} #else -static inline struct sec_path *skb_sec_path(struct sk_buff *skb) -{ return NULL; -} #endif +} /* Keeps track of mac header offset relative to skb->head. * It is useful for TSO of Tunneling protocol. e.g. GRE. -- cgit v1.2.3-71-gd317 From 5080546682bae3d32734b18e281091684f0ebbe4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Oct 2013 04:29:50 -0700 Subject: inet: consolidate INET_TW_MATCH TCP listener refactoring, part 2 : We can use a generic lookup, sockets being in whatever state, if we are sure all relevant fields are at the same place in all socket types (ESTABLISH, TIME_WAIT, SYN_RECV) This patch removes these macros : inet_addrpair, inet_addrpair, tw_addrpair, tw_portpair And adds : sk_portpair, sk_addrpair, sk_daddr, sk_rcv_saddr Then, INET_TW_MATCH() is really the same than INET_MATCH() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- include/net/inet_hashtables.h | 26 ++++++++------------------ include/net/inet_sock.h | 2 -- include/net/inet_timewait_sock.h | 8 -------- include/net/sock.h | 4 ++++ net/ipv4/inet_connection_sock.c | 11 +++++------ net/ipv6/udp.c | 6 ++---- 7 files changed, 21 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 28ea38439313..b7f1f3bb346d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -370,7 +370,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #endif /* IS_ENABLED(CONFIG_IPV6) */ #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ - ((inet_sk(__sk)->inet_portpair == (__ports)) && \ + (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ @@ -379,7 +379,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) net_eq(sock_net(__sk), (__net))) #define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ - ((inet_twsk(__sk)->tw_portpair == (__ports)) && \ + (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 594dfeead70f..10d6838378c3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -302,35 +302,25 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_sk(__sk)->inet_portpair == (__ports)) && \ - (inet_sk(__sk)->inet_addrpair == (__cookie)) && \ + (((__sk)->sk_portpair == (__ports)) && \ + ((__sk)->sk_addrpair == (__cookie)) && \ (!(__sk)->sk_bound_dev_if || \ ((__sk)->sk_bound_dev_if == (__dif))) && \ net_eq(sock_net(__sk), (__net))) -#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\ - ((inet_twsk(__sk)->tw_portpair == (__ports)) && \ - (inet_twsk(__sk)->tw_addrpair == (__cookie)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ - net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_sk(__sk)->inet_portpair == (__ports)) && \ - (inet_sk(__sk)->inet_daddr == (__saddr)) && \ - (inet_sk(__sk)->inet_rcv_saddr == (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ - net_eq(sock_net(__sk), (__net))) -#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ - ((inet_twsk(__sk)->tw_portpair == (__ports)) && \ - (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ - (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + (((__sk)->sk_portpair == (__ports)) && \ + ((__sk)->sk_daddr == (__saddr)) && \ + ((__sk)->sk_rcv_saddr == (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ ((__sk)->sk_bound_dev_if == (__dif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ +#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\ + INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) + /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f3141773c14d..6d9a7e6eb5a4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -146,10 +146,8 @@ struct inet_sock { /* Socket demultiplex comparisons on incoming packets. */ #define inet_daddr sk.__sk_common.skc_daddr #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr -#define inet_addrpair sk.__sk_common.skc_addrpair #define inet_dport sk.__sk_common.skc_dport #define inet_num sk.__sk_common.skc_num -#define inet_portpair sk.__sk_common.skc_portpair __be32 inet_saddr; __s16 uc_ttl; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 828200ab1125..48fd3561722c 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -112,10 +112,8 @@ struct inet_timewait_sock { #define tw_net __tw_common.skc_net #define tw_daddr __tw_common.skc_daddr #define tw_rcv_saddr __tw_common.skc_rcv_saddr -#define tw_addrpair __tw_common.skc_addrpair #define tw_dport __tw_common.skc_dport #define tw_num __tw_common.skc_num -#define tw_portpair __tw_common.skc_portpair int tw_timeout; volatile unsigned char tw_substate; @@ -189,12 +187,6 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) return (struct inet_timewait_sock *)sk; } -static inline __be32 sk_rcv_saddr(const struct sock *sk) -{ -/* both inet_sk() and inet_twsk() store rcv_saddr in skc_rcv_saddr */ - return sk->__sk_common.skc_rcv_saddr; -} - void inet_twsk_put(struct inet_timewait_sock *tw); int inet_twsk_unhash(struct inet_timewait_sock *tw); diff --git a/include/net/sock.h b/include/net/sock.h index f0a44cc1ff9d..e3bf213be625 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -300,6 +300,10 @@ struct sock { #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin #define sk_dontcopy_end __sk_common.skc_dontcopy_end #define sk_hash __sk_common.skc_hash +#define sk_portpair __sk_common.skc_portpair +#define sk_addrpair __sk_common.skc_addrpair +#define sk_daddr __sk_common.skc_daddr +#define sk_rcv_saddr __sk_common.skc_rcv_saddr #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7ac7aa11130e..56e82a4027b4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -71,17 +71,16 @@ int inet_csk_bind_conflict(const struct sock *sk, (!reuseport || !sk2->sk_reuseport || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2))))) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 72b7eaaf3ca0..8119791e8a95 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -57,8 +57,6 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); - __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -67,8 +65,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk1_rcv_saddr || !sk2_rcv_saddr || - sk1_rcv_saddr == sk2_rcv_saddr)); + (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || + sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) -- cgit v1.2.3-71-gd317 From 5cde282938915f36a2e6769b51c24c4159654859 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 5 Oct 2013 19:26:05 -0700 Subject: net: Separate the close_list and the unreg_list v2 Separate the unreg_list and the close_list in dev_close_many preventing dev_close_many from permuting the unreg_list. The permutations of the unreg_list have resulted in cases where the loopback device is accessed it has been freed in code such as dst_ifdown. Resulting in subtle memory corruption. This is the second bug from sharing the storage between the close_list and the unreg_list. The issues that crop up with sharing are apparently too subtle to show up in normal testing or usage, so let's forget about being clever and use two separate lists. v2: Make all callers pass in a close_list to dev_close_many Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 25 ++++++++++++++----------- net/sched/sch_generic.c | 6 +++--- 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f5cd464271bf..6d77e0f3cc10 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1143,6 +1143,7 @@ struct net_device { struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; + struct list_head close_list; /* directly linked devices, like slaves for bonding */ struct { diff --git a/net/core/dev.c b/net/core/dev.c index c25db20a4246..fa0b2b06c1a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head) ASSERT_RTNL(); might_sleep(); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head) dev_deactivate_many(head); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { const struct net_device_ops *ops = dev->netdev_ops; /* @@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev) /* Temporarily disable netpoll until the interface is down */ netpoll_rx_disable(dev); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); retval = __dev_close_many(&single); list_del(&single); @@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev) static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; - LIST_HEAD(tmp_list); - list_for_each_entry_safe(dev, tmp, head, unreg_list) + /* Remove the devices that don't need to be closed */ + list_for_each_entry_safe(dev, tmp, head, close_list) if (!(dev->flags & IFF_UP)) - list_move(&dev->unreg_list, &tmp_list); + list_del_init(&dev->close_list); __dev_close_many(head); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_DOWN, dev); + list_del_init(&dev->close_list); } - /* rollback_registered_many needs the complete original list */ - list_splice(&tmp_list, head); return 0; } @@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev) /* Block netpoll rx while the interface is going down */ netpoll_rx_disable(dev); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_close_many(&single); list_del(&single); @@ -5439,6 +5438,7 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { struct net_device *dev, *tmp; + LIST_HEAD(close_head); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -5461,7 +5461,9 @@ static void rollback_registered_many(struct list_head *head) } /* If device is running, close it first. */ - dev_close_many(head); + list_for_each_entry(dev, head, unreg_list) + list_add_tail(&dev->close_list, &close_head); + dev_close_many(&close_head); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -6257,6 +6259,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->close_list); INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->adj_list.upper); INIT_LIST_HEAD(&dev->adj_list.lower); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e7121d29c4bd..7fc899a943a8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head) struct net_device *dev; bool sync_needed = false; - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); if (dev_ingress_queue(dev)) @@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head) synchronize_net(); /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, close_list) while (some_qdisc_is_busy(dev)) yield(); } @@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev) { LIST_HEAD(single); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_deactivate_many(&single); list_del(&single); } -- cgit v1.2.3-71-gd317 From efe4208f47f907b86f528788da711e8ab9dea44d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2013 15:42:29 -0700 Subject: ipv6: make lookups simpler and faster TCP listener refactoring, part 4 : To speed up inet lookups, we moved IPv4 addresses from inet to struct sock_common Now is time to do the same for IPv6, because it permits us to have fast lookups for all kind of sockets, including upcoming SYN_RECV. Getting IPv6 addresses in TCP lookups currently requires two extra cache lines, plus a dereference (and memory stall). inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6 This patch is way bigger than its IPv4 counter part, because for IPv4, we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6, it's not doable easily. inet6_sk(sk)->daddr becomes sk->sk_v6_daddr inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr at the same offset. We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 46 +++----------------- include/net/inet6_hashtables.h | 5 +-- include/net/inet_timewait_sock.h | 4 +- include/net/ip.h | 2 +- include/net/ip6_checksum.h | 2 +- include/net/sock.h | 9 ++++ net/dccp/ipv6.c | 24 +++++------ net/dccp/ipv6.h | 1 - net/dccp/minisocks.c | 7 +--- net/ipv4/inet_diag.c | 35 +++++++--------- net/ipv4/ping.c | 15 ++++--- net/ipv4/tcp_metrics.c | 10 ++--- net/ipv4/tcp_minisocks.c | 7 +--- net/ipv4/tcp_probe.c | 29 +++++-------- net/ipv4/tcp_timer.c | 3 +- net/ipv6/af_inet6.c | 10 ++--- net/ipv6/datagram.c | 25 ++++++----- net/ipv6/inet6_connection_sock.c | 7 ++-- net/ipv6/inet6_hashtables.c | 58 +++++++++----------------- net/ipv6/ipv6_sockglue.c | 7 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 4 +- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 17 ++++---- net/ipv6/tcp_ipv6.c | 44 ++++++++++--------- net/ipv6/udp.c | 48 ++++++++++----------- net/l2tp/l2tp_core.c | 10 ++--- net/l2tp/l2tp_debugfs.c | 5 ++- net/l2tp/l2tp_ip6.c | 16 +++---- net/l2tp/l2tp_netlink.c | 4 +- net/l2tp/l2tp_ppp.c | 12 +++--- net/netfilter/xt_TPROXY.c | 2 +- net/netfilter/xt_socket.c | 2 +- net/sctp/ipv6.c | 22 +++++----- net/sunrpc/svcsock.c | 2 +- security/lsm_audit.c | 5 +-- 35 files changed, 213 insertions(+), 288 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index b7f1f3bb346d..35f6c1b562c4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -141,8 +141,6 @@ struct ipv6_fl_socklist; */ struct ipv6_pinfo { struct in6_addr saddr; - struct in6_addr rcv_saddr; - struct in6_addr daddr; struct in6_pktinfo sticky_pktinfo; const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES @@ -256,22 +254,10 @@ struct tcp6_sock { extern int inet6_sk_rebuild_header(struct sock *sk); -struct inet6_timewait_sock { - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; -}; - struct tcp6_timewait_sock { struct tcp_timewait_sock tcp6tw_tcp; - struct inet6_timewait_sock tcp6tw_inet6; }; -static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk) -{ - return (struct inet6_timewait_sock *)(((u8 *)sk) + - inet_twsk(sk)->tw_ipv6_offset); -} - #if IS_ENABLED(CONFIG_IPV6) static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { @@ -321,21 +307,11 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) -static inline u16 inet6_tw_offset(const struct proto *prot) -{ - return prot->twsk_prot->twsk_obj_size - - sizeof(struct inet6_timewait_sock); -} - -static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk) +static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL; + if (sk->sk_family == AF_INET6) + return &sk->sk_v6_rcv_saddr; + return NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) @@ -363,7 +339,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#define __inet6_rcv_saddr(__sk) NULL #define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 @@ -372,19 +347,10 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ + ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ + ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ ((__sk)->sk_bound_dev_if == (__dif))) && \ net_eq(sock_net(__sk), (__net))) -#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ - net_eq(sock_net(__sk), (__net))) - #endif /* _IPV6_H */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index f52fa88feb64..a105d1a2fc00 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -43,9 +43,8 @@ static inline unsigned int inet6_ehashfn(struct net *net, static inline int inet6_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *laddr = &np->rcv_saddr; - const struct in6_addr *faddr = &np->daddr; + const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *faddr = &sk->sk_v6_daddr; const __u16 lport = inet->inet_num; const __be16 fport = inet->inet_dport; struct net *net = sock_net(sk); diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index de9e3ab7d43d..b647c6270eb7 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -116,7 +116,9 @@ struct inet_timewait_sock { #define tw_prot __tw_common.skc_prot #define tw_net __tw_common.skc_net #define tw_daddr __tw_common.skc_daddr +#define tw_v6_daddr __tw_common.skc_v6_daddr #define tw_rcv_saddr __tw_common.skc_rcv_saddr +#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr #define tw_dport __tw_common.skc_dport #define tw_num __tw_common.skc_num @@ -133,7 +135,7 @@ struct inet_timewait_sock { tw_transparent : 1, tw_pad : 6, /* 6 bits hole */ tw_tos : 8, - tw_ipv6_offset : 16; + tw_pad2 : 16 /* 16 bits hole */ kmemcheck_bitfield_end(flags); u32 tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/include/net/ip.h b/include/net/ip.h index b39ebe5339ac..217bc5bfc6c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -374,7 +374,7 @@ static __inline__ void inet_reset_saddr(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); memset(&np->saddr, 0, sizeof(np->saddr)); - memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); } #endif } diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 7686e3f5033d..1944406949ba 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -70,7 +70,7 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); - __tcp_v6_send_check(skb, &np->saddr, &np->daddr); + __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); } int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); diff --git a/include/net/sock.h b/include/net/sock.h index 3f3e48c4704d..7e50df5c71d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -191,6 +191,12 @@ struct sock_common { #ifdef CONFIG_NET_NS struct net *skc_net; #endif + +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr skc_v6_daddr; + struct in6_addr skc_v6_rcv_saddr; +#endif + /* * fields between dontcopy_begin/dontcopy_end * are not copied in sock_copy() @@ -314,6 +320,9 @@ struct sock { #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr + socket_lock_t sk_lock; struct sk_buff_head sk_receive_queue; /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6cf9f7782ad4..7f075b83128a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); } static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) @@ -467,11 +467,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -538,9 +538,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = ireq6->rmt_addr; + newsk->sk_v6_daddr = ireq6->rmt_addr; newnp->saddr = ireq6->loc_addr; - newnp->rcv_saddr = ireq6->loc_addr; + newsk->sk_v6_rcv_saddr = ireq6->loc_addr; newsk->sk_bound_dev_if = ireq6->iif; /* Now IPv6 options... @@ -885,7 +885,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -915,16 +915,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; @@ -941,7 +941,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -963,7 +963,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index 6eef81fdbe56..6604fc3fe953 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -30,7 +30,6 @@ struct dccp6_request_sock { struct dccp6_timewait_sock { struct inet_timewait_sock inet; - struct inet6_timewait_sock tw6; }; #endif /* _DCCP_IPV6_H */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 662071b249cc..32e80d96d4c0 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - tw6->tw_v6_daddr = np->daddr; - tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_ipv6only = np->ipv6only; } #endif diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8e1e40653357..ecc179d676e4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = np->daddr; + *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) goto errout; } #endif @@ -255,11 +255,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { - const struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - - *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; + *(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr; } #endif @@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, portid, seq, nlmsg_flags, - unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); + return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, + nlmsg_flags, unlh); + + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, + nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; + entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32; + entry.daddr = sk->sk_v6_daddr.s6_addr32; } else #endif { @@ -649,10 +646,8 @@ static int inet_twsk_diag_dump(struct sock *sk, entry.family = tw->tw_family; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { - struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32; - entry.daddr = tw6->tw_v6_daddr.s6_addr32; + entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32; + entry.daddr = tw->tw_v6_daddr.s6_addr32; } else #endif { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a62610443152..ccefc07beacd 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6) && sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, (int) isk->inet_num, - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, sk->sk_bound_dev_if); - if (!ipv6_addr_any(&np->rcv_saddr) && - !ipv6_addr_equal(&np->rcv_saddr, + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, &ipv6_hdr(skb)->daddr)) continue; #endif @@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) } else if (saddr->sa_family == AF_INET6) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; struct ipv6_pinfo *np = inet6_sk(sk); - np->rcv_saddr = np->saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr; #endif } } @@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif) #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); memset(&np->saddr, 0, sizeof(np->saddr)); #endif } @@ -418,7 +417,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = 0; if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || (sk->sk_family == AF_INET6 && - !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) + !ipv6_addr_any(&sk->sk_v6_rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) @@ -429,7 +428,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); + memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); #endif sk_dst_reset(sk); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 52f3c6b971d2..27535fd5ea10 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -240,7 +240,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { - struct inet6_timewait_sock *tw6; struct tcp_metrics_block *tm; struct inetpeer_addr addr; unsigned int hash; @@ -253,9 +252,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock hash = (__force unsigned int) addr.addr.a4; break; case AF_INET6: - tw6 = inet6_twsk((struct sock *)tw); - *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; - hash = ipv6_addr_hash(&tw6->tw_v6_daddr); + *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; + hash = ipv6_addr_hash(&tw->tw_v6_daddr); break; default: return NULL; @@ -289,8 +287,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, hash = (__force unsigned int) addr.addr.a4; break; case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; - hash = ipv6_addr_hash(&inet6_sk(sk)->daddr); + *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; + hash = ipv6_addr_hash(&sk->sk_v6_daddr); break; default: return NULL; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 58a3e69aef64..97b684159861 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -293,12 +293,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - tw6->tw_v6_daddr = np->daddr; - tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_ipv6only = np->ipv6only; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 611beab38a00..8b97d71e193b 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -101,22 +101,6 @@ static inline int tcp_probe_avail(void) si4.sin_addr.s_addr = inet->inet_##mem##addr; \ } while (0) \ -#if IS_ENABLED(CONFIG_IPV6) -#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \ - do { \ - struct ipv6_pinfo *pi6 = inet->pinet6; \ - si6.sin6_family = AF_INET6; \ - si6.sin6_port = inet->inet_##mem##port; \ - si6.sin6_addr = pi6->mem##addr; \ - si6.sin6_flowinfo = 0; /* No need here. */ \ - si6.sin6_scope_id = 0; /* No need here. */ \ - } while (0) -#else -#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \ - do { \ - memset(&si6, 0, sizeof(si6)); \ - } while (0) -#endif /* * Hook inserted to be called before each receive packet. @@ -147,8 +131,17 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); break; case AF_INET6: - tcp_probe_copy_fl_to_si6(inet, p->src.v6, s); - tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d); + memset(&p->src.v6, 0, sizeof(p->src.v6)); + memset(&p->dst.v6, 0, sizeof(p->dst.v6)); +#if IS_ENABLED(CONFIG_IPV6) + p->src.v6.sin6_family = AF_INET6; + p->src.v6.sin6_port = inet->inet_sport; + p->src.v6.sin6_addr = inet6_sk(sk)->saddr; + + p->dst.v6.sin6_family = AF_INET6; + p->dst.v6.sin6_port = inet->inet_dport; + p->dst.v6.sin6_addr = sk->sk_v6_daddr; +#endif break; default: BUG(); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4b85e6f636c9..af07b5b23ebf 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -374,9 +374,8 @@ void tcp_retransmit_timer(struct sock *sk) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), - &np->daddr, + &sk->sk_v6_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4966b124dc2e..a2cb07cd3850 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -364,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; @@ -461,14 +461,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, peer == 1) return -ENOTCONN; sin->sin6_port = inet->inet_dport; - sin->sin6_addr = np->daddr; + sin->sin6_addr = sk->sk_v6_daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else - sin->sin6_addr = np->rcv_saddr; + sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; } @@ -655,7 +655,7 @@ int inet6_sk_rebuild_header(struct sock *sk) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 48b6bd2a9a14..a454b0ff57c7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -107,16 +107,16 @@ ipv4_connected: if (err) goto out; - ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr); if (ipv6_addr_any(&np->saddr) || ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr) || - ipv6_mapped_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } @@ -145,7 +145,7 @@ ipv4_connected: } } - np->daddr = *daddr; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -156,7 +156,7 @@ ipv4_connected: */ fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -183,16 +183,16 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) np->saddr = fl6.saddr; - if (ipv6_addr_any(&np->rcv_saddr)) { - np->rcv_saddr = fl6.saddr; + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : @@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket) { - struct ipv6_pinfo *np = inet6_sk(sp); const struct in6_addr *dest, *src; - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; seq_printf(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e4311cbc8b4e..b7400b480e74 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { - struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; sin6->sin6_family = AF_INET6; - sin6->sin6_addr = np->daddr; + sin6->sin6_addr = sk->sk_v6_daddr; sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; @@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = sk->sk_protocol; - fl6->daddr = np->daddr; + fl6->daddr = sk->sk_v6_daddr; fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); @@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); rcu_read_unlock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 46440777e1c5..842d833dfc18 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -89,30 +89,16 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (sk->sk_state == TCP_TIME_WAIT) { - if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif)) - continue; - } else { - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) - continue; - } + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) + continue; if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) goto out; - if (sk->sk_state == TCP_TIME_WAIT) { - if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_gen_put(sk); - goto begin; - } - } else { - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_put(sk); - goto begin; - } - goto found; + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { + sock_gen_put(sk); + goto begin; } + goto found; } if (get_nulls_value(node) != slot) goto begin; @@ -133,11 +119,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } @@ -229,9 +214,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); @@ -250,23 +234,19 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (sk2->sk_state == TCP_TIME_WAIT) { - if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, - ports, dif))) { + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { + if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; + break; } - } - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) goto not_unique; + } } -unique: /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ + * in hash table socket with a funny identity. + */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; @@ -299,9 +279,9 @@ not_unique: static inline u32 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, + + return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1e2e8ef29c5..4919a8e6063e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } if (ipv6_only_sock(sk) || - !ipv6_addr_v4mapped(&np->daddr)) { + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } @@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : + np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index d6e4dd8b58df..54b75ead5a69 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -297,9 +297,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; struct nf_conn *ct; - tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; - tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 18f19df4189f..8815e31a87fe 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } if (!iif) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a4ed2416399e..3c00842b0079 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { - struct ipv6_pinfo *np = inet6_sk(sk); if (!net_eq(sock_net(sk), net)) continue; - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) goto found; if (is_multicast && inet6_mc_check(sk, loc_addr, rmt_addr)) @@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; @@ -804,8 +803,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -816,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EDESTADDRREQ; proto = inet->inet_num; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 528e61afaf5e..541dfc40c7b3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (tp->rx_opt.ts_recent_stamp && - !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { + !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } else { ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); } return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; @@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); @@ -515,7 +515,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr); + return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, @@ -621,7 +621,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, if (sk) { saddr = &inet6_sk(sk)->saddr; - daddr = &inet6_sk(sk)->daddr; + daddr = &sk->sk_v6_daddr; } else if (req) { saddr = &inet6_rsk(req)->loc_addr; daddr = &inet6_rsk(req)->rmt_addr; @@ -1116,11 +1116,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1185,9 +1185,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = treq->rmt_addr; + newsk->sk_v6_daddr = treq->rmt_addr; newnp->saddr = treq->loc_addr; - newnp->rcv_saddr = treq->loc_addr; + newsk->sk_v6_rcv_saddr = treq->loc_addr; newsk->sk_bound_dev_if = treq->iif; /* Now IPv6 options... @@ -1244,13 +1244,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) { + if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key * across. Shucks. */ - tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -1758,10 +1758,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - const struct ipv6_pinfo *np = inet6_sk(sp); - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); @@ -1810,11 +1809,10 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); s32 delta = tw->tw_ttd - inet_tw_time_stamp(); - dest = &tw6->tw_v6_daddr; - src = &tw6->tw_v6_rcv_saddr; + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 37532478e3ba..b496de19a341 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -55,11 +55,10 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ @@ -77,7 +76,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) return 1; return 0; @@ -105,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) unsigned int hash2_nulladdr = udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -115,7 +114,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { u16 new_hash = udp6_portaddr_hash(sock_net(sk), - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -131,7 +130,6 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); score = 0; @@ -140,13 +138,13 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -169,10 +167,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; if (inet->inet_dport) { @@ -180,8 +177,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -549,7 +546,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) { + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); } @@ -690,20 +687,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (udp_sk(s)->udp_port_hash == num && s->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(s); if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; } - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } if (!inet6_mc_check(s, loc_addr, rmt_addr)) @@ -1063,7 +1059,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, } else if (!up->pending) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } else daddr = NULL; @@ -1133,8 +1129,8 @@ do_udp_sendmsg: * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -1145,7 +1141,7 @@ do_udp_sendmsg: return -EDESTADDRREQ; fl6.fl6_dport = inet->inet_dport; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; connected = 1; } @@ -1261,8 +1257,8 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b076e8309bc2..9af77d9c0ec9 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1181,7 +1181,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { __wsum csum = skb_checksum(skb, 0, udp_len, 0); skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len, + uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1189,7 +1189,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, + uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, IPPROTO_UDP, 0); } } @@ -1713,13 +1713,13 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 struct ipv6_pinfo *np = inet6_sk(sk); if (ipv6_addr_v4mapped(&np->saddr) && - ipv6_addr_v4mapped(&np->daddr)) { + ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { struct inet_sock *inet = inet_sk(sk); tunnel->v4mapped = true; inet->inet_saddr = np->saddr.s6_addr32[3]; - inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; - inet->inet_daddr = np->daddr.s6_addr32[3]; + inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3]; + inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3]; } else { tunnel->v4mapped = false; } diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 072d7202e182..2d6760a2ae34 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -127,9 +127,10 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); + const struct ipv6_pinfo *np = inet6_sk(tunnel->sock); + seq_printf(m, " from %pI6c to %pI6c\n", - &np->saddr, &np->daddr); + &np->saddr, &tunnel->sock->sk_v6_daddr); } else #endif seq_printf(m, " from %pI4 to %pI4\n", diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b8a6039314e8..cfd65304be60 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -63,7 +63,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *addr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -331,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->l2tp_addr; + sk->sk_v6_rcv_saddr = addr->l2tp_addr; np->saddr = addr->l2tp_addr; l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id; @@ -421,14 +421,14 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, if (!lsk->peer_conn_id) return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; - lsa->l2tp_addr = np->daddr; + lsa->l2tp_addr = sk->sk_v6_daddr; if (np->sndflow) lsa->l2tp_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) lsa->l2tp_addr = np->saddr; else - lsa->l2tp_addr = np->rcv_saddr; + lsa->l2tp_addr = sk->sk_v6_rcv_saddr; lsa->l2tp_conn_id = lsk->conn_id; } @@ -537,8 +537,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && lsa->l2tp_scope_id && @@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0825ff26e113..be446d517bc9 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -306,8 +306,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (np) { if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr), &np->saddr) || - nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(np->daddr), - &np->daddr)) + nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr), + &sk->sk_v6_daddr)) goto nla_put_failure; } else #endif diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 5ebee2ded9e9..f0a7adaef2ea 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -906,8 +906,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, #if IS_ENABLED(CONFIG_IPV6) } else if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET6)) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); struct sockaddr_pppol2tpin6 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -920,13 +920,13 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.d_session = session->peer_session_id; sp.pppol2tp.addr.sin6_family = AF_INET6; sp.pppol2tp.addr.sin6_port = inet->inet_dport; - memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr, - sizeof(np->daddr)); + memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, + sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); } else if ((tunnel->version == 3) && (tunnel->sock->sk_family == AF_INET6)) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); struct sockaddr_pppol2tpv3in6 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -939,8 +939,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.d_session = session->peer_session_id; sp.pppol2tp.addr.sin6_family = AF_INET6; sp.pppol2tp.addr.sin6_port = inet->inet_dport; - memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr, - sizeof(np->daddr)); + memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, + sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); #endif } else if (tunnel->version == 3) { diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 5d8a3a3cd5a7..ef8a926752a9 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, in->ifindex); if (sk) { int connected = (sk->sk_state == TCP_ESTABLISHED); - int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr); /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 06df2b9110f5..3dd0e374bc2b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && sk->sk_state != TCP_TIME_WAIT && - ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); + ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e7b2d4fe2b6a..f6334aa19151 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -426,20 +426,20 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; - addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; + addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { - inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0; - inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0; - inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); - inet6_sk(sk)->rcv_saddr.s6_addr32[3] = + sk->sk_v6_rcv_saddr.s6_addr32[0] = 0; + sk->sk_v6_rcv_saddr.s6_addr32[1] = 0; + sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); + sk->sk_v6_rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr; + sk->sk_v6_rcv_saddr = addr->v6.sin6_addr; } } @@ -447,12 +447,12 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { - inet6_sk(sk)->daddr.s6_addr32[0] = 0; - inet6_sk(sk)->daddr.s6_addr32[1] = 0; - inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff); - inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + sk->sk_v6_daddr.s6_addr32[0] = 0; + sk->sk_v6_daddr.s6_addr32[1] = 0; + sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff); + sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->daddr = addr->v6.sin6_addr; + sk->sk_v6_daddr = addr->v6.sin6_addr; } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9c9caaa5e0d3..0045c7cf1e9e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -294,7 +294,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", proto_name, - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); break; default: diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 8d8d97dbb389..80554fcf9fcc 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -304,12 +304,11 @@ static void dump_common_audit_data(struct audit_buffer *ab, } case AF_INET6: { struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *inet6 = inet6_sk(sk); - print_ipv6_addr(ab, &inet6->rcv_saddr, + print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr, inet->inet_sport, "laddr", "lport"); - print_ipv6_addr(ab, &inet6->daddr, + print_ipv6_addr(ab, &sk->sk_v6_daddr, inet->inet_dport, "faddr", "fport"); break; -- cgit v1.2.3-71-gd317 From 634fb979e8f3a70f04c1f2f519d0cd1142eb5c1a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2013 15:21:29 -0700 Subject: inet: includes a sock_common in request_sock TCP listener refactoring, part 5 : We want to be able to insert request sockets (SYN_RECV) into main ehash table instead of the per listener hash table to allow RCU lookups and remove listener lock contention. This patch includes the needed struct sock_common in front of struct request_sock This means there is no more inet6_request_sock IPv6 specific structure. Following inet_request_sock fields were renamed as they became macros to reference fields from struct sock_common. Prefix ir_ was chosen to avoid name collisions. loc_port -> ir_loc_port loc_addr -> ir_loc_addr rmt_addr -> ir_rmt_addr rmt_port -> ir_rmt_port iif -> ir_iif Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 26 ++--------------- include/net/inet_sock.h | 16 +++++----- include/net/request_sock.h | 1 + include/net/tcp.h | 4 +-- net/dccp/ipv4.c | 18 ++++++------ net/dccp/ipv6.c | 63 ++++++++++++++++++++-------------------- net/dccp/ipv6.h | 1 - net/dccp/minisocks.c | 4 +-- net/dccp/output.c | 4 +-- net/ipv4/inet_connection_sock.c | 23 ++++++++------- net/ipv4/inet_diag.c | 22 +++++++------- net/ipv4/syncookies.c | 12 ++++---- net/ipv4/tcp_ipv4.c | 38 ++++++++++++------------ net/ipv4/tcp_metrics.c | 8 +++-- net/ipv4/tcp_output.c | 4 +-- net/ipv6/inet6_connection_sock.c | 26 ++++++++--------- net/ipv6/syncookies.c | 24 +++++++-------- net/ipv6/tcp_ipv6.c | 61 +++++++++++++++++++------------------- net/netlabel/netlabel_kapi.c | 2 +- 19 files changed, 169 insertions(+), 188 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 35f6c1b562c4..a80a63cfb70c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -115,16 +115,8 @@ static inline int inet6_iif(const struct sk_buff *skb) return IP6CB(skb)->iif; } -struct inet6_request_sock { - struct in6_addr loc_addr; - struct in6_addr rmt_addr; - struct sk_buff *pktopts; - int iif; -}; - struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; - struct inet6_request_sock tcp6rsk_inet6; }; struct ipv6_mc_socklist; @@ -264,26 +256,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return inet_sk(__sk)->pinet6; } -static inline struct inet6_request_sock * - inet6_rsk(const struct request_sock *rsk) -{ - return (struct inet6_request_sock *)(((u8 *)rsk) + - inet_rsk(rsk)->inet6_rsk_offset); -} - -static inline u32 inet6_rsk_offset(struct request_sock *rsk) -{ - return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock); -} - static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops) { struct request_sock *req = reqsk_alloc(ops); - if (req != NULL) { - inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req); - inet6_rsk(req)->pktopts = NULL; - } + if (req) + inet_rsk(req)->pktopts = NULL; return req; } diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 6d9a7e6eb5a4..f91204442efa 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -70,13 +70,14 @@ struct ip_options_data { struct inet_request_sock { struct request_sock req; -#if IS_ENABLED(CONFIG_IPV6) - u16 inet6_rsk_offset; -#endif - __be16 loc_port; - __be32 loc_addr; - __be32 rmt_addr; - __be16 rmt_port; +#define ir_loc_addr req.__req_common.skc_rcv_saddr +#define ir_rmt_addr req.__req_common.skc_daddr +#define ir_loc_port req.__req_common.skc_num +#define ir_rmt_port req.__req_common.skc_dport +#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr +#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr +#define ir_iif req.__req_common.skc_bound_dev_if + kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, rcv_wscale : 4, @@ -88,6 +89,7 @@ struct inet_request_sock { no_srccheck: 1; kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; + struct sk_buff *pktopts; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 65c3e5164a5c..7f830ff67f08 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -48,6 +48,7 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); /* struct request_sock - mini sock to represent a connection request */ struct request_sock { + struct sock_common __req_common; struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 39bbfa1602b2..24a06161d174 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1109,8 +1109,8 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->wscale_ok = rx_opt->wscale_ok; ireq->acked = 0; ireq->ecn_ok = 0; - ireq->rmt_port = tcp_hdr(skb)->source; - ireq->loc_port = tcp_hdr(skb)->dest; + ireq->ir_rmt_port = tcp_hdr(skb)->source; + ireq->ir_loc_port = tcp_hdr(skb)->dest; } void tcp_enter_memory_pressure(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ebc54fef85a5..720c36225ed9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -409,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->rmt_addr; - newinet->inet_rcv_saddr = ireq->loc_addr; - newinet->inet_saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->ir_rmt_addr; + newinet->inet_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; newinet->inet_opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); @@ -516,10 +516,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, - ireq->rmt_addr); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); } @@ -641,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->loc_addr = ip_hdr(skb)->daddr; - ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 7f075b83128a..5cc5b24a956e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -216,7 +216,7 @@ out: static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { - struct inet6_request_sock *ireq6 = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct in6_addr *final_p, final; @@ -226,12 +226,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = ireq6->rmt_addr; - fl6.saddr = ireq6->loc_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowlabel = 0; - fl6.flowi6_oif = ireq6->iif; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_oif = ireq->ir_iif; + fl6.fl6_dport = ireq->ir_rmt_port; + fl6.fl6_sport = ireq->ir_loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); @@ -249,9 +249,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) struct dccp_hdr *dh = dccp_hdr(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, - &ireq6->loc_addr, - &ireq6->rmt_addr); - fl6.daddr = ireq6->rmt_addr; + &ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr); + fl6.daddr = ireq->ir_v6_rmt_addr; err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); err = net_xmit_eval(err); } @@ -264,8 +264,7 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - if (inet6_rsk(req)->pktopts != NULL) - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) @@ -359,7 +358,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct request_sock *req; struct dccp_request_sock *dreq; - struct inet6_request_sock *ireq6; + struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -398,22 +397,22 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - ireq6 = inet6_rsk(req); - ireq6->rmt_addr = ipv6_hdr(skb)->saddr; - ireq6->loc_addr = ipv6_hdr(skb)->daddr; + ireq = inet_rsk(req); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - ireq6->pktopts = skb; + ireq->pktopts = skb; } - ireq6->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq6->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); /* * Step 3: Process LISTEN state @@ -446,7 +445,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *ireq6 = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; struct dccp6_sock *newdp6; @@ -505,12 +504,12 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = ireq6->rmt_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = ireq6->loc_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.fl6_dport = ireq->ir_rmt_port; + fl6.fl6_sport = ireq->ir_loc_port; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -538,10 +537,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newsk->sk_v6_daddr = ireq6->rmt_addr; - newnp->saddr = ireq6->loc_addr; - newsk->sk_v6_rcv_saddr = ireq6->loc_addr; - newsk->sk_bound_dev_if = ireq6->iif; + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; + newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -554,10 +553,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq6->pktopts != NULL) { - newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); - consume_skb(ireq6->pktopts); - ireq6->pktopts = NULL; + if (ireq->pktopts != NULL) { + newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index 6604fc3fe953..af259e15e7f0 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -25,7 +25,6 @@ struct dccp6_sock { struct dccp6_request_sock { struct dccp_request_sock dccp; - struct inet6_request_sock inet6; }; struct dccp6_timewait_sock { diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 32e80d96d4c0..66afbcec2941 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -266,8 +266,8 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); - inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; - inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; + inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->ir_loc_port = dccp_hdr(skb)->dccph_dport; inet_rsk(req)->acked = 0; dreq->dreq_timestamp_echo = 0; diff --git a/net/dccp/output.c b/net/dccp/output.c index d17fc90a74b6..9bf195d1b87a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -424,8 +424,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_sport = inet_rsk(req)->loc_port; - dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_sport = inet_rsk(req)->ir_loc_port; + dh->dccph_dport = inet_rsk(req)->ir_rmt_port; dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 56e82a4027b4..2ffd931d652f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -412,8 +412,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, flags, - (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -448,8 +448,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), - (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -495,9 +495,9 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, prev = &req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); - if (ireq->rmt_port == rport && - ireq->rmt_addr == raddr && - ireq->loc_addr == laddr && + if (ireq->ir_rmt_port == rport && + ireq->ir_rmt_addr == raddr && + ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { WARN_ON(req->sk); *prevp = prev; @@ -514,7 +514,8 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr, + inet_rsk(req)->ir_rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); @@ -674,9 +675,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newsk->sk_state = TCP_SYN_RECV; newicsk->icsk_bind_hash = NULL; - inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; - inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); - inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; + inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; + inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->ir_loc_port); + inet_sk(newsk)->inet_sport = inet_rsk(req)->ir_loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ecc179d676e4..41e1c3ea8b51 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -679,12 +679,12 @@ static inline void inet_diag_req_addrs(const struct sock *sk, #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { if (req->rsk_ops->family == AF_INET6) { - entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32; - entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32; + entry->saddr = ireq->ir_v6_loc_addr.s6_addr32; + entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32; } else if (req->rsk_ops->family == AF_INET) { - ipv6_addr_set_v4mapped(ireq->loc_addr, + ipv6_addr_set_v4mapped(ireq->ir_loc_addr, &entry->saddr_storage); - ipv6_addr_set_v4mapped(ireq->rmt_addr, + ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, &entry->daddr_storage); entry->saddr = entry->saddr_storage.s6_addr32; entry->daddr = entry->daddr_storage.s6_addr32; @@ -692,8 +692,8 @@ static inline void inet_diag_req_addrs(const struct sock *sk, } else #endif { - entry->saddr = &ireq->loc_addr; - entry->daddr = &ireq->rmt_addr; + entry->saddr = &ireq->ir_loc_addr; + entry->daddr = &ireq->ir_rmt_addr; } } @@ -728,9 +728,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, tmo = 0; r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = ireq->rmt_port; - r->id.idiag_src[0] = ireq->loc_addr; - r->id.idiag_dst[0] = ireq->rmt_addr; + r->id.idiag_dport = ireq->ir_rmt_port; + r->id.idiag_src[0] = ireq->ir_loc_addr; + r->id.idiag_dst[0] = ireq->ir_rmt_addr; r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; @@ -789,13 +789,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (reqnum < s_reqnum) continue; - if (r->id.idiag_dport != ireq->rmt_port && + if (r->id.idiag_dport != ireq->ir_rmt_port && r->id.idiag_dport) continue; if (bc) { inet_diag_req_addrs(sk, req, &entry); - entry.dport = ntohs(ireq->rmt_port); + entry.dport = ntohs(ireq->ir_rmt_port); if (!inet_diag_bc_run(bc, &entry)) continue; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 15e024105f91..984e21cf3c50 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -304,10 +304,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; - ireq->loc_port = th->dest; - ireq->rmt_port = th->source; - ireq->loc_addr = ip_hdr(skb)->daddr; - ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_loc_port = th->dest; + ireq->ir_rmt_port = th->source; + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -347,8 +347,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e4695dde1af6..114d1b748cbb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -835,11 +835,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { - __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); skb_set_queue_mapping(skb, queue_mapping); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); if (!tcp_rsk(req)->snt_synack && !err) @@ -972,7 +972,7 @@ static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, { union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; + addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr; return tcp_md5_do_lookup(sk, addr, AF_INET); } @@ -1149,8 +1149,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, saddr = inet_sk(sk)->inet_saddr; daddr = inet_sk(sk)->inet_daddr; } else if (req) { - saddr = inet_rsk(req)->loc_addr; - daddr = inet_rsk(req)->rmt_addr; + saddr = inet_rsk(req)->ir_loc_addr; + daddr = inet_rsk(req)->ir_rmt_addr; } else { const struct iphdr *iph = ip_hdr(skb); saddr = iph->saddr; @@ -1366,8 +1366,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, kfree_skb(skb_synack); return -1; } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, - ireq->rmt_addr, ireq->opt); + err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); if (!err) tcp_rsk(req)->snt_synack = tcp_time_stamp; @@ -1502,8 +1502,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; + ireq->ir_loc_addr = daddr; + ireq->ir_rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(skb); @@ -1578,15 +1578,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { - __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr); + __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); } else goto drop_and_free; if (likely(!do_fastopen)) { int err; - err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, - ireq->rmt_addr, ireq->opt); + err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); if (err || want_cookie) goto drop_and_free; @@ -1644,9 +1644,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->rmt_addr; - newinet->inet_rcv_saddr = ireq->loc_addr; - newinet->inet_saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->ir_rmt_addr; + newinet->inet_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); ireq->opt = NULL; @@ -2548,10 +2548,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n", i, - ireq->loc_addr, + ireq->ir_loc_addr, ntohs(inet_sk(sk)->inet_sport), - ireq->rmt_addr, - ntohs(ireq->rmt_port), + ireq->ir_rmt_addr, + ntohs(ireq->ir_rmt_port), TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 8fcc2cb9dba4..4a2a84110dfb 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -215,13 +215,15 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, addr.family = req->rsk_ops->family; switch (addr.family) { case AF_INET: - addr.addr.a4 = inet_rsk(req)->rmt_addr; + addr.addr.a4 = inet_rsk(req)->ir_rmt_addr; hash = (__force unsigned int) addr.addr.a4; break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; - hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr); + *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; +#endif default: return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c6f01f2cdb32..faec81353522 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2734,8 +2734,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = ireq->loc_port; - th->dest = ireq->rmt_port; + th->source = ireq->ir_loc_port; + th->dest = ireq->ir_rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) */ diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index b7400b480e74..1317c569b58f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -70,20 +70,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, const struct request_sock *req) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_TCP; - fl6->daddr = treq->rmt_addr; + fl6->daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(fl6, np->opt, &final); - fl6->saddr = treq->loc_addr; - fl6->flowi6_oif = treq->iif; + fl6->saddr = ireq->ir_v6_loc_addr; + fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = sk->sk_mark; - fl6->fl6_dport = inet_rsk(req)->rmt_port; - fl6->fl6_sport = inet_rsk(req)->loc_port; + fl6->fl6_dport = ireq->ir_rmt_port; + fl6->fl6_sport = ireq->ir_loc_port; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); @@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { - const struct inet6_request_sock *treq = inet6_rsk(req); + const struct inet_request_sock *ireq = inet_rsk(req); - if (inet_rsk(req)->rmt_port == rport && + if (ireq->ir_rmt_port == rport && req->rsk_ops->family == AF_INET6 && - ipv6_addr_equal(&treq->rmt_addr, raddr) && - ipv6_addr_equal(&treq->loc_addr, laddr) && - (!treq->iif || treq->iif == iif)) { + ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && + ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && + (!ireq->ir_iif || ireq->ir_iif == iif)) { WARN_ON(req->sk != NULL); *prevp = prev; return req; @@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, - inet_rsk(req)->rmt_port, + const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, + inet_rsk(req)->ir_rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d703218a653b..bc5698f9e4cd 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -150,7 +150,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; struct inet_request_sock *ireq; - struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -187,7 +186,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ireq = inet_rsk(req); - ireq6 = inet6_rsk(req); treq = tcp_rsk(req); treq->listener = NULL; @@ -195,22 +193,22 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; req->mss = mss; - ireq->rmt_port = th->source; - ireq->loc_port = th->dest; - ireq6->rmt_addr = ipv6_hdr(skb)->saddr; - ireq6->loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_port = th->source; + ireq->ir_loc_port = th->dest; + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - ireq6->pktopts = skb; + ireq->pktopts = skb; } - ireq6->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq6->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); req->expires = 0UL; req->num_retrans = 0; @@ -234,12 +232,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = ireq6->rmt_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = ireq6->loc_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 541dfc40c7b3..db234d609b33 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -465,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, u16 queue_mapping) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; int err = -ENOMEM; @@ -477,9 +477,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { - __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); + __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr); - fl6->daddr = treq->rmt_addr; + fl6->daddr = ireq->ir_v6_rmt_addr; skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); @@ -502,7 +503,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) static void tcp_v6_reqsk_destructor(struct request_sock *req) { - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -521,7 +522,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, struct request_sock *req) { - return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); + return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, @@ -623,8 +624,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, saddr = &inet6_sk(sk)->saddr; daddr = &sk->sk_v6_daddr; } else if (req) { - saddr = &inet6_rsk(req)->loc_addr; - daddr = &inet6_rsk(req)->rmt_addr; + saddr = &inet_rsk(req)->ir_v6_loc_addr; + daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -949,7 +950,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; struct request_sock *req; - struct inet6_request_sock *treq; + struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; @@ -994,25 +995,25 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); - treq = inet6_rsk(req); - treq->rmt_addr = ipv6_hdr(skb)->saddr; - treq->loc_addr = ipv6_hdr(skb)->daddr; + ireq = inet_rsk(req); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, skb, sock_net(sk)); - treq->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - treq->pktopts = skb; + ireq->pktopts = skb; } if (want_cookie) { @@ -1051,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) * to the moment of synflood. */ LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); + &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } @@ -1086,7 +1087,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *treq; + struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1151,7 +1152,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } - treq = inet6_rsk(req); + ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) goto out_overflow; @@ -1185,10 +1186,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newsk->sk_v6_daddr = treq->rmt_addr; - newnp->saddr = treq->loc_addr; - newsk->sk_v6_rcv_saddr = treq->loc_addr; - newsk->sk_bound_dev_if = treq->iif; + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; + newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -1203,11 +1204,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, + if (ireq->pktopts != NULL) { + newnp->pktoptions = skb_clone(ireq->pktopts, sk_gfp_atomic(sk, GFP_ATOMIC)); - consume_skb(treq->pktopts); - treq->pktopts = NULL; + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } @@ -1722,8 +1723,8 @@ static void get_openreq6(struct seq_file *seq, const struct sock *sk, struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; - const struct in6_addr *src = &inet6_rsk(req)->loc_addr; - const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; + const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; + const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; if (ttd < 0) ttd = 0; @@ -1734,10 +1735,10 @@ static void get_openreq6(struct seq_file *seq, i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], - ntohs(inet_rsk(req)->loc_port), + ntohs(inet_rsk(req)->ir_loc_port), dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], - ntohs(inet_rsk(req)->rmt_port), + ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 96a458e12f60..dce1bebf7aec 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -817,7 +817,7 @@ int netlbl_req_setattr(struct request_sock *req, switch (req->rsk_ops->family) { case AF_INET: entry = netlbl_domhsh_getentry_af4(secattr->domain, - inet_rsk(req)->rmt_addr); + inet_rsk(req)->ir_rmt_addr); if (entry == NULL) { ret_val = -ENOENT; goto req_setattr_return; -- cgit v1.2.3-71-gd317 From 795aa6ef6a1aba99050735eadd0c2341b789b53b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 09:21:55 +0200 Subject: netfilter: pass hook ops to hookfn Pass the hook ops to the hookfn to allow for generic hook functions. This change is required by nf_tables. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 3 +- net/bridge/br_netfilter.c | 22 +++++++++----- net/bridge/netfilter/ebtable_filter.c | 16 ++++++---- net/bridge/netfilter/ebtable_nat.c | 16 ++++++---- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 5 +-- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 7 +++-- net/ipv4/netfilter/iptable_mangle.c | 10 +++--- net/ipv4/netfilter/iptable_nat.c | 26 ++++++++-------- net/ipv4/netfilter/iptable_raw.c | 6 ++-- net/ipv4/netfilter/iptable_security.c | 7 +++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 12 ++++---- net/ipv4/netfilter/nf_defrag_ipv4.c | 6 ++-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 5 +-- net/ipv6/netfilter/ip6table_mangle.c | 10 +++--- net/ipv6/netfilter/ip6table_nat.c | 27 +++++++++-------- net/ipv6/netfilter/ip6table_raw.c | 5 +-- net/ipv6/netfilter/ip6table_security.c | 5 +-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 +++++---- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 ++-- net/netfilter/core.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 42 +++++++++++++------------- security/selinux/hooks.c | 10 +++--- 26 files changed, 148 insertions(+), 122 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 61223c52414f..fef7e67f7101 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -42,7 +42,8 @@ int netfilter_init(void); struct sk_buff; -typedef unsigned int nf_hookfn(unsigned int hooknum, +struct nf_hook_ops; +typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f87736270eaa..878f008afefa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -619,7 +619,7 @@ bad: /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ -static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) #endif /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 94b2b700cff8..bb2da7b706e7 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -60,17 +60,21 @@ static const struct ebt_table frame_filter = }; static unsigned int -ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_filter); } static unsigned int -ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 322555acdd40..bd238f1f105b 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -60,17 +60,21 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 2a7efe388344..e83015cecfa7 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) } -static unsigned int dnrmg_hook(unsigned int hook, +static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index a865f6f94013..802ddecb30b8 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -27,13 +27,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int -arptable_filter_hook(unsigned int hook, struct sk_buff *skb, +arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); + return arpt_do_table(skb, ops->hooknum, in, out, + net->ipv4.arptable_filter); } static struct nf_hook_ops *arpfilter_ops __read_mostly; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 0b732efd32e2..a2e2b61cd7da 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(unsigned int hook, +arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index b6346bf2fde3..01cffeaa0085 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv4_synproxy_hook(unsigned int hooknum, +static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 50af5b45c050..e08a74a243a8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -33,20 +33,21 @@ static const struct xt_table packet_filter = { }; static unsigned int -iptable_filter_hook(unsigned int hook, struct sk_buff *skb, +iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 0d8cd82e0fad..6a5079c34bb3 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -iptable_mangle_hook(unsigned int hook, +iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ipt_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, hook, in, out, + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 683bfaffed65..ee2886126e3d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv4_fn(unsigned int hooknum, +nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. @@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) + ops->hooknum)) return NF_DROP; else return NF_ACCEPT; @@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -149,7 +149,7 @@ oif_changed: } static unsigned int -nf_nat_ipv4_in(unsigned int hooknum, +nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum, unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_out(unsigned int hooknum, +nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_local_fn(unsigned int hooknum, +nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 1f82aea11df6..b2f7e8f98316 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -20,20 +20,20 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -iptable_raw_hook(unsigned int hook, struct sk_buff *skb, +iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); + return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index f867a8d38bf7..c86647ed2078 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -37,21 +37,22 @@ static const struct xt_table security_table = { }; static unsigned int -iptable_security_hook(unsigned int hook, struct sk_buff *skb, +iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* Somebody is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 86f5b34a4ed1..ecd8bec411c9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_helper(unsigned int hooknum, +static unsigned int ipv4_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum, ct, ctinfo); } -static unsigned int ipv4_confirm(unsigned int hooknum, +static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -147,16 +147,16 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(unsigned int hooknum, +static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); } -static unsigned int ipv4_conntrack_local(unsigned int hooknum, +static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 742815518b0f..12e13bd82b5b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, return IP_DEFRAG_CONNTRACK_OUT + zone; } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, +static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { - enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); + enum ip_defrag_users user = + nf_ct_defrag_user(ops->hooknum, skb); + if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2748b042da72..bf9f612c1bc2 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(unsigned int hooknum, +static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 29b44b14c5ea..ca7f6c128086 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,13 +32,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, +ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c705907ae6ab..307bbb782d14 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, +ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, hook, in, out, + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 9b076d2d3a7b..84c7f33d0cf8 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv6_fn(unsigned int hooknum, +nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - hooknum, hdrlen)) + ops->hooknum, + hdrlen)) return NF_DROP; else return NF_ACCEPT; @@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -152,7 +153,7 @@ oif_changed: } static unsigned int -nf_nat_ipv6_in(unsigned int hooknum, +nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_out(unsigned int hooknum, +nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_local_fn(unsigned int hooknum, +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9a626d86720f..5274740acecc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,13 +19,14 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, +ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ce88d1d7e525..ab3b0219ecfa 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,14 +36,15 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(unsigned int hook, struct sk_buff *skb, +ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 54b75ead5a69..486545eb42ce 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(unsigned int hooknum, +static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(unsigned int hooknum, +static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net, return nf_conntrack_in(net, PF_INET6, hooknum, skb); } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, +static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out, + okfn); } -static unsigned int ipv6_conntrack_local(unsigned int hooknum, +static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out, + okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c5..ec483aa3f60f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, } -static unsigned int ipv6_defrag(unsigned int hooknum, +static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in, (struct net_device *)out, okfn); return NF_STOLEN; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 593b16ea45e0..1fbab0cdd302 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 74fd00c27210..34fda62f40f6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } /* @@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } /* @@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } #endif @@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } /* @@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, * Copy info from first fragment, to the rest of them. */ static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } /* @@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } #endif @@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, ops->hooknum); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); + return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); } #endif diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 568c7699abf1..3f224d7795f5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_forward(unsigned int hooknum, +static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_forward(unsigned int hooknum, +static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_output(unsigned int hooknum, +static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_postroute(unsigned int hooknum, +static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_postroute(unsigned int hooknum, +static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, -- cgit v1.2.3-71-gd317 From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Oct 2013 11:00:02 +0200 Subject: netfilter: add nftables This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 11 +- include/net/netfilter/nf_tables.h | 301 ++++ include/net/netfilter/nf_tables_core.h | 25 + include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/nf_conntrack_common.h | 4 + include/uapi/linux/netfilter/nf_tables.h | 582 +++++++ include/uapi/linux/netfilter/nfnetlink.h | 5 +- net/bridge/netfilter/Kconfig | 3 + net/bridge/netfilter/Makefile | 2 + net/bridge/netfilter/nf_tables_bridge.c | 37 + net/ipv4/netfilter/Kconfig | 16 + net/ipv4/netfilter/Makefile | 5 + net/ipv4/netfilter/nf_table_nat_ipv4.c | 409 +++++ net/ipv4/netfilter/nf_table_route_ipv4.c | 97 ++ net/ipv4/netfilter/nf_tables_ipv4.c | 59 + net/ipv4/netfilter/nft_reject_ipv4.c | 117 ++ net/ipv6/netfilter/Kconfig | 8 + net/ipv6/netfilter/Makefile | 4 + net/ipv6/netfilter/nf_table_route_ipv6.c | 93 ++ net/ipv6/netfilter/nf_tables_ipv6.c | 57 + net/netfilter/Kconfig | 37 + net/netfilter/Makefile | 16 + net/netfilter/nf_tables_api.c | 1760 ++++++++++++++++++++ net/netfilter/nf_tables_core.c | 152 ++ net/netfilter/nft_bitwise.c | 140 ++ net/netfilter/nft_byteorder.c | 167 ++ net/netfilter/nft_cmp.c | 146 ++ net/netfilter/nft_counter.c | 107 ++ net/netfilter/nft_ct.c | 252 +++ net/netfilter/nft_expr_template.c | 88 + net/netfilter/nft_exthdr.c | 127 ++ net/netfilter/nft_hash.c | 348 ++++ net/netfilter/nft_immediate.c | 113 ++ net/netfilter/nft_limit.c | 113 ++ net/netfilter/nft_log.c | 140 ++ net/netfilter/nft_meta.c | 222 +++ net/netfilter/nft_meta_target.c | 117 ++ net/netfilter/nft_payload.c | 137 ++ net/netfilter/nft_set.c | 381 +++++ 39 files changed, 6393 insertions(+), 6 deletions(-) create mode 100644 include/net/netfilter/nf_tables.h create mode 100644 include/net/netfilter/nf_tables_core.h create mode 100644 include/uapi/linux/netfilter/nf_tables.h create mode 100644 net/bridge/netfilter/nf_tables_bridge.c create mode 100644 net/ipv4/netfilter/nf_table_nat_ipv4.c create mode 100644 net/ipv4/netfilter/nf_table_route_ipv4.c create mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c create mode 100644 net/ipv4/netfilter/nft_reject_ipv4.c create mode 100644 net/ipv6/netfilter/nf_table_route_ipv6.c create mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c create mode 100644 net/netfilter/nf_tables_api.c create mode 100644 net/netfilter/nf_tables_core.c create mode 100644 net/netfilter/nft_bitwise.c create mode 100644 net/netfilter/nft_byteorder.c create mode 100644 net/netfilter/nft_cmp.c create mode 100644 net/netfilter/nft_counter.c create mode 100644 net/netfilter/nft_ct.c create mode 100644 net/netfilter/nft_expr_template.c create mode 100644 net/netfilter/nft_exthdr.c create mode 100644 net/netfilter/nft_hash.c create mode 100644 net/netfilter/nft_immediate.c create mode 100644 net/netfilter/nft_limit.c create mode 100644 net/netfilter/nft_log.c create mode 100644 net/netfilter/nft_meta.c create mode 100644 net/netfilter/nft_meta_target.c create mode 100644 net/netfilter/nft_payload.c create mode 100644 net/netfilter/nft_set.c (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index fef7e67f7101..2077489f9887 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -53,12 +53,13 @@ struct nf_hook_ops { struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h new file mode 100644 index 000000000000..d26dfa345f49 --- /dev/null +++ b/include/net/netfilter/nf_tables.h @@ -0,0 +1,301 @@ +#ifndef _NET_NF_TABLES_H +#define _NET_NF_TABLES_H + +#include +#include +#include +#include + +struct nft_pktinfo { + struct sk_buff *skb; + const struct net_device *in; + const struct net_device *out; + u8 hooknum; + u8 nhoff; + u8 thoff; +}; + +struct nft_data { + union { + u32 data[4]; + struct { + u32 verdict; + struct nft_chain *chain; + }; + }; +} __attribute__((aligned(__alignof__(u64)))); + +static inline int nft_data_cmp(const struct nft_data *d1, + const struct nft_data *d2, + unsigned int len) +{ + return memcmp(d1->data, d2->data, len); +} + +static inline void nft_data_copy(struct nft_data *dst, + const struct nft_data *src) +{ + BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64)); + *(u64 *)&dst->data[0] = *(u64 *)&src->data[0]; + *(u64 *)&dst->data[2] = *(u64 *)&src->data[2]; +} + +static inline void nft_data_debug(const struct nft_data *data) +{ + pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n", + data->data[0], data->data[1], + data->data[2], data->data[3]); +} + +/** + * struct nft_ctx - nf_tables rule context + * + * @afi: address family info + * @table: the table the chain is contained in + * @chain: the chain the rule is contained in + */ +struct nft_ctx { + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; +}; + +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT, +}; + +struct nft_data_desc { + enum nft_data_types type; + unsigned int len; +}; + +extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla); +extern void nft_data_uninit(const struct nft_data *data, + enum nft_data_types type); +extern int nft_data_dump(struct sk_buff *skb, int attr, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); + +static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) +{ + return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + +extern int nft_validate_input_register(enum nft_registers reg); +extern int nft_validate_output_register(enum nft_registers reg); +extern int nft_validate_data_load(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type); + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function + * @init: initialization function + * @destroy: destruction function + * @dump: function to dump parameters + * @list: used internally + * @name: Identifier + * @owner: module reference + * @policy: netlink attribute policy + * @maxattr: highest netlink attribute number + * @size: full expression size, including private data size + */ +struct nft_expr; +struct nft_expr_ops { + void (*eval)(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + int (*init)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + void (*destroy)(const struct nft_expr *expr); + int (*dump)(struct sk_buff *skb, + const struct nft_expr *expr); + + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; + unsigned int size; +}; + +#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ + ALIGN(size, __alignof__(struct nft_expr))) + +/** + * struct nft_expr - nf_tables expression + * + * @ops: expression ops + * @data: expression private data + */ +struct nft_expr { + const struct nft_expr_ops *ops; + unsigned char data[]; +}; + +static inline void *nft_expr_priv(const struct nft_expr *expr) +{ + return (void *)expr->data; +} + +/** + * struct nft_rule - nf_tables rule + * + * @list: used internally + * @rcu_head: used internally for rcu + * @handle: rule handle + * @dlen: length of expression data + * @data: expression data + */ +struct nft_rule { + struct list_head list; + struct rcu_head rcu_head; + u64 handle:48, + dlen:16; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[0]; +} + +static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr) +{ + return ((void *)expr) + expr->ops->size; +} + +static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[rule->dlen]; +} + +/* + * The last pointer isn't really necessary, but the compiler isn't able to + * determine that the result of nft_expr_last() is always the same since it + * can't assume that the dlen value wasn't changed within calls in the loop. + */ +#define nft_rule_for_each_expr(expr, last, rule) \ + for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \ + (expr) != (last); \ + (expr) = nft_expr_next(expr)) + +enum nft_chain_flags { + NFT_BASE_CHAIN = 0x1, + NFT_CHAIN_BUILTIN = 0x2, +}; + +/** + * struct nft_chain - nf_tables chain + * + * @rules: list of rules in the chain + * @list: used internally + * @rcu_head: used internally + * @handle: chain handle + * @flags: bitmask of enum nft_chain_flags + * @use: number of jump references to this chain + * @level: length of longest path to this chain + * @name: name of the chain + */ +struct nft_chain { + struct list_head rules; + struct list_head list; + struct rcu_head rcu_head; + u64 handle; + u8 flags; + u16 use; + u16 level; + char name[NFT_CHAIN_MAXNAMELEN]; +}; + +/** + * struct nft_base_chain - nf_tables base chain + * + * @ops: netfilter hook ops + * @chain: the chain + */ +struct nft_base_chain { + struct nf_hook_ops ops; + struct nft_chain chain; +}; + +static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) +{ + return container_of(chain, struct nft_base_chain, chain); +} + +extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +enum nft_table_flags { + NFT_TABLE_BUILTIN = 0x1, +}; + +/** + * struct nft_table - nf_tables table + * + * @list: used internally + * @chains: chains in the table + * @sets: sets in the table + * @hgenerator: handle generator state + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @name: name of the table + */ +struct nft_table { + struct list_head list; + struct list_head chains; + struct list_head sets; + u64 hgenerator; + u32 use; + u16 flags; + char name[]; +}; + +/** + * struct nft_af_info - nf_tables address family info + * + * @list: used internally + * @family: address family + * @nhooks: number of hooks in this family + * @owner: module owner + * @tables: used internally + * @hooks: hookfn overrides for packet validation + */ +struct nft_af_info { + struct list_head list; + int family; + unsigned int nhooks; + struct module *owner; + struct list_head tables; + nf_hookfn *hooks[NF_MAX_HOOKS]; +}; + +extern int nft_register_afinfo(struct nft_af_info *); +extern void nft_unregister_afinfo(struct nft_af_info *); + +extern int nft_register_table(struct nft_table *, int family); +extern void nft_unregister_table(struct nft_table *, int family); + +extern int nft_register_expr(struct nft_expr_ops *); +extern void nft_unregister_expr(struct nft_expr_ops *); + +#define MODULE_ALIAS_NFT_FAMILY(family) \ + MODULE_ALIAS("nft-afinfo-" __stringify(family)) + +#define MODULE_ALIAS_NFT_TABLE(family, name) \ + MODULE_ALIAS("nft-table-" __stringify(family) "-" name) + +#define MODULE_ALIAS_NFT_EXPR(name) \ + MODULE_ALIAS("nft-expr-" name) + +#endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h new file mode 100644 index 000000000000..283396c916e0 --- /dev/null +++ b/include/net/netfilter/nf_tables_core.h @@ -0,0 +1,25 @@ +#ifndef _NET_NF_TABLES_CORE_H +#define _NET_NF_TABLES_CORE_H + +extern int nf_tables_core_module_init(void); +extern void nf_tables_core_module_exit(void); + +extern int nft_immediate_module_init(void); +extern void nft_immediate_module_exit(void); + +extern int nft_cmp_module_init(void); +extern void nft_cmp_module_exit(void); + +extern int nft_lookup_module_init(void); +extern void nft_lookup_module_exit(void); + +extern int nft_bitwise_module_init(void); +extern void nft_bitwise_module_exit(void); + +extern int nft_byteorder_module_init(void); +extern void nft_byteorder_module_exit(void); + +extern int nft_payload_module_init(void); +extern void nft_payload_module_exit(void); + +#endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 174915420d3f..6ce0b7f566a7 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_tables.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 8dd803818ebe..319f47128db8 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -25,6 +25,10 @@ enum ip_conntrack_info { IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; +#define NF_CT_STATE_INVALID_BIT (1 << 0) +#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) + /* Bitset representing status of connection. */ enum ip_conntrack_status { /* It's an expected connection: bit 0 set. This bit never changed */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h new file mode 100644 index 000000000000..ec6d84a8ed1e --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -0,0 +1,582 @@ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_CHAIN_MAXNAMELEN 32 + +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_MAX, +}; + +enum nft_list_attributes { + NFTA_LIST_UNPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression performs the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * which allow to express all bitwise operations: + * + * mask xor + * NOT: 1 1 + * OR: 0 x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network opertaor + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +enum nft_set_elem_flags { + NFT_SE_INTERVAL_END = 0x1, +}; + +enum nft_set_elem_attributes { + NFTA_SE_UNSPEC, + NFTA_SE_KEY, + NFTA_SE_DATA, + NFTA_SE_FLAGS, + __NFTA_SE_MAX +}; +#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) + +enum nft_set_flags { + NFT_SET_INTERVAL = 0x1, + NFT_SET_MAP = 0x2, +}; + +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_FLAGS, + NFTA_SET_SREG, + NFTA_SET_DREG, + NFTA_SET_KLEN, + NFTA_SET_DLEN, + NFTA_SET_ELEMENTS, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +enum nft_hash_flags { + NFT_HASH_MAP = 0x1, +}; + +enum nft_hash_elem_attributes { + NFTA_HE_UNSPEC, + NFTA_HE_KEY, + NFTA_HE_DATA, + __NFTA_HE_MAX +}; +#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) + +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_FLAGS, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_KLEN, + NFTA_HASH_ELEMENTS, + __NFTA_HASH_MAX +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, +}; + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IIFTYPE, + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, +}; + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, +}; + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, +}; + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_ADDR_MIN, + NFTA_NAT_ADDR_MAX, + NFTA_NAT_PROTO_MIN, + NFTA_NAT_PROTO_MAX, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 4a4efafad5f4..d276c3bd55b8 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -18,6 +18,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_DESTROY, #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) @@ -51,6 +53,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 -#define NFNL_SUBSYS_COUNT 10 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_COUNT 11 #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index a9aff9c7d027..68f8128147be 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -1,6 +1,9 @@ # # Bridge netfilter configuration # +# +config NF_TABLES_BRIDGE + tristate "Ethernet Bridge nf_tables support" menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 0718699540b0..ea7629f58b3d 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -2,6 +2,8 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # +obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o # tables diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c new file mode 100644 index 000000000000..bc5c21c911c0 --- /dev/null +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include + +static struct nft_af_info nft_af_bridge __read_mostly = { + .family = NFPROTO_BRIDGE, + .nhooks = NF_BR_NUMHOOKS, + .owner = THIS_MODULE, +}; + +static int __init nf_tables_bridge_init(void) +{ + return nft_register_afinfo(&nft_af_bridge); +} + +static void __exit nf_tables_bridge_exit(void) +{ + nft_unregister_afinfo(&nft_af_bridge); +} + +module_init(nf_tables_bridge_init); +module_exit(nf_tables_bridge_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1657e39b291f..eb1d56ece361 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,6 +36,22 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. +config NF_TABLES_IPV4 + depends on NF_TABLES + tristate "IPv4 nf_tables support" + +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + tristate "nf_tables IPv4 reject support" + +config NF_TABLE_ROUTE_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables route table support" + +config NF_TABLE_NAT_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables nat table support" + config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3622b248b6dd..b2f01cd2cd65 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -27,6 +27,11 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o +obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o +obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c new file mode 100644 index 000000000000..2a6f184c10bd --- /dev/null +++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_nat_ops __read_mostly = { + .name = "nat", + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .owner = THIS_MODULE, + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, +}; + +/* + * NAT table + */ + +static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + __be32 daddr = ip_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ip_hdr(skb)->daddr != daddr) { + skb_dst_drop(skb); + } + return ret; +} + +static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + return nf_xfrm_me_harder(skb, AF_INET) == 0 ? + ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = { + .chain = { + .name = "PREROUTING", + .rules = LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_prerouting, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + .priv = &nf_chain_nat_prerouting.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = { + .chain = { + .name = "POSTROUTING", + .rules = LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_postrouting, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + .priv = &nf_chain_nat_postrouting.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_nat_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_output, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + .priv = &nf_chain_nat_output.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_input __read_mostly = { + .chain = { + .name = "INPUT", + .rules = LIST_HEAD_INIT(nf_chain_nat_input.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + .priv = &nf_chain_nat_input.chain, + }, +}; + + +static struct nft_table nf_table_nat_ipv4 __read_mostly = { + .name = "nat", + .chains = LIST_HEAD_INIT(nf_table_nat_ipv4.chains), +}; + +static int __init nf_table_nat_init(void) +{ + int err; + + list_add_tail(&nf_chain_nat_prerouting.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_postrouting.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_output.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_input.chain.list, + &nf_table_nat_ipv4.chains); + + err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4); + if (err < 0) + goto err1; + + err = nft_register_expr(&nft_nat_ops); + if (err < 0) + goto err2; + + return 0; + +err2: + nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4); +err1: + return err; +} + +static void __exit nf_table_nat_exit(void) +{ + nft_unregister_expr(&nft_nat_ops); + nft_unregister_table(&nf_table_nat_ipv4, AF_INET); +} + +module_init(nf_table_nat_init); +module_exit(nf_table_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET, "nat"); +MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c new file mode 100644 index 000000000000..4f257a1ed661 --- /dev/null +++ b/net/ipv4/netfilter/nf_table_route_ipv4.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static struct nft_base_chain nf_chain_route_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_route_table_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_MANGLE, + .priv = &nf_chain_route_output.chain, + }, +}; + +static struct nft_table nf_table_route_ipv4 __read_mostly = { + .name = "route", + .chains = LIST_HEAD_INIT(nf_table_route_ipv4.chains), +}; + +static int __init nf_table_route_init(void) +{ + list_add_tail(&nf_chain_route_output.chain.list, + &nf_table_route_ipv4.chains); + return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4); +} + +static void __exit nf_table_route_exit(void) +{ + nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4); +} + +module_init(nf_table_route_init); +module_exit(nf_table_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET, "route"); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c new file mode 100644 index 000000000000..63d0a3bf53d3 --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain(ops, skb, in, out, okfn); +} + +static struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + }, +}; + +static int __init nf_tables_ipv4_init(void) +{ + return nft_register_afinfo(&nft_af_ipv4); +} + +static void __exit nf_tables_ipv4_exit(void) +{ + nft_unregister_afinfo(&nft_af_ipv4); +} + +module_init(nf_tables_ipv4_init); +module_exit(nf_tables_ipv4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..b4ee8d3bb1e4 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_reject { + enum nft_reject_types type:8; + u8 icmp_code; +}; + +static void nft_reject_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + break; + case NFT_REJECT_TCP_RST: + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { + [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, + [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, +}; + +static int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops reject_ops __read_mostly = { + .name = "reject", + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .owner = THIS_MODULE, + .eval = nft_reject_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, +}; + +static int __init nft_reject_module_init(void) +{ + return nft_register_expr(&reject_ops); +} + +static void __exit nft_reject_module_exit(void) +{ + nft_unregister_expr(&reject_ops); +} + +module_init(nft_reject_module_init); +module_exit(nft_reject_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("reject"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a7f842b29b67..5677e38eeca3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,14 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_TABLES_IPV6 + depends on NF_TABLES + tristate "IPv6 nf_tables support" + +config NF_TABLE_ROUTE_IPV6 + depends on NF_TABLES_IPV6 + tristate "IPv6 nf_tables route table support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b53738f798c..956af4492d10 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,10 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# nf_tables +obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o +obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c new file mode 100644 index 000000000000..48ac65c7b398 --- /dev/null +++ b/net/ipv6/netfilter/nf_table_route_ipv6.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr saddr, daddr; + u_int8_t hop_limit; + u32 mark, flowlabel; + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + + return ret; +} + +static struct nft_base_chain nf_chain_route_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_route_table_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_MANGLE, + .priv = &nf_chain_route_output.chain, + }, +}; + +static struct nft_table nf_table_route_ipv6 __read_mostly = { + .name = "route", + .chains = LIST_HEAD_INIT(nf_table_route_ipv6.chains), +}; + +static int __init nf_table_route_init(void) +{ + list_add_tail(&nf_chain_route_output.chain.list, + &nf_table_route_ipv6.chains); + return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6); +} + +static void __exit nf_table_route_exit(void) +{ + nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6); +} + +module_init(nf_table_route_init); +module_exit(nf_table_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET6, "route"); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c new file mode 100644 index 000000000000..e0717cea4913 --- /dev/null +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include + +static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + if (net_ratelimit()) + pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain(ops, skb, in, out, okfn); +} + +static struct nft_af_info nft_af_ipv6 __read_mostly = { + .family = NFPROTO_IPV6, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + }, +}; + +static int __init nf_tables_ipv6_init(void) +{ + return nft_register_afinfo(&nft_af_ipv6); +} + +static void __exit nf_tables_ipv6_exit(void) +{ + nft_unregister_afinfo(&nft_af_ipv6); +} + +module_init(nf_tables_ipv6_init); +module_exit(nf_tables_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET6); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6e839b6dff2b..c271e1af93b5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK +config NF_TABLES + depends on NETFILTER_NETLINK + tristate "Netfilter nf_tables support" + +config NFT_EXTHDR + depends on NF_TABLES + tristate "Netfilter nf_tables IPv6 exthdr module" + +config NFT_META + depends on NF_TABLES + tristate "Netfilter nf_tables meta module" + +config NFT_CT + depends on NF_TABLES + depends on NF_CONNTRACK + tristate "Netfilter nf_tables conntrack module" + +config NFT_SET + depends on NF_TABLES + tristate "Netfilter nf_tables set module" + +config NFT_HASH + depends on NF_TABLES + tristate "Netfilter nf_tables hash module" + +config NFT_COUNTER + depends on NF_TABLES + tristate "Netfilter nf_tables counter module" + +config NFT_LOG + depends on NF_TABLES + tristate "Netfilter nf_tables log module" + +config NFT_LIMIT + depends on NF_TABLES + tristate "Netfilter nf_tables limit module" + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c3a0a12907f6..1ca3f3932826 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# nf_tables +nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o + +obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o +obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_COUNTER) += nft_counter.o +obj-$(CONFIG_NFT_LOG) += nft_log.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c new file mode 100644 index 000000000000..7d59c89c6c75 --- /dev/null +++ b/net/netfilter/nf_tables_api.c @@ -0,0 +1,1760 @@ +/* + * Copyright (c) 2007, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(nf_tables_afinfo); +static LIST_HEAD(nf_tables_expressions); + +/** + * nft_register_afinfo - register nf_tables address family info + * + * @afi: address family info to register + * + * Register the address family for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_afinfo(struct nft_af_info *afi) +{ + INIT_LIST_HEAD(&afi->tables); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&afi->list, &nf_tables_afinfo); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_afinfo); + +/** + * nft_unregister_afinfo - unregister nf_tables address family info + * + * @afi: address family info to unregister + * + * Unregister the address family for use with nf_tables. + */ +void nft_unregister_afinfo(struct nft_af_info *afi) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&afi->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_afinfo); + +static struct nft_af_info *nft_afinfo_lookup(int family) +{ + struct nft_af_info *afi; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (afi->family == family) + return afi; + } + return NULL; +} + +static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +{ + struct nft_af_info *afi; + + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return afi; +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-afinfo-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-EAFNOSUPPORT); +} + +/* + * Tables + */ + +static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!nla_strcmp(nla, table->name)) + return table; + } + return NULL; +} + +static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(afi, nla); + if (table != NULL) + return table; + +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-table-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (nft_table_lookup(afi, nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static inline u64 nf_tables_alloc_handle(struct nft_table *table) +{ + return ++table->hgenerator; +} + +static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { + [NFTA_TABLE_NAME] = { .type = NLA_STRING }, +}; + +static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_table_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_table_info(skb, portid, seq, event, 0, + family, table); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_tables(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_table_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, + NLM_F_MULTI, + afi->family, table) < 0) + goto done; +cont: + idx++; + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_tables, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, + family, table); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr *name; + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + name = nla[NFTA_TABLE_NAME]; + table = nf_tables_table_lookup(afi, name, false); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); + table = NULL; + } + + if (table != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + nla_strlcpy(table->name, name, nla_len(name)); + INIT_LIST_HEAD(&table->chains); + + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + return 0; +} + +static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->flags & NFT_TABLE_BUILTIN) + return -EOPNOTSUPP; + + if (table->use) + return -EBUSY; + + list_del(&table->list); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); + kfree(table); + return 0; +} + +static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, + const char *name) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!strcmp(name, table->name)) + return table; + } + + return ERR_PTR(-ENOENT); +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family); + +/** + * nft_register_table - register a built-in table + * + * @table: the table to register + * @family: protocol family to register table with + * + * Register a built-in table for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_table(struct nft_table *table, int family) +{ + struct nft_af_info *afi; + struct nft_table *t; + struct nft_chain *chain; + int err; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); +again: + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) { + err = PTR_ERR(afi); + if (err == -EAGAIN) + goto again; + goto err; + } + + t = __nf_tables_table_lookup(afi, table->name); + if (IS_ERR(t)) { + err = PTR_ERR(t); + if (err != -ENOENT) + goto err; + t = NULL; + } + + if (t != NULL) { + err = -EEXIST; + goto err; + } + + table->flags |= NFT_TABLE_BUILTIN; + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_NEWCHAIN, family); + err = 0; +err: + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return err; +} +EXPORT_SYMBOL_GPL(nft_register_table); + +/** + * nft_unregister_table - unregister a built-in table + * + * @table: the table to unregister + * @family: protocol family to unregister table with + * + * Unregister a built-in table for use with nf_tables. + */ +void nft_unregister_table(struct nft_table *table, int family) +{ + struct nft_chain *chain; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&table->list); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_DELCHAIN, family); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_table); + +/* + * Chains + */ + +static struct nft_chain * +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) { + if (chain->handle == handle) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_chain *chain; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(chain, &table->chains, list) { + if (!nla_strcmp(nla, chain->name)) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { + [NFTA_CHAIN_TABLE] = { .type = NLA_STRING }, + [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, + [NFTA_CHAIN_NAME] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { + [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, +}; + +static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) + goto nla_put_failure; + + if (chain->flags & NFT_BASE_CHAIN) { + const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; + struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + if (nest == NULL) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, + table, chain); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_chains(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + afi->family, table, chain) < 0) + goto done; +cont: + idx++; + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + + +static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_chains, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, + family, table, chain); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr * uninitialized_var(name); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_base_chain *basechain; + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + int family = nfmsg->nfgen_family; + u64 handle = 0; + int err; + bool create; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->use == UINT_MAX) + return -EOVERFLOW; + + chain = NULL; + name = nla[NFTA_CHAIN_NAME]; + + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { + chain = nf_tables_chain_lookup(table, name); + if (IS_ERR(chain)) { + if (PTR_ERR(chain) != -ENOENT) + return PTR_ERR(chain); + chain = NULL; + } + } + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (nla[NFTA_CHAIN_HANDLE] && name && + !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) + return -EEXIST; + + if (nla[NFTA_CHAIN_HANDLE] && name) + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + goto notify; + } + + if (nla[NFTA_CHAIN_HOOK]) { + struct nf_hook_ops *ops; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + return -EINVAL; + + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); + if (basechain == NULL) + return -ENOMEM; + chain = &basechain->chain; + + ops = &basechain->ops; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + ops->priv = chain; + ops->hook = nft_do_chain; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; + + chain->flags |= NFT_BASE_CHAIN; + } else { + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return -ENOMEM; + } + + INIT_LIST_HEAD(&chain->rules); + chain->handle = nf_tables_alloc_handle(table); + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + list_add_tail(&chain->list, &table->chains); + table->use++; +notify: + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, + family); + return 0; +} + +static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) + kfree(nft_base_chain(chain)); + else + kfree(chain); +} + +static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (chain->flags & NFT_CHAIN_BUILTIN) + return -EOPNOTSUPP; + + if (!list_empty(&chain->rules)) + return -EBUSY; + + list_del(&chain->list); + table->use--; + + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, + family); + + /* Make sure all rule references are gone before this is released */ + call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + return 0; +} + +static void nft_ctx_init(struct nft_ctx *ctx, + const struct nft_af_info *afi, + const struct nft_table *table, + const struct nft_chain *chain) +{ + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; +} + +/* + * Expressions + */ + +/** + * nft_register_expr - register nf_tables expr operations + * @ops: expr operations + * + * Registers the expr operations for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_expressions); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_expr); + +/** + * nft_unregister_expr - unregister nf_tables expr operations + * @ops: expr operations + * + * Unregisters the expr operations for use with nf_tables. + */ +void nft_unregister_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_expr); + +static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + list_for_each_entry(ops, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, ops->name)) + return ops; + } + return NULL; +} + +static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + ops = __nft_expr_ops_get(nla); + if (ops != NULL && try_module_get(ops->owner)) + return ops; + +#ifdef CONFIG_MODULES + if (ops == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%.*s", + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_ops_get(nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { + [NFTA_EXPR_NAME] = { .type = NLA_STRING }, + [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_expr_info(struct sk_buff *skb, + const struct nft_expr *expr) +{ + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + goto nla_put_failure; + + if (expr->ops->dump) { + struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + if (data == NULL) + goto nla_put_failure; + if (expr->ops->dump(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, data); + } + + return skb->len; + +nla_put_failure: + return -1; +}; + +struct nft_expr_info { + const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; +}; + +static int nf_tables_expr_parse(const struct nlattr *nla, + struct nft_expr_info *info) +{ + const struct nft_expr_ops *ops; + int err; + + err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + if (err < 0) + return err; + + ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); + if (IS_ERR(ops)) + return PTR_ERR(ops); + info->ops = ops; + return 0; +} + +static int nf_tables_newexpr(const struct nft_ctx *ctx, + struct nft_expr_info *info, + struct nft_expr *expr) +{ + const struct nft_expr_ops *ops = info->ops; + int err; + + expr->ops = ops; + if (ops->init) { + struct nlattr *ma[ops->maxattr + 1]; + + if (info->tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(ma, ops->maxattr, + info->tb[NFTA_EXPR_DATA], + ops->policy); + if (err < 0) + goto err1; + } else + memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); + + err = ops->init(ctx, expr, (const struct nlattr **)ma); + if (err < 0) + goto err1; + } + + info->ops = NULL; + return 0; + +err1: + expr->ops = NULL; + return err; +} + +static void nf_tables_expr_destroy(struct nft_expr *expr) +{ + if (expr->ops->destroy) + expr->ops->destroy(expr); + module_put(expr->ops->owner); +} + +/* + * Rules + */ + +static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, + u64 handle) +{ + struct nft_rule *rule; + + // FIXME: this sucks + list_for_each_entry(rule, &chain->rules, list) { + if (handle == rule->handle) + return rule; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, + const struct nlattr *nla) +{ + if (nla == NULL) + return ERR_PTR(-EINVAL); + + return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); +} + +static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { + [NFTA_RULE_TABLE] = { .type = NLA_STRING }, + [NFTA_RULE_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, + [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + const struct nft_expr *expr, *next; + struct nlattr *list; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + if (list == NULL) + goto nla_put_failure; + nft_rule_for_each_expr(expr, next, rule) { + struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); + if (elem == NULL) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, elem); + } + nla_nest_end(skb, list); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_rule_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule, + int event, u32 flags, int family) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(oskb).portid; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh->nlmsg_seq; + bool report; + int err; + + report = nlmsg_report(nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, + family, table, chain, rule); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry(rule, &chain->rules, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, chain, rule) < 0) + goto done; +cont: + idx++; + } + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_rules, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, + family, table, chain, rule); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +{ + struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); + struct nft_expr *expr; + + /* + * Careful: some expressions might not be initialized in case this + * is called on error from nf_tables_newrule(). + */ + expr = nft_expr_first(rule); + while (expr->ops && expr != nft_expr_last(rule)) { + nf_tables_expr_destroy(expr); + expr = nft_expr_next(expr); + } + kfree(rule); +} + +static void nf_tables_rule_destroy(struct nft_rule *rule) +{ + call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); +} + +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + +static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *old_rule = NULL; + struct nft_expr *expr; + struct nft_ctx ctx; + struct nlattr *tmp; + unsigned int size, i, n; + int err, rem; + bool create; + u64 handle; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); + rule = __nf_tables_rule_lookup(chain, handle); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + old_rule = rule; + else + return -EOPNOTSUPP; + } else { + if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) + return -EINVAL; + handle = nf_tables_alloc_handle(table); + } + + n = 0; + size = 0; + if (nla[NFTA_RULE_EXPRESSIONS]) { + nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { + err = -EINVAL; + if (nla_type(tmp) != NFTA_LIST_ELEM) + goto err1; + if (n == NFT_RULE_MAXEXPRS) + goto err1; + err = nf_tables_expr_parse(tmp, &info[n]); + if (err < 0) + goto err1; + size += info[n].ops->size; + n++; + } + } + + err = -ENOMEM; + rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); + if (rule == NULL) + goto err1; + + rule->handle = handle; + rule->dlen = size; + + nft_ctx_init(&ctx, afi, table, chain); + expr = nft_expr_first(rule); + for (i = 0; i < n; i++) { + err = nf_tables_newexpr(&ctx, &info[i], expr); + if (err < 0) + goto err2; + expr = nft_expr_next(expr); + } + + /* Register hook when first rule is inserted into a base chain */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + list_replace_rcu(&old_rule->list, &rule->list); + nf_tables_rule_destroy(old_rule); + } else if (nlh->nlmsg_flags & NLM_F_APPEND) + list_add_tail_rcu(&rule->list, &chain->rules); + else + list_add_rcu(&rule->list, &chain->rules); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, + nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), + nfmsg->nfgen_family); + return 0; + +err2: + nf_tables_rule_destroy(rule); +err1: + for (i = 0; i < n; i++) { + if (info[i].ops != NULL) + module_put(info[i].ops->owner); + } + return err; +} + +static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *tmp; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + /* List removal must be visible before destroying expressions */ + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } else { + /* Remove all rules in this chain */ + list_for_each_entry_safe(rule, tmp, &chain->rules, list) { + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } + } + + /* Unregister hook when last rule from base chain is deleted */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { + [NFT_MSG_NEWTABLE] = { + .call = nf_tables_newtable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_GETTABLE] = { + .call = nf_tables_gettable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_DELTABLE] = { + .call = nf_tables_deltable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_NEWCHAIN] = { + .call = nf_tables_newchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_GETCHAIN] = { + .call = nf_tables_getchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_DELCHAIN] = { + .call = nf_tables_delchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_NEWRULE] = { + .call = nf_tables_newrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_GETRULE] = { + .call = nf_tables_getrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_DELRULE] = { + .call = nf_tables_delrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, +}; + +static const struct nfnetlink_subsystem nf_tables_subsys = { + .name = "nf_tables", + .subsys_id = NFNL_SUBSYS_NFTABLES, + .cb_count = NFT_MSG_MAX, + .cb = nf_tables_cb, +}; + +/** + * nft_validate_input_register - validate an expressions' input register + * + * @reg: the register number + * + * Validate that the input register is one of the general purpose + * registers. + */ +int nft_validate_input_register(enum nft_registers reg) +{ + if (reg <= NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_input_register); + +/** + * nft_validate_output_register - validate an expressions' output register + * + * @reg: the register number + * + * Validate that the output register is one of the general purpose + * registers or the verdict register. + */ +int nft_validate_output_register(enum nft_registers reg) +{ + if (reg < NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_output_register); + +/** + * nft_validate_data_load - validate an expressions' data load + * + * @ctx: context of the expression performing the load + * @reg: the destination register number + * @data: the data to load + * @type: the data type + * + * Validate that a data load uses the appropriate data type for + * the destination register. A value of NULL for the data means + * that its runtime gathered data, which is always of type + * NFT_DATA_VALUE. + */ +int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type) +{ + switch (reg) { + case NFT_REG_VERDICT: + if (data == NULL || type != NFT_DATA_VERDICT) + return -EINVAL; + // FIXME: do loop detection + return 0; + default: + if (data != NULL && type != NFT_DATA_VALUE) + return -EINVAL; + return 0; + } +} +EXPORT_SYMBOL_GPL(nft_validate_data_load); + +static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { + [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, + [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, +}; + +static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_VERDICT_MAX + 1]; + struct nft_chain *chain; + int err; + + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + if (err < 0) + return err; + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; + data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + case NFT_CONTINUE: + case NFT_BREAK: + case NFT_RETURN: + desc->len = sizeof(data->verdict); + break; + case NFT_JUMP: + case NFT_GOTO: + if (!tb[NFTA_VERDICT_CHAIN]) + return -EINVAL; + chain = nf_tables_chain_lookup(ctx->table, + tb[NFTA_VERDICT_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + if (chain->flags & NFT_BASE_CHAIN) + return -EOPNOTSUPP; + + if (ctx->chain->level + 1 > chain->level) { + if (ctx->chain->level + 1 == 16) + return -EMLINK; + chain->level = ctx->chain->level + 1; + } + chain->use++; + data->chain = chain; + desc->len = sizeof(data); + break; + default: + return -EINVAL; + } + + desc->type = NFT_DATA_VERDICT; + return 0; +} + +static void nft_verdict_uninit(const struct nft_data *data) +{ + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + data->chain->use--; + break; + } +} + +static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + if (!nest) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + goto nla_put_failure; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + unsigned int len; + + len = nla_len(nla); + if (len == 0) + return -EINVAL; + if (len > sizeof(data->data)) + return -EOVERFLOW; + + nla_memcpy(data->data, nla, sizeof(data->data)); + desc->type = NFT_DATA_VALUE; + desc->len = len; + return 0; +} + +static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, + unsigned int len) +{ + return nla_put(skb, NFTA_DATA_VALUE, len, data->data); +} + +static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { + [NFTA_DATA_VALUE] = { .type = NLA_BINARY, + .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, +}; + +/** + * nft_data_init - parse nf_tables data netlink attributes + * + * @ctx: context of the expression using the data + * @data: destination struct nft_data + * @desc: data description + * @nla: netlink attribute containing data + * + * Parse the netlink data attributes and initialize a struct nft_data. + * The type and length of data are returned in the data description. + * + * The caller can indicate that it only wants to accept data of type + * NFT_DATA_VALUE by passing NULL for the ctx argument. + */ +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_DATA_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + if (err < 0) + return err; + + if (tb[NFTA_DATA_VALUE]) + return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + if (tb[NFTA_DATA_VERDICT] && ctx != NULL) + return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(nft_data_init); + +/** + * nft_data_uninit - release a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * all others need to be released by calling this function. + */ +void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +{ + switch (type) { + case NFT_DATA_VALUE: + return; + case NFT_DATA_VERDICT: + return nft_verdict_uninit(data); + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL_GPL(nft_data_uninit); + +int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start(skb, attr); + if (nest == NULL) + return -1; + + switch (type) { + case NFT_DATA_VALUE: + err = nft_value_dump(skb, data, len); + break; + case NFT_DATA_VERDICT: + err = nft_verdict_dump(skb, data); + break; + default: + err = -EINVAL; + WARN_ON(1); + } + + nla_nest_end(skb, nest); + return err; +} +EXPORT_SYMBOL_GPL(nft_data_dump); + +static int __init nf_tables_module_init(void) +{ + int err; + + info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, + GFP_KERNEL); + if (info == NULL) { + err = -ENOMEM; + goto err1; + } + + err = nf_tables_core_module_init(); + if (err < 0) + goto err2; + + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err3; + + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); + return 0; +err3: + nf_tables_core_module_exit(); +err2: + kfree(info); +err1: + return err; +} + +static void __exit nf_tables_module_exit(void) +{ + nfnetlink_subsys_unregister(&nf_tables_subsys); + nf_tables_core_module_exit(); + kfree(info); +} + +module_init(nf_tables_module_init); +module_exit(nf_tables_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c new file mode 100644 index 000000000000..bc7fb85d4002 --- /dev/null +++ b/net/netfilter/nf_tables_core.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_JUMP_STACK_SIZE 16 + +unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nft_chain *chain = ops->priv; + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + struct nft_data data[NFT_REG_MAX + 1]; + const struct nft_pktinfo pkt = { + .skb = skb, + .in = in, + .out = out, + .hooknum = ops->hooknum, + }; + unsigned int stackptr = 0; + struct { + const struct nft_chain *chain; + const struct nft_rule *rule; + } jumpstack[NFT_JUMP_STACK_SIZE]; + +do_chain: + rule = list_entry(&chain->rules, struct nft_rule, list); +next_rule: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NFT_BREAK: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + /* fall through */ + case NFT_CONTINUE: + continue; + } + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + return data[NFT_REG_VERDICT].verdict; + case NFT_JUMP: + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); + jumpstack[stackptr].chain = chain; + jumpstack[stackptr].rule = rule; + stackptr++; + /* fall through */ + case NFT_GOTO: + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; + case NFT_RETURN: + case NFT_CONTINUE: + break; + default: + WARN_ON(1); + } + + if (stackptr > 0) { + stackptr--; + chain = jumpstack[stackptr].chain; + rule = jumpstack[stackptr].rule; + goto next_rule; + } + + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nft_do_chain); + +int __init nf_tables_core_module_init(void) +{ + int err; + + err = nft_immediate_module_init(); + if (err < 0) + goto err1; + + err = nft_cmp_module_init(); + if (err < 0) + goto err2; + + err = nft_lookup_module_init(); + if (err < 0) + goto err3; + + err = nft_bitwise_module_init(); + if (err < 0) + goto err4; + + err = nft_byteorder_module_init(); + if (err < 0) + goto err5; + + err = nft_payload_module_init(); + if (err < 0) + goto err6; + + return 0; + +err6: + nft_byteorder_module_exit(); +err5: + nft_bitwise_module_exit(); +err4: + nft_lookup_module_exit(); +err3: + nft_cmp_module_exit(); +err2: + nft_immediate_module_exit(); +err1: + return err; +} + +void nf_tables_core_module_exit(void) +{ + nft_payload_module_exit(); + nft_byteorder_module_exit(); + nft_bitwise_module_exit(); + nft_lookup_module_exit(); + nft_cmp_module_exit(); + nft_immediate_module_exit(); +} diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c new file mode 100644 index 000000000000..0f7501506367 --- /dev/null +++ b/net/netfilter/nft_bitwise.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_bitwise { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + struct nft_data mask; + struct nft_data xor; +}; + +static void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + const struct nft_data *src = &data[priv->sreg]; + struct nft_data *dst = &data[priv->dreg]; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { + dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ + priv->xor.data[i]; + } +} + +static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { + [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, + [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, + [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, +}; + +static int nft_bitwise_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_data_desc d1, d2; + int err; + + if (tb[NFTA_BITWISE_SREG] == NULL || + tb[NFTA_BITWISE_DREG] == NULL || + tb[NFTA_BITWISE_LEN] == NULL || + tb[NFTA_BITWISE_MASK] == NULL || + tb[NFTA_BITWISE_XOR] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + + err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + if (err < 0) + return err; + if (d1.len != priv->len) + return -EINVAL; + + err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + if (err < 0) + return err; + if (d2.len != priv->len) + return -EINVAL; + + return 0; +} + +static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_bitwise_ops __read_mostly = { + .name = "bitwise", + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), + .owner = THIS_MODULE, + .eval = nft_bitwise_eval, + .init = nft_bitwise_init, + .dump = nft_bitwise_dump, + .policy = nft_bitwise_policy, + .maxattr = NFTA_BITWISE_MAX, +}; + +int __init nft_bitwise_module_init(void) +{ + return nft_register_expr(&nft_bitwise_ops); +} + +void nft_bitwise_module_exit(void) +{ + nft_unregister_expr(&nft_bitwise_ops); +} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c new file mode 100644 index 000000000000..8b0657a4d17b --- /dev/null +++ b/net/netfilter/nft_byteorder.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_byteorder { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + enum nft_byteorder_ops op:8; + u8 len; + u8 size; +}; + +static void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + union { u32 u32; u16 u16; } *s, *d; + unsigned int i; + + s = (void *)src->data; + d = (void *)dst->data; + + switch (priv->size) { + case 4: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = ntohl((__force __be32)s[i].u32); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = (__force __u32)htonl(s[i].u32); + break; + } + break; + case 2: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = ntohs((__force __be16)s[i].u16); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = (__force __u16)htons(s[i].u16); + break; + } + break; + } +} + +static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { + [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_OP] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 }, +}; + +static int nft_byteorder_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_BYTEORDER_SREG] == NULL || + tb[NFTA_BYTEORDER_DREG] == NULL || + tb[NFTA_BYTEORDER_LEN] == NULL || + tb[NFTA_BYTEORDER_SIZE] == NULL || + tb[NFTA_BYTEORDER_OP] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + case NFT_BYTEORDER_HTON: + break; + default: + return -EINVAL; + } + + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + switch (priv->size) { + case 2: + case 4: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_byteorder_ops __read_mostly = { + .name = "byteorder", + .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), + .owner = THIS_MODULE, + .eval = nft_byteorder_eval, + .init = nft_byteorder_init, + .dump = nft_byteorder_dump, + .policy = nft_byteorder_policy, + .maxattr = NFTA_BYTEORDER_MAX, +}; + +int __init nft_byteorder_module_init(void) +{ + return nft_register_expr(&nft_byteorder_ops); +} + +void nft_byteorder_module_exit(void) +{ + nft_unregister_expr(&nft_byteorder_ops); +} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c new file mode 100644 index 000000000000..e734d670120a --- /dev/null +++ b/net/netfilter/nft_cmp.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_cmp_expr { + struct nft_data data; + enum nft_registers sreg:8; + u8 len; + enum nft_cmp_ops op:8; +}; + +static void nft_cmp_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + int d; + + d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + switch (priv->op) { + case NFT_CMP_EQ: + if (d != 0) + goto mismatch; + break; + case NFT_CMP_NEQ: + if (d == 0) + goto mismatch; + break; + case NFT_CMP_LT: + if (d == 0) + goto mismatch; + case NFT_CMP_LTE: + if (d > 0) + goto mismatch; + break; + case NFT_CMP_GT: + if (d == 0) + goto mismatch; + case NFT_CMP_GTE: + if (d < 0) + goto mismatch; + break; + } + return; + +mismatch: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { + [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_OP] = { .type = NLA_U32 }, + [NFTA_CMP_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (priv->op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return -EINVAL; + } + + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return err; + + priv->len = desc.len; + return 0; +} + +static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_cmp_ops __read_mostly = { + .name = "cmp", + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), + .owner = THIS_MODULE, + .eval = nft_cmp_eval, + .init = nft_cmp_init, + .dump = nft_cmp_dump, + .policy = nft_cmp_policy, + .maxattr = NFTA_CMP_MAX, +}; + +int __init nft_cmp_module_init(void) +{ + return nft_register_expr(&nft_cmp_ops); +} + +void nft_cmp_module_exit(void) +{ + nft_unregister_expr(&nft_cmp_ops); +} diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c new file mode 100644 index 000000000000..33c5d36819bb --- /dev/null +++ b/net/netfilter/nft_counter.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_counter { + seqlock_t lock; + u64 bytes; + u64 packets; +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + write_seqlock_bh(&priv->lock); + priv->bytes += pkt->skb->len; + priv->packets++; + write_sequnlock_bh(&priv->lock); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_counter *priv = nft_expr_priv(expr); + unsigned int seq; + u64 bytes; + u64 packets; + + do { + seq = read_seqbegin(&priv->lock); + bytes = priv->bytes; + packets = priv->packets; + } while (read_seqretry(&priv->lock, seq)); + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int nft_counter_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + if (tb[NFTA_COUNTER_PACKETS]) + priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + if (tb[NFTA_COUNTER_BYTES]) + priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + + seqlock_init(&priv->lock); + return 0; +} + +static struct nft_expr_ops nft_counter_ops __read_mostly = { + .name = "counter", + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, + .eval = nft_counter_eval, + .init = nft_counter_init, + .dump = nft_counter_dump, +}; + +static int __init nft_counter_module_init(void) +{ + return nft_register_expr(&nft_counter_ops); +} + +static void __exit nft_counter_module_exit(void) +{ + nft_unregister_expr(&nft_counter_ops); +} + +module_init(nft_counter_module_init); +module_exit(nft_counter_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("counter"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c new file mode 100644 index 000000000000..a1756d678226 --- /dev/null +++ b/net/netfilter/nft_ct.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + enum nft_registers dreg:8; + uint8_t family; +}; + +static void nft_ct_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + const struct nf_conn_help *help; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_helper *helper; + long diff; + unsigned int state; + + ct = nf_ct_get(pkt->skb, &ctinfo); + + switch (priv->key) { + case NFT_CT_STATE: + if (ct == NULL) + state = NF_CT_STATE_INVALID_BIT; + else if (nf_ct_is_untracked(ct)) + state = NF_CT_STATE_UNTRACKED_BIT; + else + state = NF_CT_STATE_BIT(ctinfo); + dest->data[0] = state; + return; + } + + if (ct == NULL) + goto err; + + switch (priv->key) { + case NFT_CT_DIRECTION: + dest->data[0] = CTINFO2DIR(ctinfo); + return; + case NFT_CT_STATUS: + dest->data[0] = ct->status; + return; +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + dest->data[0] = ct->mark; + return; +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: + dest->data[0] = ct->secmark; + return; +#endif + case NFT_CT_EXPIRATION: + diff = (long)jiffies - (long)ct->timeout.expires; + if (diff < 0) + diff = 0; + dest->data[0] = jiffies_to_msecs(diff); + return; + case NFT_CT_HELPER: + if (ct->master == NULL) + goto err; + help = nfct_help(ct->master); + if (help == NULL) + goto err; + helper = rcu_dereference(help->helper); + if (helper == NULL) + goto err; + if (strlen(helper->name) >= sizeof(dest->data)) + goto err; + strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + return; + } + + tuple = &ct->tuplehash[priv->dir].tuple; + switch (priv->key) { + case NFT_CT_L3PROTOCOL: + dest->data[0] = nf_ct_l3num(ct); + return; + case NFT_CT_SRC: + memcpy(dest->data, tuple->src.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_DST: + memcpy(dest->data, tuple->dst.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_PROTOCOL: + dest->data[0] = nf_ct_protonum(ct); + return; + case NFT_CT_PROTO_SRC: + dest->data[0] = (__force __u16)tuple->src.u.all; + return; + case NFT_CT_PROTO_DST: + dest->data[0] = (__force __u16)tuple->dst.u.all; + return; + } + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { + [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_KEY] = { .type = NLA_U32 }, + [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, +}; + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_CT_DREG] == NULL || + tb[NFTA_CT_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + + switch (priv->key) { + case NFT_CT_STATE: + case NFT_CT_DIRECTION: + case NFT_CT_STATUS: +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: +#endif + case NFT_CT_EXPIRATION: + case NFT_CT_HELPER: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + break; + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + break; + default: + return -EOPNOTSUPP; + } + + err = nf_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + priv->family = ctx->afi->family; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + goto err1; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + goto err1; + return 0; + +err1: + nf_ct_l3proto_module_put(ctx->afi->family); + return err; +} + +static void nft_ct_destroy(const struct nft_expr *expr) +{ + struct nft_ct *priv = nft_expr_priv(expr); + + nf_ct_l3proto_module_put(priv->family); +} + +static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_ct_ops __read_mostly = { + .name = "ct", + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .owner = THIS_MODULE, + .eval = nft_ct_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_dump, + .policy = nft_ct_policy, + .maxattr = NFTA_CT_MAX, +}; + +static int __init nft_ct_module_init(void) +{ + return nft_register_expr(&nft_ct_ops); +} + +static void __exit nft_ct_module_exit(void) +{ + nft_unregister_expr(&nft_ct_ops); +} + +module_init(nft_ct_module_init); +module_exit(nft_ct_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("ct"); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c new file mode 100644 index 000000000000..9fc8eb308193 --- /dev/null +++ b/net/netfilter/nft_expr_template.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +struct nft_template { + +}; + +static void nft_template_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { + [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, +}; + +static int nft_template_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *tb[]) +{ + struct nft_template *priv = nft_expr_priv(expr); + + return 0; +} + +static void nft_template_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_template *priv = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops template_ops __read_mostly = { + .name = "template", + .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), + .owner = THIS_MODULE, + .eval = nft_template_eval, + .init = nft_template_init, + .destroy = nft_template_destroy, + .dump = nft_template_dump, + .policy = nft_template_policy, + .maxattr = NFTA_TEMPLATE_MAX, +}; + +static int __init nft_template_module_init(void) +{ + return nft_register_expr(&template_ops); +} + +static void __exit nft_template_module_exit(void) +{ + nft_unregister_expr(&template_ops); +} + +module_init(nft_template_module_init); +module_exit(nft_template_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c new file mode 100644 index 000000000000..21c6a6b7b662 --- /dev/null +++ b/net/netfilter/nft_exthdr.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +// FIXME: +#include + +struct nft_exthdr { + u8 type; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_exthdr_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + unsigned int offset; + int err; + + err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); + if (err < 0) + goto err; + offset += priv->offset; + + if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { + [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, + [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, + [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, +}; + +static int nft_exthdr_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_EXTHDR_DREG] == NULL || + tb[NFTA_EXTHDR_TYPE] == NULL || + tb[NFTA_EXTHDR_OFFSET] == NULL || + tb[NFTA_EXTHDR_LEN] == NULL) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops exthdr_ops __read_mostly = { + .name = "exthdr", + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .owner = THIS_MODULE, + .eval = nft_exthdr_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, + .policy = nft_exthdr_policy, + .maxattr = NFTA_EXTHDR_MAX, +}; + +static int __init nft_exthdr_module_init(void) +{ + return nft_register_expr(&exthdr_ops); +} + +static void __exit nft_exthdr_module_exit(void) +{ + nft_unregister_expr(&exthdr_ops); +} + +module_init(nft_exthdr_module_init); +module_exit(nft_exthdr_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("exthdr"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c new file mode 100644 index 000000000000..67cc502881f1 --- /dev/null +++ b/net/netfilter/nft_hash.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_hash { + struct hlist_head *hash; + unsigned int hsize; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_hash_elem { + struct hlist_node hnode; + struct nft_data key; + struct nft_data data[]; +}; + +static u32 nft_hash_rnd __read_mostly; +static bool nft_hash_rnd_initted __read_mostly; + +static unsigned int nft_hash_data(const struct nft_data *data, + unsigned int hsize, unsigned int len) +{ + unsigned int h; + + // FIXME: can we reasonably guarantee the upper bits are fixed? + h = jhash2(data->data, len >> 2, nft_hash_rnd); + return ((u64)h * hsize) >> 32; +} + +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + const struct nft_data *key = &data[priv->sreg]; + unsigned int h; + + h = nft_hash_data(key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, key, priv->klen)) + continue; + if (priv->flags & NFT_HASH_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_hash_elem_destroy(const struct nft_expr *expr, + struct nft_hash_elem *elem) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_HASH_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { + [NFTA_HE_KEY] = { .type = NLA_NESTED }, + [NFTA_HE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_hash_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_hash_elem **new) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_HE_MAX + 1]; + struct nft_hash_elem *elem; + struct nft_data_desc d1, d2; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); + if (err < 0) + return err; + + if (tb[NFTA_HE_KEY] == NULL) + return -EINVAL; + size = sizeof(*elem); + + if (priv->flags & NFT_HASH_MAP) { + if (tb[NFTA_HE_DATA] == NULL) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_HE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_HE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); + if (err < 0) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_hash_elem *elem) + +{ + const struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_HASH_MAP) { + if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, + NFT_DATA_VALUE, priv->dlen) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct hlist_node *next; + struct nft_hash_elem *elem; + unsigned int i; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(expr, elem); + } + } + kfree(priv->hash); +} + +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_KLEN] = { .type = NLA_U32 }, + [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nft_hash_elem *elem, *uninitialized_var(new); + const struct nlattr *nla; + unsigned int cnt, i; + unsigned int h; + int err, rem; + + if (unlikely(!nft_hash_rnd_initted)) { + get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd_initted = true; + } + + if (tb[NFTA_HASH_SREG] == NULL || + tb[NFTA_HASH_KLEN] == NULL || + tb[NFTA_HASH_ELEMENTS] == NULL) + return -EINVAL; + + if (tb[NFTA_HASH_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); + if (priv->flags & ~NFT_HASH_MAP) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_HASH_DREG] != NULL) { + if (!(priv->flags & NFT_HASH_MAP)) + return -EINVAL; + priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); + if (priv->klen == 0) + return -EINVAL; + + cnt = 0; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + if (nla_type(nla) != NFTA_LIST_ELEM) + return -EINVAL; + cnt++; + } + + /* Aim for a load factor of 0.75 */ + cnt = cnt * 4 / 3; + + priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); + if (priv->hash == NULL) + return -ENOMEM; + priv->hsize = cnt; + + for (i = 0; i < cnt; i++) + INIT_HLIST_HEAD(&priv->hash[i]); + + err = -ENOMEM; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + err = nft_hash_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + h = nft_hash_data(&new->key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, &new->key, priv->klen)) + continue; + nft_hash_elem_destroy(expr, new); + err = -EEXIST; + goto err1; + } + hlist_add_head(&new->hnode, &priv->hash[h]); + } + return 0; + +err1: + nft_hash_destroy(ctx, expr); + return err; +} + +static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + struct nlattr *list; + unsigned int i; + + if (priv->flags) + if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->flags & NFT_HASH_MAP) + if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry(elem, &priv->hash[i], hnode) { + if (nft_hash_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_hash_ops __read_mostly = { + .name = "hash", + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .owner = THIS_MODULE, + .eval = nft_hash_eval, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .dump = nft_hash_dump, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_expr(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_expr(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("hash"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c new file mode 100644 index 000000000000..3bf42c3cc49a --- /dev/null +++ b/net/netfilter/nft_immediate.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_immediate_expr { + struct nft_data data; + enum nft_registers dreg:8; + u8 dlen; +}; + +static void nft_immediate_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + nft_data_copy(&data[priv->dreg], &priv->data); +} + +static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { + [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_immediate_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_immediate_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_IMMEDIATE_DREG] == NULL || + tb[NFTA_IMMEDIATE_DATA] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + if (err < 0) + return err; + priv->dlen = desc.len; + + err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + if (err < 0) + goto err1; + + return 0; + +err1: + nft_data_uninit(&priv->data, desc.type); + return err; +} + +static void nft_immediate_destroy(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + + return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, + nft_dreg_to_type(priv->dreg), priv->dlen); + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_imm_ops __read_mostly = { + .name = "immediate", + .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), + .owner = THIS_MODULE, + .eval = nft_immediate_eval, + .init = nft_immediate_init, + .destroy = nft_immediate_destroy, + .dump = nft_immediate_dump, + .policy = nft_immediate_policy, + .maxattr = NFTA_IMMEDIATE_MAX, +}; + +int __init nft_immediate_module_init(void) +{ + return nft_register_expr(&nft_imm_ops); +} + +void nft_immediate_module_exit(void) +{ + nft_unregister_expr(&nft_imm_ops); +} diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c new file mode 100644 index 000000000000..e0e3fc8aebc3 --- /dev/null +++ b/net/netfilter/nft_limit.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(limit_lock); + +struct nft_limit { + u64 tokens; + u64 rate; + u64 unit; + unsigned long stamp; +}; + +static void nft_limit_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + spin_lock_bh(&limit_lock); + if (time_after_eq(jiffies, priv->stamp)) { + priv->tokens = priv->rate; + priv->stamp = jiffies + priv->unit * HZ; + } + + if (priv->tokens >= 1) { + priv->tokens--; + spin_unlock_bh(&limit_lock); + return; + } + spin_unlock_bh(&limit_lock); + + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, +}; + +static int nft_limit_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + if (tb[NFTA_LIMIT_RATE] == NULL || + tb[NFTA_LIMIT_UNIT] == NULL) + return -EINVAL; + + priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + priv->stamp = jiffies + priv->unit * HZ; + priv->tokens = priv->rate; + return 0; +} + +static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_limit_ops __read_mostly = { + .name = "limit", + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), + .owner = THIS_MODULE, + .eval = nft_limit_eval, + .init = nft_limit_init, + .dump = nft_limit_dump, + .policy = nft_limit_policy, + .maxattr = NFTA_LIMIT_MAX, +}; + +static int __init nft_limit_module_init(void) +{ + return nft_register_expr(&nft_limit_ops); +} + +static void __exit nft_limit_module_exit(void) +{ + nft_unregister_expr(&nft_limit_ops); +} + +module_init(nft_limit_module_init); +module_exit(nft_limit_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("limit"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c new file mode 100644 index 000000000000..da495c3b1e7e --- /dev/null +++ b/net/netfilter/nft_log.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *nft_log_null_prefix = ""; + +struct nft_log { + struct nf_loginfo loginfo; + char *prefix; + int family; +}; + +static void nft_log_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_log *priv = nft_expr_priv(expr); + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &priv->loginfo, "%s", priv->prefix); +} + +static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { + [NFTA_LOG_GROUP] = { .type = NLA_U16 }, + [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, + [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, + [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, +}; + +static int nft_log_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; + const struct nlattr *nla; + + priv->family = ctx->afi->family; + + nla = tb[NFTA_LOG_PREFIX]; + if (nla != NULL) { + priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); + if (priv->prefix == NULL) + return -ENOMEM; + nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); + } else + priv->prefix = (char *)nft_log_null_prefix; + + li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_GROUP] != NULL) + li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + + if (tb[NFTA_LOG_SNAPLEN] != NULL) + li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + + return 0; +} + +static void nft_log_destroy(const struct nft_expr *expr) +{ + struct nft_log *priv = nft_expr_priv(expr); + + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); +} + +static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_log *priv = nft_expr_priv(expr); + const struct nf_loginfo *li = &priv->loginfo; + + if (priv->prefix != nft_log_null_prefix) + if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) + goto nla_put_failure; + if (li->u.ulog.group) + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) + goto nla_put_failure; + if (li->u.ulog.copy_len) + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + if (li->u.ulog.qthreshold) + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_log_ops __read_mostly = { + .name = "log", + .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), + .owner = THIS_MODULE, + .eval = nft_log_eval, + .init = nft_log_init, + .destroy = nft_log_destroy, + .dump = nft_log_dump, + .policy = nft_log_policy, + .maxattr = NFTA_LOG_MAX, +}; + +static int __init nft_log_module_init(void) +{ + return nft_register_expr(&nft_log_ops); +} + +static void __exit nft_log_module_exit(void) +{ + nft_unregister_expr(&nft_log_ops); +} + +module_init(nft_log_module_init); +module_exit(nft_log_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("log"); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c new file mode 100644 index 000000000000..96735aa2f039 --- /dev/null +++ b/net/netfilter/nft_meta.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for TCP_TIME_WAIT */ +#include + +struct nft_meta { + enum nft_meta_keys key:8; + enum nft_registers dreg:8; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + + switch (priv->key) { + case NFT_META_LEN: + dest->data[0] = skb->len; + break; + case NFT_META_PROTOCOL: + *(__be16 *)dest->data = skb->protocol; + break; + case NFT_META_PRIORITY: + dest->data[0] = skb->priority; + break; + case NFT_META_MARK: + dest->data[0] = skb->mark; + break; + case NFT_META_IIF: + if (in == NULL) + goto err; + dest->data[0] = in->ifindex; + break; + case NFT_META_OIF: + if (out == NULL) + goto err; + dest->data[0] = out->ifindex; + break; + case NFT_META_IIFNAME: + if (in == NULL) + goto err; + strncpy((char *)dest->data, in->name, sizeof(dest->data)); + break; + case NFT_META_OIFNAME: + if (out == NULL) + goto err; + strncpy((char *)dest->data, out->name, sizeof(dest->data)); + break; + case NFT_META_IIFTYPE: + if (in == NULL) + goto err; + *(u16 *)dest->data = in->type; + break; + case NFT_META_OIFTYPE: + if (out == NULL) + goto err; + *(u16 *)dest->data = out->type; + break; + case NFT_META_SKUID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + + dest->data[0] = + from_kuid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsuid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; + case NFT_META_SKGID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + dest->data[0] = + from_kgid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: { + const struct dst_entry *dst = skb_dst(skb); + + if (dst == NULL) + goto err; + dest->data[0] = dst->tclassid; + break; + } +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + dest->data[0] = skb->secmark; + break; +#endif + default: + WARN_ON(1); + goto err; + } + return; + +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_META_DREG] == NULL || + tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_LEN: + case NFT_META_PROTOCOL: + case NFT_META_PRIORITY: + case NFT_META_MARK: + case NFT_META_IIF: + case NFT_META_OIF: + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + case NFT_META_SKUID: + case NFT_META_SKGID: +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_meta_ops __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_module_init(void) +{ + return nft_register_expr(&nft_meta_ops); +} + +static void __exit nft_meta_module_exit(void) +{ + nft_unregister_expr(&nft_meta_ops); +} + +module_init(nft_meta_module_init); +module_exit(nft_meta_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c new file mode 100644 index 000000000000..71177df75ffb --- /dev/null +++ b/net/netfilter/nft_meta_target.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_meta { + enum nft_meta_keys key; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data *nfres, + struct nft_data *data, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 val = data->data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = val; + break; + case NFT_META_PRIORITY: + skb->priority = val; + break; + case NFT_META_NFTRACE: + skb->nf_trace = val; + break; +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + skb->secmark = val; + break; +#endif + default: + WARN_ON(1); + } +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + if (tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (meta->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops meta_target __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_target_init(void) +{ + return nft_register_expr(&meta_target); +} + +static void __exit nft_meta_target_exit(void) +{ + nft_unregister_expr(&meta_target); +} + +module_init(nft_meta_target_init); +module_exit(nft_meta_target_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c new file mode 100644 index 000000000000..329f134b3f89 --- /dev/null +++ b/net/netfilter/nft_payload.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_payload_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + int offset; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = skb_transport_offset(skb); + break; + default: + BUG(); + } + offset += priv->offset; + + if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, +}; + +static int nft_payload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return -EINVAL; + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return -EOPNOTSUPP; + } + + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_payload_ops __read_mostly = { + .name = "payload", + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .owner = THIS_MODULE, + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, + .policy = nft_payload_policy, + .maxattr = NFTA_PAYLOAD_MAX, +}; + +int __init nft_payload_module_init(void) +{ + return nft_register_expr(&nft_payload_ops); +} + +void nft_payload_module_exit(void) +{ + nft_unregister_expr(&nft_payload_ops); +} diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c new file mode 100644 index 000000000000..7b7c8354c327 --- /dev/null +++ b/net/netfilter/nft_set.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_set { + struct rb_root root; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_set_elem { + struct rb_node node; + enum nft_set_elem_flags flags; + struct nft_data key; + struct nft_data data[]; +}; + +static void nft_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_set *priv = nft_expr_priv(expr); + const struct rb_node *parent = priv->root.rb_node; + const struct nft_set_elem *elem, *interval = NULL; + const struct nft_data *key = &data[priv->sreg]; + int d; + + while (parent != NULL) { + elem = rb_entry(parent, struct nft_set_elem, node); + + d = nft_data_cmp(&elem->key, key, priv->klen); + if (d < 0) { + parent = parent->rb_left; + interval = elem; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (elem->flags & NFT_SE_INTERVAL_END) + goto out; + if (priv->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + } + + if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { + elem = interval; + goto found; + } +out: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_set_elem_destroy(const struct nft_expr *expr, + struct nft_set_elem *elem) +{ + const struct nft_set *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_SET_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { + [NFTA_SE_KEY] = { .type = NLA_NESTED }, + [NFTA_SE_DATA] = { .type = NLA_NESTED }, + [NFTA_SE_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_set_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_set_elem **new) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_SE_MAX + 1]; + struct nft_set_elem *elem; + struct nft_data_desc d1, d2; + enum nft_set_elem_flags flags = 0; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); + if (err < 0) + return err; + + if (tb[NFTA_SE_KEY] == NULL) + return -EINVAL; + + if (tb[NFTA_SE_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); + if (flags & ~NFT_SE_INTERVAL_END) + return -EINVAL; + } + + size = sizeof(*elem); + if (priv->flags & NFT_SET_MAP) { + if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_SE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + elem->flags = flags; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_SE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); + if (err < 0) + goto err2; + err = -EINVAL; + if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_set_elem *elem) + +{ + const struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { + if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, + nft_dreg_to_type(priv->dreg), priv->dlen) < 0) + goto nla_put_failure; + } + + if (elem->flags){ + if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_set_destroy(const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + elem = rb_entry(node, struct nft_set_elem, node); + nft_set_elem_destroy(expr, elem); + } +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_SREG] = { .type = NLA_U32 }, + [NFTA_SET_DREG] = { .type = NLA_U32 }, + [NFTA_SET_KLEN] = { .type = NLA_U32 }, + [NFTA_SET_DLEN] = { .type = NLA_U32 }, + [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem, *uninitialized_var(new); + struct rb_node *parent, **p; + const struct nlattr *nla; + int err, rem, d; + + if (tb[NFTA_SET_SREG] == NULL || + tb[NFTA_SET_KLEN] == NULL || + tb[NFTA_SET_ELEMENTS] == NULL) + return -EINVAL; + + priv->root = RB_ROOT; + + if (tb[NFTA_SET_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); + if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_SET_DREG] != NULL) { + if (!(priv->flags & NFT_SET_MAP)) + return -EINVAL; + if (tb[NFTA_SET_DLEN] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) + priv->dlen = FIELD_SIZEOF(struct nft_data, data); + else { + priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); + if (priv->dlen == 0 || + priv->dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } + } else { + if (priv->flags & NFT_SET_MAP) + return -EINVAL; + if (tb[NFTA_SET_DLEN] != NULL) + return -EINVAL; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); + if (priv->klen == 0 || + priv->klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { + err = -EINVAL; + if (nla_type(nla) != NFTA_LIST_ELEM) + goto err1; + + err = nft_set_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + elem = rb_entry(parent, struct nft_set_elem, node); + d = nft_data_cmp(&elem->key, &new->key, priv->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else { + err = -EEXIST; + goto err2; + } + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + } + + return 0; + +err2: + nft_set_elem_destroy(expr, new); +err1: + nft_set_destroy(expr); + return err; +} + +static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + const struct nft_set_elem *elem; + struct rb_node *node; + struct nlattr *list; + + if (priv->flags) { + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + } + + if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) + goto nla_put_failure; + } + + list = nla_nest_start(skb, NFTA_SET_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (node = rb_first(&priv->root); node; node = rb_next(node)) { + elem = rb_entry(node, struct nft_set_elem, node); + if (nft_set_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_set_ops __read_mostly = { + .name = "set", + .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), + .owner = THIS_MODULE, + .eval = nft_set_eval, + .init = nft_set_init, + .destroy = nft_set_destroy, + .dump = nft_set_dump, + .policy = nft_set_policy, + .maxattr = NFTA_SET_MAX, +}; + +static int __init nft_set_module_init(void) +{ + return nft_register_expr(&nft_set_ops); +} + +static void __exit nft_set_module_exit(void) +{ + nft_unregister_expr(&nft_set_ops); +} + +module_init(nft_set_module_init); +module_exit(nft_set_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("set"); -- cgit v1.2.3-71-gd317 From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 11:05:33 +0200 Subject: netfilter: nfnetlink: add batch support and use it from nf_tables This patch adds a batch support to nfnetlink. Basically, it adds two new control messages: * NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch, the nfgenmsg->res_id indicates the nfnetlink subsystem ID. * NFNL_MSG_BATCH_END, that results in the invocation of the ss->commit callback function. If not specified or an error ocurred in the batch, the ss->abort function is invoked instead. The end message represents the commit operation in nftables, the lack of end message results in an abort. This patch also adds the .call_batch function that is only called from the batch receival path. This patch adds atomic rule updates and dumps based on bitmask generations. This allows to atomically commit a set of rule-set updates incrementally without altering the internal state of existing nf_tables expressions/matches/targets. The idea consists of using a generation cursor of 1 bit and a bitmask of 2 bits per rule. Assuming the gencursor is 0, then the genmask (expressed as a bitmask) can be interpreted as: 00 active in the present, will be active in the next generation. 01 inactive in the present, will be active in the next generation. 10 active in the present, will be deleted in the next generation. ^ gencursor Once you invoke the transition to the next generation, the global gencursor is updated: 00 active in the present, will be active in the next generation. 01 active in the present, needs to zero its future, it becomes 00. 10 inactive in the present, delete now. ^ gencursor If a dump is in progress and nf_tables enters a new generation, the dump will stop and return -EBUSY to let userspace know that it has to retry again. In order to invalidate dumps, a global genctr counter is increased everytime nf_tables enters a new generation. This new operation can be used from the user-space utility that controls the firewall, eg. nft -f restore The rule updates contained in `file' will be applied atomically. cat file ----- add filter INPUT ip saddr 1.1.1.1 counter accept #1 del filter INPUT ip daddr 2.2.2.2 counter drop #2 -EOF- Note that the rule 1 will be inactive until the transition to the next generation, the rule 2 will be evicted in the next generation. There is a penalty during the rule update due to the branch misprediction in the packet matching framework. But that should be quickly resolved once the iteration over the commit list that contain rules that require updates is finished. Event notification happens once the rule-set update has been committed. So we skip notifications is case the rule-set update is aborted, which can happen in case that the rule-set is tested to apply correctly. This patch squashed the following patches from Pablo: * nf_tables: atomic rule updates and dumps * nf_tables: get rid of per rule list_head for commits * nf_tables: use per netns commit list * nfnetlink: add batch support and use it from nf_tables * nf_tables: all rule updates are transactional * nf_tables: attach replacement rule after stale one * nf_tables: do not allow deletion/replacement of stale rules * nf_tables: remove unused NFTA_RULE_FLAGS Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 5 + include/net/netfilter/nf_tables.h | 25 +++- include/net/netns/nftables.h | 3 + include/uapi/linux/netfilter/nfnetlink.h | 4 + net/netfilter/nf_tables_api.c | 202 ++++++++++++++++++++++++++++--- net/netfilter/nf_tables_core.c | 10 ++ net/netfilter/nfnetlink.c | 175 +++++++++++++++++++++++++- 7 files changed, 401 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 4f68cd7141d2..28c74367e900 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -14,6 +14,9 @@ struct nfnl_callback { int (*call_rcu)(struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); + int (*call_batch)(struct sock *nl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; @@ -23,6 +26,8 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ + int (*commit)(struct sk_buff *skb); + int (*abort)(struct sk_buff *skb); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d3272e943aac..975ad3c573c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -323,18 +323,39 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * @list: used internally * @rcu_head: used internally for rcu * @handle: rule handle + * @genmask: generation mask * @dlen: length of expression data * @data: expression data */ struct nft_rule { struct list_head list; struct rcu_head rcu_head; - u64 handle:48, + u64 handle:46, + genmask:2, dlen:16; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; +/** + * struct nft_rule_trans - nf_tables rule update in transaction + * + * @list: used internally + * @rule: rule that needs to be updated + * @chain: chain that this rule belongs to + * @table: table for which this chain applies + * @nlh: netlink header of the message that contain this update + * @family: family expressesed as AF_* + */ +struct nft_rule_trans { + struct list_head list; + struct nft_rule *rule; + const struct nft_chain *chain; + const struct nft_table *table; + const struct nlmsghdr *nlh; + u8 family; +}; + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; @@ -370,6 +391,7 @@ enum nft_chain_flags { * @rules: list of rules in the chain * @list: used internally * @rcu_head: used internally + * @net: net namespace that this chain belongs to * @handle: chain handle * @flags: bitmask of enum nft_chain_flags * @use: number of jump references to this chain @@ -380,6 +402,7 @@ struct nft_chain { struct list_head rules; struct list_head list; struct rcu_head rcu_head; + struct net *net; u64 handle; u8 flags; u16 use; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a98b1c5d9913..08a4248a12b5 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,9 +7,12 @@ struct nft_af_info; struct netns_nftables { struct list_head af_info; + struct list_head commit_list; struct nft_af_info *ipv4; struct nft_af_info *ipv6; struct nft_af_info *bridge; + u8 gencursor; + u8 genctr; }; #endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 288959404d54..596ddd45253c 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -57,4 +57,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_NFT_COMPAT 11 #define NFNL_SUBSYS_COUNT 12 +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 + #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f140663ec71..79e1418a6043 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); + chain->net = net; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && @@ -1371,6 +1372,41 @@ err: return err; } +static inline bool +nft_rule_is_active(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << net->nft.gencursor)) == 0; +} + +static inline int gencursor_next(struct net *net) +{ + return net->nft.gencursor+1 == 1 ? 1 : 0; +} + +static inline int +nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << gencursor_next(net))) == 0; +} + +static inline void +nft_rule_activate_next(struct net *net, struct nft_rule *rule) +{ + /* Now inactive, will be active in the future */ + rule->genmask = (1 << net->nft.gencursor); +} + +static inline void +nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) +{ + rule->genmask = (1 << gencursor_next(net)); +} + +static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) +{ + rule->genmask = 0; +} + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { @@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u8 genctr = ACCESS_ONCE(net->nft.genctr); + u8 gencursor = ACCESS_ONCE(net->nft.gencursor); list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) @@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry(table, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) { list_for_each_entry(rule, &chain->rules, list) { + if (!nft_rule_is_active(net, rule)) + goto cont; if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1408,6 +1448,10 @@ cont: } } done: + /* Invalidate this dump, a transition to the new generation happened */ + if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) + return -EBUSY; + cb->args[0] = idx; return skb->len; } @@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; +static struct nft_rule_trans * +nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) +{ + struct nft_rule_trans *rupd; + + rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); + if (rupd == NULL) + return NULL; + + rupd->chain = ctx->chain; + rupd->table = ctx->table; + rupd->rule = rule; + rupd->family = ctx->afi->family; + rupd->nlh = ctx->nlh; + list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + + return rupd; +} + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; + struct nft_rule_trans *repl = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; + nft_rule_activate_next(net, rule); + rule->handle = handle; rule->dlen = size; @@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - list_replace_rcu(&old_rule->list, &rule->list); - nf_tables_rule_destroy(old_rule); + if (nft_rule_is_active_next(net, old_rule)) { + repl = nf_tables_trans_add(old_rule, &ctx); + if (repl == NULL) { + err = -ENOMEM; + goto err2; + } + nft_rule_disactivate_next(net, old_rule); + list_add_tail(&rule->list, &old_rule->list); + } else { + err = -ENOENT; + goto err2; + } } else if (nlh->nlmsg_flags & NLM_F_APPEND) if (old_rule) list_add_rcu(&rule->list, &old_rule->list); @@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), - nfmsg->nfgen_family); + if (nf_tables_trans_add(rule, &ctx) == NULL) { + err = -ENOMEM; + goto err3; + } return 0; +err3: + list_del_rcu(&rule->list); + if (repl) { + list_del_rcu(&repl->rule->list); + list_del(&repl->list); + nft_rule_clear(net, repl->rule); + kfree(repl); + } err2: nf_tables_rule_destroy(rule); err1: @@ -1618,6 +1703,19 @@ err1: return err; } +static int +nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) +{ + /* You cannot delete the same rule twice */ + if (nft_rule_is_active_next(ctx->net, rule)) { + if (nf_tables_trans_add(rule, ctx) == NULL) + return -ENOMEM; + nft_rule_disactivate_next(ctx->net, rule); + return 0; + } + return -ENOENT; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err = 0; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1642,31 +1741,95 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) return PTR_ERR(rule); - /* List removal must be visible before destroying expressions */ - list_del_rcu(&rule->list); - - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + err = nf_tables_delrule_one(&ctx, rule); } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) { - list_del_rcu(&rule->list); + err = nf_tables_delrule_one(&ctx, rule); + if (err < 0) + break; + } + } + + return err; +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + /* Bump generation counter, invalidate any dump in progress */ + net->nft.genctr++; + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete this rule from the dirty list */ + list_del(&rupd->list); + + /* This rule was inactive in the past and just became active. + * Clear the next bit of the genmask since its meaning has + * changed, now it is the future. + */ + if (nft_rule_is_active(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, + rupd->chain, rupd->rule, + NFT_MSG_NEWRULE, 0, + rupd->family); + kfree(rupd); + continue; } + + /* This rule is in the past, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, + rupd->rule, NFT_MSG_DELRULE, 0, + rupd->family); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); } return 0; } +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete all rules from the dirty list */ + list_del(&rupd->list); + + if (!nft_rule_is_active_next(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + kfree(rupd); + continue; + } + + /* This rule is inactive, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + return 0; +} + /* * Sets */ @@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_NEWRULE] = { - .call = nf_tables_newrule, + .call_batch = nf_tables_newrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_DELRULE] = { - .call = nf_tables_delrule, + .call_batch = nf_tables_delrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .subsys_id = NFNL_SUBSYS_NFTABLES, .cb_count = NFT_MSG_MAX, .cb = nf_tables_cb, + .commit = nf_tables_commit, + .abort = nf_tables_abort, }; /* @@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump); static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); + INIT_LIST_HEAD(&net->nft.commit_list); return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3c13007d80df..d581ef660248 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + /* + * Cache cursor to avoid problems in case that the cursor is updated + * while traversing the ruleset. + */ + unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + + /* This rule is not active, skip. */ + if (unlikely(rule->genmask & (1 << gencursor))) + continue; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 572d87dc116f..027f16af51a0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct nfnetlink_subsystem *ss; int type, err; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* All the messages must at least contain nfgenmsg */ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; @@ -217,9 +214,179 @@ replay: } } +static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, + u_int16_t subsys_id) +{ + struct sk_buff *nskb, *oskb = skb; + struct net *net = sock_net(skb->sk); + const struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + bool success = true, done = false; + int err; + + if (subsys_id >= NFNL_SUBSYS_COUNT) + return netlink_ack(skb, nlh, -EINVAL); +replay: + nskb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!nskb) + return netlink_ack(oskb, nlh, -ENOMEM); + + nskb->sk = oskb->sk; + skb = nskb; + + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) { +#ifdef CONFIG_MODULES + nfnl_unlock(subsys_id); + request_module("nfnetlink-subsys-%d", subsys_id); + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) +#endif + { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + } + + if (!ss->commit || !ss->abort) { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + + while (skb->len >= nlmsg_total_size(0)) { + int msglen, type; + + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN) { + err = -EINVAL; + goto ack; + } + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + err = -EINVAL; + goto ack; + } + + type = nlh->nlmsg_type; + if (type == NFNL_MSG_BATCH_BEGIN) { + /* Malformed: Batch begin twice */ + success = false; + goto done; + } else if (type == NFNL_MSG_BATCH_END) { + done = true; + goto done; + } else if (type < NLMSG_MIN_TYPE) { + err = -EINVAL; + goto ack; + } + + /* We only accept a batch with messages for the same + * subsystem. + */ + if (NFNL_SUBSYS_ID(type) != subsys_id) { + err = -EINVAL; + goto ack; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + err = -EINVAL; + goto ack; + } + + { + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + goto ack; + + if (nc->call_batch) { + err = nc->call_batch(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + } + + /* The lock was released to autoload some module, we + * have to abort and start from scratch using the + * original skb. + */ + if (err == -EAGAIN) { + ss->abort(skb); + nfnl_unlock(subsys_id); + kfree_skb(nskb); + goto replay; + } + } +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* We don't stop processing the batch on errors, thus, + * userspace gets all the errors that the batch + * triggers. + */ + netlink_ack(skb, nlh, err); + if (err) + success = false; + } + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +done: + if (success && done) + ss->commit(skb); + else + ss->abort(skb); + + nfnl_unlock(subsys_id); + kfree_skb(nskb); +} + static void nfnetlink_rcv(struct sk_buff *skb) { - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct net *net = sock_net(skb->sk); + int msglen; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return netlink_ack(skb, nlh, -EPERM); + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { + struct nfgenmsg *nfgenmsg; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + } else { + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + } } #ifdef CONFIG_MODULES -- cgit v1.2.3-71-gd317 From 5930e8d0ab3689f1e239566443ca8f53e45e01cc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 15 Oct 2013 16:55:22 +0200 Subject: net/mlx4: Fix typo, move similar defs to same location Small code cleanup: 1. change MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN to MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN 2. put MLX4_SET_PORT_PRIO2TC and MLX4_SET_PORT_SCHEDULER in the same union with the other MLX4_SET_PORT_yyy Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- include/linux/mlx4/cmd.h | 6 ++---- include/linux/mlx4/device.h | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fa37b7a61213..85d91665d400 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1733,7 +1733,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); - if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN)) mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 0d63daa2f422..a377484cf544 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -652,7 +652,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) - dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN; + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index cd1fdf75103b..8df61bc5da00 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -154,10 +154,6 @@ enum { MLX4_CMD_QUERY_IF_STAT = 0X54, MLX4_CMD_SET_IF_STAT = 0X55, - /* set port opcode modifiers */ - MLX4_SET_PORT_PRIO2TC = 0x8, - MLX4_SET_PORT_SCHEDULER = 0x9, - /* register/delete flow steering network rules */ MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, @@ -182,6 +178,8 @@ enum { MLX4_SET_PORT_VLAN_TABLE = 0x3, MLX4_SET_PORT_PRIO_MAP = 0x4, MLX4_SET_PORT_GID_TABLE = 0x5, + MLX4_SET_PORT_PRIO2TC = 0x8, + MLX4_SET_PORT_SCHEDULER = 0x9, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 24ce6bdd540e..9ad0c18495ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -155,7 +155,7 @@ enum { MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, - MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, + MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 4, MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, -- cgit v1.2.3-71-gd317 From 400dfd3ae899849b27d398ca7894e1b44430887f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Oct 2013 16:27:07 -0700 Subject: net: refactor sk_page_frag_refill() While working on virtio_net new allocation strategy to increase payload/truesize ratio, we found that refactoring sk_page_frag_refill() was needed. This patch splits sk_page_frag_refill() into two parts, adding skb_page_frag_refill() which can be used without a socket. While we are at it, add a minimum frag size of 32 for sk_page_frag_refill() Michael will either use netdev_alloc_frag() from softirq context, or skb_page_frag_refill() from process context in refill_work() (GFP_KERNEL allocations) Signed-off-by: Eric Dumazet Cc: Michael Dalton Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/sock.c | 27 +++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1cd32f96055e..ba74474836c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2062,6 +2062,8 @@ static inline void skb_frag_set_page(struct sk_buff *skb, int f, __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page); } +bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); + /** * skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to diff --git a/net/core/sock.c b/net/core/sock.c index fd6afa267475..440afdca1e8f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1847,7 +1847,17 @@ EXPORT_SYMBOL(sock_alloc_send_skb); /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) -bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +/** + * skb_page_frag_refill - check that a page_frag contains enough room + * @sz: minimum size of the fragment we want to get + * @pfrag: pointer to page_frag + * @prio: priority for memory allocation + * + * Note: While this allocator tries to use high order pages, there is + * no guarantee that allocations succeed. Therefore, @sz MUST be + * less or equal than PAGE_SIZE. + */ +bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) { int order; @@ -1856,16 +1866,16 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) pfrag->offset = 0; return true; } - if (pfrag->offset < pfrag->size) + if (pfrag->offset + sz <= pfrag->size) return true; put_page(pfrag->page); } /* We restrict high order allocations to users that can afford to wait */ - order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; + order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; do { - gfp_t gfp = sk->sk_allocation; + gfp_t gfp = prio; if (order) gfp |= __GFP_COMP | __GFP_NOWARN; @@ -1877,6 +1887,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } } while (--order >= 0); + return false; +} +EXPORT_SYMBOL(skb_page_frag_refill); + +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +{ + if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation))) + return true; + sk_enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return false; -- cgit v1.2.3-71-gd317 From 7cc7c5e54b7128195a1403747a63971c3c3f8e25 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 14 Oct 2013 21:49:21 +0100 Subject: net: Delete trailing semi-colon from definition of netdev_WARN() Macro definitions should not normally end with a semi-colon, as this makes it dangerous to use them an if...else statement. Happily this has not happened yet. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e53b44454ad..27f62f746621 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3049,7 +3049,7 @@ do { \ * file/line information and a backtrace. */ #define netdev_WARN(dev, format, args...) \ - WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args); + WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args) /* netif printk helpers, similar to netdev_printk */ -- cgit v1.2.3-71-gd317 From 3347c960295583eee3fd58e5c539fb1972fbc005 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Oct 2013 11:42:56 -0700 Subject: ipv4: gso: make inet_gso_segment() stackable In order to support GSO on IPIP, we need to make inet_gso_segment() stackable. It should not assume network header starts right after mac header. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++-- net/core/dev.c | 2 ++ net/ipv4/af_inet.c | 25 ++++++++++++++++++------- 3 files changed, 25 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ba74474836c0..cad1e0c5cc04 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2722,9 +2722,12 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) /* Keeps track of mac header offset relative to skb->head. * It is useful for TSO of Tunneling protocol. e.g. GRE. * For non-tunnel skb it points to skb_mac_header() and for - * tunnel skb it points to outer mac header. */ + * tunnel skb it points to outer mac header. + * Keeps track of level of encapsulation of network headers. + */ struct skb_gso_cb { - int mac_offset; + int mac_offset; + int encap_level; }; #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) diff --git a/net/core/dev.c b/net/core/dev.c index 1b6eadf69289..0918aadc20fd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2377,6 +2377,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, } SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + SKB_GSO_CB(skb)->encap_level = 0; + skb_reset_mac_header(skb); skb_reset_mac_len(skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4f8cd4fc451d..5783ab5b5ef8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1273,16 +1273,17 @@ out: } static struct sk_buff *inet_gso_segment(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; + unsigned int offset = 0; struct iphdr *iph; + bool tunnel; int proto; + int nhoff; int ihl; int id; - unsigned int offset = 0; - bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1296,6 +1297,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, 0))) goto out; + skb_reset_network_header(skb); + nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; @@ -1312,7 +1315,10 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, goto out; __skb_pull(skb, ihl); - tunnel = !!skb->encapsulation; + tunnel = SKB_GSO_CB(skb)->encap_level > 0; + if (tunnel) + features = skb->dev->hw_enc_features & netif_skb_features(skb); + SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); @@ -1327,18 +1333,23 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { - iph = ip_hdr(skb); + iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); if (!tunnel && proto == IPPROTO_UDP) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) iph->frag_off |= htons(IP_MF); - offset += (skb->len - skb->mac_len - iph->ihl * 4); + offset += skb->len - nhoff - ihl; } else { iph->id = htons(id++); } - iph->tot_len = htons(skb->len - skb->mac_len); + iph->tot_len = htons(skb->len - nhoff); ip_send_check(iph); + if (tunnel) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb->network_header = (u8 *)iph - skb->head; } while ((skb = skb->next)); out: -- cgit v1.2.3-71-gd317 From cb32f511a70be8967ac9025cf49c44324ced9a39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Oct 2013 11:42:57 -0700 Subject: ipip: add GSO/TSO support Now inet_gso_segment() is stackable, its relatively easy to implement GSO/TSO support for IPIP Performance results, when segmentation is done after tunnel device (as no NIC is yet enabled for TSO IPIP support) : Before patch : lpq83:~# ./netperf -H 7.7.9.84 -Cc MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.9.84 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 10.00 3357.88 5.09 3.70 2.983 2.167 After patch : lpq83:~# ./netperf -H 7.7.9.84 -Cc MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.9.84 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 10.00 7710.19 4.52 6.62 1.152 1.687 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ include/linux/skbuff.h | 6 ++++-- net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 9 +++++++++ net/ipv4/gre_offload.c | 3 ++- net/ipv4/ipip.c | 11 ++++++----- net/ipv4/tcp_offload.c | 1 + net/ipv4/udp_offload.c | 1 + net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 1 + net/mpls/mpls_gso.c | 1 + 11 files changed, 29 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a2a89a5c7be5..8dad68cede1c 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -42,6 +42,7 @@ enum { NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ + NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ @@ -107,6 +108,7 @@ enum { #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) +#define NETIF_F_GSO_IPIP __NETIF_F(GSO_IPIP) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cad1e0c5cc04..60729134d253 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -318,9 +318,11 @@ enum { SKB_GSO_GRE = 1 << 6, - SKB_GSO_UDP_TUNNEL = 1 << 7, + SKB_GSO_IPIP = 1 << 7, - SKB_GSO_MPLS = 1 << 8, + SKB_GSO_UDP_TUNNEL = 1 << 8, + + SKB_GSO_MPLS = 1 << 9, }; #if BITS_PER_LONG > 32 diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 78e9d9223e40..8cab7744790e 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -81,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5783ab5b5ef8..4049906010f7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1291,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | @@ -1656,6 +1657,13 @@ static struct packet_offload ip_packet_offload __read_mostly = { }, }; +static const struct net_offload ipip_offload = { + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + }, +}; + static int __init ipv4_offload_init(void) { /* @@ -1667,6 +1675,7 @@ static int __init ipv4_offload_init(void) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); + inet_add_offload(&ipip_offload, IPPROTO_IPIP); return 0; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 55e6bfb3a289..e5d436188464 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -39,7 +39,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | - SKB_GSO_GRE))) + SKB_GSO_GRE | + SKB_GSO_IPIP))) goto out; if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7f80fb4b82d3..fe3e9f7f1f0b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -220,17 +220,17 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto out; ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: - dev->stats.tx_errors++; dev_kfree_skb(skb); +out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -275,6 +275,7 @@ static const struct net_device_ops ipip_netdev_ops = { #define IPIP_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) static void ipip_tunnel_setup(struct net_device *dev) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 8e3113f46ec1..dfc96b00673e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -56,6 +56,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_IPIP | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f35eccaa855e..83206de2bc76 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -52,6 +52,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_IPIP | SKB_GSO_GRE | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b405fba91c72..5c2fc1d04196 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -96,6 +96,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | SKB_GSO_TCPV6 | diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 60559511bd9c..f63780ff3732 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -64,6 +64,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_IPIP | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1bec1219ab81..851cd880b0c0 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | SKB_GSO_MPLS))) goto out; -- cgit v1.2.3-71-gd317 From c4b2c0c5f647aa1093e8f9097a30c17ce0f94d4d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:53 +0200 Subject: static_key: WARN on usage before jump_label_init was called Usage of the static key primitives to toggle a branch must not be used before jump_label_init() is called from init/main.c. jump_label_init reorganizes and wires up the jump_entries so usage before that could have unforeseen consequences. Following primitives are now checked for correct use: * static_key_slow_inc * static_key_slow_dec * static_key_slow_dec_deferred * jump_label_rate_limit The x86 architecture already checks this by testing if the default_nop was already replaced with an optimal nop or with a branch instruction. It will panic then. Other architectures don't check for this. Because we need to relax this check for the x86 arch to allow code to transition from default_nop to the enabled state and other architectures did not check for this at all this patch introduces checking on the static_key primitives in a non-arch dependent manner. All checked functions are considered slow-path so the additional check does no harm to performance. The warnings are best observed with earlyprintk. Based on a patch from Andi Kleen. Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andi Kleen Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/jump_label.h | 10 ++++++++++ include/linux/jump_label_ratelimit.h | 2 ++ init/main.c | 7 +++++++ kernel/jump_label.c | 5 +++++ 4 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a5079072da66..e96be7245717 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -48,6 +48,13 @@ #include #include +#include + +extern bool static_key_initialized; + +#define STATIC_KEY_CHECK_USE() WARN(!static_key_initialized, \ + "%s used before call to jump_label_init", \ + __func__) #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) @@ -128,6 +135,7 @@ struct static_key { static __always_inline void jump_label_init(void) { + static_key_initialized = true; } static __always_inline bool static_key_false(struct static_key *key) @@ -146,11 +154,13 @@ static __always_inline bool static_key_true(struct static_key *key) static inline void static_key_slow_inc(struct static_key *key) { + STATIC_KEY_CHECK_USE(); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { + STATIC_KEY_CHECK_USE(); atomic_dec(&key->enabled); } diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 113788389b3d..089f70f83e97 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -23,12 +23,14 @@ struct static_key_deferred { }; static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { + STATIC_KEY_CHECK_USE(); static_key_slow_dec(&key->key); } static inline void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { + STATIC_KEY_CHECK_USE(); } #endif /* HAVE_JUMP_LABEL */ #endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */ diff --git a/init/main.c b/init/main.c index af310afbef28..27bbec1a5b35 100644 --- a/init/main.c +++ b/init/main.c @@ -135,6 +135,13 @@ static char *static_command_line; static char *execute_command; static char *ramdisk_execute_command; +/* + * Used to generate warnings if static_key manipulation functions are used + * before jump_label_init is called. + */ +bool static_key_initialized __read_mostly = false; +EXPORT_SYMBOL_GPL(static_key_initialized); + /* * If set, this is an indication to the drivers that reset the underlying * device before going ahead with the initialization otherwise driver might diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 297a9247a3b3..9019f15deab2 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -58,6 +58,7 @@ static void jump_label_update(struct static_key *key, int enable); void static_key_slow_inc(struct static_key *key) { + STATIC_KEY_CHECK_USE(); if (atomic_inc_not_zero(&key->enabled)) return; @@ -103,12 +104,14 @@ static void jump_label_update_timeout(struct work_struct *work) void static_key_slow_dec(struct static_key *key) { + STATIC_KEY_CHECK_USE(); __static_key_slow_dec(key, 0, NULL); } EXPORT_SYMBOL_GPL(static_key_slow_dec); void static_key_slow_dec_deferred(struct static_key_deferred *key) { + STATIC_KEY_CHECK_USE(); __static_key_slow_dec(&key->key, key->timeout, &key->work); } EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); @@ -116,6 +119,7 @@ EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { + STATIC_KEY_CHECK_USE(); key->timeout = rl; INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); } @@ -212,6 +216,7 @@ void __init jump_label_init(void) key->next = NULL; #endif } + static_key_initialized = true; jump_label_unlock(); } -- cgit v1.2.3-71-gd317 From a48e42920ff38bc90bbf75143fff4555723d4540 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 19 Oct 2013 21:48:55 +0200 Subject: net: introduce new macro net_get_random_once net_get_random_once is a new macro which handles the initialization of secret keys. It is possible to call it in the fast path. Only the initialization depends on the spinlock and is rather slow. Otherwise it should get used just before the key is used to delay the entropy extration as late as possible to get better randomness. It returns true if the key got initialized. The usage of static_keys for net_get_random_once is a bit uncommon so it needs some further explanation why this actually works: === In the simple non-HAVE_JUMP_LABEL case we actually have === no constrains to use static_key_(true|false) on keys initialized with STATIC_KEY_INIT_(FALSE|TRUE). So this path just expands in favor of the likely case that the initialization is already done. The key is initialized like this: ___done_key = { .enabled = ATOMIC_INIT(0) } The check if (!static_key_true(&___done_key)) \ expands into (pseudo code) if (!likely(___done_key > 0)) , so we take the fast path as soon as ___done_key is increased from the helper function. === If HAVE_JUMP_LABELs are available this depends === on patching of jumps into the prepared NOPs, which is done in jump_label_init at boot-up time (from start_kernel). It is forbidden and dangerous to use net_get_random_once in functions which are called before that! At compilation time NOPs are generated at the call sites of net_get_random_once. E.g. net/ipv6/inet6_hashtable.c:inet6_ehashfn (we need to call net_get_random_once two times in inet6_ehashfn, so two NOPs): 71: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 76: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) Both will be patched to the actual jumps to the end of the function to call __net_get_random_once at boot time as explained above. arch_static_branch is optimized and inlined for false as return value and actually also returns false in case the NOP is placed in the instruction stream. So in the fast case we get a "return false". But because we initialize ___done_key with (enabled != (entries & 1)) this call-site will get patched up at boot thus returning true. The final check looks like this: if (!static_key_true(&___done_key)) \ ___ret = __net_get_random_once(buf, \ expands to if (!!static_key_false(&___done_key)) \ ___ret = __net_get_random_once(buf, \ So we get true at boot time and as soon as static_key_slow_inc is called on the key it will invert the logic and return false for the fast path. static_key_slow_inc will change the branch because it got initialized with .enabled == 0. After static_key_slow_inc is called on the key the branch is replaced with a nop again. === Misc: === The helper defers the increment into a workqueue so we don't have problems calling this code from atomic sections. A seperate boolean (___done) guards the case where we enter net_get_random_once again before the increment happend. Cc: Ingo Molnar Cc: Steven Rostedt Cc: Jason Baron Cc: Peter Zijlstra Cc: Eric Dumazet Cc: "David S. Miller" Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 25 +++++++++++++++++++++++++ net/core/utils.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index ca9ec8540905..a489705f6fa3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -239,6 +239,31 @@ do { \ #define net_random() prandom_u32() #define net_srandom(seed) prandom_seed((__force u32)(seed)) +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key); + +#ifdef HAVE_JUMP_LABEL +#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)1 }) +#else /* !HAVE_JUMP_LABEL */ +#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#endif /* HAVE_JUMP_LABEL */ + +/* BE CAREFUL: this function is not interrupt safe */ +#define net_get_random_once(buf, nbytes) \ + ({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___done_key = \ + ___NET_RANDOM_STATIC_KEY_INIT; \ + if (!static_key_true(&___done_key)) \ + ___ret = __net_get_random_once(buf, \ + nbytes, \ + &___done, \ + &___done_key); \ + ___ret; \ + }) + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, diff --git a/net/core/utils.c b/net/core/utils.c index aa88e23fc87a..bf09371e19b1 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -338,3 +338,51 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace16); + +struct __net_random_once_work { + struct work_struct work; + struct static_key *key; +}; + +static void __net_random_once_deferred(struct work_struct *w) +{ + struct __net_random_once_work *work = + container_of(w, struct __net_random_once_work, work); + if (!static_key_enabled(work->key)) + static_key_slow_inc(work->key); + kfree(work); +} + +static void __net_random_once_disable_jump(struct static_key *key) +{ + struct __net_random_once_work *w; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return; + + INIT_WORK(&w->work, __net_random_once_deferred); + w->key = key; + schedule_work(&w->work); +} + +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key) +{ + static DEFINE_SPINLOCK(lock); + + spin_lock_bh(&lock); + if (*done) { + spin_unlock_bh(&lock); + return false; + } + + get_random_bytes(buf, nbytes); + *done = true; + spin_unlock_bh(&lock); + + __net_random_once_disable_jump(done_key); + + return true; +} +EXPORT_SYMBOL(__net_get_random_once); -- cgit v1.2.3-71-gd317 From c68c7f5a88328fbcd992c68e99ebd6bf7d49e9d2 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 20 Oct 2013 06:26:02 +0200 Subject: net: fix build warnings because of net_get_random_once merge This patch fixes the following warning: In file included from include/linux/skbuff.h:27:0, from include/linux/netfilter.h:5, from include/net/netns/netfilter.h:5, from include/net/net_namespace.h:20, from include/linux/init_task.h:14, from init/init_task.c:1: include/linux/net.h:243:14: warning: 'struct static_key' declared inside parameter list [enabled by default] struct static_key *done_key); on x86_64 allnoconfig, um defconfig and ia64 allmodconfig and maybe others as well. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index a489705f6fa3..aca446b46754 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -24,6 +24,7 @@ #include /* For O_CLOEXEC and O_NONBLOCK */ #include #include +#include #include struct poll_table_struct; -- cgit v1.2.3-71-gd317 From 61c1db7fae21ed33c614356a43bf6580c5e53118 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 20 Oct 2013 20:47:30 -0700 Subject: ipv6: sit: add GSO/TSO support Now ipv6_gso_segment() is stackable, its relatively easy to implement GSO/TSO support for SIT tunnels Performance results, when segmentation is done after tunnel device (as no NIC is yet enabled for TSO SIT support) : Before patch : lpq84:~# ./netperf -H 2002:af6:1153:: -Cc MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:1153:: () port 0 AF_INET6 Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 10.00 3168.31 4.81 4.64 2.988 2.877 After patch : lpq84:~# ./netperf -H 2002:af6:1153:: -Cc MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:1153:: () port 0 AF_INET6 Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 10.00 5525.00 7.76 5.17 2.763 1.840 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ include/linux/skbuff.h | 6 ++++-- net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp_offload.c | 1 + net/ipv6/ip6_offload.c | 11 +++++++++++ net/ipv6/sit.c | 28 +++++++++++++++++++--------- net/ipv6/udp_offload.c | 1 + 8 files changed, 40 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 8dad68cede1c..b05a4b501ab5 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -43,6 +43,7 @@ enum { NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ + NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ @@ -109,6 +110,7 @@ enum { #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) #define NETIF_F_GSO_IPIP __NETIF_F(GSO_IPIP) +#define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60729134d253..2c154976394b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -320,9 +320,11 @@ enum { SKB_GSO_IPIP = 1 << 7, - SKB_GSO_UDP_TUNNEL = 1 << 8, + SKB_GSO_SIT = 1 << 8, - SKB_GSO_MPLS = 1 << 9, + SKB_GSO_UDP_TUNNEL = 1 << 9, + + SKB_GSO_MPLS = 1 << 10, }; #if BITS_PER_LONG > 32 diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8cab7744790e..862989898f61 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", + [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 24a53fc275b0..f4a159e705c0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1265,6 +1265,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index dfc96b00673e..a7a5583eab04 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f9b33d82bb9d..4b851692b1f6 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | SKB_GSO_TCPV6 | @@ -276,6 +277,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { }, }; +static const struct net_offload sit_offload = { + .callbacks = { + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + }, +}; + static int __init ipv6_offload_init(void) { @@ -287,6 +295,9 @@ static int __init ipv6_offload_init(void) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); dev_add_offload(&ipv6_packet_offload); + + inet_add_offload(&sit_offload, IPPROTO_IPV6); + return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 19269453a8ea..3a9038dd818d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -933,10 +933,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + if (IS_ERR(skb)) + goto out; err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -946,8 +945,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev->stats.tx_errors++; dev_kfree_skb(skb); +out: + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -956,13 +956,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto out; ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; +out: + dev->stats.tx_errors++; + return NETDEV_TX_OK; } static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, @@ -1292,6 +1294,12 @@ static void ipip6_dev_free(struct net_device *dev) free_netdev(dev); } +#define SIT_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) + static void ipip6_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip6_netdev_ops; @@ -1305,6 +1313,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; + dev->features |= SIT_FEATURES; + dev->hw_features |= SIT_FEATURES; } static int ipip6_tunnel_init(struct net_device *dev) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index f63780ff3732..08e23b0bf302 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -65,6 +65,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit v1.2.3-71-gd317 From 93302880d8a3e5dc6b7da3f9825beb839152c940 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 11:41:55 +0200 Subject: netfilter: ipset: Use netlink callback dump args only Instead of cb->data, use callback dump args only and introduce symbolic names instead of plain numbers at accessing the argument members. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 10 +++++ net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +++--- net/netfilter/ipset/ip_set_core.c | 70 ++++++++++++++++----------------- net/netfilter/ipset/ip_set_hash_gen.h | 20 +++++----- net/netfilter/ipset/ip_set_list_set.c | 11 +++--- 5 files changed, 68 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 7967516adc0d..c7174b816674 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -316,6 +316,16 @@ ip_set_init_counter(struct ip_set_counter *counter, atomic64_set(&(counter)->packets, (long long)(ext->packets)); } +/* Netlink CB args */ +enum { + IPSET_CB_NET = 0, + IPSET_CB_DUMP, + IPSET_CB_INDEX, + IPSET_CB_ARG0, + IPSET_CB_ARG1, + IPSET_CB_ARG2, +}; + /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index a13e15be7911..f2c7d83dc23f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set, struct mtype *map = set->data; struct nlattr *adt, *nested; void *x; - u32 id, first = cb->args[2]; + u32 id, first = cb->args[IPSET_CB_ARG0]; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; - for (; cb->args[2] < map->elements; cb->args[2]++) { - id = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->elements; + cb->args[IPSET_CB_ARG0]++) { + id = cb->args[IPSET_CB_ARG0]; x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && @@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set, ipset_nest_end(skb, adt); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(id == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dc9284bdd2dd..bac7e01df67f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); - __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); + struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; + if (cb->args[IPSET_CB_ARG0]) { + pr_debug("release set %s\n", + nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); + __ip_set_put_byindex(inst, + (ip_set_id_t) cb->args[IPSET_CB_INDEX]); } return 0; } @@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh) } static int -dump_init(struct netlink_callback *cb) +dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); @@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[0] : dump single set/all sets - * [1] : set index - * [..]: type specific + /* cb->args[IPSET_CB_NET]: net namespace + * [IPSET_CB_DUMP]: dump single set/all sets + * [IPSET_CB_INDEX]: set index + * [IPSET_CB_ARG0]: type specific */ if (cda[IPSET_ATTR_SETNAME]) { @@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb) return -ENOENT; dump_type = DUMP_ONE; - cb->args[1] = index; + cb->args[IPSET_CB_INDEX] = index; } else dump_type = DUMP_ALL; @@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb) u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); dump_type |= (f << 16); } - cb->args[0] = dump_type; + cb->args[IPSET_CB_NET] = (unsigned long)inst; + cb->args[IPSET_CB_DUMP] = dump_type; return 0; } @@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; int ret = 0; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (!cb->args[0]) { - ret = dump_init(cb); + if (!cb->args[IPSET_CB_DUMP]) { + ret = dump_init(cb, inst); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message @@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= inst->ip_set_max) + if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; - dump_type = DUMP_TYPE(cb->args[0]); - dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; + dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); + dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); + max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 + : inst->ip_set_max; dump_last: - pr_debug("args[0]: %u %u args[1]: %ld\n", - dump_type, dump_flags, cb->args[1]); - for (; cb->args[1] < max; cb->args[1]++) { - index = (ip_set_id_t) cb->args[1]; + pr_debug("dump type, flag: %u %u index: %ld\n", + dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); + for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { + index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { @@ -1294,7 +1298,7 @@ dump_last: !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); - if (!cb->args[2]) { + if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); __ip_set_get(set); @@ -1311,7 +1315,7 @@ dump_last: goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; - switch (cb->args[2]) { + switch (cb->args[IPSET_CB_ARG0]) { case 0: /* Core header data */ if (nla_put_string(skb, IPSET_ATTR_TYPENAME, @@ -1331,7 +1335,7 @@ dump_last: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) + if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; goto release_refcount; @@ -1340,8 +1344,8 @@ dump_last: /* If we dump all sets, continue with dumping last ones */ if (dump_type == DUMP_ALL) { dump_type = DUMP_LAST; - cb->args[0] = dump_type | (dump_flags << 16); - cb->args[1] = 0; + cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); + cb->args[IPSET_CB_INDEX] = 0; goto dump_last; } goto out; @@ -1350,15 +1354,15 @@ nla_put_failure: ret = -EFAULT; next_set: if (dump_type == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; + cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; else - cb->args[1]++; + cb->args[IPSET_CB_INDEX]++; release_refcount: /* If there was an error or set is done, release set */ - if (ret || !cb->args[2]) { + if (ret || !cb->args[IPSET_CB_ARG0]) { pr_debug("release set %s\n", nfnl_set(inst, index)->name); __ip_set_put_byindex(inst, index); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; } out: if (nlh) { @@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); - if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, - .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1961,7 +1962,6 @@ static int __net_init ip_set_net_init(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set **list; inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6a80dbd30df7..2f80c74c871f 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -931,7 +931,7 @@ mtype_list(const struct ip_set *set, struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; - u32 first = cb->args[2]; + u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; int i; @@ -940,20 +940,22 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); + cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + n = hbucket(t, cb->args[IPSET_CB_ARG0]); + pr_debug("cb->arg bucket: %lu, t %p n %p\n", + cb->args[IPSET_CB_ARG0], t, n); for (i = 0; i < n->pos; i++) { e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", - cb->args[2], n, i, e); + cb->args[IPSET_CB_ARG0], n, i, e); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { - if (cb->args[2] == first) { + if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); return -EMSGSIZE; } else @@ -968,16 +970,16 @@ mtype_list(const struct ip_set *set, } ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nlmsg_trim(skb, incomplete); - if (unlikely(first == cb->args[2])) { + if (unlikely(first == cb->args[IPSET_CB_ARG0])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ec6f6d15dded..3e2317f3cf68 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[2]; + u32 i, first = cb->args[IPSET_CB_ARG0]; const struct set_elem *e; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[2] < map->size; cb->args[2]++) { - i = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->size; + cb->args[IPSET_CB_ARG0]++) { + i = cb->args[IPSET_CB_ARG0]; e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; @@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set, finish: ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); -- cgit v1.2.3-71-gd317 From f84be2bd96a108b09c8440263fa3adb3fb225fa3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 23 Oct 2013 20:05:27 +0200 Subject: net: make net_get_random_once irq safe I initial build non irq safe version of net_get_random_once because I would liked to have the freedom to defer even the extraction process of get_random_bytes until the nonblocking pool is fully seeded. I don't think this is a good idea anymore and thus this patch makes net_get_random_once irq safe. Now someone using net_get_random_once does not need to care from where it is called. Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 1 - net/core/utils.c | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index aca446b46754..b292a0435571 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -250,7 +250,6 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, #define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #endif /* HAVE_JUMP_LABEL */ -/* BE CAREFUL: this function is not interrupt safe */ #define net_get_random_once(buf, nbytes) \ ({ \ bool ___ret = false; \ diff --git a/net/core/utils.c b/net/core/utils.c index bf09371e19b1..2f737bf90b3f 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -370,16 +370,17 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key) { static DEFINE_SPINLOCK(lock); + unsigned long flags; - spin_lock_bh(&lock); + spin_lock_irqsave(&lock, flags); if (*done) { - spin_unlock_bh(&lock); + spin_unlock_irqrestore(&lock, flags); return false; } get_random_bytes(buf, nbytes); *done = true; - spin_unlock_bh(&lock); + spin_unlock_irqrestore(&lock, flags); __net_random_once_disable_jump(done_key); -- cgit v1.2.3-71-gd317 From 7f29405403d7c17f539c099987972b862e7e5255 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 23 Oct 2013 16:02:42 -0700 Subject: net: fix rtnl notification in atomic context commit 991fb3f74c "dev: always advertise rx_flags changes via netlink" introduced rtnl notification from __dev_set_promiscuity(), which can be called in atomic context. Steps to reproduce: ip tuntap add dev tap1 mode tap ifconfig tap1 up tcpdump -nei tap1 & ip tuntap del dev tap1 mode tap [ 271.627994] device tap1 left promiscuous mode [ 271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940 [ 271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip [ 271.677525] INFO: lockdep is turned off. [ 271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G W 3.12.0-rc3+ #73 [ 271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 271.731254] ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428 [ 271.760261] ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8 [ 271.790683] 0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8 [ 271.822332] Call Trace: [ 271.838234] [] dump_stack+0x55/0x76 [ 271.854446] [] __might_sleep+0x181/0x240 [ 271.870836] [] ? rcu_irq_exit+0x68/0xb0 [ 271.887076] [] kmem_cache_alloc_node+0x4e/0x2a0 [ 271.903368] [] ? vprintk_emit+0x1dc/0x5a0 [ 271.919716] [] ? __alloc_skb+0x57/0x2a0 [ 271.936088] [] ? vprintk_emit+0x1e0/0x5a0 [ 271.952504] [] __alloc_skb+0x57/0x2a0 [ 271.968902] [] rtmsg_ifinfo+0x52/0x100 [ 271.985302] [] __dev_notify_flags+0xad/0xc0 [ 272.001642] [] __dev_set_promiscuity+0x8c/0x1c0 [ 272.017917] [] ? packet_notifier+0x5/0x380 [ 272.033961] [] dev_set_promiscuity+0x29/0x50 [ 272.049855] [] packet_dev_mc+0x87/0xc0 [ 272.065494] [] packet_notifier+0x1b2/0x380 [ 272.080915] [] ? packet_notifier+0x5/0x380 [ 272.096009] [] notifier_call_chain+0x66/0x150 [ 272.110803] [] __raw_notifier_call_chain+0xe/0x10 [ 272.125468] [] raw_notifier_call_chain+0x16/0x20 [ 272.139984] [] call_netdevice_notifiers_info+0x40/0x70 [ 272.154523] [] call_netdevice_notifiers+0x16/0x20 [ 272.168552] [] rollback_registered_many+0x145/0x240 [ 272.182263] [] rollback_registered+0x31/0x40 [ 272.195369] [] unregister_netdevice_queue+0x58/0x90 [ 272.208230] [] __tun_detach+0x140/0x340 [ 272.220686] [] tun_chr_close+0x36/0x60 Signed-off-by: Alexei Starovoitov Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++-- include/linux/rtnetlink.h | 2 +- net/core/dev.c | 16 ++++++++-------- net/core/rtnetlink.c | 9 +++++---- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2daa066c6cdd..a141f406cb98 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1213,7 +1213,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, if (err) return err; slave_dev->flags |= IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE); + rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); return 0; } @@ -1222,7 +1222,7 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev, { netdev_upper_dev_unlink(slave_dev, bond_dev); slave_dev->flags &= ~IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE); + rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); } /* enslave device to bond device */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index f28544b2f9af..939428ad25ac 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -15,7 +15,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); -extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change); +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); diff --git a/net/core/dev.c b/net/core/dev.c index bdffd654edc4..0054c8c75f50 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { call_netdevice_notifiers(NETDEV_CHANGE, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } } EXPORT_SYMBOL(netdev_state_change); @@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev) if (ret < 0) return ret; - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_UP, dev); return ret; @@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head) __dev_close_many(head); list_for_each_entry_safe(dev, tmp, head, close_list) { - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); list_del_init(&dev->close_list); } @@ -5258,7 +5258,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, unsigned int changes = dev->flags ^ old_flags; if (gchanges) - rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges); + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC); if (changes & IFF_UP) { if (dev->flags & IFF_UP) @@ -5490,7 +5490,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); /* * Flush the unicast and multicast chains @@ -5889,7 +5889,7 @@ int register_netdevice(struct net_device *dev) */ if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); out: return ret; @@ -6501,7 +6501,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); /* * Flush the unicast and multicast chains @@ -6540,7 +6540,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char * Prevent userspace races by waiting until the network * device is fully setup before sending notifications. */ - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL); synchronize_net(); err = 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4aedf03da052..cf67144d3e3c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change) +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, + gfp_t flags) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags); if (skb == NULL) goto errout; @@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change) kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags); return; errout: if (err < 0) @@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_JOIN: break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; } return NOTIFY_DONE; -- cgit v1.2.3-71-gd317 From 8f2535b92d685c68db4bc699dd78462a646f6ef9 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Mon, 14 Oct 2013 19:08:27 -0700 Subject: mac80211: process the CSA frame for mesh accordingly Process the CSA frame according to the procedures define in IEEE Std 802.11-2012 section 10.9.8.4.3 as follow: * The mesh channel switch parameters element (MCSP) must be availabe. * If the MCSP's TTL is 1, drop the frame but still process the CSA. * If the MCSP's precedence value is less than or equal to the current precedence value, drop the frame and do not process the CSA. * The CSA frame is forwarded after TTL is decremented by 1 and the initiator field is set to 0. Transmit restrict field and others are maintained as is. * No beacon or probe response frame are handled here. Also, introduce the debug message used for mesh CSA purpose. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 +++++++++++ net/mac80211/Kconfig | 11 ++++++ net/mac80211/debug.h | 10 ++++++ net/mac80211/ieee80211_i.h | 4 +++ net/mac80211/mesh.c | 83 ++++++++++++++++++++++++++++++++++++++++++++-- net/mac80211/util.c | 9 +++++ 6 files changed, 134 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7c1e1ebc0e23..8c3b26a21574 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -696,6 +696,18 @@ struct ieee80211_sec_chan_offs_ie { u8 sec_chan_offs; } __packed; +/** + * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE + * + * This structure represents the "Mesh Channel Switch Paramters element" + */ +struct ieee80211_mesh_chansw_params_ie { + u8 mesh_ttl; + u8 mesh_flags; + __le16 mesh_reason; + __le16 mesh_pre_value; +} __packed; + /** * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE */ @@ -750,6 +762,14 @@ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL = 0x40, }; +/** + * mesh channel switch parameters element's flag indicator + * + */ +#define WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT BIT(0) +#define WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR BIT(1) +#define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2) + /** * struct ieee80211_rann_ie * diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index dc31ec3db404..97b5dcad5025 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -259,6 +259,17 @@ config MAC80211_MESH_SYNC_DEBUG Do not select this option. +config MAC80211_MESH_CSA_DEBUG + bool "Verbose mesh channel switch debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very verbose mesh + channel switch debugging messages (when mac80211 is taking part in a + mesh network). + + Do not select this option. + config MAC80211_MESH_PS_DEBUG bool "Verbose mesh powersave debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 4ccc5ed6237d..493d68061f0c 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -44,6 +44,12 @@ #define MAC80211_MESH_SYNC_DEBUG 0 #endif +#ifdef CONFIG_MAC80211_MESH_CSA_DEBUG +#define MAC80211_MESH_CSA_DEBUG 1 +#else +#define MAC80211_MESH_CSA_DEBUG 0 +#endif + #ifdef CONFIG_MAC80211_MESH_PS_DEBUG #define MAC80211_MESH_PS_DEBUG 1 #else @@ -157,6 +163,10 @@ do { \ _sdata_dbg(MAC80211_MESH_SYNC_DEBUG, \ sdata, fmt, ##__VA_ARGS__) +#define mcsa_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MESH_CSA_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + #define mps_dbg(sdata, fmt, ...) \ _sdata_dbg(MAC80211_MESH_PS_DEBUG, \ sdata, fmt, ##__VA_ARGS__) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cbaea32bccf1..4ebbcc6f67e0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -603,6 +603,9 @@ struct ieee80211_if_mesh { int ps_peers_light_sleep; int ps_peers_deep_sleep; struct ps_data ps; + /* Channel Switching Support */ + bool chsw_init; + u16 pre_value; }; #ifdef CONFIG_MAC80211_MESH @@ -1252,6 +1255,7 @@ struct ieee802_11_elems { const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; + const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; /* length of them, respectively */ u8 ssid_len; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 707ac61d63e5..0a3ccaa275f9 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -920,6 +920,82 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, stype, mgmt, &elems, rx_status); } +static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_mgmt *mgmt_fwd; + struct sk_buff *skb; + struct ieee80211_local *local = sdata->local; + u8 *pos = mgmt->u.action.u.chan_switch.variable; + size_t offset_ttl; + + skb = dev_alloc_skb(local->tx_headroom + len); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->tx_headroom); + mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len); + + /* offset_ttl is based on whether the secondary channel + * offset is available or not. Substract 1 from the mesh TTL + * and disable the initiator flag before forwarding. + */ + offset_ttl = (len < 42) ? 7 : 10; + *(pos + offset_ttl) -= 1; + *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; + + memcpy(mgmt_fwd, mgmt, len); + eth_broadcast_addr(mgmt_fwd->da); + memcpy(mgmt_fwd->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt_fwd->bssid, sdata->vif.addr, ETH_ALEN); + + ieee80211_tx_skb(sdata, skb); + return 0; +} + +static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee802_11_elems elems; + u16 pre_value; + bool block_tx, fwd_csa = true; + size_t baselen; + u8 *pos, ttl; + + if (mgmt->u.action.u.measurement.action_code != + WLAN_ACTION_SPCT_CHL_SWITCH) + return; + + pos = mgmt->u.action.u.chan_switch.variable; + baselen = offsetof(struct ieee80211_mgmt, + u.action.u.chan_switch.variable); + ieee802_11_parse_elems(pos, len - baselen, false, &elems); + + ttl = elems.mesh_chansw_params_ie->mesh_ttl; + if (!--ttl) + fwd_csa = false; + + pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); + if (ifmsh->pre_value >= pre_value) + return; + + ifmsh->pre_value = pre_value; + + /* forward or re-broadcast the CSA frame */ + if (fwd_csa) { + if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0) + mcsa_dbg(sdata, "Failed to forward the CSA frame"); + } + + /* block the Tx only after forwarding the CSA frame if required */ + block_tx = elems.mesh_chansw_params_ie->mesh_flags & + WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT; + if (block_tx) + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); +} + static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, @@ -939,6 +1015,9 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, if (mesh_action_is_path_sel(mgmt)) mesh_rx_path_sel_frame(sdata, mgmt, len); break; + case WLAN_CATEGORY_SPECTRUM_MGMT: + mesh_rx_csa_frame(sdata, mgmt, len); + break; } } @@ -1056,13 +1135,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) (unsigned long) sdata); ifmsh->accepting_plinks = true; - ifmsh->preq_id = 0; - ifmsh->sn = 0; - ifmsh->num_gates = 0; atomic_set(&ifmsh->mpaths, 0); mesh_rmc_init(sdata); ifmsh->last_preq = jiffies; ifmsh->next_perr = jiffies; + ifmsh->chsw_init = false; /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 65ebe0c5e835..523783cedf6e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -740,6 +740,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, case WLAN_EID_TIMEOUT_INTERVAL: case WLAN_EID_SECONDARY_CHANNEL_OFFSET: case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + case WLAN_EID_CHAN_SWITCH_PARAM: /* * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible * that if the content gets bigger it might be needed more than once @@ -905,6 +906,14 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, } elems->sec_chan_offs = (void *)pos; break; + case WLAN_EID_CHAN_SWITCH_PARAM: + if (elen != + sizeof(*elems->mesh_chansw_params_ie)) { + elem_parse_failed = true; + break; + } + elems->mesh_chansw_params_ie = (void *)pos; + break; case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: if (!action || elen != sizeof(*elems->wide_bw_chansw_ie)) { -- cgit v1.2.3-71-gd317 From 5d9efa7ee99eed58388f186c13cf2e2a87e9ceb4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Oct 2013 20:07:50 -0400 Subject: ipv6: Remove privacy config option. The code for privacy extentions is very mature, and making it configurable only gives marginal memory/code savings in exchange for obfuscation and hard to read code via CPP ifdef'ery. Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 -- include/net/if_inet6.h | 5 +---- net/ipv6/Kconfig | 18 ------------------ net/ipv6/addrconf.c | 41 +++-------------------------------------- 4 files changed, 4 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a80a63cfb70c..5d89d1b808a6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -21,13 +21,11 @@ struct ipv6_devconf { __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; __s32 mldv2_unsolicited_report_interval; -#ifdef CONFIG_IPV6_PRIVACY __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; __s32 regen_max_retry; __s32 max_desync_factor; -#endif __s32 max_addresses; __s32 accept_ra_defrtr; __s32 accept_ra_pinfo; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 02ef7727bb55..76d54270f2e2 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -66,11 +66,10 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; -#ifdef CONFIG_IPV6_PRIVACY struct list_head tmp_list; struct inet6_ifaddr *ifpub; int regen_count; -#endif + bool tokenized; struct rcu_head rcu; @@ -192,11 +191,9 @@ struct inet6_dev { __u32 if_flags; int dead; -#ifdef CONFIG_IPV6_PRIVACY u8 rndid[8]; struct timer_list regen_timer; struct list_head tempaddr_list; -#endif struct in6_addr token; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e1a8d903e366..d92e5586783e 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -21,24 +21,6 @@ menuconfig IPV6 if IPV6 -config IPV6_PRIVACY - bool "IPv6: Privacy Extensions (RFC 3041) support" - ---help--- - Privacy Extensions for Stateless Address Autoconfiguration in IPv6 - support. With this option, additional periodically-altered - pseudo-random global-scope unicast address(es) will be assigned to - your interface(s). - - We use our standard pseudo-random algorithm to generate the - randomized interface identifier, instead of one described in RFC 3041. - - By default the kernel does not generate temporary addresses. - To use temporary addresses, do - - echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr - - See for details. - config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" ---help--- diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb301da38..542d09561ed6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -83,11 +83,7 @@ #include #include #include - -#ifdef CONFIG_IPV6_PRIVACY #include -#endif - #include #include @@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -#ifdef CONFIG_IPV6_PRIVACY static void __ipv6_regen_rndid(struct inet6_dev *idev); static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); -#endif static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -183,13 +177,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, -#endif .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_pinfo = 1, @@ -221,13 +213,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, -#endif .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_pinfo = 1, @@ -371,7 +361,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) } #endif -#ifdef CONFIG_IPV6_PRIVACY INIT_LIST_HEAD(&ndev->tempaddr_list); setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || @@ -384,7 +373,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) in6_dev_hold(ndev); ipv6_regen_rndid((unsigned long) ndev); } -#endif + ndev->token = in6addr_any; if (netif_running(dev) && addrconf_qdisc_ok(dev)) @@ -865,12 +854,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); -#ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { list_add(&ifa->tmp_list, &idev->tempaddr_list); in6_ifa_hold(ifa); } -#endif in6_ifa_hold(ifa); write_unlock(&idev->lock); @@ -913,7 +900,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) spin_unlock_bh(&addrconf_hash_lock); write_lock_bh(&idev->lock); -#ifdef CONFIG_IPV6_PRIVACY + if (ifp->flags&IFA_F_TEMPORARY) { list_del(&ifp->tmp_list); if (ifp->ifpub) { @@ -922,7 +909,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } __in6_ifa_put(ifp); } -#endif list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { if (ifa == ifp) { @@ -1013,7 +999,6 @@ out: in6_ifa_put(ifp); } -#ifdef CONFIG_IPV6_PRIVACY static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) { struct inet6_dev *idev = ifp->idev; @@ -1116,7 +1101,6 @@ retry: out: return ret; } -#endif /* * Choose an appropriate source address (RFC3484) @@ -1131,9 +1115,7 @@ enum { #endif IPV6_SADDR_RULE_OIF, IPV6_SADDR_RULE_LABEL, -#ifdef CONFIG_IPV6_PRIVACY IPV6_SADDR_RULE_PRIVACY, -#endif IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, IPV6_SADDR_RULE_MAX @@ -1247,7 +1229,6 @@ static int ipv6_get_saddr_eval(struct net *net, &score->ifa->addr, score->addr_type, score->ifa->idev->dev->ifindex) == dst->label; break; -#ifdef CONFIG_IPV6_PRIVACY case IPV6_SADDR_RULE_PRIVACY: { /* Rule 7: Prefer public address @@ -1259,7 +1240,6 @@ static int ipv6_get_saddr_eval(struct net *net, ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } -#endif case IPV6_SADDR_RULE_ORCHID: /* Rule 8-: Prefer ORCHID vs ORCHID or * non-ORCHID vs non-ORCHID @@ -1588,7 +1568,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); -#ifdef CONFIG_IPV6_PRIVACY } else if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); @@ -1602,7 +1581,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); -#endif } else ipv6_del_addr(ifp); } @@ -1851,7 +1829,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) return err; } -#ifdef CONFIG_IPV6_PRIVACY /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ static void __ipv6_regen_rndid(struct inet6_dev *idev) { @@ -1919,7 +1896,6 @@ static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) __ipv6_regen_rndid(idev); } -#endif /* * Add prefix route. @@ -2207,9 +2183,7 @@ ok: if (ifp) { int flags; unsigned long now; -#ifdef CONFIG_IPV6_PRIVACY struct inet6_ifaddr *ift; -#endif u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ @@ -2250,7 +2224,6 @@ ok: } else spin_unlock(&ifp->lock); -#ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ list_for_each_entry(ift, &in6_dev->tempaddr_list, @@ -2315,7 +2288,7 @@ ok: } else { read_unlock_bh(&in6_dev->lock); } -#endif + in6_ifa_put(ifp); addrconf_verify(0); } @@ -2995,7 +2968,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); -#ifdef CONFIG_IPV6_PRIVACY if (how && del_timer(&idev->regen_timer)) in6_dev_put(idev); @@ -3015,7 +2987,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) in6_ifa_put(ifa); write_lock_bh(&idev->lock); } -#endif while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, @@ -3528,7 +3499,6 @@ restart: in6_ifa_put(ifp); goto restart; } -#ifdef CONFIG_IPV6_PRIVACY } else if ((ifp->flags&IFA_F_TEMPORARY) && !(ifp->flags&IFA_F_TENTATIVE)) { unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * @@ -3556,7 +3526,6 @@ restart: } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; spin_unlock(&ifp->lock); -#endif } else { /* ifp->prefered_lft <= ifp->valid_lft */ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) @@ -4128,13 +4097,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval); array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval); -#ifdef CONFIG_IPV6_PRIVACY array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; -#endif array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; @@ -4828,7 +4795,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, -#ifdef CONFIG_IPV6_PRIVACY { .procname = "use_tempaddr", .data = &ipv6_devconf.use_tempaddr, @@ -4864,7 +4830,6 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "max_addresses", .data = &ipv6_devconf.max_addresses, -- cgit v1.2.3-71-gd317 From 80c33ddd31d0e801953e02b7b003f395c1920e4e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Sat, 21 Sep 2013 05:05:39 +0000 Subject: net: add might_sleep() call to napi_disable napi_disable uses an msleep() call to wait for outstanding napi work to be finished after setting the disable bit. It does not always sleep incase there was no outstanding work. This resulted in a rare bug in ixgbe_down operation where a napi_disable call took place inside of a local_bh_disable()d context. In order to enable easier detection of future sleep while atomic BUGs, this patch adds a might_sleep() call, so that every use of napi_disable during atomic context will be visible. Signed-off-by: Jacob Keller Cc: Eliezer Tamir Cc: Alexander Duyck Cc: Hyong-Youb Kim Cc: Amir Vadai Cc: Dmitry Kravkov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 27f62f746621..cb1d918ecdf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -483,6 +483,7 @@ void napi_hash_del(struct napi_struct *napi); */ static inline void napi_disable(struct napi_struct *n) { + might_sleep(); set_bit(NAPI_STATE_DISABLE, &n->state); while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) msleep(1); -- cgit v1.2.3-71-gd317 From f6701d5f73c5c2f4ca37634514631fb056291f89 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:39 +0100 Subject: net: cdc_ncm: add include protection to cdc_ncm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it a lot easier to test modified versions Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index cc25b70af33c..89f0bbc2cf83 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -36,6 +36,9 @@ * SUCH DAMAGE. */ +#ifndef __LINUX_USB_CDC_NCM_H +#define __LINUX_USB_CDC_NCM_H + #define CDC_NCM_COMM_ALTSETTING_NCM 0 #define CDC_NCM_COMM_ALTSETTING_MBIM 1 @@ -133,3 +136,5 @@ extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign); extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); + +#endif /* __LINUX_USB_CDC_NCM_H */ -- cgit v1.2.3-71-gd317 From 3e515665a76ad8f60a1c05968cc6a5b2f2701171 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:40 +0100 Subject: net: cdc_ncm: remove redundant "intf" field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is always a duplicate of the "control" field. It causes confusion wrt intf_data updates and cleanups. Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 4 +--- include/linux/usb/cdc_ncm.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 38566bffca98..bfab8796730f 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -382,7 +382,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ len = intf->cur_altsetting->extralen; ctx->udev = dev->udev; - ctx->intf = intf; /* parse through descriptors associated with control interface */ while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) { @@ -489,7 +488,6 @@ advance: usb_set_intfdata(ctx->data, dev); usb_set_intfdata(ctx->control, dev); - usb_set_intfdata(ctx->intf, dev); if (ctx->ether_desc) { temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress); @@ -562,7 +560,7 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) ctx->control = NULL; } - usb_set_intfdata(ctx->intf, NULL); + usb_set_intfdata(intf, NULL); cdc_ncm_free(ctx); } EXPORT_SYMBOL_GPL(cdc_ncm_unbind); diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 89f0bbc2cf83..c14e00fb1667 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -103,7 +103,6 @@ struct cdc_ncm_ctx { struct usb_host_endpoint *in_ep; struct usb_host_endpoint *out_ep; struct usb_host_endpoint *status_ep; - struct usb_interface *intf; struct usb_interface *control; struct usb_interface *data; -- cgit v1.2.3-71-gd317 From ff1632aa8581b7103ac2af1ea3cb4a415eb9d6ad Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:41 +0100 Subject: net: cdc_ncm: remove redundant endpoint pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to duplicate stuff already in the common usbnet struct. We still need to keep our special find_endpoints function because we need explicit control over the selected altsetting. Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 38 +++++++++++++++++++------------------- include/linux/usb/cdc_ncm.h | 3 --- 2 files changed, 19 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index bfab8796730f..a989bd5290a6 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -292,9 +292,9 @@ max_dgram_err: } static void -cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf) +cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf) { - struct usb_host_endpoint *e; + struct usb_host_endpoint *e, *in = NULL, *out = NULL; u8 ep; for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) { @@ -303,18 +303,18 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf) switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) { case USB_ENDPOINT_XFER_INT: if (usb_endpoint_dir_in(&e->desc)) { - if (ctx->status_ep == NULL) - ctx->status_ep = e; + if (!dev->status) + dev->status = e; } break; case USB_ENDPOINT_XFER_BULK: if (usb_endpoint_dir_in(&e->desc)) { - if (ctx->in_ep == NULL) - ctx->in_ep = e; + if (!in) + in = e; } else { - if (ctx->out_ep == NULL) - ctx->out_ep = e; + if (!out) + out = e; } break; @@ -322,6 +322,14 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf) break; } } + if (in && !dev->in) + dev->in = usb_rcvbulkpipe(dev->udev, + in->desc.bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK); + if (out && !dev->out) + dev->out = usb_sndbulkpipe(dev->udev, + out->desc.bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK); } static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) @@ -477,11 +485,9 @@ advance: if (temp) goto error2; - cdc_ncm_find_endpoints(ctx, ctx->data); - cdc_ncm_find_endpoints(ctx, ctx->control); - - if ((ctx->in_ep == NULL) || (ctx->out_ep == NULL) || - (ctx->status_ep == NULL)) + cdc_ncm_find_endpoints(dev, ctx->data); + cdc_ncm_find_endpoints(dev, ctx->control); + if (!dev->in || !dev->out || !dev->status) goto error2; dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; @@ -496,12 +502,6 @@ advance: dev_info(&dev->udev->dev, "MAC-Address: %pM\n", dev->net->dev_addr); } - - dev->in = usb_rcvbulkpipe(dev->udev, - ctx->in_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); - dev->out = usb_sndbulkpipe(dev->udev, - ctx->out_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); - dev->status = ctx->status_ep; dev->rx_urb_size = ctx->rx_max; /* cdc_ncm_setup will override dwNtbOutMaxSize if it is diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index c14e00fb1667..36e1e153ca2d 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -100,9 +100,6 @@ struct cdc_ncm_ctx { struct net_device *netdev; struct usb_device *udev; - struct usb_host_endpoint *in_ep; - struct usb_host_endpoint *out_ep; - struct usb_host_endpoint *status_ep; struct usb_interface *control; struct usb_interface *data; -- cgit v1.2.3-71-gd317 From bed6f762123fc53c63efef386531dd877cba2468 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:42 +0100 Subject: net: cdc_ncm: remove redundant netdev field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Too many pointers back and forth are likely to confuse developers, creating subtle bugs whenever we forget to syncronize them all. As a usbnet driver, we should stick with the standard struct usbnet fields as much as possible. The netdevice is one such field. Cc: Greg Suarez Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/usb/cdc_ncm.c | 73 +++++++++++++++++++++++---------------------- include/linux/usb/cdc_ncm.h | 3 +- 3 files changed, 39 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 253472a8a983..af76aaf08b6b 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -175,7 +175,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb } spin_lock_bh(&ctx->mtx); - skb_out = cdc_ncm_fill_tx_frame(ctx, skb, sign); + skb_out = cdc_ncm_fill_tx_frame(dev, skb, sign); spin_unlock_bh(&ctx->mtx); return skb_out; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index a989bd5290a6..e39e7678a798 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -80,8 +80,9 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info)); } -static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) +static u8 cdc_ncm_setup(struct usbnet *dev) { + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u32 val; u8 flags; u8 iface_no; @@ -90,7 +91,6 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) u16 ntb_fmt_supported; u32 min_dgram_size; u32 min_hdr_size; - struct usbnet *dev = netdev_priv(ctx->netdev); iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; @@ -285,8 +285,8 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) } max_dgram_err: - if (ctx->netdev->mtu != (ctx->max_datagram_size - eth_hlen)) - ctx->netdev->mtu = ctx->max_datagram_size - eth_hlen; + if (dev->net->mtu != (ctx->max_datagram_size - eth_hlen)) + dev->net->mtu = ctx->max_datagram_size - eth_hlen; return 0; } @@ -375,11 +375,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ hrtimer_init(&ctx->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ctx->tx_timer.function = &cdc_ncm_tx_timer_cb; - ctx->bh.data = (unsigned long)ctx; + ctx->bh.data = (unsigned long)dev; ctx->bh.func = cdc_ncm_txpath_bh; atomic_set(&ctx->stop, 0); spin_lock_init(&ctx->mtx); - ctx->netdev = dev->net; /* store ctx pointer in device data field */ dev->data[0] = (unsigned long)ctx; @@ -477,7 +476,7 @@ advance: goto error2; /* initialize data interface */ - if (cdc_ncm_setup(ctx)) + if (cdc_ncm_setup(dev)) goto error2; /* configure data interface */ @@ -669,9 +668,9 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ } struct sk_buff * -cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) +cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) { - struct usbnet *dev = netdev_priv(ctx->netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; struct usb_cdc_ncm_nth16 *nth16; struct usb_cdc_ncm_ndp16 *ndp16; struct sk_buff *skb_out; @@ -695,7 +694,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) if (skb_out == NULL) { if (skb != NULL) { dev_kfree_skb_any(skb); - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } goto exit_no_skb; } @@ -733,12 +732,12 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); skb = NULL; - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } else { /* no room for skb - store for later */ if (ctx->tx_rem_skb != NULL) { dev_kfree_skb_any(ctx->tx_rem_skb); - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } ctx->tx_rem_skb = skb; ctx->tx_rem_sign = sign; @@ -771,7 +770,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) if (skb != NULL) { dev_kfree_skb_any(skb); skb = NULL; - ctx->netdev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; } ctx->tx_curr_frame_num = n; @@ -814,7 +813,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; - ctx->netdev->stats.tx_packets += ctx->tx_curr_frame_num; + dev->net->stats.tx_packets += ctx->tx_curr_frame_num; return skb_out; exit_no_skb: @@ -846,18 +845,19 @@ static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *timer) static void cdc_ncm_txpath_bh(unsigned long param) { - struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)param; + struct usbnet *dev = (struct usbnet *)param; + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; spin_lock_bh(&ctx->mtx); if (ctx->tx_timer_pending != 0) { ctx->tx_timer_pending--; cdc_ncm_tx_timeout_start(ctx); spin_unlock_bh(&ctx->mtx); - } else if (ctx->netdev != NULL) { + } else if (dev->net != NULL) { spin_unlock_bh(&ctx->mtx); - netif_tx_lock_bh(ctx->netdev); - usbnet_start_xmit(NULL, ctx->netdev); - netif_tx_unlock_bh(ctx->netdev); + netif_tx_lock_bh(dev->net); + usbnet_start_xmit(NULL, dev->net); + netif_tx_unlock_bh(dev->net); } else { spin_unlock_bh(&ctx->mtx); } @@ -880,7 +880,7 @@ cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) goto error; spin_lock_bh(&ctx->mtx); - skb_out = cdc_ncm_fill_tx_frame(ctx, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)); + skb_out = cdc_ncm_fill_tx_frame(dev, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)); spin_unlock_bh(&ctx->mtx); return skb_out; @@ -1047,9 +1047,10 @@ error: } static void -cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx, +cdc_ncm_speed_change(struct usbnet *dev, struct usb_cdc_speed_change *data) { + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; uint32_t rx_speed = le32_to_cpu(data->DLBitRRate); uint32_t tx_speed = le32_to_cpu(data->ULBitRate); @@ -1063,18 +1064,18 @@ cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx, if ((tx_speed > 1000000) && (rx_speed > 1000000)) { printk(KERN_INFO KBUILD_MODNAME - ": %s: %u mbit/s downlink " - "%u mbit/s uplink\n", - ctx->netdev->name, - (unsigned int)(rx_speed / 1000000U), - (unsigned int)(tx_speed / 1000000U)); + ": %s: %u mbit/s downlink " + "%u mbit/s uplink\n", + dev->net->name, + (unsigned int)(rx_speed / 1000000U), + (unsigned int)(tx_speed / 1000000U)); } else { printk(KERN_INFO KBUILD_MODNAME - ": %s: %u kbit/s downlink " - "%u kbit/s uplink\n", - ctx->netdev->name, - (unsigned int)(rx_speed / 1000U), - (unsigned int)(tx_speed / 1000U)); + ": %s: %u kbit/s downlink " + "%u kbit/s uplink\n", + dev->net->name, + (unsigned int)(rx_speed / 1000U), + (unsigned int)(tx_speed / 1000U)); } } } @@ -1091,7 +1092,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) /* test for split data in 8-byte chunks */ if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) { - cdc_ncm_speed_change(ctx, + cdc_ncm_speed_change(dev, (struct usb_cdc_speed_change *)urb->transfer_buffer); return; } @@ -1108,8 +1109,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) ctx->connected = le16_to_cpu(event->wValue); printk(KERN_INFO KBUILD_MODNAME ": %s: network connection:" - " %sconnected\n", - ctx->netdev->name, ctx->connected ? "" : "dis"); + " %sconnected\n", + dev->net->name, ctx->connected ? "" : "dis"); usbnet_link_change(dev, ctx->connected, 0); if (!ctx->connected) @@ -1121,8 +1122,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) sizeof(struct usb_cdc_speed_change))) set_bit(EVENT_STS_SPLIT, &dev->flags); else - cdc_ncm_speed_change(ctx, - (struct usb_cdc_speed_change *) &event[1]); + cdc_ncm_speed_change(dev, + (struct usb_cdc_speed_change *)&event[1]); break; default: diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 36e1e153ca2d..5c47bd9620d5 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -98,7 +98,6 @@ struct cdc_ncm_ctx { const struct usb_cdc_union_desc *union_desc; const struct usb_cdc_ether_desc *ether_desc; - struct net_device *netdev; struct usb_device *udev; struct usb_interface *control; struct usb_interface *data; @@ -129,7 +128,7 @@ struct cdc_ncm_ctx { extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); -extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign); +extern struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); -- cgit v1.2.3-71-gd317 From de5bee2720776989060b9686e6a89e938a346345 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:43 +0100 Subject: net: cdc_ncm: remove unused udev field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already use the usbnet udev field everywhere this could have been used. Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 2 -- include/linux/usb/cdc_ncm.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index e39e7678a798..9cdd762916dc 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -388,8 +388,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ buf = intf->cur_altsetting->extra; len = intf->cur_altsetting->extralen; - ctx->udev = dev->udev; - /* parse through descriptors associated with control interface */ while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) { diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 5c47bd9620d5..059dcc93c4d8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -98,7 +98,6 @@ struct cdc_ncm_ctx { const struct usb_cdc_union_desc *union_desc; const struct usb_cdc_ether_desc *ether_desc; - struct usb_device *udev; struct usb_interface *control; struct usb_interface *data; -- cgit v1.2.3-71-gd317 From f3028c524a7cd4d97b034fc1f35dcaecb5d6f9d6 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:44 +0100 Subject: net: cdc_ncm: remove tx_speed and rx_speed fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields are only used to prevent printing the same speeds multiple times if we receive multiple identical speed notifications. The value of these printk's is questionable, and even more so when we filter out some of the notifications sent us by the firmware. If we are going to print any of these, then we should print them all. Removing little used fields is a bonus. Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 37 ++++++++++++++----------------------- include/linux/usb/cdc_ncm.h | 2 -- 2 files changed, 14 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 9cdd762916dc..fc36a99f8183 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -510,7 +510,6 @@ advance: ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0) ctx->tx_max++; - ctx->tx_speed = ctx->rx_speed = 0; return 0; error2: @@ -1048,7 +1047,6 @@ static void cdc_ncm_speed_change(struct usbnet *dev, struct usb_cdc_speed_change *data) { - struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; uint32_t rx_speed = le32_to_cpu(data->DLBitRRate); uint32_t tx_speed = le32_to_cpu(data->ULBitRate); @@ -1056,25 +1054,20 @@ cdc_ncm_speed_change(struct usbnet *dev, * Currently the USB-NET API does not support reporting the actual * device speed. Do print it instead. */ - if ((tx_speed != ctx->tx_speed) || (rx_speed != ctx->rx_speed)) { - ctx->tx_speed = tx_speed; - ctx->rx_speed = rx_speed; - - if ((tx_speed > 1000000) && (rx_speed > 1000000)) { - printk(KERN_INFO KBUILD_MODNAME - ": %s: %u mbit/s downlink " - "%u mbit/s uplink\n", - dev->net->name, - (unsigned int)(rx_speed / 1000000U), - (unsigned int)(tx_speed / 1000000U)); - } else { - printk(KERN_INFO KBUILD_MODNAME - ": %s: %u kbit/s downlink " - "%u kbit/s uplink\n", - dev->net->name, - (unsigned int)(rx_speed / 1000U), - (unsigned int)(tx_speed / 1000U)); - } + if ((tx_speed > 1000000) && (rx_speed > 1000000)) { + printk(KERN_INFO KBUILD_MODNAME + ": %s: %u mbit/s downlink " + "%u mbit/s uplink\n", + dev->net->name, + (unsigned int)(rx_speed / 1000000U), + (unsigned int)(tx_speed / 1000000U)); + } else { + printk(KERN_INFO KBUILD_MODNAME + ": %s: %u kbit/s downlink " + "%u kbit/s uplink\n", + dev->net->name, + (unsigned int)(rx_speed / 1000U), + (unsigned int)(tx_speed / 1000U)); } } @@ -1111,8 +1104,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) dev->net->name, ctx->connected ? "" : "dis"); usbnet_link_change(dev, ctx->connected, 0); - if (!ctx->connected) - ctx->tx_speed = ctx->rx_speed = 0; break; case USB_CDC_NOTIFY_SPEED_CHANGE: diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 059dcc93c4d8..f14af3dd0cce 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -110,8 +110,6 @@ struct cdc_ncm_ctx { u32 tx_timer_pending; u32 tx_curr_frame_num; - u32 rx_speed; - u32 tx_speed; u32 rx_max; u32 tx_max; u32 max_datagram_size; -- cgit v1.2.3-71-gd317 From 6a9612e2cb22b3fd6a7304dcbf2b4ee1cf2104b2 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:45 +0100 Subject: net: cdc_ncm: remove ncm_parm field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving the call to cdc_ncm_setup() after the endpoint setup removes the last remaining reference to ncm_parm outside cdc_ncm_setup. Collecting all the ncm_parm based calculations in cdc_ncm_setup improves readability. Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 46 ++++++++++++++++++++++----------------------- include/linux/usb/cdc_ncm.h | 1 - 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index fc36a99f8183..4de3a5423e87 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -83,6 +83,7 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) static u8 cdc_ncm_setup(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + struct usb_cdc_ncm_ntb_parameters ncm_parm; u32 val; u8 flags; u8 iface_no; @@ -97,22 +98,22 @@ static u8 cdc_ncm_setup(struct usbnet *dev) err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS, USB_TYPE_CLASS | USB_DIR_IN |USB_RECIP_INTERFACE, - 0, iface_no, &ctx->ncm_parm, - sizeof(ctx->ncm_parm)); + 0, iface_no, &ncm_parm, + sizeof(ncm_parm)); if (err < 0) { pr_debug("failed GET_NTB_PARAMETERS\n"); return 1; } /* read correct set of parameters according to device mode */ - ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize); - ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize); - ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder); - ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor); - ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment); + ctx->rx_max = le32_to_cpu(ncm_parm.dwNtbInMaxSize); + ctx->tx_max = le32_to_cpu(ncm_parm.dwNtbOutMaxSize); + ctx->tx_remainder = le16_to_cpu(ncm_parm.wNdpOutPayloadRemainder); + ctx->tx_modulus = le16_to_cpu(ncm_parm.wNdpOutDivisor); + ctx->tx_ndp_modulus = le16_to_cpu(ncm_parm.wNdpOutAlignment); /* devices prior to NCM Errata shall set this field to zero */ - ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams); - ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported); + ctx->tx_max_datagrams = le16_to_cpu(ncm_parm.wNtbOutMaxDatagrams); + ntb_fmt_supported = le16_to_cpu(ncm_parm.bmNtbFormatsSupported); eth_hlen = ETH_HLEN; min_dgram_size = CDC_NCM_MIN_DATAGRAM_SIZE; @@ -153,7 +154,7 @@ static u8 cdc_ncm_setup(struct usbnet *dev) } /* inform device about NTB input size changes */ - if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { + if (ctx->rx_max != le32_to_cpu(ncm_parm.dwNtbInMaxSize)) { __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE, @@ -171,6 +172,14 @@ static u8 cdc_ncm_setup(struct usbnet *dev) pr_debug("Using default maximum transmit length=%d\n", CDC_NCM_NTB_MAX_SIZE_TX); ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX; + + /* Adding a pad byte here simplifies the handling in + * cdc_ncm_fill_tx_frame, by making tx_max always + * represent the real skb max size. + */ + if (ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0) + ctx->tx_max++; + } /* @@ -473,10 +482,6 @@ advance: if (temp) goto error2; - /* initialize data interface */ - if (cdc_ncm_setup(dev)) - goto error2; - /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) @@ -487,6 +492,10 @@ advance: if (!dev->in || !dev->out || !dev->status) goto error2; + /* initialize data interface */ + if (cdc_ncm_setup(dev)) + goto error2; + dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; usb_set_intfdata(ctx->data, dev); @@ -501,15 +510,6 @@ advance: dev->rx_urb_size = ctx->rx_max; - /* cdc_ncm_setup will override dwNtbOutMaxSize if it is - * outside the sane range. Adding a pad byte here if necessary - * simplifies the handling in cdc_ncm_fill_tx_frame, making - * tx_max always represent the real skb max size. - */ - if (ctx->tx_max != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) && - ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0) - ctx->tx_max++; - return 0; error2: diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index f14af3dd0cce..89b52a0fe4b9 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -88,7 +88,6 @@ #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) struct cdc_ncm_ctx { - struct usb_cdc_ncm_ntb_parameters ncm_parm; struct hrtimer tx_timer; struct tasklet_struct bh; -- cgit v1.2.3-71-gd317 From 832922362e1308aaef95a43383577d56f51fbc3c Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:47 +0100 Subject: net: cdc_ncm: remove descriptor pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit header_desc was completely unused and union_desc was never used outside cdc_ncm_bind_common. Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 12 ++++++------ include/linux/usb/cdc_ncm.h | 4 +--- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index b8de8ddd6d98..435fcc75d706 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -372,6 +372,7 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = { int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) { + const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; struct usb_driver *driver; u8 *buf; @@ -406,16 +407,15 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ switch (buf[2]) { case USB_CDC_UNION_TYPE: - if (buf[0] < sizeof(*(ctx->union_desc))) + if (buf[0] < sizeof(*union_desc)) break; - ctx->union_desc = - (const struct usb_cdc_union_desc *)buf; + union_desc = (const struct usb_cdc_union_desc *)buf; ctx->control = usb_ifnum_to_if(dev->udev, - ctx->union_desc->bMasterInterface0); + union_desc->bMasterInterface0); ctx->data = usb_ifnum_to_if(dev->udev, - ctx->union_desc->bSlaveInterface0); + union_desc->bSlaveInterface0); break; case USB_CDC_ETHERNET_TYPE: @@ -458,7 +458,7 @@ advance: } /* some buggy devices have an IAD but no CDC Union */ - if (!ctx->union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { + if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { ctx->control = intf; ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1); dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n"); diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 89b52a0fe4b9..cad54ad4ad12 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -92,9 +92,7 @@ struct cdc_ncm_ctx { struct tasklet_struct bh; const struct usb_cdc_ncm_desc *func_desc; - const struct usb_cdc_mbim_desc *mbim_desc; - const struct usb_cdc_header_desc *header_desc; - const struct usb_cdc_union_desc *union_desc; + const struct usb_cdc_mbim_desc *mbim_desc; const struct usb_cdc_ether_desc *ether_desc; struct usb_interface *control; -- cgit v1.2.3-71-gd317 From 6dd13e83ce37f716e36085cb8b58779da1e98f6d Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 1 Nov 2013 11:16:57 +0100 Subject: net: cdc_ncm: drop "extern" from header declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Alexey Orishko Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index cad54ad4ad12..2300f7492927 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -119,11 +119,11 @@ struct cdc_ncm_ctx { u16 connected; }; -extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); -extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); -extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); -extern struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); -extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); -extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); +u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); +struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); +int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); +int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); #endif /* __LINUX_USB_CDC_NCM_H */ -- cgit v1.2.3-71-gd317 From 6e95fcaa42e5078ac265964deebed597f9eae07a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Oct 2013 11:50:49 +0100 Subject: lib: crc32: add functionality to combine two crc32{, c}s in GF(2) This patch adds a combinator to merge two or more crc32{,c}s into a new one. This is useful for checksum computations of fragmented skbs that use crc32/crc32c as checksums. The arithmetics for combining both in the GF(2) was taken and slightly modified from zlib. Only passing two crcs is insufficient as two crcs and the length of the second piece is needed for merging. The code is made generic, so that only polynomials need to be passed for crc32_le resp. crc32c_le. Signed-off-by: Daniel Borkmann Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/crc32.h | 40 +++++++++++++++++++++++++ lib/crc32.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 68267b64bb98..7d275c4fc011 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -11,8 +11,48 @@ extern u32 crc32_le(u32 crc, unsigned char const *p, size_t len); extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len); +/** + * crc32_le_combine - Combine two crc32 check values into one. For two + * sequences of bytes, seq1 and seq2 with lengths len1 + * and len2, crc32_le() check values were calculated + * for each, crc1 and crc2. + * + * @crc1: crc32 of the first block + * @crc2: crc32 of the second block + * @len2: length of the second block + * + * Return: The crc32_le() check value of seq1 and seq2 concatenated, + * requiring only crc1, crc2, and len2. Note: If seq_full denotes + * the concatenated memory area of seq1 with seq2, and crc_full + * the crc32_le() value of seq_full, then crc_full == + * crc32_le_combine(crc1, crc2, len2) when crc_full was seeded + * with the same initializer as crc1, and crc2 seed was 0. See + * also crc32_combine_test(). + */ +extern u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2); + extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len); +/** + * __crc32c_le_combine - Combine two crc32c check values into one. For two + * sequences of bytes, seq1 and seq2 with lengths len1 + * and len2, __crc32c_le() check values were calculated + * for each, crc1 and crc2. + * + * @crc1: crc32c of the first block + * @crc2: crc32c of the second block + * @len2: length of the second block + * + * Return: The __crc32c_le() check value of seq1 and seq2 concatenated, + * requiring only crc1, crc2, and len2. Note: If seq_full denotes + * the concatenated memory area of seq1 with seq2, and crc_full + * the __crc32c_le() value of seq_full, then crc_full == + * __crc32c_le_combine(crc1, crc2, len2) when crc_full was + * seeded with the same initializer as crc1, and crc2 seed + * was 0. See also crc32c_combine_test(). + */ +extern u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2); + #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* diff --git a/lib/crc32.c b/lib/crc32.c index 429d61ce6aa0..595205cddc30 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -49,6 +49,30 @@ MODULE_AUTHOR("Matt Domsch "); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); +#define GF2_DIM 32 + +static u32 gf2_matrix_times(u32 *mat, u32 vec) +{ + u32 sum = 0; + + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + + return sum; +} + +static void gf2_matrix_square(u32 *square, u32 *mat) +{ + int i; + + for (i = 0; i < GF2_DIM; i++) + square[i] = gf2_matrix_times(mat, mat[i]); +} + #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ @@ -130,6 +154,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) } #endif +/* For conditions of distribution and use, see copyright notice in zlib.h */ +static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2, + u32 polynomial) +{ + u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */ + u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */ + u32 row; + int i; + + if (len2 <= 0) + return crc1; + + /* Put operator for one zero bit in odd */ + odd[0] = polynomial; + row = 1; + for (i = 1; i < GF2_DIM; i++) { + odd[i] = row; + row <<= 1; + } + + gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */ + gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */ + + /* Apply len2 zeros to crc1 (first square will put the operator for one + * zero byte, eight zero bits, in even). + */ + do { + /* Apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + /* If no more bits set, then done */ + if (len2 == 0) + break; + /* Another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + } while (len2 != 0); + + crc1 ^= crc2; + return crc1; +} + /** * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II * CRC32/CRC32C @@ -200,8 +270,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); } #endif +u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2) +{ + return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE); +} + +u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +{ + return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE); +} EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(crc32_le_combine); EXPORT_SYMBOL(__crc32c_le); +EXPORT_SYMBOL(__crc32c_le_combine); /** * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 -- cgit v1.2.3-71-gd317 From 2817a336d4d533fb8b68719723cd60ea7dd7c09e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Oct 2013 11:50:51 +0100 Subject: net: skb_checksum: allow custom update/combine for walking skb Currently, skb_checksum walks over 1) linearized, 2) frags[], and 3) frag_list data and calculats the one's complement, a 32 bit result suitable for feeding into itself or csum_tcpudp_magic(), but unsuitable for SCTP as we're calculating CRC32c there. Hence, in order to not re-implement the very same function in SCTP (and maybe other protocols) over and over again, use an update() + combine() callback internally to allow for walking over the skb with different algorithms. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 ++++++++++--- include/net/checksum.h | 6 ++++++ net/core/skbuff.c | 31 +++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2c154976394b..44727b5d4981 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2360,8 +2360,6 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); -__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, - __wsum csum); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, @@ -2373,9 +2371,18 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); - struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); +struct skb_checksum_ops { + __wsum (*update)(const void *mem, int len, __wsum wsum); + __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); +}; + +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum, const struct skb_checksum_ops *ops); +__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum); + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { diff --git a/include/net/checksum.h b/include/net/checksum.h index 8f59ca50477c..15f33fde826e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -78,6 +78,12 @@ csum_block_add(__wsum csum, __wsum csum2, int offset) return csum_add(csum, (__force __wsum)sum); } +static inline __wsum +csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) +{ + return csum_block_add(csum, csum2, offset); +} + static inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0ab32faa520f..31aab5376cb4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1928,9 +1928,8 @@ fault: EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ - -__wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum) +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum, const struct skb_checksum_ops *ops) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1941,7 +1940,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - csum = csum_partial(skb->data + offset, copy, csum); + csum = ops->update(skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; @@ -1962,10 +1961,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = csum_partial(vaddr + frag->page_offset + - offset - start, copy, 0); + csum2 = ops->update(vaddr + frag->page_offset + + offset - start, copy, 0); kunmap_atomic(vaddr); - csum = csum_block_add(csum, csum2, pos); + csum = ops->combine(csum, csum2, pos, copy); if (!(len -= copy)) return csum; offset += copy; @@ -1984,9 +1983,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, __wsum csum2; if (copy > len) copy = len; - csum2 = skb_checksum(frag_iter, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); + csum2 = __skb_checksum(frag_iter, offset - start, + copy, 0, ops); + csum = ops->combine(csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; @@ -1998,6 +1997,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, return csum; } +EXPORT_SYMBOL(__skb_checksum); + +__wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum) +{ + const struct skb_checksum_ops ops = { + .update = csum_partial, + .combine = csum_block_add_ext, + }; + + return __skb_checksum(skb, offset, len, csum, &ops); +} EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ -- cgit v1.2.3-71-gd317 From 74d332c13b2148ae934ea94dac1745ae92efe8e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Oct 2013 13:10:44 -0700 Subject: net: extend net_device allocation to vmalloc() Joby Poriyath provided a xen-netback patch to reduce the size of xenvif structure as some netdev allocation could fail under memory pressure/fragmentation. This patch is handling the problem at the core level, allowing any netdev structures to use vmalloc() if kmalloc() failed. As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT to kzalloc() flags to do this fallback only when really needed. Signed-off-by: Eric Dumazet Reported-by: Joby Poriyath Cc: Ben Hutchings Signed-off-by: David S. Miller --- Documentation/networking/netdevices.txt | 10 +++++----- include/linux/netdevice.h | 1 + net/core/dev.c | 22 +++++++++++++++++----- net/core/net-sysfs.c | 2 +- 4 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index c7ecc7080494..0b1cf6b2a592 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -10,12 +10,12 @@ network devices. struct net_device allocation rules ================================== Network device structures need to persist even after module is unloaded and -must be allocated with kmalloc. If device has registered successfully, -it will be freed on last use by free_netdev. This is required to handle the -pathologic case cleanly (example: rmmod mydriver padded; + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6239,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!p) + p = vzalloc(alloc_size); if (!p) return NULL; @@ -6248,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) - goto free_p; + goto free_dev; if (dev_addr_init(dev)) goto free_pcpu; @@ -6301,8 +6313,8 @@ free_pcpu: kfree(dev->_rx); #endif -free_p: - kfree(p); +free_dev: + netdev_freemem(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6339,7 +6351,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); + netdev_freemem(dev); return; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56b4e47..d03f2c9750fa 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); kfree(dev->ifalias); - kfree((char *)dev - dev->padded); + netdev_freemem(dev); } static const void *net_namespace(struct device *d) -- cgit v1.2.3-71-gd317 From acddd5dd44d4fd9b45dd5ee69cd8b183052b1cdc Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:03:18 +0200 Subject: net/mlx4_core: Fix reg/unreg vlan/mac to conform to the firmware spec The functions mlx4_register_vlan, mlx4_unregister_vlan, mlx4_register_mac, mlx4_unregister_mac all made illegal use of the out_param in multifunc mode to pass the port number. The firmware spec specifies that the port number should be passed in bits 8..15 of the input-modifier field for ALLOC_RES and FREE_RES (sections 20.15.1 and 20.15.2). For MAC register/unregister, this patch contains workarounds so that guests running previous kernels continue to work on a new Hypervisor, and guests running the new kernel will continue to work on old hypervisors. Vlan registeration capability is still not operational in multifunction mode, since the vlan wrapper functions are not implemented in this patch. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/port.c | 45 +++++++++++++++------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 28 ++++++++------ include/linux/mlx4/device.h | 1 + 3 files changed, 49 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index d3d3106f588f..9433c1f6b0d4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -178,13 +178,24 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { u64 out_param = 0; - int err; + int err = -EINVAL; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + err = mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } + if (err && err == -EINVAL && mlx4_is_slave(dev)) { + /* retry using old REG_MAC format */ + set_param_l(&out_param, port); + err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + dev->flags |= MLX4_FLAG_OLD_REG_MAC; + } if (err) return err; @@ -231,10 +242,18 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + (void) mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } else { + /* use old unregister mac format */ + set_param_l(&out_param, port); + (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } return; } __mlx4_unregister_mac(dev, port, mac); @@ -374,8 +393,8 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) return -EINVAL; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN, + err = mlx4_cmd_imm(dev, vlan, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) @@ -418,8 +437,8 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - set_param_l(&out_param, port); - (void) mlx4_cmd_imm(dev, index, &out_param, RES_VLAN, + (void) mlx4_cmd_imm(dev, index, &out_param, + ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index dd6876321116..a5aa3be554fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1443,7 +1443,7 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave) } static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int in_port) { int err = -EINVAL; int port; @@ -1452,7 +1452,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (op != RES_OP_RESERVE_AND_MAP) return err; - port = get_param_l(out_param); + port = !in_port ? get_param_l(out_param) : in_port; mac = in_param; err = __mlx4_register_mac(dev, port, mac); @@ -1470,7 +1470,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { return 0; } @@ -1528,7 +1528,7 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, int err; int alop = vhcr->op_modifier; - switch (vhcr->in_modifier) { + switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); @@ -1556,12 +1556,14 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_MAC: err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: @@ -1730,14 +1732,14 @@ static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int in_port) { int port; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: - port = get_param_l(out_param); + port = !in_port ? get_param_l(out_param) : in_port; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; @@ -1751,7 +1753,7 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { return 0; } @@ -1803,7 +1805,7 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, int err = -EINVAL; int alop = vhcr->op_modifier; - switch (vhcr->in_modifier) { + switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); @@ -1831,12 +1833,14 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_MAC: err = mac_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9ad0c18495ad..297a16309f00 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -54,6 +54,7 @@ enum { MLX4_FLAG_MASTER = 1 << 2, MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, + MLX4_FLAG_OLD_REG_MAC = 1 << 6, }; enum { -- cgit v1.2.3-71-gd317 From 2009d0059c084288f060b1ffe3d14229588acb67 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:03:19 +0200 Subject: net/mlx4_en: Use vlan id instead of vlan index for unregistration Use of vlan_index created problems unregistering vlans on guests. In addition, tools delete vlan by tag, not by index, lets follow that. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +---- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- drivers/net/ethernet/mellanox/mlx4/port.c | 27 ++++++++++++---------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- include/linux/mlx4/device.h | 2 +- 6 files changed, 20 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ae8eb4c4fb6c..887d62576f54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1687,7 +1687,7 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, - port, vp_oper->vlan_idx); + port, vp_oper->state.default_vlan); vp_oper->vlan_idx = NO_INDX; } if (NO_INDX != vp_oper->mac_idx) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 85d91665d400..b5554121aca4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -417,7 +417,6 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; - int idx; en_dbg(HW, priv, "Killing VID:%d\n", vid); @@ -425,10 +424,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); - if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx)) - mlx4_unregister_vlan(mdev->dev, priv->port, idx); - else - en_dbg(HW, priv, "could not find vid %d in cache\n", vid); + mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 348bb8c7d9a7..f2ad4f61f58c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1111,7 +1111,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); -void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 9433c1f6b0d4..caaa15470395 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -406,23 +406,26 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) } EXPORT_SYMBOL_GPL(mlx4_register_vlan); -void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int index; - if (index < MLX4_VLAN_REGULAR) { - mlx4_warn(dev, "Trying to free special vlan index %d\n", index); - return; + mutex_lock(&table->mutex); + if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { + mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); + goto out; } - mutex_lock(&table->mutex); - if (!table->refs[index]) { - mlx4_warn(dev, "No vlan entry for index %d\n", index); + if (index < MLX4_VLAN_REGULAR) { + mlx4_warn(dev, "Trying to free special vlan index %d\n", index); goto out; } + if (--table->refs[index]) { - mlx4_dbg(dev, "Have more references for index %d," - "no need to modify vlan table\n", index); + mlx4_dbg(dev, "Have %d more references for index %d," + "no need to modify vlan table\n", table->refs[index], + index); goto out; } table->entries[index] = 0; @@ -432,19 +435,19 @@ out: mutex_unlock(&table->mutex); } -void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index) +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - (void) mlx4_cmd_imm(dev, index, &out_param, + (void) mlx4_cmd_imm(dev, vlan, &out_param, ((u32) port) << 8 | (u32) RES_VLAN, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); return; } - __mlx4_unregister_vlan(dev, port, index); + __mlx4_unregister_vlan(dev, port, vlan); } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a5aa3be554fe..993a2ef13866 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4085,7 +4085,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors && NO_INDX != work->orig_vlan_ix) __mlx4_unregister_vlan(&work->priv->dev, work->port, - work->orig_vlan_ix); + work->orig_vlan_id); out: kfree(work); return; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 297a16309f00..e2e92885bdc1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1079,7 +1079,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); +void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); -- cgit v1.2.3-71-gd317 From 5a0d0a6161aecbbc76e4c1d2b82e4c7cef88bb29 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:03:23 +0200 Subject: mlx4: Structures and init/teardown for VF resource quotas This is step #1 for implementing SRIOV resource quotas for VFs. Quotas are implemented per resource type for VFs and the PF, to prevent any entity from simply grabbing all the resources for itself and leaving the other entities unable to obtain such resources. Resources which are allocated using quotas: QPs, CQs, SRQs, MPTs, MTTs, MAC, VLAN, and Counters. The quota system works as follows: Each entity (VF or PF) is given a max number of a given resource (its quota), and a guaranteed minimum number for each resource (starvation prevention). For QPs, CQs, SRQs, MPTs and MTTs: 50% of the available quantity for the resource is divided equally among the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool. The other 50% is the "free pool", allocated on a first-come-first-serve basis. For each VF/PF, resources are first allocated from its "guaranteed-minimum" pool. When that pool is exhausted, the driver attempts to allocate from the resource "free-pool". The quota (i.e., max) for the VFs and the PF is: The free-pool amount (50% of the real max) + the guaranteed minimum For MACs: Guarantee 2 MACs per VF/PF per port. As a result, since we have only 128 MACs per port, reduce the allowable number of VFs from 64 to 63. Any remaining MACs are put into a free pool. For VLANs: For the PF, the per-port quota is 128 and guarantee is 64 (to allow the PF to register at least a VLAN per VF in VST mode). For the VFs, the per-port quota is 64 and the guarantee is 0. We assume that VGT VFs are trusted not to abuse the VLAN resource. For Counters: For all functions (PF and VFs), the quota is 128 and the guarantee is 0. In this patch, we define the needed structures, which are added to the resource-tracker struct. In addition, we do initialization for the resource quota, and adjust the query_device response to use quotas rather than resource maxima. As part of the implementation, we introduce a new field in mlx4_dev: quotas. This field holds the resource quotas used to report maxima to the upper layers (ib_core, via query_device). The HCA maxima of these values are passed to the VFs (via QUERY_HCA) so that they may continue to use these in handling QPs, CQs, SRQs and MPTs. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 8 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 11 +- drivers/net/ethernet/mellanox/mlx4/main.c | 32 +++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 17 +++ drivers/net/ethernet/mellanox/mlx4/qp.c | 3 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 157 ++++++++++++++++++++- include/linux/mlx4/device.h | 17 +++ 7 files changed, 222 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f0612645de99..7567437dbd34 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -177,18 +177,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; - props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; + props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); - props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; + props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; - props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; + props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; + props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c151e7a6710a..f8c88c3ad9fc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -177,6 +177,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + struct mlx4_priv *priv = mlx4_priv(dev); u8 field; u32 size; int err = 0; @@ -250,13 +251,13 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = 0; /* protected FMR support not available as yet */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); - size = dev->caps.num_qps; + size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - size = dev->caps.num_srqs; + size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - size = dev->caps.num_cqs; + size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); size = dev->caps.num_eqs; @@ -265,10 +266,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - size = dev->caps.num_mpts; + size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - size = dev->caps.num_mtts; + size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); size = dev->caps.num_mgms + dev->caps.num_amgms; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 179d26709c94..7d2628dfdc29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -562,13 +562,17 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } dev->caps.num_ports = func_cap.num_ports; - dev->caps.num_qps = func_cap.qp_quota; - dev->caps.num_srqs = func_cap.srq_quota; - dev->caps.num_cqs = func_cap.cq_quota; - dev->caps.num_eqs = func_cap.max_eq; - dev->caps.reserved_eqs = func_cap.reserved_eq; - dev->caps.num_mpts = func_cap.mpt_quota; - dev->caps.num_mtts = func_cap.mtt_quota; + dev->quotas.qp = func_cap.qp_quota; + dev->quotas.srq = func_cap.srq_quota; + dev->quotas.cq = func_cap.cq_quota; + dev->quotas.mpt = func_cap.mpt_quota; + dev->quotas.mtt = func_cap.mtt_quota; + dev->caps.num_qps = 1 << hca_param.log_num_qps; + dev->caps.num_srqs = 1 << hca_param.log_num_srqs; + dev->caps.num_cqs = 1 << hca_param.log_num_cqs; + dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; + dev->caps.num_eqs = func_cap.max_eq; + dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -2102,9 +2106,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) "aborting.\n"); return err; } - if (num_vfs > MLX4_MAX_NUM_VF) { - printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n", - num_vfs, MLX4_MAX_NUM_VF); + + /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS + * per port, we must limit the number of VFs to 63 (since their are + * 128 MACs) + */ + if (num_vfs >= MLX4_MAX_NUM_VF) { + dev_err(&pdev->dev, + "Requested more VF's (%d) than allowed (%d)\n", + num_vfs, MLX4_MAX_NUM_VF - 1); return -EINVAL; } @@ -2322,6 +2332,8 @@ slave_start: if (err) goto err_steer; + mlx4_init_quotas(dev); + for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 97941269bc14..e7eb86ecc6ea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -504,12 +504,27 @@ struct slave_list { struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE]; }; +struct resource_allocator { + union { + int res_reserved; + int res_port_rsvd[MLX4_MAX_PORTS]; + }; + union { + int res_free; + int res_port_free[MLX4_MAX_PORTS]; + }; + int *quota; + int *allocated; + int *guaranteed; +}; + struct mlx4_resource_tracker { spinlock_t lock; /* tree for each resources */ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; /* num_of_slave's lists, one per slave */ struct slave_list *slave_list; + struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE]; }; #define SLAVE_EVENT_EQ_SIZE 128 @@ -1253,4 +1268,6 @@ static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev) void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); +void mlx4_init_quotas(struct mlx4_dev *dev); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index e891b058c1be..2715e61dbb74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -480,8 +480,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) */ err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + - 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), + (1 << 23) - 1, mlx4_num_reserved_sqps(dev), reserved_from_top); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 35863889bec0..cc5d6d0aad16 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -284,10 +284,59 @@ static const char *ResourceType(enum mlx4_resource rt) } static void rem_slave_vlans(struct mlx4_dev *dev, int slave); +static inline void initialize_res_quotas(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + enum mlx4_resource res_type, + int vf, int num_instances) +{ + res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; + if (vf == mlx4_master_func_num(dev)) { + res_alloc->res_free = num_instances; + if (res_type == RES_MTT) { + /* reserved mtts will be taken out of the PF allocation */ + res_alloc->res_free += dev->caps.reserved_mtts; + res_alloc->guaranteed[vf] += dev->caps.reserved_mtts; + res_alloc->quota[vf] += dev->caps.reserved_mtts; + } + } +} + +void mlx4_init_quotas(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int pf; + + /* quotas for VFs are initialized in mlx4_slave_cap */ + if (mlx4_is_slave(dev)) + return; + + if (!mlx4_is_mfunc(dev)) { + dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev); + dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs; + dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs; + dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts; + dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws; + return; + } + + pf = mlx4_master_func_num(dev); + dev->quotas.qp = + priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf]; + dev->quotas.cq = + priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf]; + dev->quotas.srq = + priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf]; + dev->quotas.mtt = + priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf]; + dev->quotas.mpt = + priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; +} int mlx4_init_resource_tracker(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int i; + int i, j; int t; priv->mfunc.master.res_tracker.slave_list = @@ -308,8 +357,104 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + struct resource_allocator *res_alloc = + &priv->mfunc.master.res_tracker.res_alloc[i]; + res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + if (i == RES_MAC || i == RES_VLAN) + res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * + (dev->num_vfs + 1) * sizeof(int), + GFP_KERNEL); + else + res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + + if (!res_alloc->quota || !res_alloc->guaranteed || + !res_alloc->allocated) + goto no_mem_err; + + for (t = 0; t < dev->num_vfs + 1; t++) { + switch (i) { + case RES_QP: + initialize_res_quotas(dev, res_alloc, RES_QP, + t, dev->caps.num_qps - + dev->caps.reserved_qps - + mlx4_num_reserved_sqps(dev)); + break; + case RES_CQ: + initialize_res_quotas(dev, res_alloc, RES_CQ, + t, dev->caps.num_cqs - + dev->caps.reserved_cqs); + break; + case RES_SRQ: + initialize_res_quotas(dev, res_alloc, RES_SRQ, + t, dev->caps.num_srqs - + dev->caps.reserved_srqs); + break; + case RES_MPT: + initialize_res_quotas(dev, res_alloc, RES_MPT, + t, dev->caps.num_mpts - + dev->caps.reserved_mrws); + break; + case RES_MTT: + initialize_res_quotas(dev, res_alloc, RES_MTT, + t, dev->caps.num_mtts - + dev->caps.reserved_mtts); + break; + case RES_MAC: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; + } else { + res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + res_alloc->guaranteed[t] = 2; + } + break; + case RES_VLAN: + if (t == mlx4_master_func_num(dev)) { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM; + res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2; + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_free[j] = + res_alloc->quota[t]; + } else { + res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2; + res_alloc->guaranteed[t] = 0; + } + break; + case RES_COUNTER: + res_alloc->quota[t] = dev->caps.max_counters; + res_alloc->guaranteed[t] = 0; + if (t == mlx4_master_func_num(dev)) + res_alloc->res_free = res_alloc->quota[t]; + break; + default: + break; + } + if (i == RES_MAC || i == RES_VLAN) { + for (j = 0; j < MLX4_MAX_PORTS; j++) + res_alloc->res_port_rsvd[j] += + res_alloc->guaranteed[t]; + } else { + res_alloc->res_reserved += res_alloc->guaranteed[t]; + } + } + } spin_lock_init(&priv->mfunc.master.res_tracker.lock); - return 0 ; + return 0; + +no_mem_err: + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } + return -ENOMEM; } void mlx4_free_resource_tracker(struct mlx4_dev *dev, @@ -333,6 +478,14 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, } if (type != RES_TR_FREE_SLAVES_ONLY) { + for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { + kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); + priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); + priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; + kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); + priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; + } kfree(priv->mfunc.master.res_tracker.slave_list); priv->mfunc.master.res_tracker.slave_list = NULL; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e2e92885bdc1..f6f59271f857 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -641,12 +641,23 @@ struct mlx4_counter { __be64 tx_bytes; }; +struct mlx4_quotas { + int qp; + int cq; + int srq; + int mpt; + int mtt; + int counter; + int xrcd; +}; + struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; struct mlx4_phys_caps phys_caps; + struct mlx4_quotas quotas; struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; @@ -772,6 +783,12 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) return dev->flags & MLX4_FLAG_MASTER; } +static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev) +{ + return dev->phys_caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev); +} + static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { return (qpn < dev->phys_caps.base_sqpn + 8 + -- cgit v1.2.3-71-gd317 From f8e617e100d7369a0108f96abf4414e9fb82ced7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 1 Nov 2013 14:07:47 +0800 Subject: net: introduce skb_coalesce_rx_frag() Sometimes we need to coalesce the rx frags to avoid frag list. One example is virtio-net driver which tries to use small frags for both MTU sized packet and GSO packet. So this patch introduce skb_coalesce_rx_frag() to do this. Cc: Rusty Russell Cc: Michael S. Tsirkin Cc: Michael Dalton Cc: Eric Dumazet Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ net/core/skbuff.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 44727b5d4981..2e153b69d318 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1372,6 +1372,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize); +void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, + unsigned int truesize); + #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e4115597b38b..3735fad5616e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -476,6 +476,18 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, } EXPORT_SYMBOL(skb_add_rx_frag); +void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, + unsigned int truesize) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + skb_frag_size_add(frag, size); + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; +} +EXPORT_SYMBOL(skb_coalesce_rx_frag); + static void skb_drop_list(struct sk_buff **listp) { kfree_skb_list(*listp); -- cgit v1.2.3-71-gd317 From 2f69702c4db5f1c3149fd17fe30bdeb87cba9698 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Mon, 4 Nov 2013 09:50:47 +0100 Subject: net: cdc_ncm: Export cdc_ncm_{tx, rx}_fixup functions for re-use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some drivers implementing NCM-like protocols, may re-use those functions, as is the case in the huawei_cdc_ncm driver. Export them via EXPORT_SYMBOL_GPL, in accordance with how other functions have been exported. Signed-off-by: Enrico Mioso Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 6 ++++-- include/linux/usb/cdc_ncm.h | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 11c703337577..1763195bd599 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -830,7 +830,7 @@ static void cdc_ncm_txpath_bh(unsigned long param) } } -static struct sk_buff * +struct sk_buff * cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct sk_buff *skb_out; @@ -857,6 +857,7 @@ error: return NULL; } +EXPORT_SYMBOL_GPL(cdc_ncm_tx_fixup); /* verify NTB header and return offset of first NDP, or negative error */ int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in) @@ -943,7 +944,7 @@ error: } EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp16); -static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) +int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) { struct sk_buff *skb; struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@ -1019,6 +1020,7 @@ err_ndp: error: return 0; } +EXPORT_SYMBOL_GPL(cdc_ncm_rx_fixup); static void cdc_ncm_speed_change(struct usbnet *dev, diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 2300f7492927..c3fa80745996 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -125,5 +125,8 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); +struct sk_buff * +cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); +int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in); #endif /* __LINUX_USB_CDC_NCM_H */ -- cgit v1.2.3-71-gd317 From d32435391974e39c35ade4d115f17c538a96a708 Mon Sep 17 00:00:00 2001 From: Eyal Perry Date: Wed, 6 Nov 2013 15:37:23 +0200 Subject: net/vlan: Provide read access to the vlan egress map Provide a method for read-only access to the vlan device egress mapping. Do this by refactoring vlan_dev_get_egress_qos_mask() such that now it receives as an argument the skb priority instead of pointer to the skb. Such an access is needed for the IBoE stack where the control plane goes through the network stack. This is an add-on step on top of commit d4a968658c "net/route: export symbol ip_tos2prio" which allowed the RDMA-CM to use ip_tos2prio. Signed-off-by: Eyal Perry Signed-off-by: Hadar Hen Zion Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 9 ++++++++- net/8021q/vlan_dev.c | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 715c343f7c00..f3088a0112cf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -88,7 +88,8 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); - +extern u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, + u32 skprio); extern bool vlan_do_receive(struct sk_buff **skb); extern struct sk_buff *vlan_untag(struct sk_buff *skb); @@ -121,6 +122,12 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev) return 0; } +static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, + u32 skprio) +{ + return 0; +} + static inline bool vlan_do_receive(struct sk_buff **skb) { return false; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09bf1c38805b..13904a414929 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -69,15 +69,15 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) } static inline u16 -vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) +__vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) { struct vlan_priority_tci_mapping *mp; smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ - mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; + mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)]; while (mp) { - if (mp->priority == skb->priority) { + if (mp->priority == skprio) { return mp->vlan_qos; /* This should already be shifted * to mask correctly with the * VLAN's TCI */ @@ -87,6 +87,12 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) return 0; } +u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) +{ + return __vlan_dev_get_egress_qos_mask(dev, skprio); +} +EXPORT_SYMBOL(vlan_dev_get_egress_qos_mask); + /* * Create the VLAN header for an arbitrary protocol layer * @@ -111,7 +117,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); vlan_tci = vlan->vlan_id; - vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority); vhdr->h_vlan_TCI = htons(vlan_tci); /* @@ -168,7 +174,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; vlan_tci = vlan->vlan_id; - vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); + vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority); skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci); } @@ -253,7 +259,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->vlan_qos = vlan_qos; /* Before inserting this element in hash table, make sure all its fields * are committed to memory. - * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + * coupled with smp_rmb() in __vlan_dev_get_egress_qos_mask() */ smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; -- cgit v1.2.3-71-gd317 From df42153c59a38a65c6f7440d5c70d87d1c24438d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Nov 2013 10:48:49 +0300 Subject: net: make ndev->irq signed for error handling There is a bug in cpsw_probe() where we do: ndev->irq = platform_get_irq(pdev, 0); if (ndev->irq < 0) { The problem is that "ndev->irq" is unsigned so the error handling doesn't work. I have changed it to a regular int. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e6353cafbf05..b6f6efbcfc74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1132,7 +1132,7 @@ struct net_device { unsigned long mem_end; /* shared mem end */ unsigned long mem_start; /* shared mem start */ unsigned long base_addr; /* device I/O address */ - unsigned int irq; /* device IRQ number */ + int irq; /* device IRQ number */ /* * Some hardware also needs these fields, but they are not -- cgit v1.2.3-71-gd317 From a6cc0cfa72e0b6d9f2c8fd858aacc32313c4f272 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Nov 2013 09:54:46 -0800 Subject: net: Add layer 2 hardware acceleration operations for macvlan devices Add a operations structure that allows a network interface to export the fact that it supports package forwarding in hardware between physical interfaces and other mac layer devices assigned to it (such as macvlans). This operaions structure can be used by virtual mac devices to bypass software switching so that forwarding can be done in hardware more efficiently. Signed-off-by: John Fastabend Signed-off-by: Neil Horman CC: Andy Gospodarek CC: "David S. Miller" Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 36 +++++++++++++++++++++++++++++++++++- include/linux/if_macvlan.h | 1 + include/linux/netdev_features.h | 2 ++ include/linux/netdevice.h | 36 +++++++++++++++++++++++++++++++++++- include/uapi/linux/if.h | 1 + net/core/dev.c | 18 +++++++++++++----- net/core/ethtool.c | 1 + net/sched/sch_generic.c | 2 +- 8 files changed, 89 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cc9845ec91c1..af4aaa5893ff 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -297,7 +297,13 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, int ret; const struct macvlan_dev *vlan = netdev_priv(dev); - ret = macvlan_queue_xmit(skb, dev); + if (vlan->fwd_priv) { + skb->dev = vlan->lowerdev; + ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv); + } else { + ret = macvlan_queue_xmit(skb, dev); + } + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct macvlan_pcpu_stats *pcpu_stats; @@ -347,6 +353,21 @@ static int macvlan_open(struct net_device *dev) goto hash_add; } + if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) { + vlan->fwd_priv = + lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); + + /* If we get a NULL pointer back, or if we get an error + * then we should just fall through to the non accelerated path + */ + if (IS_ERR_OR_NULL(vlan->fwd_priv)) { + vlan->fwd_priv = NULL; + } else { + dev->features &= ~NETIF_F_LLTX; + return 0; + } + } + err = -EBUSY; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; @@ -367,6 +388,11 @@ hash_add: del_unicast: dev_uc_del(lowerdev, dev->dev_addr); out: + if (vlan->fwd_priv) { + lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, + vlan->fwd_priv); + vlan->fwd_priv = NULL; + } return err; } @@ -375,6 +401,13 @@ static int macvlan_stop(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; + if (vlan->fwd_priv) { + lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, + vlan->fwd_priv); + vlan->fwd_priv = NULL; + return 0; + } + dev_uc_unsync(lowerdev, dev); dev_mc_unsync(lowerdev, dev); @@ -833,6 +866,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (err < 0) goto destroy_port; + dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev); if (err) goto destroy_port; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index ddd33fd5904d..c2702856295e 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -61,6 +61,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; + void *fwd_priv; struct macvlan_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index b05a4b501ab5..1005ebf17575 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -62,6 +62,7 @@ enum { NETIF_F_HW_VLAN_STAG_TX_BIT, /* Transmit VLAN STAG HW acceleration */ NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ + NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ /* * Add your fresh new feature above and remember to update @@ -116,6 +117,7 @@ enum { #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) +#define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b6f6efbcfc74..15fa01c9a3bf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -962,6 +962,25 @@ struct netdev_phys_port_id { * Called by vxlan to notify the driver about a UDP port and socket * address family that vxlan is not listening to anymore. The operation * is protected by the vxlan_net->sock_lock. + * + * void* (*ndo_dfwd_add_station)(struct net_device *pdev, + * struct net_device *dev) + * Called by upper layer devices to accelerate switching or other + * station functionality into hardware. 'pdev is the lowerdev + * to use for the offload and 'dev' is the net device that will + * back the offload. Returns a pointer to the private structure + * the upper layer will maintain. + * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv) + * Called by upper layer device to delete the station created + * by 'ndo_dfwd_add_station'. 'pdev' is the net device backing + * the station and priv is the structure returned by the add + * operation. + * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb, + * struct net_device *dev, + * void *priv); + * Callback to use for xmit over the accelerated station. This + * is used in place of ndo_start_xmit on accelerated net + * devices. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1098,6 +1117,15 @@ struct net_device_ops { void (*ndo_del_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); + + void* (*ndo_dfwd_add_station)(struct net_device *pdev, + struct net_device *dev); + void (*ndo_dfwd_del_station)(struct net_device *pdev, + void *priv); + + netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, + struct net_device *dev, + void *priv); }; /* @@ -1195,6 +1223,7 @@ struct net_device { /* Management operations */ const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; + const struct forwarding_accel_ops *fwd_ops; /* Hardware header description */ const struct header_ops *header_ops; @@ -2388,7 +2417,7 @@ int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq); + struct netdev_queue *txq, void *accel_priv); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); extern int netdev_budget; @@ -2967,6 +2996,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline bool netif_is_macvlan(struct net_device *dev) +{ + return dev->priv_flags & IFF_MACVLAN; +} + static inline bool netif_is_bond_master(struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 1ec407b01e46..d758163b0e43 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -83,6 +83,7 @@ #define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */ #define IFF_LIVE_ADDR_CHANGE 0x100000 /* device supports hardware address * change when it's running */ +#define IFF_MACVLAN 0x200000 /* Macvlan device */ #define IF_GET_IFACE 0x0001 /* for querying only */ diff --git a/net/core/dev.c b/net/core/dev.c index 0e6136546a8c..8ffc52e01ece 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2538,7 +2538,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb, } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) + struct netdev_queue *txq, void *accel_priv) { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; @@ -2604,9 +2604,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; - rc = ops->ndo_start_xmit(skb, dev); + if (accel_priv) + rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv); + else + rc = ops->ndo_start_xmit(skb, dev); + trace_net_dev_xmit(skb, rc, dev, skb_len); - if (rc == NETDEV_TX_OK) + if (rc == NETDEV_TX_OK && txq) txq_trans_update(txq); return rc; } @@ -2622,7 +2626,10 @@ gso: dev_queue_xmit_nit(nskb, dev); skb_len = nskb->len; - rc = ops->ndo_start_xmit(nskb, dev); + if (accel_priv) + rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv); + else + rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) @@ -2647,6 +2654,7 @@ out_kfree_skb: out: return rc; } +EXPORT_SYMBOL_GPL(dev_hard_start_xmit); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -2854,7 +2862,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); - rc = dev_hard_start_xmit(skb, dev, txq); + rc = dev_hard_start_xmit(skb, dev, txq, NULL); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 862989898f61..30071dec287a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -96,6 +96,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_LOOPBACK_BIT] = "loopback", [NETIF_F_RXFCS_BIT] = "rx-fcs", [NETIF_F_RXALL_BIT] = "rx-all", + [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7fc899a943a8..922a09406ba7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -126,7 +126,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); + ret = dev_hard_start_xmit(skb, dev, txq, NULL); HARD_TX_UNLOCK(dev, txq); -- cgit v1.2.3-71-gd317 From 6e7136ed7793fa4948b0192dcd6862d12a50d67c Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 7 Nov 2013 12:19:53 +0200 Subject: net/mlx4_core: ICM pages are allocated on device NUMA node This is done to optimize FW/HW access to host memory. Signed-off-by: Yevgeny Petrilin Signed-off-by: Eugenia Emantayev Reviewed-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/icm.c | 42 ++++++++++++++++++++++--------- drivers/net/ethernet/mellanox/mlx4/main.c | 1 + include/linux/mlx4/device.h | 1 + 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 31d02649be41..5fbf4924c272 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -93,13 +93,17 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) kfree(icm); } -static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) +static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, + gfp_t gfp_mask, int node) { struct page *page; - page = alloc_pages(gfp_mask, order); - if (!page) - return -ENOMEM; + page = alloc_pages_node(node, gfp_mask, order); + if (!page) { + page = alloc_pages(gfp_mask, order); + if (!page) + return -ENOMEM; + } sg_set_page(mem, page, PAGE_SIZE << order, 0); return 0; @@ -130,9 +134,15 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, /* We use sg_set_buf for coherent allocs, which assumes low memory */ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); - icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); - if (!icm) - return NULL; + icm = kmalloc_node(sizeof(*icm), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN), + dev->numa_node); + if (!icm) { + icm = kmalloc(sizeof(*icm), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!icm) + return NULL; + } icm->refcount = 0; INIT_LIST_HEAD(&icm->chunk_list); @@ -141,10 +151,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, while (npages > 0) { if (!chunk) { - chunk = kmalloc(sizeof *chunk, - gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); - if (!chunk) - goto fail; + chunk = kmalloc_node(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | + __GFP_NOWARN), + dev->numa_node); + if (!chunk) { + chunk = kmalloc(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | + __GFP_NOWARN)); + if (!chunk) + goto fail; + } sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); chunk->npages = 0; @@ -161,7 +178,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, cur_order, gfp_mask); else ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages], - cur_order, gfp_mask); + cur_order, gfp_mask, + dev->numa_node); if (ret) { if (--cur_order < 0) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7d2628dfdc29..5789ea2c934d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2191,6 +2191,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) mutex_init(&priv->bf_mutex); dev->rev_id = pdev->revision; + dev->numa_node = dev_to_node(&pdev->dev); /* Detect if this device is a virtual function */ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f6f59271f857..4cf0b0153639 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -662,6 +662,7 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; -- cgit v1.2.3-71-gd317 From 163561a4e2f8af44e96453bc10c7a4f9bcc736e1 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 7 Nov 2013 12:19:54 +0200 Subject: net/mlx4_en: Datapath structures are allocated per NUMA node For each RX/TX ring and its CQ, allocation is done on a NUMA node that corresponds to the core that the data structure should operate on. The assumption is that the core number is reflected by the ring index. The affected allocations are the ring/CQ data structures, the TX/RX info and the shared HW/SW buffer. For TX rings, each core has rings of all UPs. Signed-off-by: Yevgeny Petrilin Signed-off-by: Eugenia Emantayev Reviewed-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 17 ++++++++++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 ++++++--- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 23 +++++++++++------ drivers/net/ethernet/mellanox/mlx4/en_tx.c | 34 +++++++++++++++++--------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 ++--- drivers/net/ethernet/mellanox/mlx4/pd.c | 11 ++++++--- include/linux/mlx4/device.h | 2 +- 7 files changed, 71 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index d203f11b9edf..3a098cc4d349 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -45,16 +45,20 @@ static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event) int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, - int entries, int ring, enum cq_type mode) + int entries, int ring, enum cq_type mode, + int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; int err; - cq = kzalloc(sizeof(*cq), GFP_KERNEL); + cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node); if (!cq) { - en_err(priv, "Failed to allocate CQ structure\n"); - return -ENOMEM; + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + en_err(priv, "Failed to allocate CQ structure\n"); + return -ENOMEM; + } } cq->size = entries; @@ -64,8 +68,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->is_tx = mode; spin_lock_init(&cq->lock); + /* Allocate HW buffers on provided NUMA node. + * dev->numa_node is used in mtt range allocation flow. + */ + set_dev_node(&mdev->dev->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); + set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f430788cc4fe..e72d8a112a6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1895,6 +1895,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) struct mlx4_en_port_profile *prof = priv->prof; int i; int err; + int node; err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn); if (err) { @@ -1904,23 +1905,26 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { + node = cpu_to_node(i % num_online_cpus()); if (mlx4_en_create_cq(priv, &priv->tx_cq[i], - prof->tx_ring_size, i, TX)) + prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i, - prof->tx_ring_size, TXBB_SIZE)) + prof->tx_ring_size, TXBB_SIZE, node)) goto err; } /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { + node = cpu_to_node(i % num_online_cpus()); if (mlx4_en_create_cq(priv, &priv->rx_cq[i], - prof->rx_ring_size, i, RX)) + prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], - prof->rx_ring_size, priv->stride)) + prof->rx_ring_size, priv->stride, + node)) goto err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 1c45f88776c5..07a1d0fbae47 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -320,17 +320,20 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, - u32 size, u16 stride) + u32 size, u16 stride, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring; int err = -ENOMEM; int tmp; - ring = kzalloc(sizeof(*ring), GFP_KERNEL); + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); if (!ring) { - en_err(priv, "Failed to allocate RX ring structure\n"); - return -ENOMEM; + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) { + en_err(priv, "Failed to allocate RX ring structure\n"); + return -ENOMEM; + } } ring->prod = 0; @@ -343,17 +346,23 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); - ring->rx_info = vmalloc(tmp); + ring->rx_info = vmalloc_node(tmp, node); if (!ring->rx_info) { - err = -ENOMEM; - goto err_ring; + ring->rx_info = vmalloc(tmp); + if (!ring->rx_info) { + err = -ENOMEM; + goto err_ring; + } } en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", ring->rx_info, tmp); + /* Allocate HW buffers on provided NUMA node */ + set_dev_node(&mdev->dev->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); + set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index d4e4cf30a720..f54ebd5a1702 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -55,17 +55,20 @@ MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, int qpn, u32 size, - u16 stride) + u16 stride, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; int tmp; int err; - ring = kzalloc(sizeof(*ring), GFP_KERNEL); + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); if (!ring) { - en_err(priv, "Failed allocating TX ring\n"); - return -ENOMEM; + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) { + en_err(priv, "Failed allocating TX ring\n"); + return -ENOMEM; + } } ring->size = size; @@ -75,24 +78,33 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, inline_thold = min(inline_thold, MAX_INLINE); tmp = size * sizeof(struct mlx4_en_tx_info); - ring->tx_info = vmalloc(tmp); + ring->tx_info = vmalloc_node(tmp, node); if (!ring->tx_info) { - err = -ENOMEM; - goto err_ring; + ring->tx_info = vmalloc(tmp); + if (!ring->tx_info) { + err = -ENOMEM; + goto err_ring; + } } en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); - ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); if (!ring->bounce_buf) { - err = -ENOMEM; - goto err_info; + ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + if (!ring->bounce_buf) { + err = -ENOMEM; + goto err_info; + } } ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); + /* Allocate HW buffers on provided NUMA node */ + set_dev_node(&mdev->dev->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); + set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; @@ -118,7 +130,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, } ring->qp.event = mlx4_en_sqp_event; - err = mlx4_bf_alloc(mdev->dev, &ring->bf); + err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { en_dbg(DRV, priv, "working without blueflame (%d)", err); ring->bf.uar = &mdev->priv_uar; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b2547ae07dfa..f3758de59c05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -705,7 +705,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, - int entries, int ring, enum cq_type mode); + int entries, int ring, enum cq_type mode, int node); void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); @@ -719,7 +719,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, - int qpn, u32 size, u16 stride); + int qpn, u32 size, u16 stride, int node); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, @@ -730,7 +730,7 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, - u32 size, u16 stride); + u32 size, u16 stride, int node); void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 00f223acada7..84cfb40bf451 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -168,7 +168,7 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) } EXPORT_SYMBOL_GPL(mlx4_uar_free); -int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf) +int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_uar *uar; @@ -186,10 +186,13 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf) err = -ENOMEM; goto out; } - uar = kmalloc(sizeof *uar, GFP_KERNEL); + uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node); if (!uar) { - err = -ENOMEM; - goto out; + uar = kmalloc(sizeof(*uar), GFP_KERNEL); + if (!uar) { + err = -ENOMEM; + goto out; + } } err = mlx4_uar_alloc(dev, uar); if (err) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4cf0b0153639..7d3a523160ba 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -835,7 +835,7 @@ void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); -int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf); +int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf); int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, -- cgit v1.2.3-71-gd317 From 0c7ddf36c29c3ce12f2d2931a357ccaa0861035a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 7 Nov 2013 14:18:24 +0100 Subject: net: move pskb_put() to core code This function has usage beside IPsec so move it to the core skbuff code. While doing so, give it some documentation and change its return type to 'unsigned char *' to be in line with skb_put(). Signed-off-by: Mathias Krause Cc: Steffen Klassert Cc: "David S. Miller" Cc: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/esp.h | 2 -- net/core/skbuff.c | 23 +++++++++++++++++++++++ net/xfrm/xfrm_algo.c | 13 ------------- 4 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e153b69d318..491dd6c2c6cc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1417,6 +1417,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) /* * Add data to an sk_buff */ +unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); unsigned char *skb_put(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) { diff --git a/include/net/esp.h b/include/net/esp.h index c92213c38312..a43be85aedc4 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -3,8 +3,6 @@ #include -void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); - struct ip_esp_hdr; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3735fad5616e..2fbea08c4f50 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1263,6 +1263,29 @@ free_skb: } EXPORT_SYMBOL(skb_pad); +/** + * pskb_put - add data to the tail of a potentially fragmented buffer + * @skb: start of the buffer to use + * @tail: tail fragment of the buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the potentially + * fragmented buffer. @tail must be the last fragment of @skb -- or + * @skb itself. If this would exceed the total buffer size the kernel + * will panic. A pointer to the first byte of the extra data is + * returned. + */ + +unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +{ + if (tail != skb) { + skb->data_len += len; + skb->len += len; + } + return skb_put(tail, len); +} +EXPORT_SYMBOL_GPL(pskb_put); + /** * skb_put - add data to a buffer * @skb: buffer to use diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index ab4ef72f0b1d..debe733386f8 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -802,17 +802,4 @@ int xfrm_count_pfkey_enc_supported(void) } EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) - -void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) -{ - if (tail != skb) { - skb->data_len += len; - skb->len += len; - } - return skb_put(tail, len); -} -EXPORT_SYMBOL_GPL(pskb_put); -#endif - MODULE_LICENSE("GPL"); -- cgit v1.2.3-71-gd317 From bc32383cd6496d595e6a25cdc7cff1da6b694462 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 7 Nov 2013 14:18:26 +0100 Subject: net: skbuff - kernel-doc fixes Use "@" to refer to parameters in the kernel-doc description. According to Documentation/kernel-doc-nano-HOWTO.txt "&" shall be used to refer to structures only. Signed-off-by: Mathias Krause Cc: "David S. Miller" Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 491dd6c2c6cc..036ec7d8a83a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1357,7 +1357,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, * @size: the length of the data * * As per __skb_fill_page_desc() -- initialises the @i'th fragment of - * @skb to point to &size bytes at offset @off within @page. In + * @skb to point to @size bytes at offset @off within @page. In * addition updates @skb such that @i is the last fragment. * * Does not take any additional reference on the fragment. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2fbea08c4f50..8c5197fe55a4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1051,8 +1051,8 @@ EXPORT_SYMBOL(__pskb_copy); * @ntail: room to add at tail * @gfp_mask: allocation priority * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have + * Expands (or creates identical copy, if @nhead and @ntail are zero) + * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have * reference count of 1. Returns zero in the case of success or error, * if expansion failed. In the last case, &sk_buff is not changed. * @@ -2563,14 +2563,14 @@ EXPORT_SYMBOL(skb_prepare_seq_read); * @data: destination pointer for data to be returned * @st: state variable * - * Reads a block of skb data at &consumed relative to the + * Reads a block of skb data at @consumed relative to the * lower offset specified to skb_prepare_seq_read(). Assigns - * the head of the data block to &data and returns the length + * the head of the data block to @data and returns the length * of the block or 0 if the end of the skb data or the upper * offset has been reached. * * The caller is not required to consume all of the data - * returned, i.e. &consumed is typically set to the number + * returned, i.e. @consumed is typically set to the number * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * -- cgit v1.2.3-71-gd317 From 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Nov 2013 17:52:20 +0100 Subject: netfilter: push reasm skb through instead of original frag skbs Pushing original fragments through causes several problems. For example for matching, frags may not be matched correctly. Take following example: On HOSTA do: ip6tables -I INPUT -p icmpv6 -j DROP ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT and on HOSTB you do: ping6 HOSTA -s2000 (MTU is 1500) Incoming echo requests will be filtered out on HOSTA. This issue does not occur with smaller packets than MTU (where fragmentation does not happen) As was discussed previously, the only correct solution seems to be to use reassembled skb instead of separete frags. Doing this has positive side effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams dances in ipvs and conntrack can be removed. Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c entirely and use code in net/ipv6/reassembly.c instead. Signed-off-by: Jiri Pirko Acked-by: Julian Anastasov Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 32 --------------- include/net/ip_vs.h | 32 +-------------- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 4 +- net/core/skbuff.c | 3 -- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 56 +------------------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 19 +-------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 7 +++- net/netfilter/ipvs/ip_vs_core.c | 55 +------------------------ net/netfilter/ipvs/ip_vs_pe_sip.c | 8 +--- 9 files changed, 13 insertions(+), 203 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 036ec7d8a83a..215b5ea1cb30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif -#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \ - defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) -#define NET_SKBUFF_NF_DEFRAG_NEEDED 1 -#endif - /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t; * @protocol: Packet protocol from driver * @destructor: Destruct function * @nfct: Associated connection, if any - * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -463,9 +457,6 @@ struct sk_buff { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack *nfct; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - struct sk_buff *nfct_reasm; -#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif @@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED -static inline void nf_conntrack_get_reasm(struct sk_buff *skb) -{ - if (skb) - atomic_inc(&skb->users); -} -static inline void nf_conntrack_put_reasm(struct sk_buff *skb) -{ - if (skb) - kfree_skb(skb); -} -#endif #ifdef CONFIG_BRIDGE_NETFILTER static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { @@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); - skb->nfct_reasm = NULL; -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; @@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) nf_conntrack_get(src->nfct); dst->nfctinfo = src->nfctinfo; #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - dst->nfct_reasm = src->nfct_reasm; - nf_conntrack_get_reasm(src->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); @@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(dst->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(dst->nf_bridge); #endif diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cd7275f9c463..5679d927562b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size; struct ip_vs_iphdr { __u32 len; /* IPv4 simply where L4 starts IPv6 where L4 Transport Header starts */ - __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */ __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ __s16 protocol; __s32 flags; @@ -117,34 +116,12 @@ struct ip_vs_iphdr { union nf_inet_addr daddr; }; -/* Dependency to module: nf_defrag_ipv6 */ -#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) -static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) -{ - return skb->nfct_reasm; -} -static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, - int len, void *buffer, - const struct ip_vs_iphdr *ipvsh) -{ - if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb))) - return skb_header_pointer(skb_nfct_reasm(skb), - ipvsh->thoff_reasm, len, buffer); - - return skb_header_pointer(skb, offset, len, buffer); -} -#else -static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) -{ - return NULL; -} static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, int len, void *buffer, const struct ip_vs_iphdr *ipvsh) { return skb_header_pointer(skb, offset, len, buffer); } -#endif static inline void ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr) @@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) (struct ipv6hdr *)skb_network_header(skb); iphdr->saddr.in6 = iph->saddr; iphdr->daddr.in6 = iph->daddr; - /* ipv6_find_hdr() updates len, flags, thoff_reasm */ - iphdr->thoff_reasm = 0; + /* ipv6_find_hdr() updates len, flags */ iphdr->len = 0; iphdr->flags = 0; iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, &iphdr->fragoffs, &iphdr->flags); - /* get proto from re-assembled packet and it's offset */ - if (skb_nfct_reasm(skb)) - iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb), - &iphdr->thoff_reasm, - -1, NULL, NULL); - } else #endif { diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 5613412e7dc2..27666d8a0bd0 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user); -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)); +void nf_ct_frag6_consume_orig(struct sk_buff *skb); struct inet_frags_ctl; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8c5197fe55a4..8cec1e6b844d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 486545eb42ce..4cbc6b290dd5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -169,64 +169,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int __ipv6_conntrack_in(struct net *net, - unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb->nfct_reasm; - const struct nf_conn_help *help; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - /* This packet is fragmented and has reassembled packet. */ - if (reasm) { - /* Reassembled packet isn't parsed yet ? */ - if (!reasm->nfct) { - unsigned int ret; - - ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); - if (ret != NF_ACCEPT) - return ret; - } - - /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. In case of unconfirmed connections NAT - * might reassign a helper, so the entire packet is also - * required. - */ - ct = nf_ct_get(reasm, &ctinfo); - if (ct != NULL && !nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { - nf_conntrack_get_reasm(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, - (struct net_device *)in, - (struct net_device *)out, - okfn, NF_IP6_PRI_CONNTRACK + 1); - return NF_DROP_ERR(-ECANCELED); - } - } - - nf_conntrack_get(reasm->nfct); - skb->nfct = reasm->nfct; - skb->nfctinfo = reasm->nfctinfo; - return NF_ACCEPT; - } - - return nf_conntrack_in(net, PF_INET6, hooknum, skb); -} - static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out, - okfn); + return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb); } static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, @@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out, - okfn); + return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 4a258263d8ec..767ab8da8218 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -633,31 +633,16 @@ ret_orig: return skb; } -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) +void nf_ct_frag6_consume_orig(struct sk_buff *skb) { struct sk_buff *s, *s2; - unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - nf_conntrack_put_reasm(s->nfct_reasm); - nf_conntrack_get_reasm(skb); - s->nfct_reasm = skb; - s2 = s->next; s->next = NULL; - - if (ret != -ECANCELED) - ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, - in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - else - kfree_skb(s); - + consume_skb(s); s = s2; } - nf_conntrack_put_reasm(skb); } static int nf_ct_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index ec483aa3f60f..7b9a748c6bac 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in, - (struct net_device *)out, okfn); + nf_ct_frag6_consume_orig(reasm); + + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, + (struct net_device *) in, (struct net_device *) out, + okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 34fda62f40f6..4f26ee46b51f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, @@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags. */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, @@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs && !skb_nfct_reasm(skb)) { + if (iph.fragoffs) { /* Fragment that couldn't be mapped to a conn entry - * and don't have any pointer to a reasm skb * is missing module nf_defrag_ipv6 */ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); @@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 -/* - * AF_INET6 fragment handling - * Copy info from first fragment, to the rest of them. - */ -static unsigned int -ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb_nfct_reasm(skb); - struct net *net; - - /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. - * ipvs_property is set when checking first fragment - * in ip_vs_in() and ip_vs_out(). - */ - if (reasm) - IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); - if (!reasm || !reasm->ipvs_property) - return NF_ACCEPT; - - net = skb_net(skb); - if (!net_ipvs(net)->enable) - return NF_ACCEPT; - - /* Copy stored fw mark, saved in ip_vs_{in,out} */ - skb->mark = reasm->mark; - - return NF_ACCEPT; -} - /* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients @@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 - /* After mangle & nat fetch 2:nd fragment and following */ - { - .hook = ip_vs_preroute_frag6, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST + 1, - }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 9ef22bdce9f1..bed5f7042529 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { - struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; @@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS */ - if (reasm) { - skb = reasm; - dataoff = iph.thoff_reasm + sizeof(struct udphdr); - } else - dataoff = iph.len + sizeof(struct udphdr); + dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - /* todo: Check if this will mess-up the reasm skb !!! /HS */ retc = skb_linearize(skb); if (retc < 0) return retc; -- cgit v1.2.3-71-gd317 From e267cb960ab790c94a5019272c0e4dac95dc4dba Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Nov 2013 00:42:07 -0500 Subject: vlan: Implement vlan_dev_get_egress_qos_mask as an inline. This is to avoid very silly Kconfig dependencies for modules using this routine. Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++++- net/8021q/vlan.h | 77 -------------------------------------- net/8021q/vlan_dev.c | 31 ++-------------- 3 files changed, 99 insertions(+), 107 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index f3088a0112cf..f252deb99454 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -88,8 +88,102 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); -extern u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, - u32 skprio); + +/** + * struct vlan_priority_tci_mapping - vlan egress priority mappings + * @priority: skb priority + * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 + * @next: pointer to next struct + */ +struct vlan_priority_tci_mapping { + u32 priority; + u16 vlan_qos; + struct vlan_priority_tci_mapping *next; +}; + +/** + * struct vlan_pcpu_stats - VLAN percpu rx/tx stats + * @rx_packets: number of received packets + * @rx_bytes: number of received bytes + * @rx_multicast: number of received multicast packets + * @tx_packets: number of transmitted packets + * @tx_bytes: number of transmitted bytes + * @syncp: synchronization point for 64bit counters + * @rx_errors: number of rx errors + * @tx_dropped: number of tx drops + */ +struct vlan_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 rx_errors; + u32 tx_dropped; +}; + +struct proc_dir_entry; +struct netpoll; + +/** + * struct vlan_dev_priv - VLAN private device data + * @nr_ingress_mappings: number of ingress priority mappings + * @ingress_priority_map: ingress priority mappings + * @nr_egress_mappings: number of egress priority mappings + * @egress_priority_map: hash of egress priority mappings + * @vlan_proto: VLAN encapsulation protocol + * @vlan_id: VLAN identifier + * @flags: device flags + * @real_dev: underlying netdevice + * @real_dev_addr: address of underlying netdevice + * @dent: proc dir entry + * @vlan_pcpu_stats: ptr to percpu rx stats + */ +struct vlan_dev_priv { + unsigned int nr_ingress_mappings; + u32 ingress_priority_map[8]; + unsigned int nr_egress_mappings; + struct vlan_priority_tci_mapping *egress_priority_map[16]; + + __be16 vlan_proto; + u16 vlan_id; + u16 flags; + + struct net_device *real_dev; + unsigned char real_dev_addr[ETH_ALEN]; + + struct proc_dir_entry *dent; + struct vlan_pcpu_stats __percpu *vlan_pcpu_stats; +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif +}; + +static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +static inline u16 +vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) +{ + struct vlan_priority_tci_mapping *mp; + + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + + mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)]; + while (mp) { + if (mp->priority == skprio) { + return mp->vlan_qos; /* This should already be shifted + * to mask correctly with the + * VLAN's TCI */ + } + mp = mp->next; + } + return 0; +} + extern bool vlan_do_receive(struct sk_buff **skb); extern struct sk_buff *vlan_untag(struct sk_buff *skb); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a2caf00b82cc..5704ed9c3a23 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -5,83 +5,6 @@ #include #include - -/** - * struct vlan_priority_tci_mapping - vlan egress priority mappings - * @priority: skb priority - * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 - * @next: pointer to next struct - */ -struct vlan_priority_tci_mapping { - u32 priority; - u16 vlan_qos; - struct vlan_priority_tci_mapping *next; -}; - - -/** - * struct vlan_pcpu_stats - VLAN percpu rx/tx stats - * @rx_packets: number of received packets - * @rx_bytes: number of received bytes - * @rx_multicast: number of received multicast packets - * @tx_packets: number of transmitted packets - * @tx_bytes: number of transmitted bytes - * @syncp: synchronization point for 64bit counters - * @rx_errors: number of rx errors - * @tx_dropped: number of tx drops - */ -struct vlan_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; - u32 rx_errors; - u32 tx_dropped; -}; - -struct netpoll; - -/** - * struct vlan_dev_priv - VLAN private device data - * @nr_ingress_mappings: number of ingress priority mappings - * @ingress_priority_map: ingress priority mappings - * @nr_egress_mappings: number of egress priority mappings - * @egress_priority_map: hash of egress priority mappings - * @vlan_proto: VLAN encapsulation protocol - * @vlan_id: VLAN identifier - * @flags: device flags - * @real_dev: underlying netdevice - * @real_dev_addr: address of underlying netdevice - * @dent: proc dir entry - * @vlan_pcpu_stats: ptr to percpu rx stats - */ -struct vlan_dev_priv { - unsigned int nr_ingress_mappings; - u32 ingress_priority_map[8]; - unsigned int nr_egress_mappings; - struct vlan_priority_tci_mapping *egress_priority_map[16]; - - __be16 vlan_proto; - u16 vlan_id; - u16 flags; - - struct net_device *real_dev; - unsigned char real_dev_addr[ETH_ALEN]; - - struct proc_dir_entry *dent; - struct vlan_pcpu_stats __percpu *vlan_pcpu_stats; -#ifdef CONFIG_NET_POLL_CONTROLLER - struct netpoll *netpoll; -#endif -}; - -static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) -{ - return netdev_priv(dev); -} - /* if this changes, algorithm will have to be reworked because this * depends on completely exhausting the VLAN identifier space. Thus * it gives constant time look-up, but in many cases it wastes memory. diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 13904a414929..8db1b985dbf1 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -68,31 +68,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) return 0; } -static inline u16 -__vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) -{ - struct vlan_priority_tci_mapping *mp; - - smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ - - mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)]; - while (mp) { - if (mp->priority == skprio) { - return mp->vlan_qos; /* This should already be shifted - * to mask correctly with the - * VLAN's TCI */ - } - mp = mp->next; - } - return 0; -} - -u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio) -{ - return __vlan_dev_get_egress_qos_mask(dev, skprio); -} -EXPORT_SYMBOL(vlan_dev_get_egress_qos_mask); - /* * Create the VLAN header for an arbitrary protocol layer * @@ -117,7 +92,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); vlan_tci = vlan->vlan_id; - vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority); + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); vhdr->h_vlan_TCI = htons(vlan_tci); /* @@ -174,7 +149,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; vlan_tci = vlan->vlan_id; - vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority); + vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci); } @@ -259,7 +234,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->vlan_qos = vlan_qos; /* Before inserting this element in hash table, make sure all its fields * are committed to memory. - * coupled with smp_rmb() in __vlan_dev_get_egress_qos_mask() + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() */ smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; -- cgit v1.2.3-71-gd317 From 51c37a70aaa3f95773af560e6db3073520513912 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Nov 2013 12:20:32 +0100 Subject: random32: fix off-by-one in seeding requirement For properly initialising the Tausworthe generator [1], we have a strict seeding requirement, that is, s1 > 1, s2 > 7, s3 > 15. Commit 697f8d0348 ("random32: seeding improvement") introduced a __seed() function that imposes boundary checks proposed by the errata paper [2] to properly ensure above conditions. However, we're off by one, as the function is implemented as: "return (x < m) ? x + m : x;", and called with __seed(X, 1), __seed(X, 7), __seed(X, 15). Thus, an unwanted seed of 1, 7, 15 would be possible, whereas the lower boundary should actually be of at least 2, 8, 16, just as GSL does. Fix this, as otherwise an initialization with an unwanted seed could have the effect that Tausworthe's PRNG properties cannot not be ensured. Note that this PRNG is *not* used for cryptography in the kernel. [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps Joint work with Hannes Frederic Sowa. Fixes: 697f8d0348a6 ("random32: seeding improvement") Cc: Stephen Hemminger Cc: Florian Weimer Cc: Theodore Ts'o Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/random.h | 6 +++--- lib/random32.c | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 6312dd9ba449..bf9085e89fb5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -50,9 +50,9 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) { u32 i = (seed >> 32) ^ (seed << 10) ^ seed; - state->s1 = __seed(i, 1); - state->s2 = __seed(i, 7); - state->s3 = __seed(i, 15); + state->s1 = __seed(i, 2); + state->s2 = __seed(i, 8); + state->s3 = __seed(i, 16); } #ifdef CONFIG_ARCH_RANDOM diff --git a/lib/random32.c b/lib/random32.c index 52280d5526be..01e8890d1089 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -141,7 +141,7 @@ void prandom_seed(u32 entropy) */ for_each_possible_cpu (i) { struct rnd_state *state = &per_cpu(net_rand_state, i); - state->s1 = __seed(state->s1 ^ entropy, 1); + state->s1 = __seed(state->s1 ^ entropy, 2); } } EXPORT_SYMBOL(prandom_seed); @@ -158,9 +158,9 @@ static int __init prandom_init(void) struct rnd_state *state = &per_cpu(net_rand_state,i); #define LCG(x) ((x) * 69069) /* super-duper LCG */ - state->s1 = __seed(LCG(i + jiffies), 1); - state->s2 = __seed(LCG(state->s1), 7); - state->s3 = __seed(LCG(state->s2), 15); + state->s1 = __seed(LCG(i + jiffies), 2); + state->s2 = __seed(LCG(state->s1), 8); + state->s3 = __seed(LCG(state->s2), 16); /* "warm it up" */ prandom_u32_state(state); @@ -187,9 +187,9 @@ static int __init prandom_reseed(void) u32 seeds[3]; get_random_bytes(&seeds, sizeof(seeds)); - state->s1 = __seed(seeds[0], 1); - state->s2 = __seed(seeds[1], 7); - state->s3 = __seed(seeds[2], 15); + state->s1 = __seed(seeds[0], 2); + state->s2 = __seed(seeds[1], 8); + state->s3 = __seed(seeds[2], 16); /* mix it in */ prandom_u32_state(state); -- cgit v1.2.3-71-gd317 From 4af712e8df998475736f3e2727701bd31e3751a9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 11 Nov 2013 12:20:34 +0100 Subject: random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized The Tausworthe PRNG is initialized at late_initcall time. At that time the entropy pool serving get_random_bytes is not filled sufficiently. This patch adds an additional reseeding step as soon as the nonblocking pool gets marked as initialized. On some machines it might be possible that late_initcall gets called after the pool has been initialized. In this situation we won't reseed again. (A call to prandom_seed_late blocks later invocations of early reseed attempts.) Joint work with Daniel Borkmann. Cc: Eric Dumazet Cc: Theodore Ts'o Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: "Theodore Ts'o" Signed-off-by: David S. Miller --- drivers/char/random.c | 5 ++++- include/linux/random.h | 1 + lib/random32.c | 23 ++++++++++++++++++++++- 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/random.c b/drivers/char/random.c index 7a744d391756..4fe5609eeb72 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -603,8 +603,11 @@ retry: if (!r->initialized && nbits > 0) { r->entropy_total += nbits; - if (r->entropy_total > 128) + if (r->entropy_total > 128) { r->initialized = 1; + if (r == &nonblocking_pool) + prandom_reseed_late(); + } } trace_credit_entropy_bits(r->name, nbits, entropy_count, diff --git a/include/linux/random.h b/include/linux/random.h index bf9085e89fb5..5117ae348fe8 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -29,6 +29,7 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l u32 prandom_u32(void); void prandom_bytes(void *buf, int nbytes); void prandom_seed(u32 seed); +void prandom_reseed_late(void); u32 prandom_u32_state(struct rnd_state *); void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); diff --git a/lib/random32.c b/lib/random32.c index 12215df701e8..9f2f2fb03dfe 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -200,9 +200,18 @@ static void prandom_start_seed_timer(void) * Generate better values after random number generator * is fully initialized. */ -static int __init prandom_reseed(void) +static void __prandom_reseed(bool late) { int i; + unsigned long flags; + static bool latch = false; + static DEFINE_SPINLOCK(lock); + + /* only allow initial seeding (late == false) once */ + spin_lock_irqsave(&lock, flags); + if (latch && !late) + goto out; + latch = true; for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); @@ -216,6 +225,18 @@ static int __init prandom_reseed(void) /* mix it in */ prandom_u32_state(state); } +out: + spin_unlock_irqrestore(&lock, flags); +} + +void prandom_reseed_late(void) +{ + __prandom_reseed(true); +} + +static int __init prandom_reseed(void) +{ + __prandom_reseed(false); prandom_start_seed_timer(); return 0; } -- cgit v1.2.3-71-gd317 From 38e9efcdb33270b4da72143d8e7ca4dcf7f0989b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Nov 2013 12:20:35 +0100 Subject: random32: move rnd_state to linux/random.h struct rnd_state got mistakenly pulled into uapi header. It is not used anywhere and does also not belong there! Commit 5960164fde ("lib/random32: export pseudo-random number generator for modules"), the last commit on rnd_state before it got moved to uapi, says: This patch moves the definition of struct rnd_state and the inline __seed() function to linux/random.h. It renames the static __random32() function to prandom32() and exports it for use in modules. Hence, the structure was moved from lib/random32.c to linux/random.h so that it can be used within modules (FCoE-related code in this case), but not from user space. However, it seems to have been mistakenly moved to uapi header through the uapi script. Since no-one should make use of it from the linux headers, move the structure back to the kernel for internal use, so that it can be modified on demand. Joint work with Hannes Frederic Sowa. Cc: Joe Eykholt Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/random.h | 4 ++++ include/uapi/linux/random.h | 7 ------- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 5117ae348fe8..8ef0b70bd1f9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -31,6 +31,10 @@ void prandom_bytes(void *buf, int nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +struct rnd_state { + __u32 s1, s2, s3; +}; + u32 prandom_u32_state(struct rnd_state *); void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index 7471b5b3b8ba..fff3528a078f 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -40,11 +40,4 @@ struct rand_pool_info { __u32 buf[0]; }; -struct rnd_state { - __u32 s1, s2, s3; -}; - -/* Exported functions */ - - #endif /* _UAPI_LINUX_RANDOM_H */ -- cgit v1.2.3-71-gd317 From a98814cef87946d2708812ad9f8b1e03b8366b6f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Nov 2013 12:20:36 +0100 Subject: random32: upgrade taus88 generator to taus113 from errata paper Since we use prandom*() functions quite often in networking code i.e. in UDP port selection, netfilter code, etc, upgrade the PRNG from Pierre L'Ecuyer's original paper "Maximally Equidistributed Combined Tausworthe Generators", Mathematics of Computation, 65, 213 (1996), 203--213 to the version published in his errata paper [1]. The Tausworthe generator is a maximally-equidistributed generator, that is fast and has good statistical properties [1]. The version presented there upgrades the 3 state LFSR to a 4 state LFSR with increased periodicity from about 2^88 to 2^113. The algorithm is presented in [1] by the very same author who also designed the original algorithm in [2]. Also, by increasing the state, we make it a bit harder for attackers to "guess" the PRNGs internal state. See also discussion in [3]. Now, as we use this sort of weak initialization discussed in [3] only between core_initcall() until late_initcall() time [*] for prandom32*() users, namely in prandom_init(), it is less relevant from late_initcall() onwards as we overwrite seeds through prandom_reseed() anyways with a seed source of higher entropy, that is, get_random_bytes(). In other words, a exhaustive keysearch of 96 bit would be needed. Now, with the help of this patch, this state-search increases further to 128 bit. Initialization needs to make sure that s1 > 1, s2 > 7, s3 > 15, s4 > 127. taus88 and taus113 algorithm is also part of GSL. I added a test case in the next patch to verify internal behaviour of this patch with GSL and ran tests with the dieharder 3.31.1 RNG test suite: $ dieharder -g 052 -a -m 10 -s 1 -S 4137730333 #taus88 $ dieharder -g 054 -a -m 10 -s 1 -S 4137730333 #taus113 With this seed configuration, in order to compare both, we get the following differences: algorithm taus88 taus113 rands/second [**] 1.61e+08 1.37e+08 sts_serial(4, 1st run) WEAK PASSED sts_serial(9, 2nd run) WEAK PASSED rgb_lagged_sum(31) WEAK PASSED We took out diehard_sums test as according to the authors it is considered broken and unusable [4]. Despite that and the slight decrease in performance (which is acceptable), taus113 here passes all 113 tests (only rgb_minimum_distance_5 in WEAK, the rest PASSED). In general, taus/taus113 is considered "very good" by the authors of dieharder [5]. The papers [1][2] states a single warm-up step is sufficient by running quicktaus once on each state to ensure proper initialization of ~s_{0}: Our selection of (s) according to Table 1 of [1] row 1 holds the condition L - k <= r - s, that is, (32 32 32 32) - (31 29 28 25) <= (25 27 15 22) - (18 2 7 13) with r = k - q and q = (6 2 13 3) as also stated by the paper. So according to [2] we are safe with one round of quicktaus for initialization. However we decided to include the warm-up phase of the PRNG as done in GSL in every case as a safety net. We also use the warm up phase to make the output of the RNG easier to verify by the GSL output. In prandom_init(), we also mix random_get_entropy() into it, just like drivers/char/random.c does it, jiffies ^ random_get_entropy(). random-get_entropy() is get_cycles(). xor is entropy preserving so it is fine if it is not implemented by some architectures. Note, this PRNG is *not* used for cryptography in the kernel, but rather as a fast PRNG for various randomizations i.e. in the networking code, or elsewhere for debugging purposes, for example. [*]: In order to generate some "sort of pseduo-randomness", since get_random_bytes() is not yet available for us, we use jiffies and initialize states s1 - s3 with a simple linear congruential generator (LCG), that is x <- x * 69069; and derive s2, s3, from the 32bit initialization from s1. So the above quote from [3] accounts only for the time from core to late initcall, not afterwards. [**] Single threaded run on MacBook Air w/ Intel Core i5-3317U [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps [3] http://thread.gmane.org/gmane.comp.encryption.general/12103/ [4] http://code.google.com/p/dieharder/source/browse/trunk/libdieharder/diehard_sums.c?spec=svn490&r=490#20 [5] http://www.phy.duke.edu/~rgb/General/dieharder.php Joint work with Hannes Frederic Sowa. Cc: Florian Weimer Cc: Theodore Ts'o Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/random.h | 11 +++---- lib/random32.c | 80 +++++++++++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 8ef0b70bd1f9..4002b3df4c85 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -32,10 +32,10 @@ void prandom_seed(u32 seed); void prandom_reseed_late(void); struct rnd_state { - __u32 s1, s2, s3; + __u32 s1, s2, s3, s4; }; -u32 prandom_u32_state(struct rnd_state *); +u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); /* @@ -55,9 +55,10 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) { u32 i = (seed >> 32) ^ (seed << 10) ^ seed; - state->s1 = __seed(i, 2); - state->s2 = __seed(i, 8); - state->s3 = __seed(i, 16); + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); } #ifdef CONFIG_ARCH_RANDOM diff --git a/lib/random32.c b/lib/random32.c index 9f2f2fb03dfe..27adb753180f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -2,19 +2,19 @@ This is a maximally equidistributed combined Tausworthe generator based on code from GNU Scientific Library 1.5 (30 Jun 2004) - x_n = (s1_n ^ s2_n ^ s3_n) + lfsr113 version: - s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) - s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) - s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n) - The period of this generator is about 2^88. + s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13)) + s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27)) + s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21)) + s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12)) - From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe - Generators", Mathematics of Computation, 65, 213 (1996), 203--213. - - This is available on the net from L'Ecuyer's home page, + The period of this generator is about 2^113 (see erratum paper). + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213: http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps @@ -29,7 +29,7 @@ that paper.) This affects the seeding procedure by imposing the requirement - s1 > 1, s2 > 7, s3 > 15. + s1 > 1, s2 > 7, s3 > 15, s4 > 127. */ @@ -52,11 +52,12 @@ u32 prandom_u32_state(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) - state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); - state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); - state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); + state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); + state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); + state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U); - return (state->s1 ^ state->s2 ^ state->s3); + return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4); } EXPORT_SYMBOL(prandom_u32_state); @@ -126,6 +127,21 @@ void prandom_bytes(void *buf, int bytes) } EXPORT_SYMBOL(prandom_bytes); +static void prandom_warmup(struct rnd_state *state) +{ + /* Calling RNG ten times to satify recurrence condition */ + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); +} + /** * prandom_seed - add entropy to pseudo random number generator * @seed: seed value @@ -141,8 +157,9 @@ void prandom_seed(u32 entropy) */ for_each_possible_cpu (i) { struct rnd_state *state = &per_cpu(net_rand_state, i); - state->s1 = __seed(state->s1 ^ entropy, 2); - prandom_u32_state(state); + + state->s1 = __seed(state->s1 ^ entropy, 2U); + prandom_warmup(state); } } EXPORT_SYMBOL(prandom_seed); @@ -158,18 +175,13 @@ static int __init prandom_init(void) for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); -#define LCG(x) ((x) * 69069) /* super-duper LCG */ - state->s1 = __seed(LCG(i + jiffies), 2); - state->s2 = __seed(LCG(state->s1), 8); - state->s3 = __seed(LCG(state->s2), 16); - - /* "warm it up" */ - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ + state->s1 = __seed(LCG((i + jiffies) ^ random_get_entropy()), 2U); + state->s2 = __seed(LCG(state->s1), 8U); + state->s3 = __seed(LCG(state->s2), 16U); + state->s4 = __seed(LCG(state->s3), 128U); + + prandom_warmup(state); } return 0; } @@ -215,15 +227,15 @@ static void __prandom_reseed(bool late) for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); - u32 seeds[3]; + u32 seeds[4]; get_random_bytes(&seeds, sizeof(seeds)); - state->s1 = __seed(seeds[0], 2); - state->s2 = __seed(seeds[1], 8); - state->s3 = __seed(seeds[2], 16); + state->s1 = __seed(seeds[0], 2U); + state->s2 = __seed(seeds[1], 8U); + state->s3 = __seed(seeds[2], 16U); + state->s4 = __seed(seeds[3], 128U); - /* mix it in */ - prandom_u32_state(state); + prandom_warmup(state); } out: spin_unlock_irqrestore(&lock, flags); -- cgit v1.2.3-71-gd317