From 2f268f129c2d1a05d297fe3ee34d393f862d2b22 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:07 +0200
Subject: net: add adj_list to save only neighbours

Currently, we distinguish neighbours (first-level linked devices) from
non-neighbours by the neighbour bool in the netdev_adjacent. This could be
quite time-consuming in case we would like to traverse *only* through
neighbours - cause we'd have to traverse through all devices and check for
this flag, and in a (quite common) scenario where we have lots of vlans on
top of bridge, which is on top of a bond - the bonding would have to go
through all those vlans to get its upper neighbour linked devices.

This situation is really unpleasant, cause there are already a lot of cases
when a device with slaves needs to go through them in hot path.

To fix this, introduce a new upper/lower device lists structure -
adj_list, which contains only the neighbours. It works always in
pair with the all_adj_list structure (renamed from upper/lower_dev_list),
i.e. both of them contain the same links, only that all_adj_list contains
also non-neighbour device links. It's really a small change visible,
currently, only for __netdev_adjacent_dev_insert/remove(), and doesn't
change the main linked logic at all.

Also, add some comments a fix a name collision in
netdev_for_each_upper_dev_rcu() and rework the naming by the following
rules:

netdev_(all_)(upper|lower)_*

If "all_" is present, then we work with the whole list of upper/lower
devices, otherwise - only with direct neighbours. Uninline functions - to
get better stack traces.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Cong Wang <amwang@redhat.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3de49aca4519..514045c704a8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1143,8 +1143,18 @@ struct net_device {
 	struct list_head	dev_list;
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
-	struct list_head	upper_dev_list; /* List of upper devices */
-	struct list_head	lower_dev_list;
+
+	/* directly linked devices, like slaves for bonding */
+	struct {
+		struct list_head upper;
+		struct list_head lower;
+	} adj_list;
+
+	/* all linked devices, *including* neighbours */
+	struct {
+		struct list_head upper;
+		struct list_head lower;
+	} all_adj_list;
 
 
 	/* currently active device features */
@@ -2813,15 +2823,15 @@ extern int		bpf_jit_enable;
 extern bool netdev_has_upper_dev(struct net_device *dev,
 				 struct net_device *upper_dev);
 extern bool netdev_has_any_upper_dev(struct net_device *dev);
-extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
-							struct list_head **iter);
+extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+							    struct list_head **iter);
 
 /* iterate through upper list, must be called under RCU read lock */
-#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \
-	for (iter = &(dev)->upper_dev_list, \
-	     upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
-	     upper; \
-	     upper = netdev_upper_get_next_dev_rcu(dev, &(iter)))
+#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
+	for (iter = &(dev)->all_adj_list.upper, \
+	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \
+	     updev; \
+	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
-- 
cgit v1.2.3-71-gd317


From 402dae9614557296e84543008a8e582c28fb1db3 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:09 +0200
Subject: net: add netdev_adjacent->private and allow to use it

Currently, even though we can access any linked device, we can't attach
anything to it, which is vital to properly manage them.

To fix this, add a new void *private to netdev_adjacent and functions
setting/getting it (per link), so that we can save, per example, bonding's
slave structures there, per slave device.

netdev_master_upper_dev_link_private(dev, upper_dev, private) links dev to
upper dev and populates the neighbour link only with private.

netdev_lower_dev_get_private{,_rcu}() returns the private, if found.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++
 net/core/dev.c            | 68 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 64 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 514045c704a8..75d5beac463b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2839,8 +2839,15 @@ extern int netdev_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev);
 extern int netdev_master_upper_dev_link(struct net_device *dev,
 					struct net_device *upper_dev);
+extern int netdev_master_upper_dev_link_private(struct net_device *dev,
+						struct net_device *upper_dev,
+						void *private);
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
+extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+					      struct net_device *lower_dev);
+extern void *netdev_lower_dev_get_private(struct net_device *dev,
+					  struct net_device *lower_dev);
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features, bool tx_path);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9290f09cdf26..c69ab74fb201 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4376,6 +4376,9 @@ struct netdev_adjacent {
 	/* counter for the number of times this device was added to us */
 	u16 ref_nr;
 
+	/* private field for the users */
+	void *private;
+
 	struct list_head list;
 	struct rcu_head rcu;
 };
@@ -4510,7 +4513,7 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
 					struct list_head *dev_list,
-					bool master)
+					void *private, bool master)
 {
 	struct netdev_adjacent *adj;
 
@@ -4528,6 +4531,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	adj->dev = adj_dev;
 	adj->master = master;
 	adj->ref_nr = 1;
+	adj->private = private;
 	dev_hold(adj_dev);
 
 	pr_debug("dev_hold for %s, because of link added from %s to %s\n",
@@ -4574,15 +4578,17 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 				     struct net_device *upper_dev,
 				     struct list_head *up_list,
 				     struct list_head *down_list,
-				     bool master)
+				     void *private, bool master)
 {
 	int ret;
 
-	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, master);
+	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
+					   master);
 	if (ret)
 		return ret;
 
-	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, false);
+	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
+					   false);
 	if (ret) {
 		__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
 		return ret;
@@ -4597,7 +4603,7 @@ int __netdev_adjacent_dev_link(struct net_device *dev,
 	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
 						&dev->all_adj_list.upper,
 						&upper_dev->all_adj_list.lower,
-						false);
+						NULL, false);
 }
 
 void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
@@ -4619,7 +4625,7 @@ void __netdev_adjacent_dev_unlink(struct net_device *dev,
 
 int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 					 struct net_device *upper_dev,
-					 bool master)
+					 void *private, bool master)
 {
 	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
 
@@ -4629,7 +4635,7 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
 	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
 					       &dev->adj_list.upper,
 					       &upper_dev->adj_list.lower,
-					       master);
+					       private, master);
 	if (ret) {
 		__netdev_adjacent_dev_unlink(dev, upper_dev);
 		return ret;
@@ -4648,7 +4654,8 @@ void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 }
 
 static int __netdev_upper_dev_link(struct net_device *dev,
-				   struct net_device *upper_dev, bool master)
+				   struct net_device *upper_dev, bool master,
+				   void *private)
 {
 	struct netdev_adjacent *i, *j, *to_i, *to_j;
 	int ret = 0;
@@ -4668,7 +4675,8 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (master && netdev_master_upper_dev_get(dev))
 		return -EBUSY;
 
-	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+						   master);
 	if (ret)
 		return ret;
 
@@ -4759,7 +4767,7 @@ rollback_mesh:
 int netdev_upper_dev_link(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, false);
+	return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 
@@ -4777,10 +4785,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
 int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, true);
+	return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 void *private)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, true, private);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
+
 /**
  * netdev_upper_dev_unlink - Removes a link to upper device
  * @dev: device
@@ -4818,6 +4834,36 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+				       struct net_device *lower_dev)
+{
+	struct netdev_adjacent *lower;
+
+	if (!lower_dev)
+		return NULL;
+	lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
+	if (!lower)
+		return NULL;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
+
+void *netdev_lower_dev_get_private(struct net_device *dev,
+				   struct net_device *lower_dev)
+{
+	struct netdev_adjacent *lower;
+
+	if (!lower_dev)
+		return NULL;
+	lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+	if (!lower)
+		return NULL;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-- 
cgit v1.2.3-71-gd317


From 31088a113c2a948856ed2047d8c21c217b13e85d Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:12 +0200
Subject: net: add for_each iterators through neighbour lower link's private

Add a possibility to iterate through netdev_adjacent's private, currently
only for lower neighbours.

Add both RCU and RTNL/other locking variants of iterators, and make the
non-rcu variant to be safe from removal.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 ++++++++++++++
 net/core/dev.c            | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 75d5beac463b..168974e40cf5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2833,6 +2833,23 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d
 	     updev; \
 	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
+extern void *netdev_lower_get_next_private(struct net_device *dev,
+					   struct list_head **iter);
+extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					       struct list_head **iter);
+
+#define netdev_for_each_lower_private(dev, priv, iter) \
+	for (iter = (dev)->adj_list.lower.next, \
+	     priv = netdev_lower_get_next_private(dev, &(iter)); \
+	     priv; \
+	     priv = netdev_lower_get_next_private(dev, &(iter)))
+
+#define netdev_for_each_lower_private_rcu(dev, priv, iter) \
+	for (iter = &(dev)->adj_list.lower, \
+	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)); \
+	     priv; \
+	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
+
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
 extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index c69ab74fb201..0aa844aae40b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4466,7 +4466,8 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
-/* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
+/**
+ * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
  * @iter: list_head ** of the current position
  *
@@ -4491,6 +4492,63 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
 
+/**
+ * netdev_lower_get_next_private - Get the next ->private from the
+ *				   lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold either hold the
+ * RTNL lock or its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next_private(struct net_device *dev,
+				    struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry(*iter, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	if (iter)
+		*iter = lower->list.next;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private);
+
+/**
+ * netdev_lower_get_next_private_rcu - Get the next ->private from the
+ *				       lower neighbour list, RCU
+ *				       variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	if (iter)
+		*iter = &lower->list;
+
+	return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
+
 /**
  * netdev_master_upper_dev_get_rcu - Get master upper device
  * @dev: device
-- 
cgit v1.2.3-71-gd317


From b6ccba4c681fdaf0070e580bf951badf7edc860b Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 25 Sep 2013 09:20:23 +0200
Subject: net: add a possibility to get private from netdev_adjacent->list

It will be useful to get first/last element.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 168974e40cf5..b4cfb63f264e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2850,6 +2850,7 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
+extern void *netdev_adjacent_get_private(struct list_head *adj_list);
 extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
 extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
 extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 0aa844aae40b..acc11810805f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4466,6 +4466,16 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get);
 
+void *netdev_adjacent_get_private(struct list_head *adj_list)
+{
+	struct netdev_adjacent *adj;
+
+	adj = list_entry(adj_list, struct netdev_adjacent, list);
+
+	return adj->private;
+}
+EXPORT_SYMBOL(netdev_adjacent_get_private);
+
 /**
  * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
-- 
cgit v1.2.3-71-gd317


From a0f4ecf3494c9869d20f606e7e2b2f50f0e67a7f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 26 Sep 2013 14:48:15 -0700
Subject: netfilter: Remove extern from function prototypes

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/netfilter.h                        |  10 +-
 include/linux/netfilter/nf_conntrack_common.h    |   2 +-
 include/linux/netfilter/nf_conntrack_h323.h      |  14 +--
 include/linux/netfilter/nf_conntrack_proto_gre.h |   4 +-
 include/linux/netfilter/nf_conntrack_sip.h       |  57 +++++-----
 include/linux/netfilter/nfnetlink.h              |  28 ++---
 include/linux/netfilter/nfnetlink_acct.h         |   6 +-
 include/linux/netfilter/x_tables.h               | 128 +++++++++++------------
 include/linux/netfilter_bridge.h                 |   4 +-
 include/linux/netfilter_ipv4.h                   |   6 +-
 include/linux/netfilter_ipv6.h                   |  10 +-
 11 files changed, 133 insertions(+), 136 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 708fe72ab913..61223c52414f 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -35,7 +35,7 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
 	result->all[3] = a1->all[3] & mask->all[3];
 }
 
-extern int netfilter_init(void);
+int netfilter_init(void);
 
 /* Largest hook number + 1 */
 #define NF_MAX_HOOKS 8
@@ -208,7 +208,7 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 /* Call this before modifying an existing packet: ensures it is
    modifiable and linear to the point you care about (writable_len).
    Returns true or false. */
-extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
 
 struct flowi;
 struct nf_queue_entry;
@@ -269,8 +269,8 @@ nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 }
 
-extern int nf_register_afinfo(const struct nf_afinfo *afinfo);
-extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+int nf_register_afinfo(const struct nf_afinfo *afinfo);
+void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
 
 #include <net/flow.h>
 extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
@@ -315,7 +315,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
-extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
 extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
 
 struct nf_conn;
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 127d0b90604f..275505792664 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -23,6 +23,6 @@ struct ip_conntrack_stat {
 };
 
 /* call to create an explicit dependency on nf_conntrack. */
-extern void need_conntrack(void);
+void need_conntrack(void);
 
 #endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index f381020eee92..858d9b214053 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -29,13 +29,13 @@ struct nf_ct_h323_master {
 
 struct nf_conn;
 
-extern int get_h225_addr(struct nf_conn *ct, unsigned char *data,
-			 TransportAddress *taddr,
-			 union nf_inet_addr *addr, __be16 *port);
-extern void nf_conntrack_h245_expect(struct nf_conn *new,
-				     struct nf_conntrack_expect *this);
-extern void nf_conntrack_q931_expect(struct nf_conn *new,
-				     struct nf_conntrack_expect *this);
+int get_h225_addr(struct nf_conn *ct, unsigned char *data,
+		  TransportAddress *taddr, union nf_inet_addr *addr,
+		  __be16 *port);
+void nf_conntrack_h245_expect(struct nf_conn *new,
+			      struct nf_conntrack_expect *this);
+void nf_conntrack_q931_expect(struct nf_conn *new,
+			      struct nf_conntrack_expect *this);
 extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 				  unsigned char **data, int dataoff,
 				  H245_TransportAddress *taddr,
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 6a0664c0c451..ec2ffaf418c8 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,8 +87,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
-extern void nf_ct_gre_keymap_flush(struct net *net);
-extern void nf_nat_need_gre(void);
+void nf_ct_gre_keymap_flush(struct net *net);
+void nf_nat_need_gre(void);
 
 #endif /* __KERNEL__ */
 #endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index ba7f571a2b1c..5cac0207b95d 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -157,35 +157,34 @@ extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
 					     unsigned int medialen,
 					     union nf_inet_addr *rtp_addr);
 
-extern int ct_sip_parse_request(const struct nf_conn *ct,
-				const char *dptr, unsigned int datalen,
-				unsigned int *matchoff, unsigned int *matchlen,
-				union nf_inet_addr *addr, __be16 *port);
-extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
-			     unsigned int dataoff, unsigned int datalen,
-			     enum sip_header_types type,
-			     unsigned int *matchoff, unsigned int *matchlen);
-extern int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
-				   unsigned int *dataoff, unsigned int datalen,
-				   enum sip_header_types type, int *in_header,
-				   unsigned int *matchoff, unsigned int *matchlen,
-				   union nf_inet_addr *addr, __be16 *port);
-extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
-				      unsigned int dataoff, unsigned int datalen,
-				      const char *name,
-				      unsigned int *matchoff, unsigned int *matchlen,
-				      union nf_inet_addr *addr, bool delim);
-extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
-					unsigned int off, unsigned int datalen,
-					const char *name,
-					unsigned int *matchoff, unsigned int *matchen,
-					unsigned int *val);
-
-extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
-				 unsigned int dataoff, unsigned int datalen,
-				 enum sdp_header_types type,
-				 enum sdp_header_types term,
-				 unsigned int *matchoff, unsigned int *matchlen);
+int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr,
+			 unsigned int datalen, unsigned int *matchoff,
+			 unsigned int *matchlen, union nf_inet_addr *addr,
+			 __be16 *port);
+int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
+		      unsigned int dataoff, unsigned int datalen,
+		      enum sip_header_types type, unsigned int *matchoff,
+		      unsigned int *matchlen);
+int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
+			    unsigned int *dataoff, unsigned int datalen,
+			    enum sip_header_types type, int *in_header,
+			    unsigned int *matchoff, unsigned int *matchlen,
+			    union nf_inet_addr *addr, __be16 *port);
+int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
+			       unsigned int dataoff, unsigned int datalen,
+			       const char *name, unsigned int *matchoff,
+			       unsigned int *matchlen, union nf_inet_addr *addr,
+			       bool delim);
+int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
+				 unsigned int off, unsigned int datalen,
+				 const char *name, unsigned int *matchoff,
+				 unsigned int *matchen, unsigned int *val);
+
+int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
+			  unsigned int dataoff, unsigned int datalen,
+			  enum sdp_header_types type,
+			  enum sdp_header_types term,
+			  unsigned int *matchoff, unsigned int *matchlen);
 
 #endif /* __KERNEL__ */
 #endif /* __NF_CONNTRACK_SIP_H__ */
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index cadb7402d7a7..4f68cd7141d2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -25,20 +25,20 @@ struct nfnetlink_subsystem {
 	const struct nfnl_callback *cb;	/* callback for individual types */
 };
 
-extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
-extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
-
-extern int nfnetlink_has_listeners(struct net *net, unsigned int group);
-extern struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
-					   u32 dst_portid, gfp_t gfp_mask);
-extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
-			  unsigned int group, int echo, gfp_t flags);
-extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
-extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net,
-			     u32 portid, int flags);
-
-extern void nfnl_lock(__u8 subsys_id);
-extern void nfnl_unlock(__u8 subsys_id);
+int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
+int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
+
+int nfnetlink_has_listeners(struct net *net, unsigned int group);
+struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
+				    u32 dst_portid, gfp_t gfp_mask);
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
+		   unsigned int group, int echo, gfp_t flags);
+int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
+		      int flags);
+
+void nfnl_lock(__u8 subsys_id);
+void nfnl_unlock(__u8 subsys_id);
 
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index bb4bbc9b7a18..b2e85e59f760 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -6,8 +6,8 @@
 
 struct nf_acct;
 
-extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
-extern void nfnl_acct_put(struct nf_acct *acct);
-extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+struct nf_acct *nfnl_acct_find_get(const char *filter_name);
+void nfnl_acct_put(struct nf_acct *acct);
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
 
 #endif /* _NFNL_ACCT_H */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dd49566315c6..a3e215bb0241 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -229,50 +229,48 @@ struct xt_table_info {
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
 			  + nr_cpu_ids * sizeof(char *))
-extern int xt_register_target(struct xt_target *target);
-extern void xt_unregister_target(struct xt_target *target);
-extern int xt_register_targets(struct xt_target *target, unsigned int n);
-extern void xt_unregister_targets(struct xt_target *target, unsigned int n);
-
-extern int xt_register_match(struct xt_match *target);
-extern void xt_unregister_match(struct xt_match *target);
-extern int xt_register_matches(struct xt_match *match, unsigned int n);
-extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
-
-extern int xt_check_match(struct xt_mtchk_param *,
-			  unsigned int size, u_int8_t proto, bool inv_proto);
-extern int xt_check_target(struct xt_tgchk_param *,
-			   unsigned int size, u_int8_t proto, bool inv_proto);
-
-extern struct xt_table *xt_register_table(struct net *net,
-					  const struct xt_table *table,
-					  struct xt_table_info *bootstrap,
-					  struct xt_table_info *newinfo);
-extern void *xt_unregister_table(struct xt_table *table);
-
-extern struct xt_table_info *xt_replace_table(struct xt_table *table,
-					      unsigned int num_counters,
-					      struct xt_table_info *newinfo,
-					      int *error);
-
-extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
-extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
-extern struct xt_match *xt_request_find_match(u8 af, const char *name,
-					      u8 revision);
-extern struct xt_target *xt_request_find_target(u8 af, const char *name,
-						u8 revision);
-extern int xt_find_revision(u8 af, const char *name, u8 revision,
-			    int target, int *err);
-
-extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
-					   const char *name);
-extern void xt_table_unlock(struct xt_table *t);
-
-extern int xt_proto_init(struct net *net, u_int8_t af);
-extern void xt_proto_fini(struct net *net, u_int8_t af);
-
-extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
-extern void xt_free_table_info(struct xt_table_info *info);
+int xt_register_target(struct xt_target *target);
+void xt_unregister_target(struct xt_target *target);
+int xt_register_targets(struct xt_target *target, unsigned int n);
+void xt_unregister_targets(struct xt_target *target, unsigned int n);
+
+int xt_register_match(struct xt_match *target);
+void xt_unregister_match(struct xt_match *target);
+int xt_register_matches(struct xt_match *match, unsigned int n);
+void xt_unregister_matches(struct xt_match *match, unsigned int n);
+
+int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
+		   bool inv_proto);
+int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
+		    bool inv_proto);
+
+struct xt_table *xt_register_table(struct net *net,
+				   const struct xt_table *table,
+				   struct xt_table_info *bootstrap,
+				   struct xt_table_info *newinfo);
+void *xt_unregister_table(struct xt_table *table);
+
+struct xt_table_info *xt_replace_table(struct xt_table *table,
+				       unsigned int num_counters,
+				       struct xt_table_info *newinfo,
+				       int *error);
+
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
+struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
+struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision);
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
+		     int *err);
+
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+				    const char *name);
+void xt_table_unlock(struct xt_table *t);
+
+int xt_proto_init(struct net *net, u_int8_t af);
+void xt_proto_fini(struct net *net, u_int8_t af);
+
+struct xt_table_info *xt_alloc_table_info(unsigned int size);
+void xt_free_table_info(struct xt_table_info *info);
 
 /**
  * xt_recseq - recursive seqcount for netfilter use
@@ -353,8 +351,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
-extern struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
-extern void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
+struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
+void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
@@ -414,25 +412,25 @@ struct _compat_xt_align {
 
 #define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align))
 
-extern void xt_compat_lock(u_int8_t af);
-extern void xt_compat_unlock(u_int8_t af);
-
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
-extern void xt_compat_flush_offsets(u_int8_t af);
-extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
-extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
-
-extern int xt_compat_match_offset(const struct xt_match *match);
-extern int xt_compat_match_from_user(struct xt_entry_match *m,
-				     void **dstptr, unsigned int *size);
-extern int xt_compat_match_to_user(const struct xt_entry_match *m,
-				   void __user **dstptr, unsigned int *size);
-
-extern int xt_compat_target_offset(const struct xt_target *target);
-extern void xt_compat_target_from_user(struct xt_entry_target *t,
-				       void **dstptr, unsigned int *size);
-extern int xt_compat_target_to_user(const struct xt_entry_target *t,
-				    void __user **dstptr, unsigned int *size);
+void xt_compat_lock(u_int8_t af);
+void xt_compat_unlock(u_int8_t af);
+
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
+void xt_compat_flush_offsets(u_int8_t af);
+void xt_compat_init_offsets(u_int8_t af, unsigned int number);
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
+
+int xt_compat_match_offset(const struct xt_match *match);
+int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			      unsigned int *size);
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size);
+
+int xt_compat_target_offset(const struct xt_target *target);
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+				unsigned int *size);
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size);
 
 #endif /* CONFIG_COMPAT */
 #endif /* _X_TABLES_H */
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index dfb4d9e52bcb..8ab1c278b66d 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -25,7 +25,7 @@ enum nf_br_hook_priorities {
 #define BRNF_PPPoE			0x20
 
 /* Only used in br_forward.c */
-extern int nf_bridge_copy_header(struct sk_buff *skb);
+int nf_bridge_copy_header(struct sk_buff *skb);
 static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
 	if (skb->nf_bridge &&
@@ -53,7 +53,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 	return 0;
 }
 
-extern int br_handle_frame_finish(struct sk_buff *skb);
+int br_handle_frame_finish(struct sk_buff *skb);
 /* Only used in br_device.c */
 static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 {
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index dfaf116b3e81..6e4591bb54d4 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -6,7 +6,7 @@
 
 #include <uapi/linux/netfilter_ipv4.h>
 
-extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
-extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
-				   unsigned int dataoff, u_int8_t protocol);
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u_int8_t protocol);
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 2d4df6ce043e..64dad1cc1a4b 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -11,12 +11,12 @@
 
 
 #ifdef CONFIG_NETFILTER
-extern int ip6_route_me_harder(struct sk_buff *skb);
-extern __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-				    unsigned int dataoff, u_int8_t protocol);
+int ip6_route_me_harder(struct sk_buff *skb);
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u_int8_t protocol);
 
-extern int ipv6_netfilter_init(void);
-extern void ipv6_netfilter_fini(void);
+int ipv6_netfilter_init(void);
+void ipv6_netfilter_fini(void);
 
 /*
  * Hook functions for ipv6 to allow xt_* modules to be built-in even
-- 
cgit v1.2.3-71-gd317


From 7965bd4d71ef7cf1db00afb9e406ddfc13443c13 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 26 Sep 2013 14:48:15 -0700
Subject: net.h/skbuff.h: Remove extern from function prototypes

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/net.h    |  82 ++++++++--------
 include/linux/skbuff.h | 250 ++++++++++++++++++++++---------------------------
 2 files changed, 148 insertions(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4f27575ce1d6..ca9ec8540905 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -195,27 +195,23 @@ enum {
 	SOCK_WAKE_URG,
 };
 
-extern int	     sock_wake_async(struct socket *sk, int how, int band);
-extern int	     sock_register(const struct net_proto_family *fam);
-extern void	     sock_unregister(int family);
-extern int	     __sock_create(struct net *net, int family, int type, int proto,
-				 struct socket **res, int kern);
-extern int	     sock_create(int family, int type, int proto,
-				 struct socket **res);
-extern int	     sock_create_kern(int family, int type, int proto,
-				      struct socket **res);
-extern int	     sock_create_lite(int family, int type, int proto,
-				      struct socket **res); 
-extern void	     sock_release(struct socket *sock);
-extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
-				  size_t len);
-extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
-				  size_t size, int flags);
-extern struct file  *sock_alloc_file(struct socket *sock, int flags, const char *dname);
-extern struct socket *sockfd_lookup(int fd, int *err);
-extern struct socket *sock_from_file(struct file *file, int *err);
+int sock_wake_async(struct socket *sk, int how, int band);
+int sock_register(const struct net_proto_family *fam);
+void sock_unregister(int family);
+int __sock_create(struct net *net, int family, int type, int proto,
+		  struct socket **res, int kern);
+int sock_create(int family, int type, int proto, struct socket **res);
+int sock_create_kern(int family, int type, int proto, struct socket **res);
+int sock_create_lite(int family, int type, int proto, struct socket **res);
+void sock_release(struct socket *sock);
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		 int flags);
+struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
+struct socket *sockfd_lookup(int fd, int *err);
+struct socket *sock_from_file(struct file *file, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
-extern int	     net_ratelimit(void);
+int net_ratelimit(void);
 
 #define net_ratelimited_function(function, ...)			\
 do {								\
@@ -243,32 +239,28 @@ do {								\
 #define net_random()		prandom_u32()
 #define net_srandom(seed)	prandom_seed((__force u32)(seed))
 
-extern int   	     kernel_sendmsg(struct socket *sock, struct msghdr *msg,
-				    struct kvec *vec, size_t num, size_t len);
-extern int   	     kernel_recvmsg(struct socket *sock, struct msghdr *msg,
-				    struct kvec *vec, size_t num,
-				    size_t len, int flags);
+int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+		   size_t num, size_t len);
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+		   size_t num, size_t len, int flags);
 
-extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
-		       int addrlen);
-extern int kernel_listen(struct socket *sock, int backlog);
-extern int kernel_accept(struct socket *sock, struct socket **newsock,
-			 int flags);
-extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
-			  int addrlen, int flags);
-extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-			      int *addrlen);
-extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-			      int *addrlen);
-extern int kernel_getsockopt(struct socket *sock, int level, int optname,
-			     char *optval, int *optlen);
-extern int kernel_setsockopt(struct socket *sock, int level, int optname,
-			     char *optval, unsigned int optlen);
-extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
-			   size_t size, int flags);
-extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
-extern int kernel_sock_shutdown(struct socket *sock,
-				enum sock_shutdown_cmd how);
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen);
+int kernel_listen(struct socket *sock, int backlog);
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+		   int flags);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+		       int *addrlen);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+		       int *addrlen);
+int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
+		      int *optlen);
+int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
+		      unsigned int optlen);
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags);
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
 #define MODULE_ALIAS_NETPROTO(proto) \
 	MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ddb48d9312c..6d56840e561e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -585,8 +585,8 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 	skb->_skb_refdst = (unsigned long)dst;
 }
 
-extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
-				bool force);
+void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
+			 bool force);
 
 /**
  * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
@@ -634,20 +634,20 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 	return (struct rtable *)skb_dst(skb);
 }
 
-extern void kfree_skb(struct sk_buff *skb);
-extern void kfree_skb_list(struct sk_buff *segs);
-extern void skb_tx_error(struct sk_buff *skb);
-extern void consume_skb(struct sk_buff *skb);
-extern void	       __kfree_skb(struct sk_buff *skb);
+void kfree_skb(struct sk_buff *skb);
+void kfree_skb_list(struct sk_buff *segs);
+void skb_tx_error(struct sk_buff *skb);
+void consume_skb(struct sk_buff *skb);
+void  __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
 
-extern void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
-extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
-			     bool *fragstolen, int *delta_truesize);
+void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
+bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
+		      bool *fragstolen, int *delta_truesize);
 
-extern struct sk_buff *__alloc_skb(unsigned int size,
-				   gfp_t priority, int flags, int node);
-extern struct sk_buff *build_skb(void *data, unsigned int frag_size);
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
+			    int node);
+struct sk_buff *build_skb(void *data, unsigned int frag_size);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
@@ -660,41 +660,33 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
-extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
+struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
 static inline struct sk_buff *alloc_skb_head(gfp_t priority)
 {
 	return __alloc_skb_head(priority, -1);
 }
 
-extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
-extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
-extern struct sk_buff *skb_clone(struct sk_buff *skb,
-				 gfp_t priority);
-extern struct sk_buff *skb_copy(const struct sk_buff *skb,
-				gfp_t priority);
-extern struct sk_buff *__pskb_copy(struct sk_buff *skb,
-				 int headroom, gfp_t gfp_mask);
-
-extern int	       pskb_expand_head(struct sk_buff *skb,
-					int nhead, int ntail,
-					gfp_t gfp_mask);
-extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
-					    unsigned int headroom);
-extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
-				       int newheadroom, int newtailroom,
-				       gfp_t priority);
-extern int	       skb_to_sgvec(struct sk_buff *skb,
-				    struct scatterlist *sg, int offset,
-				    int len);
-extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
-				    struct sk_buff **trailer);
-extern int	       skb_pad(struct sk_buff *skb, int pad);
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
+				     unsigned int headroom);
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
+				int newtailroom, gfp_t priority);
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
+		 int len);
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
+int skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
 
-extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int getfrag(void *from, char *to, int offset,
-			int len,int odd, struct sk_buff *skb),
-			void *from, int length);
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+			    int getfrag(void *from, char *to, int offset,
+					int len, int odd, struct sk_buff *skb),
+			    void *from, int length);
 
 struct skb_seq_state {
 	__u32		lower_offset;
@@ -706,18 +698,17 @@ struct skb_seq_state {
 	__u8		*frag_data;
 };
 
-extern void	      skb_prepare_seq_read(struct sk_buff *skb,
-					   unsigned int from, unsigned int to,
-					   struct skb_seq_state *st);
-extern unsigned int   skb_seq_read(unsigned int consumed, const u8 **data,
-				   struct skb_seq_state *st);
-extern void	      skb_abort_seq_read(struct skb_seq_state *st);
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+			  unsigned int to, struct skb_seq_state *st);
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+			  struct skb_seq_state *st);
+void skb_abort_seq_read(struct skb_seq_state *st);
 
-extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
-				    unsigned int to, struct ts_config *config,
-				    struct ts_state *state);
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+			   unsigned int to, struct ts_config *config,
+			   struct ts_state *state);
 
-extern void __skb_get_rxhash(struct sk_buff *skb);
+void __skb_get_rxhash(struct sk_buff *skb);
 static inline __u32 skb_get_rxhash(struct sk_buff *skb)
 {
 	if (!skb->l4_rxhash)
@@ -1095,7 +1086,8 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  *	The "__skb_xxxx()" functions are the non-atomic ones that
  *	can only be called with interrupts disabled.
  */
-extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk,
+		struct sk_buff_head *list);
 static inline void __skb_insert(struct sk_buff *newsk,
 				struct sk_buff *prev, struct sk_buff *next,
 				struct sk_buff_head *list)
@@ -1201,8 +1193,8 @@ static inline void __skb_queue_after(struct sk_buff_head *list,
 	__skb_insert(newsk, prev, prev->next, list);
 }
 
-extern void skb_append(struct sk_buff *old, struct sk_buff *newsk,
-		       struct sk_buff_head *list);
+void skb_append(struct sk_buff *old, struct sk_buff *newsk,
+		struct sk_buff_head *list);
 
 static inline void __skb_queue_before(struct sk_buff_head *list,
 				      struct sk_buff *next,
@@ -1221,7 +1213,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_head(struct sk_buff_head *list,
 				    struct sk_buff *newsk)
 {
@@ -1238,7 +1230,7 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_tail(struct sk_buff_head *list,
 				   struct sk_buff *newsk)
 {
@@ -1249,7 +1241,7 @@ static inline void __skb_queue_tail(struct sk_buff_head *list,
  * remove sk_buff from list. _Must_ be called atomically, and with
  * the list known..
  */
-extern void	   skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
 	struct sk_buff *next, *prev;
@@ -1270,7 +1262,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The head item is
  *	returned or %NULL if the list is empty.
  */
-extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
+struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek(list);
@@ -1287,7 +1279,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The tail item is
  *	returned or %NULL if the list is empty.
  */
-extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek_tail(list);
@@ -1373,8 +1365,8 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 
-extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
-			    int off, int size, unsigned int truesize);
+void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+		     int size, unsigned int truesize);
 
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
@@ -1418,7 +1410,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 /*
  *	Add data to an sk_buff
  */
-extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
 	unsigned char *tmp = skb_tail_pointer(skb);
@@ -1428,7 +1420,7 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
-extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 {
 	skb->data -= len;
@@ -1436,7 +1428,7 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 	return skb->data;
 }
 
-extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	skb->len -= len;
@@ -1449,7 +1441,7 @@ static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int l
 	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
 }
 
-extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
 {
@@ -1753,7 +1745,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 #define NET_SKB_PAD	max(32, L1_CACHE_BYTES)
 #endif
 
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
+int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1765,7 +1757,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 	skb_set_tail_pointer(skb, len);
 }
 
-extern void skb_trim(struct sk_buff *skb, unsigned int len);
+void skb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1838,7 +1830,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
  *	the list and one reference dropped. This function does not take the
  *	list lock and the caller must hold the relevant locks to use it.
  */
-extern void skb_queue_purge(struct sk_buff_head *list);
+void skb_queue_purge(struct sk_buff_head *list);
 static inline void __skb_queue_purge(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
@@ -1850,11 +1842,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 #define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
 #define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
 
-extern void *netdev_alloc_frag(unsigned int fragsz);
+void *netdev_alloc_frag(unsigned int fragsz);
 
-extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
-					  unsigned int length,
-					  gfp_t gfp_mask);
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
+				   gfp_t gfp_mask);
 
 /**
  *	netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -2342,60 +2333,42 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
 #define skb_walk_frags(skb, iter)	\
 	for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
 
-extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
-					   int *peeked, int *off, int *err);
-extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-					 int noblock, int *err);
-extern unsigned int    datagram_poll(struct file *file, struct socket *sock,
-				     struct poll_table_struct *wait);
-extern int	       skb_copy_datagram_iovec(const struct sk_buff *from,
-					       int offset, struct iovec *to,
-					       int size);
-extern int	       skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
-							int hlen,
-							struct iovec *iov);
-extern int	       skb_copy_datagram_from_iovec(struct sk_buff *skb,
-						    int offset,
-						    const struct iovec *from,
-						    int from_offset,
-						    int len);
-extern int	       zerocopy_sg_from_iovec(struct sk_buff *skb,
-					      const struct iovec *frm,
-					      int offset,
-					      size_t count);
-extern int	       skb_copy_datagram_const_iovec(const struct sk_buff *from,
-						     int offset,
-						     const struct iovec *to,
-						     int to_offset,
-						     int size);
-extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
-extern void	       skb_free_datagram_locked(struct sock *sk,
-						struct sk_buff *skb);
-extern int	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
-					 unsigned int flags);
-extern __wsum	       skb_checksum(const struct sk_buff *skb, int offset,
-				    int len, __wsum csum);
-extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
-				     void *to, int len);
-extern int	       skb_store_bits(struct sk_buff *skb, int offset,
-				      const void *from, int len);
-extern __wsum	       skb_copy_and_csum_bits(const struct sk_buff *skb,
-					      int offset, u8 *to, int len,
-					      __wsum csum);
-extern int             skb_splice_bits(struct sk_buff *skb,
-						unsigned int offset,
-						struct pipe_inode_info *pipe,
-						unsigned int len,
-						unsigned int flags);
-extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
-extern void	       skb_split(struct sk_buff *skb,
-				 struct sk_buff *skb1, const u32 len);
-extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
-				 int shiftlen);
-extern void	       skb_scrub_packet(struct sk_buff *skb, bool xnet);
-
-extern struct sk_buff *skb_segment(struct sk_buff *skb,
-				   netdev_features_t features);
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+				    int *peeked, int *off, int *err);
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
+				  int *err);
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   struct poll_table_struct *wait);
+int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
+			    struct iovec *to, int size);
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
+				     struct iovec *iov);
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+				 const struct iovec *from, int from_offset,
+				 int len);
+int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
+			   int offset, size_t count);
+int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
+				  const struct iovec *to, int to_offset,
+				  int size);
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
+__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
+		    __wsum csum);
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
+			      int len, __wsum csum);
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int len,
+		    unsigned int flags);
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
+void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
@@ -2440,7 +2413,7 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
 	memcpy(skb->data + offset, from, len);
 }
 
-extern void skb_init(void);
+void skb_init(void);
 
 static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
 {
@@ -2483,12 +2456,12 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
-extern void skb_timestamping_init(void);
+void skb_timestamping_init(void);
 
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 
-extern void skb_clone_tx_timestamp(struct sk_buff *skb);
-extern bool skb_defer_rx_timestamp(struct sk_buff *skb);
+void skb_clone_tx_timestamp(struct sk_buff *skb);
+bool skb_defer_rx_timestamp(struct sk_buff *skb);
 
 #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
 
@@ -2529,8 +2502,8 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
  * generates a software time stamp (otherwise), then queues the clone
  * to the error queue of the socket.  Errors are silently ignored.
  */
-extern void skb_tstamp_tx(struct sk_buff *orig_skb,
-			struct skb_shared_hwtstamps *hwtstamps);
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		   struct skb_shared_hwtstamps *hwtstamps);
 
 static inline void sw_tx_timestamp(struct sk_buff *skb)
 {
@@ -2562,8 +2535,8 @@ static inline void skb_tx_timestamp(struct sk_buff *skb)
  */
 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
 
-extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
-extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
+__sum16 __skb_checksum_complete(struct sk_buff *skb);
 
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
@@ -2593,7 +2566,7 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 }
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void nf_conntrack_destroy(struct nf_conntrack *nfct);
+void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && atomic_dec_and_test(&nfct->use))
@@ -2732,9 +2705,8 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-extern u16 __skb_tx_hash(const struct net_device *dev,
-			 const struct sk_buff *skb,
-			 unsigned int num_tx_queues);
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues);
 
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
@@ -2788,7 +2760,7 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
-extern void __skb_warn_lro_forwarding(const struct sk_buff *skb);
+void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
 {
-- 
cgit v1.2.3-71-gd317


From f629d208d27a22f495b7734eede585b5d207e912 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 26 Sep 2013 14:48:15 -0700
Subject: [networking]device.h: Remove extern from function prototypes

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/etherdevice.h |  35 ++--
 include/linux/fcdevice.h    |   2 +-
 include/linux/fddidevice.h  |   7 +-
 include/linux/hippidevice.h |  10 +-
 include/linux/inetdevice.h  |  28 +--
 include/linux/netdevice.h   | 432 +++++++++++++++++++++-----------------------
 6 files changed, 248 insertions(+), 266 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index d8b512496e50..fc4a9aa7dd82 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -28,27 +28,24 @@
 #include <asm/unaligned.h>
 
 #ifdef __KERNEL__
-extern __be16		eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
-extern int eth_header(struct sk_buff *skb, struct net_device *dev,
-		      unsigned short type,
-		      const void *daddr, const void *saddr, unsigned len);
-extern int eth_rebuild_header(struct sk_buff *skb);
-extern int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
-extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
-extern void eth_header_cache_update(struct hh_cache *hh,
-				    const struct net_device *dev,
-				    const unsigned char *haddr);
-extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
-extern void eth_commit_mac_addr_change(struct net_device *dev, void *p);
-extern int eth_mac_addr(struct net_device *dev, void *p);
-extern int eth_change_mtu(struct net_device *dev, int new_mtu);
-extern int eth_validate_addr(struct net_device *dev);
-
-
-
-extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	       const void *daddr, const void *saddr, unsigned len);
+int eth_rebuild_header(struct sk_buff *skb);
+int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
+int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
+		     __be16 type);
+void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev,
+			     const unsigned char *haddr);
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
+void eth_commit_mac_addr_change(struct net_device *dev, void *p);
+int eth_mac_addr(struct net_device *dev, void *p);
+int eth_change_mtu(struct net_device *dev, int new_mtu);
+int eth_validate_addr(struct net_device *dev);
+
+struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 					    unsigned int rxqs);
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
diff --git a/include/linux/fcdevice.h b/include/linux/fcdevice.h
index e460ef831984..5009fa16b5d8 100644
--- a/include/linux/fcdevice.h
+++ b/include/linux/fcdevice.h
@@ -27,7 +27,7 @@
 #include <linux/if_fc.h>
 
 #ifdef __KERNEL__
-extern struct net_device *alloc_fcdev(int sizeof_priv);
+struct net_device *alloc_fcdev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_FCDEVICE_H */
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index 155bafd9e886..9a79f0106da1 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -25,10 +25,9 @@
 #include <linux/if_fddi.h>
 
 #ifdef __KERNEL__
-extern __be16	fddi_type_trans(struct sk_buff *skb,
-				struct net_device *dev);
-extern int fddi_change_mtu(struct net_device *dev, int new_mtu);
-extern struct net_device *alloc_fddidev(int sizeof_priv);
+__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev);
+int fddi_change_mtu(struct net_device *dev, int new_mtu);
+struct net_device *alloc_fddidev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_FDDIDEVICE_H */
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index f148e4908410..8ec23fb0b412 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -31,11 +31,11 @@ struct hippi_cb {
 	__u32	ifield;
 };
 
-extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
-extern int hippi_change_mtu(struct net_device *dev, int new_mtu);
-extern int hippi_mac_addr(struct net_device *dev, void *p);
-extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
-extern struct net_device *alloc_hippi_dev(int sizeof_priv);
+__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
+int hippi_change_mtu(struct net_device *dev, int new_mtu);
+int hippi_mac_addr(struct net_device *dev, void *p);
+int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
+struct net_device *alloc_hippi_dev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_HIPPIDEVICE_H */
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 79640e015a86..0d678aefe69d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -147,25 +147,27 @@ struct in_ifaddr {
 	unsigned long		ifa_tstamp; /* updated timestamp */
 };
 
-extern int register_inetaddr_notifier(struct notifier_block *nb);
-extern int unregister_inetaddr_notifier(struct notifier_block *nb);
+int register_inetaddr_notifier(struct notifier_block *nb);
+int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
-					struct ipv4_devconf *devconf);
+void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
+				 struct ipv4_devconf *devconf);
 
-extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
+struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
 static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
 {
 	return __ip_dev_find(net, addr, true);
 }
 
-extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
-extern int		devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
-extern void		devinet_init(void);
-extern struct in_device	*inetdev_by_index(struct net *, int);
-extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
-extern __be32		inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, int scope);
-extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask);
+int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+void devinet_init(void);
+struct in_device *inetdev_by_index(struct net *, int);
+__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
+__be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local,
+			 int scope);
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+				    __be32 mask);
 
 static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
 {
@@ -218,7 +220,7 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
 	return rtnl_dereference(dev->ip_ptr);
 }
 
-extern void in_dev_finish_destroy(struct in_device *idev);
+void in_dev_finish_destroy(struct in_device *idev);
 
 static inline void in_dev_put(struct in_device *idev)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4cfb63f264e..5f01af3927ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -60,8 +60,8 @@ struct wireless_dev;
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
 
-extern void netdev_set_default_ethtool_ops(struct net_device *dev,
-					   const struct ethtool_ops *ops);
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+				    const struct ethtool_ops *ops);
 
 /* hardware address assignment types */
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
@@ -298,7 +298,7 @@ struct netdev_boot_setup {
 };
 #define NETDEV_BOOT_SETUP_MAX 8
 
-extern int __init netdev_boot_setup(char *str);
+int __init netdev_boot_setup(char *str);
 
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
@@ -394,7 +394,7 @@ enum rx_handler_result {
 typedef enum rx_handler_result rx_handler_result_t;
 typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
 
-extern void __napi_schedule(struct napi_struct *n);
+void __napi_schedule(struct napi_struct *n);
 
 static inline bool napi_disable_pending(struct napi_struct *n)
 {
@@ -445,8 +445,8 @@ static inline bool napi_reschedule(struct napi_struct *napi)
  *
  * Mark NAPI processing as complete.
  */
-extern void __napi_complete(struct napi_struct *n);
-extern void napi_complete(struct napi_struct *n);
+void __napi_complete(struct napi_struct *n);
+void napi_complete(struct napi_struct *n);
 
 /**
  *	napi_by_id - lookup a NAPI by napi_id
@@ -455,7 +455,7 @@ extern void napi_complete(struct napi_struct *n);
  * lookup @napi_id in napi_hash table
  * must be called under rcu_read_lock()
  */
-extern struct napi_struct *napi_by_id(unsigned int napi_id);
+struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /**
  *	napi_hash_add - add a NAPI to global hashtable
@@ -463,7 +463,7 @@ extern struct napi_struct *napi_by_id(unsigned int napi_id);
  *
  * generate a new napi_id and store a @napi under it in napi_hash
  */
-extern void napi_hash_add(struct napi_struct *napi);
+void napi_hash_add(struct napi_struct *napi);
 
 /**
  *	napi_hash_del - remove a NAPI from global table
@@ -472,7 +472,7 @@ extern void napi_hash_add(struct napi_struct *napi);
  * Warning: caller must observe rcu grace period
  * before freeing memory containing @napi
  */
-extern void napi_hash_del(struct napi_struct *napi);
+void napi_hash_del(struct napi_struct *napi);
 
 /**
  *	napi_disable - prevent NAPI from scheduling
@@ -664,8 +664,8 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
 extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
 
 #ifdef CONFIG_RFS_ACCEL
-extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
-				u32 flow_id, u16 filter_id);
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
+			 u16 filter_id);
 #endif
 
 /* This structure contains an instance of an RX queue. */
@@ -1497,9 +1497,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
-extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-					   struct sk_buff *skb);
-extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb);
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
  * Net namespace inlines
@@ -1683,8 +1683,8 @@ struct packet_offload {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 
-extern int register_netdevice_notifier(struct notifier_block *nb);
-extern int unregister_netdevice_notifier(struct notifier_block *nb);
+int register_netdevice_notifier(struct notifier_block *nb);
+int unregister_netdevice_notifier(struct notifier_block *nb);
 
 struct netdev_notifier_info {
 	struct net_device *dev;
@@ -1707,9 +1707,9 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
 	return info->dev;
 }
 
-extern int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
-					 struct netdev_notifier_info *info);
-extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
+int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
+				  struct netdev_notifier_info *info);
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
@@ -1764,54 +1764,52 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
 	return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
 }
 
-extern int 			netdev_boot_setup_check(struct net_device *dev);
-extern unsigned long		netdev_boot_base(const char *prefix, int unit);
-extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
-					      const char *hwaddr);
-extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
-extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
-extern void		dev_add_pack(struct packet_type *pt);
-extern void		dev_remove_pack(struct packet_type *pt);
-extern void		__dev_remove_pack(struct packet_type *pt);
-extern void		dev_add_offload(struct packet_offload *po);
-extern void		dev_remove_offload(struct packet_offload *po);
-extern void		__dev_remove_offload(struct packet_offload *po);
-
-extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
-						      unsigned short mask);
-extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
-extern struct net_device	*dev_get_by_name_rcu(struct net *net, const char *name);
-extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
-extern int		dev_alloc_name(struct net_device *dev, const char *name);
-extern int		dev_open(struct net_device *dev);
-extern int		dev_close(struct net_device *dev);
-extern void		dev_disable_lro(struct net_device *dev);
-extern int		dev_loopback_xmit(struct sk_buff *newskb);
-extern int		dev_queue_xmit(struct sk_buff *skb);
-extern int		register_netdevice(struct net_device *dev);
-extern void		unregister_netdevice_queue(struct net_device *dev,
-						   struct list_head *head);
-extern void		unregister_netdevice_many(struct list_head *head);
+int netdev_boot_setup_check(struct net_device *dev);
+unsigned long netdev_boot_base(const char *prefix, int unit);
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *hwaddr);
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
+void dev_add_pack(struct packet_type *pt);
+void dev_remove_pack(struct packet_type *pt);
+void __dev_remove_pack(struct packet_type *pt);
+void dev_add_offload(struct packet_offload *po);
+void dev_remove_offload(struct packet_offload *po);
+void __dev_remove_offload(struct packet_offload *po);
+
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
+					unsigned short mask);
+struct net_device *dev_get_by_name(struct net *net, const char *name);
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
+struct net_device *__dev_get_by_name(struct net *net, const char *name);
+int dev_alloc_name(struct net_device *dev, const char *name);
+int dev_open(struct net_device *dev);
+int dev_close(struct net_device *dev);
+void dev_disable_lro(struct net_device *dev);
+int dev_loopback_xmit(struct sk_buff *newskb);
+int dev_queue_xmit(struct sk_buff *skb);
+int register_netdevice(struct net_device *dev);
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
+void unregister_netdevice_many(struct list_head *head);
 static inline void unregister_netdevice(struct net_device *dev)
 {
 	unregister_netdevice_queue(dev, NULL);
 }
 
-extern int 		netdev_refcnt_read(const struct net_device *dev);
-extern void		free_netdev(struct net_device *dev);
-extern void		synchronize_net(void);
-extern int		init_dummy_netdev(struct net_device *dev);
+int netdev_refcnt_read(const struct net_device *dev);
+void free_netdev(struct net_device *dev);
+void synchronize_net(void);
+int init_dummy_netdev(struct net_device *dev);
 
-extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
-extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
-extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
-extern int		netdev_get_name(struct net *net, char *name, int ifindex);
-extern int		dev_restart(struct net_device *dev);
+struct net_device *dev_get_by_index(struct net *net, int ifindex);
+struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
-extern int		netpoll_trap(void);
+int netpoll_trap(void);
 #endif
-extern int	       skb_gro_receive(struct sk_buff **head,
-				       struct sk_buff *skb);
+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -1883,7 +1881,7 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 }
 
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
-extern int		register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)
 {
 	return register_gifconf(family, NULL);
@@ -1954,7 +1952,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
-extern void __netif_schedule(struct Qdisc *q);
+void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
@@ -2274,8 +2272,8 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 }
 
 #ifdef CONFIG_XPS
-extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
-			       u16 index);
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
+			u16 index);
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      struct cpumask *mask,
@@ -2306,12 +2304,10 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
 	return dev->num_tx_queues > 1;
 }
 
-extern int netif_set_real_num_tx_queues(struct net_device *dev,
-					unsigned int txq);
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
 
 #ifdef CONFIG_RPS
-extern int netif_set_real_num_rx_queues(struct net_device *dev,
-					unsigned int rxq);
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
 #else
 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
 						unsigned int rxq)
@@ -2338,28 +2334,27 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
 }
 
 #define DEFAULT_MAX_NUM_RSS_QUEUES	(8)
-extern int netif_get_num_default_rss_queues(void);
+int netif_get_num_default_rss_queues(void);
 
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
  */
-extern void dev_kfree_skb_irq(struct sk_buff *skb);
+void dev_kfree_skb_irq(struct sk_buff *skb);
 
 /* Use this variant in places where it could be invoked
  * from either hardware interrupt or other context, with hardware interrupts
  * either disabled or enabled.
  */
-extern void dev_kfree_skb_any(struct sk_buff *skb);
+void dev_kfree_skb_any(struct sk_buff *skb);
 
-extern int		netif_rx(struct sk_buff *skb);
-extern int		netif_rx_ni(struct sk_buff *skb);
-extern int		netif_receive_skb(struct sk_buff *skb);
-extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
-					 struct sk_buff *skb);
-extern void		napi_gro_flush(struct napi_struct *napi, bool flush_old);
-extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
-extern gro_result_t	napi_gro_frags(struct napi_struct *napi);
+int netif_rx(struct sk_buff *skb);
+int netif_rx_ni(struct sk_buff *skb);
+int netif_receive_skb(struct sk_buff *skb);
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
+void napi_gro_flush(struct napi_struct *napi, bool flush_old);
+struct sk_buff *napi_get_frags(struct napi_struct *napi);
+gro_result_t napi_gro_frags(struct napi_struct *napi);
 
 static inline void napi_free_frags(struct napi_struct *napi)
 {
@@ -2367,40 +2362,35 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
-extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler,
-				      void *rx_handler_data);
-extern void netdev_rx_handler_unregister(struct net_device *dev);
-
-extern bool		dev_valid_name(const char *name);
-extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
-extern int		dev_ethtool(struct net *net, struct ifreq *);
-extern unsigned int	dev_get_flags(const struct net_device *);
-extern int		__dev_change_flags(struct net_device *, unsigned int flags);
-extern int		dev_change_flags(struct net_device *, unsigned int);
-extern void		__dev_notify_flags(struct net_device *, unsigned int old_flags);
-extern int		dev_change_name(struct net_device *, const char *);
-extern int		dev_set_alias(struct net_device *, const char *, size_t);
-extern int		dev_change_net_namespace(struct net_device *,
-						 struct net *, const char *);
-extern int		dev_set_mtu(struct net_device *, int);
-extern void		dev_set_group(struct net_device *, int);
-extern int		dev_set_mac_address(struct net_device *,
-					    struct sockaddr *);
-extern int		dev_change_carrier(struct net_device *,
-					   bool new_carrier);
-extern int		dev_get_phys_port_id(struct net_device *dev,
-					     struct netdev_phys_port_id *ppid);
-extern int		dev_hard_start_xmit(struct sk_buff *skb,
-					    struct net_device *dev,
-					    struct netdev_queue *txq);
-extern int		dev_forward_skb(struct net_device *dev,
-					struct sk_buff *skb);
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data);
+void netdev_rx_handler_unregister(struct net_device *dev);
+
+bool dev_valid_name(const char *name);
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
+int dev_ethtool(struct net *net, struct ifreq *);
+unsigned int dev_get_flags(const struct net_device *);
+int __dev_change_flags(struct net_device *, unsigned int flags);
+int dev_change_flags(struct net_device *, unsigned int);
+void __dev_notify_flags(struct net_device *, unsigned int old_flags);
+int dev_change_name(struct net_device *, const char *);
+int dev_set_alias(struct net_device *, const char *, size_t);
+int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int dev_set_mtu(struct net_device *, int);
+void dev_set_group(struct net_device *, int);
+int dev_set_mac_address(struct net_device *, struct sockaddr *);
+int dev_change_carrier(struct net_device *, bool new_carrier);
+int dev_get_phys_port_id(struct net_device *dev,
+			 struct netdev_phys_port_id *ppid);
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+			struct netdev_queue *txq);
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 
 extern int		netdev_budget;
 
 /* Called by rtnetlink.c:rtnl_unlock() */
-extern void netdev_run_todo(void);
+void netdev_run_todo(void);
 
 /**
  *	dev_put - release reference to device
@@ -2433,9 +2423,9 @@ static inline void dev_hold(struct net_device *dev)
  * kind of lower layer not just hardware media.
  */
 
-extern void linkwatch_init_dev(struct net_device *dev);
-extern void linkwatch_fire_event(struct net_device *dev);
-extern void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_fire_event(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
 
 /**
  *	netif_carrier_ok - test if carrier present
@@ -2448,13 +2438,13 @@ static inline bool netif_carrier_ok(const struct net_device *dev)
 	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
 }
 
-extern unsigned long dev_trans_start(struct net_device *dev);
+unsigned long dev_trans_start(struct net_device *dev);
 
-extern void __netdev_watchdog_up(struct net_device *dev);
+void __netdev_watchdog_up(struct net_device *dev);
 
-extern void netif_carrier_on(struct net_device *dev);
+void netif_carrier_on(struct net_device *dev);
 
-extern void netif_carrier_off(struct net_device *dev);
+void netif_carrier_off(struct net_device *dev);
 
 /**
  *	netif_dormant_on - mark device as dormant.
@@ -2522,9 +2512,9 @@ static inline bool netif_device_present(struct net_device *dev)
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
 
-extern void netif_device_detach(struct net_device *dev);
+void netif_device_detach(struct net_device *dev);
 
-extern void netif_device_attach(struct net_device *dev);
+void netif_device_attach(struct net_device *dev);
 
 /*
  * Network interface message level settings
@@ -2733,98 +2723,93 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
-extern void		ether_setup(struct net_device *dev);
+void ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
-				       void (*setup)(struct net_device *),
-				       unsigned int txqs, unsigned int rxqs);
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+				    void (*setup)(struct net_device *),
+				    unsigned int txqs, unsigned int rxqs);
 #define alloc_netdev(sizeof_priv, name, setup) \
 	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
 
 #define alloc_netdev_mq(sizeof_priv, name, setup, count) \
 	alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
 
-extern int		register_netdev(struct net_device *dev);
-extern void		unregister_netdev(struct net_device *dev);
+int register_netdev(struct net_device *dev);
+void unregister_netdev(struct net_device *dev);
 
 /* General hardware address lists handling functions */
-extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len, unsigned char addr_type);
-extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len, unsigned char addr_type);
-extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len);
-extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len);
-extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
-extern void __hw_addr_init(struct netdev_hw_addr_list *list);
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type);
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type);
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list, int addr_len);
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list, int addr_len);
+void __hw_addr_flush(struct netdev_hw_addr_list *list);
+void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
-extern int dev_addr_add(struct net_device *dev, const unsigned char *addr,
-			unsigned char addr_type);
-extern int dev_addr_del(struct net_device *dev, const unsigned char *addr,
-			unsigned char addr_type);
-extern int dev_addr_add_multiple(struct net_device *to_dev,
-				 struct net_device *from_dev,
-				 unsigned char addr_type);
-extern int dev_addr_del_multiple(struct net_device *to_dev,
-				 struct net_device *from_dev,
-				 unsigned char addr_type);
-extern void dev_addr_flush(struct net_device *dev);
-extern int dev_addr_init(struct net_device *dev);
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
+		 unsigned char addr_type);
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
+		 unsigned char addr_type);
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev, unsigned char addr_type);
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev, unsigned char addr_type);
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
 
 /* Functions used for unicast addresses handling */
-extern int dev_uc_add(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_del(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_sync(struct net_device *to, struct net_device *from);
-extern int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
-extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
-extern void dev_uc_flush(struct net_device *dev);
-extern void dev_uc_init(struct net_device *dev);
+int dev_uc_add(struct net_device *dev, const unsigned char *addr);
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
+int dev_uc_del(struct net_device *dev, const unsigned char *addr);
+int dev_uc_sync(struct net_device *to, struct net_device *from);
+int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
+void dev_uc_unsync(struct net_device *to, struct net_device *from);
+void dev_uc_flush(struct net_device *dev);
+void dev_uc_init(struct net_device *dev);
 
 /* Functions used for multicast addresses handling */
-extern int dev_mc_add(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_del(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_sync(struct net_device *to, struct net_device *from);
-extern int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
-extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
-extern void dev_mc_flush(struct net_device *dev);
-extern void dev_mc_init(struct net_device *dev);
+int dev_mc_add(struct net_device *dev, const unsigned char *addr);
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
+int dev_mc_del(struct net_device *dev, const unsigned char *addr);
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
+int dev_mc_sync(struct net_device *to, struct net_device *from);
+int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
+void dev_mc_unsync(struct net_device *to, struct net_device *from);
+void dev_mc_flush(struct net_device *dev);
+void dev_mc_init(struct net_device *dev);
 
 /* Functions used for secondary unicast and multicast support */
-extern void		dev_set_rx_mode(struct net_device *dev);
-extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int		dev_set_promiscuity(struct net_device *dev, int inc);
-extern int		dev_set_allmulti(struct net_device *dev, int inc);
-extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_notify_peers(struct net_device *dev);
-extern void		netdev_features_change(struct net_device *dev);
+void dev_set_rx_mode(struct net_device *dev);
+void __dev_set_rx_mode(struct net_device *dev);
+int dev_set_promiscuity(struct net_device *dev, int inc);
+int dev_set_allmulti(struct net_device *dev, int inc);
+void netdev_state_change(struct net_device *dev);
+void netdev_notify_peers(struct net_device *dev);
+void netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
-extern void		dev_load(struct net *net, const char *name);
-extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-					       struct rtnl_link_stats64 *storage);
-extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
-				    const struct net_device_stats *netdev_stats);
+void dev_load(struct net *net, const char *name);
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage);
+void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+			     const struct net_device_stats *netdev_stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		bpf_jit_enable;
 
-extern bool netdev_has_upper_dev(struct net_device *dev,
-				 struct net_device *upper_dev);
-extern bool netdev_has_any_upper_dev(struct net_device *dev);
-extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
-							    struct list_head **iter);
+bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
+bool netdev_has_any_upper_dev(struct net_device *dev);
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+						     struct list_head **iter);
 
 /* iterate through upper list, must be called under RCU read lock */
 #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
@@ -2833,10 +2818,10 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d
 	     updev; \
 	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
-extern void *netdev_lower_get_next_private(struct net_device *dev,
-					   struct list_head **iter);
-extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
-					       struct list_head **iter);
+void *netdev_lower_get_next_private(struct net_device *dev,
+				    struct list_head **iter);
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					struct list_head **iter);
 
 #define netdev_for_each_lower_private(dev, priv, iter) \
 	for (iter = (dev)->adj_list.lower.next, \
@@ -2850,27 +2835,26 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
-extern void *netdev_adjacent_get_private(struct list_head *adj_list);
-extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
-extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
-extern int netdev_upper_dev_link(struct net_device *dev,
+void *netdev_adjacent_get_private(struct list_head *adj_list);
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
+int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
+int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev);
-extern int netdev_master_upper_dev_link(struct net_device *dev,
-					struct net_device *upper_dev);
-extern int netdev_master_upper_dev_link_private(struct net_device *dev,
-						struct net_device *upper_dev,
-						void *private);
-extern void netdev_upper_dev_unlink(struct net_device *dev,
-				    struct net_device *upper_dev);
-extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
-					      struct net_device *lower_dev);
-extern void *netdev_lower_dev_get_private(struct net_device *dev,
-					  struct net_device *lower_dev);
-extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features, bool tx_path);
-extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-					  netdev_features_t features);
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 void *private);
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev);
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+				       struct net_device *lower_dev);
+void *netdev_lower_dev_get_private(struct net_device *dev,
+				   struct net_device *lower_dev);
+int skb_checksum_help(struct sk_buff *skb);
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path);
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features);
 
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
@@ -2892,30 +2876,30 @@ static inline bool can_checksum_protocol(netdev_features_t features,
 }
 
 #ifdef CONFIG_BUG
-extern void netdev_rx_csum_fault(struct net_device *dev);
+void netdev_rx_csum_fault(struct net_device *dev);
 #else
 static inline void netdev_rx_csum_fault(struct net_device *dev)
 {
 }
 #endif
 /* rx skb timestamps */
-extern void		net_enable_timestamp(void);
-extern void		net_disable_timestamp(void);
+void net_enable_timestamp(void);
+void net_disable_timestamp(void);
 
 #ifdef CONFIG_PROC_FS
-extern int __init dev_proc_init(void);
+int __init dev_proc_init(void);
 #else
 #define dev_proc_init() 0
 #endif
 
-extern int netdev_class_create_file(struct class_attribute *class_attr);
-extern void netdev_class_remove_file(struct class_attribute *class_attr);
+int netdev_class_create_file(struct class_attribute *class_attr);
+void netdev_class_remove_file(struct class_attribute *class_attr);
 
 extern struct kobj_ns_type_operations net_ns_type_operations;
 
-extern const char *netdev_drivername(const struct net_device *dev);
+const char *netdev_drivername(const struct net_device *dev);
 
-extern void linkwatch_run_queue(void);
+void linkwatch_run_queue(void);
 
 static inline netdev_features_t netdev_get_wanted_features(
 	struct net_device *dev)
@@ -3007,22 +2991,22 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
-extern __printf(3, 4)
+__printf(3, 4)
 int netdev_printk(const char *level, const struct net_device *dev,
 		  const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_emerg(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_alert(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_crit(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_err(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_warn(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_notice(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define MODULE_ALIAS_NETDEV(device) \
-- 
cgit v1.2.3-71-gd317


From a528c219df2e865e178c538c7178961dfed5a13c Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 25 Sep 2013 12:02:44 +0200
Subject: dev: update __dev_notify_flags() to send rtnl msg

This patch only prepares the next one, there is no functional change.
Now, __dev_notify_flags() can also be used to notify flags changes via
rtnetlink.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +++-
 net/core/dev.c            | 11 ++++++-----
 net/core/rtnetlink.c      |  3 +--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4cfb63f264e..f44f99a69977 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2378,7 +2378,9 @@ extern int		dev_ethtool(struct net *net, struct ifreq *);
 extern unsigned int	dev_get_flags(const struct net_device *);
 extern int		__dev_change_flags(struct net_device *, unsigned int flags);
 extern int		dev_change_flags(struct net_device *, unsigned int);
-extern void		__dev_notify_flags(struct net_device *, unsigned int old_flags);
+void			__dev_notify_flags(struct net_device *,
+					   unsigned int old_flags,
+					   unsigned int gchanges);
 extern int		dev_change_name(struct net_device *, const char *);
 extern int		dev_set_alias(struct net_device *, const char *, size_t);
 extern int		dev_change_net_namespace(struct net_device *,
diff --git a/net/core/dev.c b/net/core/dev.c
index 25ab6fe80da2..594a6b0ab3da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5235,10 +5235,14 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 	return ret;
 }
 
-void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
+			unsigned int gchanges)
 {
 	unsigned int changes = dev->flags ^ old_flags;
 
+	if (gchanges)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
 			call_netdevice_notifiers(NETDEV_UP, dev);
@@ -5274,10 +5278,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
 		return ret;
 
 	changes = old_flags ^ dev->flags;
-	if (changes)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
-
-	__dev_notify_flags(dev, old_flags);
+	__dev_notify_flags(dev, old_flags, changes);
 	return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2a0e21de3060..4aedf03da052 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 	}
 
 	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
-	__dev_notify_flags(dev, old_flags);
+	__dev_notify_flags(dev, old_flags, ~0U);
 	return 0;
 }
 EXPORT_SYMBOL(rtnl_configure_link);
-- 
cgit v1.2.3-71-gd317


From 35b8dcf8c3a0be1feb1c8b29b22e1685ba0c2e14 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 30 Apr 2013 23:02:43 +0200
Subject: netfilter: ipset: Rename simple macro names to avoid namespace
 issues.

Reported-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |   3 +
 net/netfilter/ipset/ip_set_bitmap_gen.h     |  47 +++++----
 net/netfilter/ipset/ip_set_bitmap_ip.c      |  10 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c   |  10 +-
 net/netfilter/ipset/ip_set_bitmap_port.c    |  10 +-
 net/netfilter/ipset/ip_set_hash_gen.h       | 147 +++++++++++++++-------------
 net/netfilter/ipset/ip_set_hash_ip.c        |  10 +-
 net/netfilter/ipset/ip_set_hash_ipport.c    |  12 +--
 net/netfilter/ipset/ip_set_hash_ipportip.c  |  12 +--
 net/netfilter/ipset/ip_set_hash_ipportnet.c |  16 +--
 net/netfilter/ipset/ip_set_hash_net.c       |  14 +--
 net/netfilter/ipset/ip_set_hash_netiface.c  |  14 +--
 net/netfilter/ipset/ip_set_hash_netport.c   |  16 +--
 net/netfilter/ipset/ip_set_list_set.c       |  10 +-
 14 files changed, 169 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 9ac9fbde7b61..f900f33a5f3d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -398,4 +398,7 @@ bitmap_bytes(u32 a, u32 b)
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
 	  .timeout = (map)->timeout }
 
+#define IPSET_CONCAT(a, b)		a##b
+#define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
+
 #endif /*_IP_SET_H */
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f6af97cf8d3e..d39905e7e881 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -8,31 +8,28 @@
 #ifndef __IP_SET_BITMAP_IP_GEN_H
 #define __IP_SET_BITMAP_IP_GEN_H
 
-#define CONCAT(a, b)		a##b
-#define TOKEN(a,b)		CONCAT(a, b)
-
-#define mtype_do_test		TOKEN(MTYPE, _do_test)
-#define mtype_gc_test		TOKEN(MTYPE, _gc_test)
-#define mtype_is_filled		TOKEN(MTYPE, _is_filled)
-#define mtype_do_add		TOKEN(MTYPE, _do_add)
-#define mtype_do_del		TOKEN(MTYPE, _do_del)
-#define mtype_do_list		TOKEN(MTYPE, _do_list)
-#define mtype_do_head		TOKEN(MTYPE, _do_head)
-#define mtype_adt_elem		TOKEN(MTYPE, _adt_elem)
-#define mtype_add_timeout	TOKEN(MTYPE, _add_timeout)
-#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
-#define mtype_kadt		TOKEN(MTYPE, _kadt)
-#define mtype_uadt		TOKEN(MTYPE, _uadt)
-#define mtype_destroy		TOKEN(MTYPE, _destroy)
-#define mtype_flush		TOKEN(MTYPE, _flush)
-#define mtype_head		TOKEN(MTYPE, _head)
-#define mtype_same_set		TOKEN(MTYPE, _same_set)
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_test		TOKEN(MTYPE, _test)
-#define mtype_add		TOKEN(MTYPE, _add)
-#define mtype_del		TOKEN(MTYPE, _del)
-#define mtype_list		TOKEN(MTYPE, _list)
-#define mtype_gc		TOKEN(MTYPE, _gc)
+#define mtype_do_test		IPSET_TOKEN(MTYPE, _do_test)
+#define mtype_gc_test		IPSET_TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled		IPSET_TOKEN(MTYPE, _is_filled)
+#define mtype_do_add		IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_do_del		IPSET_TOKEN(MTYPE, _do_del)
+#define mtype_do_list		IPSET_TOKEN(MTYPE, _do_list)
+#define mtype_do_head		IPSET_TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem		IPSET_TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout	IPSET_TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
 #define ext_timeout(e, m)	\
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index f1a8128bef01..c2f89b1c1d92 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -25,12 +25,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip");
 
 #define MTYPE		bitmap_ip
@@ -401,8 +401,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 3b30e0bef890..1d6551cc590e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -25,12 +25,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip,mac");
 
 #define MTYPE		bitmap_ipmac
@@ -460,8 +460,8 @@ static struct ip_set_type bitmap_ipmac_type = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_ipmac_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 8207d1fda528..b22048965d2a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -20,12 +20,12 @@
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 #include <linux/netfilter/ipset/ip_set_getport.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counter support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counter support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_bitmap:port");
 
 #define MTYPE		bitmap_port
@@ -333,8 +333,8 @@ static struct ip_set_type bitmap_port_type = {
 	.features	= IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= bitmap_port_create,
 	.create_policy	= {
 		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 09a21dd5f120..68b9ccebabaa 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -17,9 +17,6 @@
 
 #define rcu_dereference_bh_nfnl(p)	rcu_dereference_bh_check(p, 1)
 
-#define CONCAT(a, b)		a##b
-#define TOKEN(a, b)		CONCAT(a, b)
-
 /* Hashing which uses arrays to resolve clashing. The hash table is resized
  * (doubled) when searching becomes too long.
  * Internally jhash is used with the assumption that the size of the
@@ -222,41 +219,41 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 
 #undef HKEY
 
-#define mtype_data_equal	TOKEN(MTYPE, _data_equal)
+#define mtype_data_equal	IPSET_TOKEN(MTYPE, _data_equal)
 #ifdef IP_SET_HASH_WITH_NETS
-#define mtype_do_data_match	TOKEN(MTYPE, _do_data_match)
+#define mtype_do_data_match	IPSET_TOKEN(MTYPE, _do_data_match)
 #else
 #define mtype_do_data_match(d)	1
 #endif
-#define mtype_data_set_flags	TOKEN(MTYPE, _data_set_flags)
-#define mtype_data_reset_flags	TOKEN(MTYPE, _data_reset_flags)
-#define mtype_data_netmask	TOKEN(MTYPE, _data_netmask)
-#define mtype_data_list		TOKEN(MTYPE, _data_list)
-#define mtype_data_next		TOKEN(MTYPE, _data_next)
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_add_cidr		TOKEN(MTYPE, _add_cidr)
-#define mtype_del_cidr		TOKEN(MTYPE, _del_cidr)
-#define mtype_ahash_memsize	TOKEN(MTYPE, _ahash_memsize)
-#define mtype_flush		TOKEN(MTYPE, _flush)
-#define mtype_destroy		TOKEN(MTYPE, _destroy)
-#define mtype_gc_init		TOKEN(MTYPE, _gc_init)
-#define mtype_same_set		TOKEN(MTYPE, _same_set)
-#define mtype_kadt		TOKEN(MTYPE, _kadt)
-#define mtype_uadt		TOKEN(MTYPE, _uadt)
+#define mtype_data_set_flags	IPSET_TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_flags	IPSET_TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask	IPSET_TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
+#define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush		IPSET_TOKEN(MTYPE, _flush)
+#define mtype_destroy		IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_gc_init		IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_same_set		IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_kadt		IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt		IPSET_TOKEN(MTYPE, _uadt)
 #define mtype			MTYPE
 
-#define mtype_elem		TOKEN(MTYPE, _elem)
-#define mtype_add		TOKEN(MTYPE, _add)
-#define mtype_del		TOKEN(MTYPE, _del)
-#define mtype_test_cidrs	TOKEN(MTYPE, _test_cidrs)
-#define mtype_test		TOKEN(MTYPE, _test)
-#define mtype_expire		TOKEN(MTYPE, _expire)
-#define mtype_resize		TOKEN(MTYPE, _resize)
-#define mtype_head		TOKEN(MTYPE, _head)
-#define mtype_list		TOKEN(MTYPE, _list)
-#define mtype_gc		TOKEN(MTYPE, _gc)
-#define mtype_variant		TOKEN(MTYPE, _variant)
-#define mtype_data_match	TOKEN(MTYPE, _data_match)
+#define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_add		IPSET_TOKEN(MTYPE, _add)
+#define mtype_del		IPSET_TOKEN(MTYPE, _del)
+#define mtype_test_cidrs	IPSET_TOKEN(MTYPE, _test_cidrs)
+#define mtype_test		IPSET_TOKEN(MTYPE, _test)
+#define mtype_expire		IPSET_TOKEN(MTYPE, _expire)
+#define mtype_resize		IPSET_TOKEN(MTYPE, _resize)
+#define mtype_head		IPSET_TOKEN(MTYPE, _head)
+#define mtype_list		IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
+#define mtype_variant		IPSET_TOKEN(MTYPE, _variant)
+#define mtype_data_match	IPSET_TOKEN(MTYPE, _data_match)
 
 #ifndef HKEY_DATALEN
 #define HKEY_DATALEN		sizeof(struct mtype_elem)
@@ -941,13 +938,13 @@ nla_put_failure:
 }
 
 static int
-TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
-	      const struct xt_action_param *par,
-	      enum ipset_adt adt, struct ip_set_adt_opt *opt);
+IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+	    const struct xt_action_param *par,
+	    enum ipset_adt adt, struct ip_set_adt_opt *opt);
 
 static int
-TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
-	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+	    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
 
 static const struct ip_set_type_variant mtype_variant = {
 	.kadt	= mtype_kadt,
@@ -967,7 +964,7 @@ static const struct ip_set_type_variant mtype_variant = {
 
 #ifdef IP_SET_EMIT_CREATE
 static int
-TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
 	u32 cadt_flags = 0;
@@ -1045,9 +1042,9 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	set->data = h;
 	if (set->family ==  NFPROTO_IPV4)
-		set->variant = &TOKEN(HTYPE, 4_variant);
+		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 	else
-		set->variant = &TOKEN(HTYPE, 6_variant);
+		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
@@ -1058,64 +1055,74 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			if (set->family == NFPROTO_IPV4) {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 4ct_elem));
+				h->dsize = sizeof(struct
+					IPSET_TOKEN(HTYPE, 4ct_elem));
 				h->offset[IPSET_OFFSET_TIMEOUT] =
-					offsetof(struct TOKEN(HTYPE, 4ct_elem),
-						 timeout);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 4ct_elem),
+						timeout);
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 4ct_elem),
-						 counter);
-				TOKEN(HTYPE, 4_gc_init)(set,
-					TOKEN(HTYPE, 4_gc));
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 4ct_elem),
+						counter);
+				IPSET_TOKEN(HTYPE, 4_gc_init)(set,
+					IPSET_TOKEN(HTYPE, 4_gc));
 			} else {
-				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 6ct_elem));
+				h->dsize = sizeof(struct
+					IPSET_TOKEN(HTYPE, 6ct_elem));
 				h->offset[IPSET_OFFSET_TIMEOUT] =
-					offsetof(struct TOKEN(HTYPE, 6ct_elem),
-						 timeout);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 6ct_elem),
+						timeout);
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 6ct_elem),
-						 counter);
-				TOKEN(HTYPE, 6_gc_init)(set,
-					TOKEN(HTYPE, 6_gc));
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 6ct_elem),
+						counter);
+				IPSET_TOKEN(HTYPE, 6_gc_init)(set,
+					IPSET_TOKEN(HTYPE, 6_gc));
 			}
 		} else {
 			if (set->family == NFPROTO_IPV4) {
 				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 4c_elem));
+					sizeof(struct
+						IPSET_TOKEN(HTYPE, 4c_elem));
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 4c_elem),
-						 counter);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 4c_elem),
+						counter);
 			} else {
 				h->dsize =
-					sizeof(struct TOKEN(HTYPE, 6c_elem));
+					sizeof(struct
+						IPSET_TOKEN(HTYPE, 6c_elem));
 				h->offset[IPSET_OFFSET_COUNTER] =
-					offsetof(struct TOKEN(HTYPE, 6c_elem),
-						 counter);
+					offsetof(struct
+						IPSET_TOKEN(HTYPE, 6c_elem),
+						counter);
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		if (set->family == NFPROTO_IPV4) {
-			h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
 			h->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct TOKEN(HTYPE, 4t_elem),
+				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
 					 timeout);
-			TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
+			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
+				IPSET_TOKEN(HTYPE, 4_gc));
 		} else {
-			h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
 			h->offset[IPSET_OFFSET_TIMEOUT] =
-				offsetof(struct TOKEN(HTYPE, 6t_elem),
+				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
 					 timeout);
-			TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
+			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
+				IPSET_TOKEN(HTYPE, 6_gc));
 		}
 	} else {
 		if (set->family == NFPROTO_IPV4)
-			h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
 		else
-			h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
+			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index c74e6e14cd93..de44fca687c3 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -23,12 +23,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1	/* Counters support */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1	/* Counters support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip");
 
 /* Type specific function prefix */
@@ -304,8 +304,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 7a2d2bd98d04..b514ff4e83ae 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -24,13 +24,13 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-#define REVISION_MAX	2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+#define IPSET_TYPE_REV_MAX	2 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port");
 
 /* Type specific function prefix */
@@ -396,8 +396,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 34e8a1acce42..d05070d74604 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -24,13 +24,13 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-#define REVISION_MAX	2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+#define IPSET_TYPE_REV_MAX	2 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,ip");
 
 /* Type specific function prefix */
@@ -414,8 +414,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 9a80d8bc9f18..7d1dede4ab6d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -24,15 +24,15 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-/*			2    Range as input support for IPv4 added */
-/*			3    nomatch flag support added */
-#define REVISION_MAX	4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+#define IPSET_TYPE_REV_MAX	4 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,net");
 
 /* Type specific function prefix */
@@ -574,8 +574,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 223e9f546d0f..9cb9ef43d941 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -22,14 +22,14 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    Range as input support for IPv4 added */
-/*			2    nomatch flag support added */
-#define REVISION_MAX	3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    Range as input support for IPv4 added */
+/*				2    nomatch flag support added */
+#define IPSET_TYPE_REV_MAX	3 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net");
 
 /* Type specific function prefix */
@@ -406,8 +406,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_net_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 7d798d5d5cd3..2310fc29e89e 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -23,14 +23,14 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    nomatch flag support added */
-/*			2    /0 support added */
-#define REVISION_MAX	3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    nomatch flag support added */
+/*				2    /0 support added */
+#define IPSET_TYPE_REV_MAX	3 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,iface");
 
 /* Interface name rbtree */
@@ -645,8 +645,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_netiface_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09d6690bee6f..1601d489fdbc 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -23,15 +23,15 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
-#define REVISION_MIN	0
-/*			1    SCTP and UDPLITE support added */
-/*			2    Range as input support for IPv4 added */
-/*			3    nomatch flag support added */
-#define REVISION_MAX	4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+/*				1    SCTP and UDPLITE support added */
+/*				2    Range as input support for IPv4 added */
+/*				3    nomatch flag support added */
+#define IPSET_TYPE_REV_MAX	4 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_hash:net,port");
 
 /* Type specific function prefix */
@@ -518,8 +518,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 68299ee15847..a9e301f6e093 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -15,12 +15,12 @@
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_list.h>
 
-#define REVISION_MIN	0
-#define REVISION_MAX	1 /* Counters support added */
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	1 /* Counters support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
 MODULE_ALIAS("ip_set_list:set");
 
 /* Member elements  */
@@ -703,8 +703,8 @@ static struct ip_set_type list_set_type __read_mostly = {
 	.features	= IPSET_TYPE_NAME | IPSET_DUMP_LAST,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= REVISION_MIN,
-	.revision_max	= REVISION_MAX,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= list_set_create,
 	.create_policy	= {
 		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
-- 
cgit v1.2.3-71-gd317


From b8cd97865c903e032db85e5a4f2783928c56f2bd Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 2 May 2013 10:52:27 +0200
Subject: netfilter: ipset: Use fix sized type for timeout in the extension
 part

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index f900f33a5f3d..69aa60487f05 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -67,7 +67,7 @@ enum ip_set_offset {
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 
 struct ip_set_ext {
-	unsigned long timeout;
+	u32 timeout;
 	u64 packets;
 	u64 bytes;
 };
-- 
cgit v1.2.3-71-gd317


From a04d8b6bd9113f3e7f0c216dcaa3c1ad498f2a96 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 30 Sep 2013 09:05:54 +0200
Subject: netfilter: ipset: Prepare ipset to support multiple networks for hash
 types

In order to support hash:net,net, hash:net,port,net etc. types,
arrays are introduced for the book-keeping of existing cidr sizes
and network numbers in a set.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |  2 +
 net/netfilter/ipset/ip_set_hash_gen.h       | 78 +++++++++++++++--------------
 net/netfilter/ipset/ip_set_hash_ipportnet.c |  4 +-
 net/netfilter/ipset/ip_set_hash_net.c       |  4 +-
 net/netfilter/ipset/ip_set_hash_netiface.c  |  4 +-
 net/netfilter/ipset/ip_set_hash_netport.c   |  4 +-
 6 files changed, 50 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 69aa60487f05..56012a3431b2 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -398,6 +398,8 @@ bitmap_bytes(u32 a, u32 b)
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
 	  .timeout = (map)->timeout }
 
+#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
+
 #define IPSET_CONCAT(a, b)		a##b
 #define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 68b9ccebabaa..a8332404d9b4 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -77,10 +77,14 @@ struct htable {
 
 #define hbucket(h, i)		(&((h)->bucket[i]))
 
+#ifndef IPSET_NET_COUNT
+#define IPSET_NET_COUNT		1
+#endif
+
 /* Book-keeping of the prefixes added to the set */
 struct net_prefixes {
-	u8 cidr;		/* the different cidr values in the set */
-	u32 nets;		/* number of elements per cidr */
+	u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */
+	u8 cidr[IPSET_NET_COUNT];  /* the different cidr values in the set */
 };
 
 /* Compute the hash table size */
@@ -165,13 +169,13 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128)
 
 #ifdef IP_SET_HASH_WITH_MULTI
-#define NETS_LENGTH(family)	(SET_HOST_MASK(family) + 1)
+#define NLEN(family)		(SET_HOST_MASK(family) + 1)
 #else
-#define NETS_LENGTH(family)	SET_HOST_MASK(family)
+#define NLEN(family)		SET_HOST_MASK(family)
 #endif
 
 #else
-#define NETS_LENGTH(family)	0
+#define NLEN(family)		0
 #endif /* IP_SET_HASH_WITH_NETS */
 
 #define ext_timeout(e, h)	\
@@ -296,49 +300,49 @@ struct htype {
 /* Network cidr size book keeping when the hash stores different
  * sized networks */
 static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 {
 	int i, j;
 
 	/* Add in increasing prefix order, so larger cidr first */
-	for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+	for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) {
 		if (j != -1)
 			continue;
-		else if (h->nets[i].cidr < cidr)
+		else if (h->nets[i].cidr[n] < cidr)
 			j = i;
-		else if (h->nets[i].cidr == cidr) {
-			h->nets[i].nets++;
+		else if (h->nets[i].cidr[n] == cidr) {
+			h->nets[i].nets[n]++;
 			return;
 		}
 	}
 	if (j != -1) {
 		for (; i > j; i--) {
-			h->nets[i].cidr = h->nets[i - 1].cidr;
-			h->nets[i].nets = h->nets[i - 1].nets;
+			h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
+			h->nets[i].nets[n] = h->nets[i - 1].nets[n];
 		}
 	}
-	h->nets[i].cidr = cidr;
-	h->nets[i].nets = 1;
+	h->nets[i].cidr[n] = cidr;
+	h->nets[i].nets[n] = 1;
 }
 
 static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 {
 	u8 i, j, net_end = nets_length - 1;
 
 	for (i = 0; i < nets_length; i++) {
-	        if (h->nets[i].cidr != cidr)
+	        if (h->nets[i].cidr[n] != cidr)
 	                continue;
-                if (h->nets[i].nets > 1 || i == net_end ||
-                    h->nets[i + 1].nets == 0) {
-                        h->nets[i].nets--;
+                if (h->nets[i].nets[n] > 1 || i == net_end ||
+                    h->nets[i + 1].nets[n] == 0) {
+                        h->nets[i].nets[n]--;
                         return;
                 }
-                for (j = i; j < net_end && h->nets[j].nets; j++) {
-		        h->nets[j].cidr = h->nets[j + 1].cidr;
-		        h->nets[j].nets = h->nets[j + 1].nets;
+                for (j = i; j < net_end && h->nets[j].nets[n]; j++) {
+		        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+		        h->nets[j].nets[n] = h->nets[j + 1].nets[n];
                 }
-                h->nets[j].nets = 0;
+                h->nets[j].nets[n] = 0;
                 return;
 	}
 }
@@ -382,8 +386,7 @@ mtype_flush(struct ip_set *set)
 		}
 	}
 #ifdef IP_SET_HASH_WITH_NETS
-	memset(h->nets, 0, sizeof(struct net_prefixes)
-			   * NETS_LENGTH(set->family));
+	memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
 #endif
 	h->elements = 0;
 }
@@ -459,7 +462,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
 				pr_debug("expired %u/%u\n", i, j);
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_del_cidr(h, CIDR(data->cidr),
-					       nets_length);
+					       nets_length, 0);
 #endif
 				if (j != n->pos - 1)
 					/* Not last one */
@@ -494,7 +497,7 @@ mtype_gc(unsigned long ul_set)
 
 	pr_debug("called\n");
 	write_lock_bh(&set->lock);
-	mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+	mtype_expire(h, NLEN(set->family), h->dsize);
 	write_unlock_bh(&set->lock);
 
 	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
@@ -523,8 +526,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	if (SET_WITH_TIMEOUT(set) && !retried) {
 		i = h->elements;
 		write_lock_bh(&set->lock);
-		mtype_expire(set->data, NETS_LENGTH(set->family),
-			     h->dsize);
+		mtype_expire(set->data, NLEN(set->family), h->dsize);
 		write_unlock_bh(&set->lock);
 		if (h->elements < i)
 			return 0;
@@ -607,7 +609,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
-		mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+		mtype_expire(h, NLEN(set->family), h->dsize);
 
 	if (h->elements >= h->maxelem) {
 		if (net_ratelimit())
@@ -645,8 +647,8 @@ reuse_slot:
 		/* Fill out reused slot */
 		data = ahash_data(n, j, h->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
-		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0);
+		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
 	} else {
 		/* Use/create a new slot */
@@ -659,7 +661,7 @@ reuse_slot:
 		}
 		data = ahash_data(n, n->pos++, h->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
 		h->elements++;
 	}
@@ -711,7 +713,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		n->pos--;
 		h->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
-		mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+		mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
@@ -760,11 +762,11 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 	struct mtype_elem *data;
 	int i, j = 0;
 	u32 key, multi = 0;
-	u8 nets_length = NETS_LENGTH(set->family);
+	u8 nets_length = NLEN(set->family);
 
 	pr_debug("test by nets\n");
-	for (; j < nets_length && h->nets[j].nets && !multi; j++) {
-		mtype_data_netmask(d, h->nets[j].cidr);
+	for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) {
+		mtype_data_netmask(d, h->nets[j].cidr[0]);
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
 		for (i = 0; i < n->pos; i++) {
@@ -839,7 +841,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	size_t memsize;
 
 	t = rcu_dereference_bh_nfnl(h->table);
-	memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family));
+	memsize = mtype_ahash_memsize(h, t, NLEN(set->family));
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index f111558c4597..6ce5a8e2c44e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -170,7 +170,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
@@ -454,7 +454,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 0a64dad156d9..ec1c7dc10489 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -143,7 +143,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
@@ -338,7 +338,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 846371b08630..814b4e31a7f8 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -265,7 +265,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
@@ -534,7 +534,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index d98a685cd916..3bd923d3f179 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -162,7 +162,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
@@ -407,7 +407,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport6_elem e = {
-		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
+		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
 
-- 
cgit v1.2.3-71-gd317


From f925f7056920213889c5e61445f9529f1a86ae41 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 6 Sep 2013 22:31:40 +0200
Subject: netfilter: ipset: Rename extension offset ids to extension ids

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    | 16 ++++++++--------
 net/netfilter/ipset/ip_set_bitmap_gen.h   |  4 ++--
 net/netfilter/ipset/ip_set_bitmap_ip.c    | 10 +++++-----
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +++++-----
 net/netfilter/ipset/ip_set_bitmap_port.c  | 10 +++++-----
 net/netfilter/ipset/ip_set_hash_gen.h     | 22 +++++++++++-----------
 net/netfilter/ipset/ip_set_list_set.c     | 14 +++++++-------
 7 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 56012a3431b2..b4db7912bf0d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -56,20 +56,20 @@ enum ip_set_extension {
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 };
 
-/* Extension offsets */
-enum ip_set_offset {
-	IPSET_OFFSET_TIMEOUT = 0,
-	IPSET_OFFSET_COUNTER,
-	IPSET_OFFSET_MAX,
-};
-
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 
+/* Extension id, in size order */
+enum ip_set_ext_id {
+	IPSET_EXT_ID_COUNTER = 0,
+	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_MAX,
+};
+
 struct ip_set_ext {
-	u32 timeout;
 	u64 packets;
 	u64 bytes;
+	u32 timeout;
 };
 
 struct ip_set;
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d39905e7e881..889a929b908f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,9 +33,9 @@
 #define mtype			MTYPE
 
 #define ext_timeout(e, m)	\
-	(unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+	(unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, m)	\
-	(struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
+	(struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER])
 #define get_ext(map, id)	((map)->extensions + (map)->dsize * (id))
 
 static void
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index ce99d2652d47..2ee210ef49a2 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -45,7 +45,7 @@ struct bitmap_ip {
 	u32 hosts;		/* number of hosts in a subnet */
 	size_t memsize;		/* members size */
 	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u8 netmask;		/* subnet netmask */
 	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
@@ -342,9 +342,9 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
 			map->dsize = sizeof(struct bitmap_ipct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipct_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -360,7 +360,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			bitmap_ip_gc_init(set, bitmap_ip_gc);
 		} else {
 			map->dsize = sizeof(struct bitmap_ipc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipc_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -371,7 +371,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		map->dsize = sizeof(struct bitmap_ipt_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipt_elem, timeout);
 
 		if (!init_map_ip(set, map, first_ip, last_ip,
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 6d5bad93026b..e711875ea452 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,7 +52,7 @@ struct bitmap_ipmac {
 	struct timer_list gc;	/* garbage collector */
 	size_t memsize;		/* members size */
 	size_t dsize;		/* size of element */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 };
 
 /* ADT structure for generic function args */
@@ -405,9 +405,9 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
 			map->dsize = sizeof(struct bitmap_ipmacct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipmacct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacct_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -421,7 +421,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 		} else {
 			map->dsize = sizeof(struct bitmap_ipmacc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacc_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -432,7 +432,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		map->dsize = sizeof(struct bitmap_ipmact_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipmact_elem, timeout);
 
 		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index b22048965d2a..bebc137a5737 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -39,7 +39,7 @@ struct bitmap_port {
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
@@ -282,9 +282,9 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
 			map->dsize = sizeof(struct bitmap_portct_elem);
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_portct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portct_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
@@ -297,7 +297,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			bitmap_port_gc_init(set, bitmap_port_gc);
 		} else {
 			map->dsize = sizeof(struct bitmap_portc_elem);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portc_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
@@ -306,7 +306,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
 		map->dsize = sizeof(struct bitmap_portt_elem);
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_portt_elem, timeout);
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index a8332404d9b4..e4db9250f337 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -179,9 +179,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #endif /* IP_SET_HASH_WITH_NETS */
 
 #define ext_timeout(e, h)	\
-(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
+(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, h)	\
-(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
+(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER])
 
 #endif /* _IP_SET_HASH_GEN_H */
 
@@ -278,7 +278,7 @@ struct htype {
 	u32 initval;		/* random jhash init value */
 	u32 timeout;		/* timeout value, if enabled */
 	size_t dsize;		/* data struct size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	struct timer_list gc;	/* garbage collection when timeout enabled */
 	struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
@@ -1059,11 +1059,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			if (set->family == NFPROTO_IPV4) {
 				h->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 4ct_elem));
-				h->offset[IPSET_OFFSET_TIMEOUT] =
+				h->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						timeout);
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						counter);
@@ -1072,11 +1072,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			} else {
 				h->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 6ct_elem));
-				h->offset[IPSET_OFFSET_TIMEOUT] =
+				h->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						timeout);
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						counter);
@@ -1088,7 +1088,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				h->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem));
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem),
 						counter);
@@ -1096,7 +1096,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				h->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem));
-				h->offset[IPSET_OFFSET_COUNTER] =
+				h->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem),
 						counter);
@@ -1107,14 +1107,14 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		if (set->family == NFPROTO_IPV4) {
 			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
-			h->offset[IPSET_OFFSET_TIMEOUT] =
+			h->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 4_gc));
 		} else {
 			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
-			h->offset[IPSET_OFFSET_TIMEOUT] =
+			h->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a9e301f6e093..0ed19b5e675e 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -59,7 +59,7 @@ struct set_adt_elem {
 /* Type structure */
 struct list_set {
 	size_t dsize;		/* element size */
-	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u32 size;		/* size of set list array */
 	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collection */
@@ -73,9 +73,9 @@ list_set_elem(const struct list_set *map, u32 id)
 }
 
 #define ext_timeout(e, m)	\
-(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
+(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, m)	\
-(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
+(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER])
 
 static int
 list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
@@ -667,9 +667,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			if (!map)
 				return -ENOMEM;
 			set->extensions |= IPSET_EXT_TIMEOUT;
-			map->offset[IPSET_OFFSET_TIMEOUT] =
+			map->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct setct_elem, timeout);
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setct_elem, counter);
 			list_set_gc_init(set, list_set_gc);
 		} else {
@@ -677,7 +677,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 					    sizeof(struct setc_elem), 0);
 			if (!map)
 				return -ENOMEM;
-			map->offset[IPSET_OFFSET_COUNTER] =
+			map->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setc_elem, counter);
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
@@ -686,7 +686,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		if (!map)
 			return -ENOMEM;
 		set->extensions |= IPSET_EXT_TIMEOUT;
-		map->offset[IPSET_OFFSET_TIMEOUT] =
+		map->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct sett_elem, timeout);
 		list_set_gc_init(set, list_set_gc);
 	} else {
-- 
cgit v1.2.3-71-gd317


From ca134ce86451f3f5ac45ffbf1494a1f42110bf93 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Sep 2013 00:10:07 +0200
Subject: netfilter: ipset: Move extension data to set structure

Default timeout and extension offsets are moved to struct set, because
all set types supports all extensions and it makes possible to generalize
extension support.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         |  29 ++++--
 include/linux/netfilter/ipset/ip_set_timeout.h |   4 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h        |  59 ++++++------
 net/netfilter/ipset/ip_set_bitmap_ip.c         |  45 +++++----
 net/netfilter/ipset/ip_set_bitmap_ipmac.c      |  64 ++++++-------
 net/netfilter/ipset/ip_set_bitmap_port.c       |  44 ++++-----
 net/netfilter/ipset/ip_set_hash_gen.h          | 127 ++++++++++++-------------
 net/netfilter/ipset/ip_set_hash_ip.c           |   8 +-
 net/netfilter/ipset/ip_set_hash_ipport.c       |  10 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c     |  10 +-
 net/netfilter/ipset/ip_set_hash_ipportnet.c    |  14 +--
 net/netfilter/ipset/ip_set_hash_net.c          |   9 +-
 net/netfilter/ipset/ip_set_hash_netiface.c     |   8 +-
 net/netfilter/ipset/ip_set_hash_netport.c      |   8 +-
 net/netfilter/ipset/ip_set_list_set.c          | 116 ++++++++++------------
 15 files changed, 266 insertions(+), 289 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index b4db7912bf0d..992a2f58dbd3 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -72,6 +72,16 @@ struct ip_set_ext {
 	u32 timeout;
 };
 
+struct ip_set_counter {
+	atomic64_t bytes;
+	atomic64_t packets;
+};
+
+#define ext_timeout(e, s)	\
+(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
+#define ext_counter(e, s)	\
+(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
+
 struct ip_set;
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
@@ -179,15 +189,16 @@ struct ip_set {
 	u8 revision;
 	/* Extensions */
 	u8 extensions;
+	/* Default timeout value, if enabled */
+	u32 timeout;
+	/* Element data size */
+	size_t dsize;
+	/* Offsets to extensions in elements */
+	size_t offset[IPSET_EXT_ID_MAX];
 	/* The type specific data */
 	void *data;
 };
 
-struct ip_set_counter {
-	atomic64_t bytes;
-	atomic64_t packets;
-};
-
 static inline void
 ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
 {
@@ -390,13 +401,13 @@ bitmap_bytes(u32 a, u32 b)
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 
-#define IP_SET_INIT_KEXT(skb, opt, map)			\
+#define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
-	  .timeout = ip_set_adt_opt_timeout(opt, map) }
+	  .timeout = ip_set_adt_opt_timeout(opt, set) }
 
-#define IP_SET_INIT_UEXT(map)				\
+#define IP_SET_INIT_UEXT(set)				\
 	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
-	  .timeout = (map)->timeout }
+	  .timeout = (set)->timeout }
 
 #define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
 
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 3aac04167ca7..83c2f9e0886c 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -23,8 +23,8 @@
 /* Set is defined with timeout support: timeout value may be 0 */
 #define IPSET_NO_TIMEOUT	UINT_MAX
 
-#define ip_set_adt_opt_timeout(opt, map)	\
-((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (map)->timeout)
+#define ip_set_adt_opt_timeout(opt, set)	\
+((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout)
 
 static inline unsigned int
 ip_set_timeout_uget(struct nlattr *tb)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 889a929b908f..f32ddbca3923 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -32,11 +32,7 @@
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define ext_timeout(e, m)	\
-	(unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
-#define ext_counter(e, m)	\
-	(struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER])
-#define get_ext(map, id)	((map)->extensions + (map)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -46,7 +42,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
 	map->gc.function = gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -59,7 +55,7 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (map->dsize)
+	if (set->dsize)
 		ip_set_free(map->extensions);
 	kfree(map);
 
@@ -88,9 +84,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
 			  htonl(sizeof(*map) +
 				map->memsize +
-				map->dsize * map->elements)) ||
+				set->dsize * map->elements)) ||
 	    (SET_WITH_TIMEOUT(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) ||
 	    (SET_WITH_COUNTER(set) &&
 	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
 			   htonl(IPSET_FLAG_WITH_COUNTERS))))
@@ -108,16 +104,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	void *x = get_ext(map, e->id);
-	int ret = mtype_do_test(e, map);
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_test(e, map, set->dsize);
 
 	if (ret <= 0)
 		return ret;
 	if (SET_WITH_TIMEOUT(set) &&
-	    ip_set_timeout_expired(ext_timeout(x, map)))
+	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return 0;
 	if (SET_WITH_COUNTER(set))
-		ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
+		ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
 	return 1;
 }
 
@@ -127,12 +123,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	void *x = get_ext(map, e->id);
-	int ret = mtype_do_add(e, map, flags);
+	void *x = get_ext(set, map, e->id);
+	int ret = mtype_do_add(e, map, flags, set->dsize);
 
 	if (ret == IPSET_ADD_FAILED) {
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(x, map)))
+		    ip_set_timeout_expired(ext_timeout(x, set)))
 			ret = 0;
 		else if (!(flags & IPSET_FLAG_EXIST))
 			return -IPSET_ERR_EXIST;
@@ -140,13 +136,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set))
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
-		mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
+		mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret);
 #else
-		ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
+		ip_set_timeout_set(ext_timeout(x, set), ext->timeout);
 #endif
 
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(x, map), ext);
+		ip_set_init_counter(ext_counter(x, set), ext);
 	return 0;
 }
 
@@ -156,11 +152,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	const void *x = get_ext(map, e->id);
+	const void *x = get_ext(set, map, e->id);
 
 	if (mtype_do_del(e, map) ||
 	    (SET_WITH_TIMEOUT(set) &&
-	     ip_set_timeout_expired(ext_timeout(x, map))))
+	     ip_set_timeout_expired(ext_timeout(x, set))))
 		return -IPSET_ERR_EXIST;
 
 	return 0;
@@ -180,13 +176,13 @@ mtype_list(const struct ip_set *set,
 		return -EMSGSIZE;
 	for (; cb->args[2] < map->elements; cb->args[2]++) {
 		id = cb->args[2];
-		x = get_ext(map, id);
+		x = get_ext(set, map, id);
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
 		     mtype_is_filled((const struct mtype_elem *) x) &&
 #endif
-		     ip_set_timeout_expired(ext_timeout(x, map))))
+		     ip_set_timeout_expired(ext_timeout(x, set))))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -196,23 +192,24 @@ mtype_list(const struct ip_set *set,
 			} else
 				goto nla_put_failure;
 		}
-		if (mtype_do_list(skb, map, id))
+		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
 		if (SET_WITH_TIMEOUT(set)) {
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
 			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 					  htonl(ip_set_timeout_stored(map, id,
-							ext_timeout(x, map)))))
+							ext_timeout(x, set),
+							set->dsize))))
 				goto nla_put_failure;
 #else
 			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 					  htonl(ip_set_timeout_get(
-							ext_timeout(x, map)))))
+							ext_timeout(x, set)))))
 				goto nla_put_failure;
 #endif
 		}
 		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(x, map)))
+		    ip_set_put_counter(skb, ext_counter(x, set)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
@@ -245,14 +242,14 @@ mtype_gc(unsigned long ul_set)
 	 * but adding/deleting new entries is locked out */
 	read_lock_bh(&set->lock);
 	for (id = 0; id < map->elements; id++)
-		if (mtype_gc_test(id, map)) {
-			x = get_ext(map, id);
-			if (ip_set_timeout_expired(ext_timeout(x, map)))
+		if (mtype_gc_test(id, map, set->dsize)) {
+			x = get_ext(set, map, id);
+			if (ip_set_timeout_expired(ext_timeout(x, set)))
 				clear_bit(id, map->members);
 		}
 	read_unlock_bh(&set->lock);
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 2ee210ef49a2..363022edb8fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -44,10 +44,7 @@ struct bitmap_ip {
 	u32 elements;		/* number of max elements in the set */
 	u32 hosts;		/* number of hosts in a subnet */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u8 netmask;		/* subnet netmask */
-	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
@@ -65,20 +62,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
 /* Common functions */
 
 static inline int
-bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
+		  struct bitmap_ip *map, size_t dsize)
 {
 	return !!test_bit(e->id, map->members);
 }
 
 static inline int
-bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
 {
 	return !!test_bit(id, map->members);
 }
 
 static inline int
 bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
-		 u32 flags)
+		 u32 flags, size_t dsize)
 {
 	return !!test_and_set_bit(e->id, map->members);
 }
@@ -90,7 +88,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
 }
 
 static inline int
-bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
+		  size_t dsize)
 {
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			htonl(map->first_ip + id * map->hosts));
@@ -113,7 +112,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_ip *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ip_adt_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
 	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
@@ -133,7 +132,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	u32 ip = 0, ip_to = 0;
 	struct bitmap_ip_adt_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret = 0;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -200,7 +199,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
 	       x->netmask == y->netmask &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -240,8 +239,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -252,7 +251,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->elements = elements;
 	map->hosts = hosts;
 	map->netmask = netmask;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_IPV4;
@@ -341,10 +340,10 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_ipct_elem);
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct bitmap_ipct_elem);
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipct_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -353,14 +352,14 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 				return -ENOMEM;
 			}
 
-			map->timeout = ip_set_timeout_uget(
+			set->timeout = ip_set_timeout_uget(
 				tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 
 			bitmap_ip_gc_init(set, bitmap_ip_gc);
 		} else {
-			map->dsize = sizeof(struct bitmap_ipc_elem);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->dsize = sizeof(struct bitmap_ipc_elem);
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipc_elem, counter);
 
 			if (!init_map_ip(set, map, first_ip, last_ip,
@@ -370,8 +369,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_ipt_elem);
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->dsize = sizeof(struct bitmap_ipt_elem);
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipt_elem, timeout);
 
 		if (!init_map_ip(set, map, first_ip, last_ip,
@@ -380,12 +379,12 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			return -ENOMEM;
 		}
 
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 
 		bitmap_ip_gc_init(set, bitmap_ip_gc);
 	} else {
-		map->dsize = 0;
+		set->dsize = 0;
 		if (!init_map_ip(set, map, first_ip, last_ip,
 				 elements, hosts, netmask)) {
 			kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index e711875ea452..74576cb19264 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -48,11 +48,8 @@ struct bitmap_ipmac {
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
-	u32 timeout;		/* timeout value */
-	struct timer_list gc;	/* garbage collector */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* size of element */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
+	struct timer_list gc;	/* garbage collector */
 };
 
 /* ADT structure for generic function args */
@@ -82,13 +79,13 @@ get_elem(void *extensions, u16 id, size_t dsize)
 
 static inline int
 bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
-		     const struct bitmap_ipmac *map)
+		     const struct bitmap_ipmac *map, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem;
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, map->dsize);
+	elem = get_elem(map->extensions, e->id, dsize);
 	if (elem->filled == MAC_FILLED)
 		return e->ether == NULL ||
 		       ether_addr_equal(e->ether, elem->ether);
@@ -97,13 +94,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 }
 
 static inline int
-bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem;
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, map->dsize);
+	elem = get_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -117,13 +114,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
 static inline int
 bitmap_ipmac_add_timeout(unsigned long *timeout,
 			 const struct bitmap_ipmac_adt_elem *e,
-			 const struct ip_set_ext *ext,
+			 const struct ip_set_ext *ext, struct ip_set *set,
 			 struct bitmap_ipmac *map, int mode)
 {
 	u32 t = ext->timeout;
 
 	if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
-		if (t == map->timeout)
+		if (t == set->timeout)
 			/* Timeout was not specified, get stored one */
 			t = *timeout;
 		ip_set_timeout_set(timeout, t);
@@ -142,11 +139,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 
 static inline int
 bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
-		    struct bitmap_ipmac *map, u32 flags)
+		    struct bitmap_ipmac *map, u32 flags, size_t dsize)
 {
 	struct bitmap_ipmac_elem *elem;
 
-	elem = get_elem(map->extensions, e->id, map->dsize);
+	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_and_set_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
 			if (e->ether && (flags & IPSET_FLAG_EXIST))
@@ -179,10 +176,11 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
 }
 
 static inline unsigned long
-ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
+ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout,
+		      size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, map->dsize);
+		get_elem(map->extensions, id, dsize);
 
 	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
 					    *timeout;
@@ -190,10 +188,10 @@ ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
 
 static inline int
 bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
-		     u32 id)
+		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, map->dsize);
+		get_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -216,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ipmac_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
 	/* MAC can be src only */
@@ -245,7 +243,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_ipmac_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0;
 	int ret = 0;
 
@@ -285,7 +283,7 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_ip == y->first_ip &&
 	       x->last_ip == y->last_ip &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -330,11 +328,11 @@ static bool
 init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	       u32 first_ip, u32 last_ip, u32 elements)
 {
-	map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
+	map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -343,7 +341,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_IPV4;
@@ -404,10 +402,10 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_ipmacct_elem);
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct bitmap_ipmacct_elem);
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_ipmacct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacct_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -415,13 +413,13 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 				kfree(map);
 				return -ENOMEM;
 			}
-			map->timeout = ip_set_timeout_uget(
+			set->timeout = ip_set_timeout_uget(
 				tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 		} else {
-			map->dsize = sizeof(struct bitmap_ipmacc_elem);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->dsize = sizeof(struct bitmap_ipmacc_elem);
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_ipmacc_elem, counter);
 
 			if (!init_map_ipmac(set, map, first_ip, last_ip,
@@ -431,19 +429,19 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_ipmact_elem);
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->dsize = sizeof(struct bitmap_ipmact_elem);
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_ipmact_elem, timeout);
 
 		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 			kfree(map);
 			return -ENOMEM;
 		}
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
 	} else {
-		map->dsize = sizeof(struct bitmap_ipmac_elem);
+		set->dsize = sizeof(struct bitmap_ipmac_elem);
 
 		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 			kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index bebc137a5737..71da31935499 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -38,9 +38,6 @@ struct bitmap_port {
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
-	size_t dsize;		/* extensions struct size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
-	u32 timeout;		/* timeout parameter */
 	struct timer_list gc;	/* garbage collection */
 };
 
@@ -59,20 +56,20 @@ port_to_id(const struct bitmap_port *m, u16 port)
 
 static inline int
 bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
-		    const struct bitmap_port *map)
+		    const struct bitmap_port *map, size_t dsize)
 {
 	return !!test_bit(e->id, map->members);
 }
 
 static inline int
-bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
 {
 	return !!test_bit(id, map->members);
 }
 
 static inline int
 bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
-		   struct bitmap_port *map, u32 flags)
+		   struct bitmap_port *map, u32 flags, size_t dsize)
 {
 	return !!test_and_set_bit(e->id, map->members);
 }
@@ -85,7 +82,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
 }
 
 static inline int
-bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
+		    size_t dsize)
 {
 	return nla_put_net16(skb, IPSET_ATTR_PORT,
 			     htons(map->first_port + id));
@@ -106,7 +104,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_port_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	__be16 __port;
 	u16 port = 0;
 
@@ -131,7 +129,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct bitmap_port *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct bitmap_port_adt_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port;	/* wraparound */
 	u16 port_to;
 	int ret = 0;
@@ -191,7 +189,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	return x->first_port == y->first_port &&
 	       x->last_port == y->last_port &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -230,8 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (map->dsize) {
-		map->extensions = ip_set_alloc(map->dsize * map->elements);
+	if (set->dsize) {
+		map->extensions = ip_set_alloc(set->dsize * map->elements);
 		if (!map->extensions) {
 			kfree(map->members);
 			return false;
@@ -239,7 +237,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	}
 	map->first_port = first_port;
 	map->last_port = last_port;
-	map->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	set->data = map;
 	set->family = NFPROTO_UNSPEC;
@@ -281,23 +279,23 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map->dsize = sizeof(struct bitmap_portct_elem);
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct bitmap_portct_elem);
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct bitmap_portct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portct_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
 				return -ENOMEM;
 			}
 
-			map->timeout =
+			set->timeout =
 				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			bitmap_port_gc_init(set, bitmap_port_gc);
 		} else {
-			map->dsize = sizeof(struct bitmap_portc_elem);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->dsize = sizeof(struct bitmap_portc_elem);
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct bitmap_portc_elem, counter);
 			if (!init_map_port(set, map, first_port, last_port)) {
 				kfree(map);
@@ -305,19 +303,19 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map->dsize = sizeof(struct bitmap_portt_elem);
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->dsize = sizeof(struct bitmap_portt_elem);
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct bitmap_portt_elem, timeout);
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
 			return -ENOMEM;
 		}
 
-		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_port_gc_init(set, bitmap_port_gc);
 	} else {
-		map->dsize = 0;
+		set->dsize = 0;
 		if (!init_map_port(set, map, first_port, last_port)) {
 			kfree(map);
 			return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e4db9250f337..0cb840e1f8ae 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -178,11 +178,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #define NLEN(family)		0
 #endif /* IP_SET_HASH_WITH_NETS */
 
-#define ext_timeout(e, h)	\
-(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT])
-#define ext_counter(e, h)	\
-(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER])
-
 #endif /* _IP_SET_HASH_GEN_H */
 
 /* Family dependent templates */
@@ -276,9 +271,6 @@ struct htype {
 	u32 maxelem;		/* max elements in the hash */
 	u32 elements;		/* current element (vs timeout) */
 	u32 initval;		/* random jhash init value */
-	u32 timeout;		/* timeout value, if enabled */
-	size_t dsize;		/* data struct size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	struct timer_list gc;	/* garbage collection when timeout enabled */
 	struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
@@ -351,7 +343,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 /* Calculate the actual memory size of the set data */
 static size_t
 mtype_ahash_memsize(const struct htype *h, const struct htable *t,
-		    u8 nets_length)
+		    u8 nets_length, size_t dsize)
 {
 	u32 i;
 	size_t memsize = sizeof(*h)
@@ -362,7 +354,7 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
 			 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
 
 	for (i = 0; i < jhash_size(t->htable_bits); i++)
-		memsize += t->bucket[i].size * h->dsize;
+		memsize += t->bucket[i].size * dsize;
 
 	return memsize;
 }
@@ -417,10 +409,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&h->gc);
 	h->gc.data = (unsigned long) set;
 	h->gc.function = gc;
-	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
 	pr_debug("gc initialized, run in every %u\n",
-		 IPSET_GC_PERIOD(h->timeout));
+		 IPSET_GC_PERIOD(set->timeout));
 }
 
 static bool
@@ -431,7 +423,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 
 	/* Resizing changes htable_bits, so we ignore it */
 	return x->maxelem == y->maxelem &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 #ifdef IP_SET_HASH_WITH_NETMASK
 	       x->netmask == y->netmask &&
 #endif
@@ -444,7 +436,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 
 /* Delete expired elements from the hashtable */
 static void
-mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
+mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 {
 	struct htable *t;
 	struct hbucket *n;
@@ -458,7 +450,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
 		n = hbucket(t, i);
 		for (j = 0; j < n->pos; j++) {
 			data = ahash_data(n, j, dsize);
-			if (ip_set_timeout_expired(ext_timeout(data, h))) {
+			if (ip_set_timeout_expired(ext_timeout(data, set))) {
 				pr_debug("expired %u/%u\n", i, j);
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_del_cidr(h, CIDR(data->cidr),
@@ -497,10 +489,10 @@ mtype_gc(unsigned long ul_set)
 
 	pr_debug("called\n");
 	write_lock_bh(&set->lock);
-	mtype_expire(h, NLEN(set->family), h->dsize);
+	mtype_expire(set, h, NLEN(set->family), set->dsize);
 	write_unlock_bh(&set->lock);
 
-	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
 }
 
@@ -526,7 +518,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	if (SET_WITH_TIMEOUT(set) && !retried) {
 		i = h->elements;
 		write_lock_bh(&set->lock);
-		mtype_expire(set->data, NLEN(set->family), h->dsize);
+		mtype_expire(set, set->data, NLEN(set->family), set->dsize);
 		write_unlock_bh(&set->lock);
 		if (h->elements < i)
 			return 0;
@@ -553,13 +545,13 @@ retry:
 	for (i = 0; i < jhash_size(orig->htable_bits); i++) {
 		n = hbucket(orig, i);
 		for (j = 0; j < n->pos; j++) {
-			data = ahash_data(n, j, h->dsize);
+			data = ahash_data(n, j, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 			flags = 0;
 			mtype_data_reset_flags(data, &flags);
 #endif
 			m = hbucket(t, HKEY(data, h->initval, htable_bits));
-			ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
+			ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize);
 			if (ret < 0) {
 #ifdef IP_SET_HASH_WITH_NETS
 				mtype_data_reset_flags(data, &flags);
@@ -570,8 +562,8 @@ retry:
 					goto retry;
 				return ret;
 			}
-			d = ahash_data(m, m->pos++, h->dsize);
-			memcpy(d, data, h->dsize);
+			d = ahash_data(m, m->pos++, set->dsize);
+			memcpy(d, data, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 			mtype_data_reset_flags(d, &flags);
 #endif
@@ -609,7 +601,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
-		mtype_expire(h, NLEN(set->family), h->dsize);
+		mtype_expire(set, h, NLEN(set->family), set->dsize);
 
 	if (h->elements >= h->maxelem) {
 		if (net_ratelimit())
@@ -623,11 +615,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(value, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (mtype_data_equal(data, d, &multi)) {
 			if (flag_exist ||
 			    (SET_WITH_TIMEOUT(set) &&
-			     ip_set_timeout_expired(ext_timeout(data, h)))) {
+			     ip_set_timeout_expired(ext_timeout(data, set)))) {
 				/* Just the extensions could be overwritten */
 				j = i;
 				goto reuse_slot;
@@ -638,14 +630,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		}
 		/* Reuse first timed out entry */
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(data, h)) &&
+		    ip_set_timeout_expired(ext_timeout(data, set)) &&
 		    j != AHASH_MAX(h) + 1)
 			j = i;
 	}
 reuse_slot:
 	if (j != AHASH_MAX(h) + 1) {
 		/* Fill out reused slot */
-		data = ahash_data(n, j, h->dsize);
+		data = ahash_data(n, j, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 		mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0);
 		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
@@ -653,13 +645,13 @@ reuse_slot:
 	} else {
 		/* Use/create a new slot */
 		TUNE_AHASH_MAX(h, multi);
-		ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
+		ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize);
 		if (ret != 0) {
 			if (ret == -EAGAIN)
 				mtype_data_next(&h->next, d);
 			goto out;
 		}
-		data = ahash_data(n, n->pos++, h->dsize);
+		data = ahash_data(n, n->pos++, set->dsize);
 #ifdef IP_SET_HASH_WITH_NETS
 		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
@@ -670,9 +662,9 @@ reuse_slot:
 	mtype_data_set_flags(data, flags);
 #endif
 	if (SET_WITH_TIMEOUT(set))
-		ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
+		ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(data, h), ext);
+		ip_set_init_counter(ext_counter(data, set), ext);
 
 out:
 	rcu_read_unlock_bh();
@@ -699,16 +691,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(value, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (!mtype_data_equal(data, d, &multi))
 			continue;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(data, h)))
+		    ip_set_timeout_expired(ext_timeout(data, set)))
 			goto out;
 		if (i != n->pos - 1)
 			/* Not last one */
-			memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
-			       h->dsize);
+			memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
+			       set->dsize);
 
 		n->pos--;
 		h->elements--;
@@ -717,14 +709,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 #endif
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
-					    * h->dsize,
+					    * set->dsize,
 					    GFP_ATOMIC);
 			if (!tmp) {
 				ret = 0;
 				goto out;
 			}
 			n->size -= AHASH_INIT_SIZE;
-			memcpy(tmp, n->value, n->size * h->dsize);
+			memcpy(tmp, n->value, n->size * set->dsize);
 			kfree(n->value);
 			n->value = tmp;
 		}
@@ -742,8 +734,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 		 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
 {
 	if (SET_WITH_COUNTER(set))
-		ip_set_update_counter(ext_counter(data,
-						  (struct htype *)(set->data)),
+		ip_set_update_counter(ext_counter(data, set),
 				      ext, mext, flags);
 	return mtype_do_data_match(data);
 }
@@ -770,12 +761,12 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
 		for (i = 0; i < n->pos; i++) {
-			data = ahash_data(n, i, h->dsize);
+			data = ahash_data(n, i, set->dsize);
 			if (!mtype_data_equal(data, d, &multi))
 				continue;
 			if (SET_WITH_TIMEOUT(set)) {
 				if (!ip_set_timeout_expired(
-							ext_timeout(data, h)))
+						ext_timeout(data, set)))
 					return mtype_data_match(data, ext,
 								mext, set,
 								flags);
@@ -818,10 +809,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	key = HKEY(d, h->initval, t->htable_bits);
 	n = hbucket(t, key);
 	for (i = 0; i < n->pos; i++) {
-		data = ahash_data(n, i, h->dsize);
+		data = ahash_data(n, i, set->dsize);
 		if (mtype_data_equal(data, d, &multi) &&
 		    !(SET_WITH_TIMEOUT(set) &&
-		      ip_set_timeout_expired(ext_timeout(data, h)))) {
+		      ip_set_timeout_expired(ext_timeout(data, set)))) {
 			ret = mtype_data_match(data, ext, mext, set, flags);
 			goto out;
 		}
@@ -841,7 +832,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	size_t memsize;
 
 	t = rcu_dereference_bh_nfnl(h->table);
-	memsize = mtype_ahash_memsize(h, t, NLEN(set->family));
+	memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
@@ -858,7 +849,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
 	    ((set->extensions & IPSET_EXT_TIMEOUT) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) ||
 	    ((set->extensions & IPSET_EXT_COUNTER) &&
 	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
 			   htonl(IPSET_FLAG_WITH_COUNTERS))))
@@ -894,9 +885,9 @@ mtype_list(const struct ip_set *set,
 		n = hbucket(t, cb->args[2]);
 		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
 		for (i = 0; i < n->pos; i++) {
-			e = ahash_data(n, i, h->dsize);
+			e = ahash_data(n, i, set->dsize);
 			if (SET_WITH_TIMEOUT(set) &&
-			    ip_set_timeout_expired(ext_timeout(e, h)))
+			    ip_set_timeout_expired(ext_timeout(e, set)))
 				continue;
 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
 				 cb->args[2], n, i, e);
@@ -913,10 +904,10 @@ mtype_list(const struct ip_set *set,
 			if (SET_WITH_TIMEOUT(set) &&
 			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 					  htonl(ip_set_timeout_get(
-						ext_timeout(e, h)))))
+						ext_timeout(e, set)))))
 				goto nla_put_failure;
 			if (SET_WITH_COUNTER(set) &&
-			    ip_set_put_counter(skb, ext_counter(e, h)))
+			    ip_set_put_counter(skb, ext_counter(e, set)))
 				goto nla_put_failure;
 			ipset_nest_end(skb, nested);
 		}
@@ -1026,7 +1017,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	h->netmask = netmask;
 #endif
 	get_random_bytes(&h->initval, sizeof(h->initval));
-	h->timeout = IPSET_NO_TIMEOUT;
+	set->timeout = IPSET_NO_TIMEOUT;
 
 	hbits = htable_bits(hashsize);
 	hsize = htable_size(hbits);
@@ -1053,30 +1044,30 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
 		set->extensions |= IPSET_EXT_COUNTER;
 		if (tb[IPSET_ATTR_TIMEOUT]) {
-			h->timeout =
+			set->timeout =
 				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 			set->extensions |= IPSET_EXT_TIMEOUT;
 			if (set->family == NFPROTO_IPV4) {
-				h->dsize = sizeof(struct
+				set->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 4ct_elem));
-				h->offset[IPSET_EXT_ID_TIMEOUT] =
+				set->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						timeout);
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4ct_elem),
 						counter);
 				IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 					IPSET_TOKEN(HTYPE, 4_gc));
 			} else {
-				h->dsize = sizeof(struct
+				set->dsize = sizeof(struct
 					IPSET_TOKEN(HTYPE, 6ct_elem));
-				h->offset[IPSET_EXT_ID_TIMEOUT] =
+				set->offset[IPSET_EXT_ID_TIMEOUT] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						timeout);
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6ct_elem),
 						counter);
@@ -1085,36 +1076,36 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			}
 		} else {
 			if (set->family == NFPROTO_IPV4) {
-				h->dsize =
+				set->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem));
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 4c_elem),
 						counter);
 			} else {
-				h->dsize =
+				set->dsize =
 					sizeof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem));
-				h->offset[IPSET_EXT_ID_COUNTER] =
+				set->offset[IPSET_EXT_ID_COUNTER] =
 					offsetof(struct
 						IPSET_TOKEN(HTYPE, 6c_elem),
 						counter);
 			}
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		set->extensions |= IPSET_EXT_TIMEOUT;
 		if (set->family == NFPROTO_IPV4) {
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
-			h->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 4_gc));
 		} else {
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
-			h->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
 					 timeout);
 			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
@@ -1122,9 +1113,9 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		}
 	} else {
 		if (set->family == NFPROTO_IPV4)
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
 		else
-			h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
+			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 260c9a80d8a5..bbde7c304622 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -99,7 +99,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	__be32 ip;
 
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -118,7 +118,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, hosts;
 	int ret = 0;
 
@@ -253,7 +253,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip6_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
 	hash_ip6_netmask(&e.ip, h->netmask);
@@ -270,7 +270,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip6_elem e = {};
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 64caad35a391..dd175d6f3965 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -116,10 +116,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -136,7 +135,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip, ip_to = 0, p = 0, port, port_to;
 	bool with_ports = false;
 	int ret;
@@ -306,10 +305,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -326,7 +324,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 2873bbc20d7a..87a2cfab2568 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -120,10 +120,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -141,7 +140,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip, ip_to = 0, p = 0, port, port_to;
 	bool with_ports = false;
 	int ret;
@@ -319,10 +318,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
 				 &e.port, &e.proto))
@@ -340,7 +338,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportip *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip6_elem e = { };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 6ce5a8e2c44e..0b9a28d7c740 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -172,7 +172,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_ipportnet4_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -195,7 +195,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
 	u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
 	bool with_ports = false;
@@ -306,9 +306,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 						       : port;
 		for (; p <= port_to; p++) {
 			e.port = htons(p);
-			ip2 = retried
-			      && ip == ntohl(h->next.ip)
-			      && p == ntohs(h->next.port)
+			ip2 = retried &&
+			      ip == ntohl(h->next.ip) &&
+			      p == ntohs(h->next.port)
 				? ntohl(h->next.ip2) : ip2_from;
 			while (!after(ip2, ip2_to)) {
 				e.ip2 = htonl(ip2);
@@ -456,7 +456,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_ipportnet6_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -479,7 +479,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_ipportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	u8 cidr;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index ec1c7dc10489..1d4caa50dacb 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -145,7 +145,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_net4_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (e.cidr == 0)
 		return -EINVAL;
@@ -165,7 +165,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = { .cidr = HOST_MASK };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, last;
 	int ret;
 
@@ -340,7 +340,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_net6_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (e.cidr == 0)
 		return -EINVAL;
@@ -357,10 +357,9 @@ static int
 hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_net *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net6_elem e = { .cidr = HOST_MASK };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 814b4e31a7f8..2f0ffe35c408 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -268,7 +268,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	int ret;
 
 	if (e.cidr == 0)
@@ -319,7 +319,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, last;
 	char iface[IFNAMSIZ];
 	int ret;
@@ -537,7 +537,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
 		.elem = 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	int ret;
 
 	if (e.cidr == 0)
@@ -584,7 +584,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netiface *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	char iface[IFNAMSIZ];
 	int ret;
 
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 3bd923d3f179..cab236625f97 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -164,7 +164,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netport4_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -186,7 +186,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to, p = 0, ip = 0, ip_to = 0, last;
 	bool with_ports = false;
 	u8 cidr;
@@ -409,7 +409,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	struct hash_netport6_elem e = {
 		.cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
 	};
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	if (adt == IPSET_TEST)
 		e.cidr = HOST_MASK - 1;
@@ -431,7 +431,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	const struct hash_netport *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 port, port_to;
 	bool with_ports = false;
 	u8 cidr;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 0ed19b5e675e..f22d05d6e366 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -58,24 +58,13 @@ struct set_adt_elem {
 
 /* Type structure */
 struct list_set {
-	size_t dsize;		/* element size */
-	size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */
 	u32 size;		/* size of set list array */
-	u32 timeout;		/* timeout value */
 	struct timer_list gc;	/* garbage collection */
 	struct set_elem members[0]; /* the set members */
 };
 
-static inline struct set_elem *
-list_set_elem(const struct list_set *map, u32 id)
-{
-	return (struct set_elem *)((void *)map->members + id * map->dsize);
-}
-
-#define ext_timeout(e, m)	\
-(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT])
-#define ext_counter(e, m)	\
-(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER])
+#define list_set_elem(set, map, id)	\
+	(struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
 
 static int
 list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
@@ -92,16 +81,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
 		opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_test(e->id, skb, par, opt);
 		if (ret > 0) {
 			if (SET_WITH_COUNTER(set))
-				ip_set_update_counter(ext_counter(e, map),
+				ip_set_update_counter(ext_counter(e, set),
 						      ext, &opt->ext,
 						      cmdflags);
 			return ret;
@@ -121,11 +110,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_add(e->id, skb, par, opt);
 		if (ret == 0)
@@ -145,11 +134,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		ret = ip_set_del(e->id, skb, par, opt);
 		if (ret == 0)
@@ -163,8 +152,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
 	      const struct xt_action_param *par,
 	      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-	struct list_set *map = set->data;
-	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	switch (adt) {
 	case IPSET_TEST:
@@ -188,10 +176,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
 	if (i >= map->size)
 		return 0;
 
-	e = list_set_elem(map, i);
+	e = list_set_elem(set, map, i);
 	return !!(e->id == id &&
 		 !(SET_WITH_TIMEOUT(set) &&
-		   ip_set_timeout_expired(ext_timeout(e, map))));
+		   ip_set_timeout_expired(ext_timeout(e, set))));
 }
 
 static int
@@ -199,28 +187,29 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	     const struct ip_set_ext *ext)
 {
 	struct list_set *map = set->data;
-	struct set_elem *e = list_set_elem(map, i);
+	struct set_elem *e = list_set_elem(set, map, i);
 
 	if (e->id != IPSET_INVALID_ID) {
 		if (i == map->size - 1)
 			/* Last element replaced: e.g. add new,before,last */
 			ip_set_put_byindex(e->id);
 		else {
-			struct set_elem *x = list_set_elem(map, map->size - 1);
+			struct set_elem *x = list_set_elem(set, map,
+							   map->size - 1);
 
 			/* Last element pushed off */
 			if (x->id != IPSET_INVALID_ID)
 				ip_set_put_byindex(x->id);
-			memmove(list_set_elem(map, i + 1), e,
-				map->dsize * (map->size - (i + 1)));
+			memmove(list_set_elem(set, map, i + 1), e,
+				set->dsize * (map->size - (i + 1)));
 		}
 	}
 
 	e->id = d->id;
 	if (SET_WITH_TIMEOUT(set))
-		ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+		ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 	if (SET_WITH_COUNTER(set))
-		ip_set_init_counter(ext_counter(e, map), ext);
+		ip_set_init_counter(ext_counter(e, set), ext);
 	return 0;
 }
 
@@ -228,16 +217,16 @@ static int
 list_set_del(struct ip_set *set, u32 i)
 {
 	struct list_set *map = set->data;
-	struct set_elem *e = list_set_elem(map, i);
+	struct set_elem *e = list_set_elem(set, map, i);
 
 	ip_set_put_byindex(e->id);
 
 	if (i < map->size - 1)
-		memmove(e, list_set_elem(map, i + 1),
-			map->dsize * (map->size - (i + 1)));
+		memmove(e, list_set_elem(set, map, i + 1),
+			set->dsize * (map->size - (i + 1)));
 
 	/* Last element */
-	e = list_set_elem(map, map->size - 1);
+	e = list_set_elem(set, map, map->size - 1);
 	e->id = IPSET_INVALID_ID;
 	return 0;
 }
@@ -250,9 +239,9 @@ set_cleanup_entries(struct ip_set *set)
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			list_set_del(set, i);
 	}
 }
@@ -268,11 +257,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	int ret;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return 0;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -301,11 +290,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 
 	/* Check already added element */
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto insert;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -320,9 +309,9 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			return -IPSET_ERR_EXIST;
 		/* Update extensions */
 		if (SET_WITH_TIMEOUT(set))
-			ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+			ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 		if (SET_WITH_COUNTER(set))
-			ip_set_init_counter(ext_counter(e, map), ext);
+			ip_set_init_counter(ext_counter(e, set), ext);
 		/* Set is already added to the list */
 		ip_set_put_byindex(d->id);
 		return 0;
@@ -330,7 +319,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 insert:
 	ret = -IPSET_ERR_LIST_FULL;
 	for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
 				: list_set_add(set, i, d, ext);
@@ -355,12 +344,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			return d->before != 0 ? -IPSET_ERR_REF_EXIST
 					      : -IPSET_ERR_EXIST;
 		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, map)))
+			 ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		else if (e->id != d->id)
 			continue;
@@ -383,10 +372,9 @@ static int
 list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	struct list_set *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
-	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	struct ip_set *s;
 	int ret = 0;
 
@@ -454,7 +442,7 @@ list_set_flush(struct ip_set *set)
 	u32 i;
 
 	for (i = 0; i < map->size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
 			ip_set_put_byindex(e->id);
 			e->id = IPSET_INVALID_ID;
@@ -486,13 +474,13 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
 	    (SET_WITH_TIMEOUT(set) &&
-	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
+	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) ||
 	    (SET_WITH_COUNTER(set) &&
 	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
 			   htonl(IPSET_FLAG_WITH_COUNTERS))) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) + map->size * map->dsize)))
+			  htonl(sizeof(*map) + map->size * set->dsize)))
 		goto nla_put_failure;
 	ipset_nest_end(skb, nested);
 
@@ -515,11 +503,11 @@ list_set_list(const struct ip_set *set,
 		return -EMSGSIZE;
 	for (; cb->args[2] < map->size; cb->args[2]++) {
 		i = cb->args[2];
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto finish;
 		if (SET_WITH_TIMEOUT(set) &&
-		    ip_set_timeout_expired(ext_timeout(e, map)))
+		    ip_set_timeout_expired(ext_timeout(e, set)))
 			continue;
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested) {
@@ -535,10 +523,10 @@ list_set_list(const struct ip_set *set,
 		if (SET_WITH_TIMEOUT(set) &&
 		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
 				  htonl(ip_set_timeout_get(
-						ext_timeout(e, map)))))
+						ext_timeout(e, set)))))
 			goto nla_put_failure;
 		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(e, map)))
+		    ip_set_put_counter(skb, ext_counter(e, set)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
@@ -565,7 +553,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
 	const struct list_set *y = b->data;
 
 	return x->size == y->size &&
-	       x->timeout == y->timeout &&
+	       a->timeout == b->timeout &&
 	       a->extensions == b->extensions;
 }
 
@@ -594,7 +582,7 @@ list_set_gc(unsigned long ul_set)
 	set_cleanup_entries(set);
 	write_unlock_bh(&set->lock);
 
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -606,7 +594,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	init_timer(&map->gc);
 	map->gc.data = (unsigned long) set;
 	map->gc.function = gc;
-	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
 }
 
@@ -625,12 +613,12 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
 		return NULL;
 
 	map->size = size;
-	map->dsize = dsize;
-	map->timeout = timeout;
+	set->dsize = dsize;
+	set->timeout = timeout;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
-		e = list_set_elem(map, i);
+		e = list_set_elem(set, map, i);
 		e->id = IPSET_INVALID_ID;
 	}
 
@@ -667,9 +655,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 			if (!map)
 				return -ENOMEM;
 			set->extensions |= IPSET_EXT_TIMEOUT;
-			map->offset[IPSET_EXT_ID_TIMEOUT] =
+			set->offset[IPSET_EXT_ID_TIMEOUT] =
 				offsetof(struct setct_elem, timeout);
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setct_elem, counter);
 			list_set_gc_init(set, list_set_gc);
 		} else {
@@ -677,7 +665,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 					    sizeof(struct setc_elem), 0);
 			if (!map)
 				return -ENOMEM;
-			map->offset[IPSET_EXT_ID_COUNTER] =
+			set->offset[IPSET_EXT_ID_COUNTER] =
 				offsetof(struct setc_elem, counter);
 		}
 	} else if (tb[IPSET_ATTR_TIMEOUT]) {
@@ -686,7 +674,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 		if (!map)
 			return -ENOMEM;
 		set->extensions |= IPSET_EXT_TIMEOUT;
-		map->offset[IPSET_EXT_ID_TIMEOUT] =
+		set->offset[IPSET_EXT_ID_TIMEOUT] =
 			offsetof(struct sett_elem, timeout);
 		list_set_gc_init(set, list_set_gc);
 	} else {
-- 
cgit v1.2.3-71-gd317


From 03c8b234e61a9a3aab8d970b3bf681934ecfe443 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 7 Sep 2013 00:43:52 +0200
Subject: netfilter: ipset: Generalize extensions support

Get rid of the structure based extensions and introduce a blob for
the extensions. Thus we can support more extension types easily.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 13 +++++
 net/netfilter/ipset/ip_set_bitmap_ip.c      | 81 +++----------------------
 net/netfilter/ipset/ip_set_bitmap_ipmac.c   | 91 +++--------------------------
 net/netfilter/ipset/ip_set_bitmap_port.c    | 71 ++--------------------
 net/netfilter/ipset/ip_set_core.c           | 46 +++++++++++++++
 net/netfilter/ipset/ip_set_hash_gen.h       | 86 ++++-----------------------
 net/netfilter/ipset/ip_set_hash_ip.c        | 36 +-----------
 net/netfilter/ipset/ip_set_hash_ipport.c    | 54 +----------------
 net/netfilter/ipset/ip_set_hash_ipportip.c  | 60 +------------------
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 66 +--------------------
 net/netfilter/ipset/ip_set_hash_net.c       | 54 +----------------
 net/netfilter/ipset/ip_set_hash_netiface.c  | 68 +--------------------
 net/netfilter/ipset/ip_set_hash_netport.c   | 60 +------------------
 net/netfilter/ipset/ip_set_list_set.c       | 81 ++++---------------------
 14 files changed, 118 insertions(+), 749 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 992a2f58dbd3..66d6bd404d64 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -66,6 +66,17 @@ enum ip_set_ext_id {
 	IPSET_EXT_ID_MAX,
 };
 
+/* Extension type */
+struct ip_set_ext_type {
+	enum ip_set_extension type;
+	enum ipset_cadt_flags flag;
+	/* Size and minimal alignment */
+	u8 len;
+	u8 align;
+};
+
+extern const struct ip_set_ext_type ip_set_extensions[];
+
 struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
@@ -283,6 +294,8 @@ extern void *ip_set_alloc(size_t size);
 extern void ip_set_free(void *members);
 extern int ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr);
 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
+extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
+			      size_t len);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 363022edb8fb..94d985457c51 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -208,25 +208,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
 struct bitmap_ip_elem {
 };
 
-/* Timeout variant */
-
-struct bitmap_ipt_elem {
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipc_elem {
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipct_elem {
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
@@ -263,7 +244,7 @@ static int
 bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	struct bitmap_ip *map;
-	u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0;
+	u32 first_ip = 0, last_ip = 0, hosts;
 	u64 elements;
 	u8 netmask = 32;
 	int ret;
@@ -335,61 +316,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->dsize = sizeof(struct bitmap_ipct_elem);
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct bitmap_ipct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipct_elem, counter);
-
-			if (!init_map_ip(set, map, first_ip, last_ip,
-					 elements, hosts, netmask)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-
-			set->timeout = ip_set_timeout_uget(
-				tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-
-			bitmap_ip_gc_init(set, bitmap_ip_gc);
-		} else {
-			set->dsize = sizeof(struct bitmap_ipc_elem);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipc_elem, counter);
-
-			if (!init_map_ip(set, map, first_ip, last_ip,
-					 elements, hosts, netmask)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		set->dsize = sizeof(struct bitmap_ipt_elem);
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct bitmap_ipt_elem, timeout);
-
-		if (!init_map_ip(set, map, first_ip, last_ip,
-				 elements, hosts, netmask)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
+	set->dsize = ip_set_elem_len(set, tb, 0);
+	if (!init_map_ip(set, map, first_ip, last_ip,
+			 elements, hosts, netmask)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
-
 		bitmap_ip_gc_init(set, bitmap_ip_gc);
-	} else {
-		set->dsize = 0;
-		if (!init_map_ip(set, map, first_ip, last_ip,
-				 elements, hosts, netmask)) {
-			kfree(map);
-			return -ENOMEM;
-		}
 	}
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 74576cb19264..654a97bedfe9 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -289,37 +289,6 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
 
 /* Plain variant */
 
-/* Timeout variant */
-
-struct bitmap_ipmact_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipmacc_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipmacct_elem {
-	struct {
-		unsigned char ether[ETH_ALEN];
-		unsigned char filled;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip,mac type of sets */
@@ -328,7 +297,7 @@ static bool
 init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	       u32 first_ip, u32 last_ip, u32 elements)
 {
-	map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize);
+	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
 	if (set->dsize) {
@@ -353,7 +322,7 @@ static int
 bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 		    u32 flags)
 {
-	u32 first_ip = 0, last_ip = 0, cadt_flags = 0;
+	u32 first_ip = 0, last_ip = 0;
 	u64 elements;
 	struct bitmap_ipmac *map;
 	int ret;
@@ -397,57 +366,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->dsize = sizeof(struct bitmap_ipmacct_elem);
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct bitmap_ipmacct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipmacct_elem, counter);
-
-			if (!init_map_ipmac(set, map, first_ip, last_ip,
-					    elements)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-			set->timeout = ip_set_timeout_uget(
-				tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
-		} else {
-			set->dsize = sizeof(struct bitmap_ipmacc_elem);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_ipmacc_elem, counter);
-
-			if (!init_map_ipmac(set, map, first_ip, last_ip,
-					    elements)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		set->dsize = sizeof(struct bitmap_ipmact_elem);
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct bitmap_ipmact_elem, timeout);
-
-		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
-			kfree(map);
-			return -ENOMEM;
-		}
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem));
+	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
-	} else {
-		set->dsize = sizeof(struct bitmap_ipmac_elem);
-
-		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-		set->variant = &bitmap_ipmac;
 	}
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 71da31935499..1ef2f3186b80 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -198,25 +198,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
 struct bitmap_port_elem {
 };
 
-/* Timeout variant */
-
-struct bitmap_portt_elem {
-	unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_portc_elem {
-	struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_portct_elem {
-	unsigned long timeout;
-	struct ip_set_counter counter;
-};
-
 #include "ip_set_bitmap_gen.h"
 
 /* Create bitmap:ip type of sets */
@@ -250,7 +231,6 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
-	u32 cadt_flags = 0;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -274,53 +254,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	map->elements = last_port - first_port + 1;
 	map->memsize = map->elements * sizeof(unsigned long);
 	set->variant = &bitmap_port;
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->dsize = sizeof(struct bitmap_portct_elem);
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct bitmap_portct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_portct_elem, counter);
-			if (!init_map_port(set, map, first_port, last_port)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-
-			set->timeout =
-				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			bitmap_port_gc_init(set, bitmap_port_gc);
-		} else {
-			set->dsize = sizeof(struct bitmap_portc_elem);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct bitmap_portc_elem, counter);
-			if (!init_map_port(set, map, first_port, last_port)) {
-				kfree(map);
-				return -ENOMEM;
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		set->dsize = sizeof(struct bitmap_portt_elem);
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct bitmap_portt_elem, timeout);
-		if (!init_map_port(set, map, first_port, last_port)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
+	set->dsize = ip_set_elem_len(set, tb, 0);
+	if (!init_map_port(set, map, first_port, last_port)) {
+		kfree(map);
+		return -ENOMEM;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
 		bitmap_port_gc_init(set, bitmap_port_gc);
-	} else {
-		set->dsize = 0;
-		if (!init_map_port(set, map, first_port, last_port)) {
-			kfree(map);
-			return -ENOMEM;
-		}
-
 	}
 	return 0;
 }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 428c30a8586f..f35afed3814f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -315,6 +315,52 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+/* ipset data extension types, in size order */
+
+const struct ip_set_ext_type ip_set_extensions[] = {
+	[IPSET_EXT_ID_COUNTER] = {
+		.type	= IPSET_EXT_COUNTER,
+		.flag	= IPSET_FLAG_WITH_COUNTERS,
+		.len	= sizeof(struct ip_set_counter),
+		.align	= __alignof__(struct ip_set_counter),
+	},
+	[IPSET_EXT_ID_TIMEOUT] = {
+		.type	= IPSET_EXT_TIMEOUT,
+		.len	= sizeof(unsigned long),
+		.align	= __alignof__(unsigned long),
+	},
+};
+EXPORT_SYMBOL_GPL(ip_set_extensions);
+
+static inline bool
+add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
+{
+	return ip_set_extensions[id].flag ?
+		(flags & ip_set_extensions[id].flag) :
+		!!tb[IPSET_ATTR_TIMEOUT];
+}
+
+size_t
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+{
+	enum ip_set_ext_id id;
+	size_t offset = 0;
+	u32 cadt_flags = 0;
+
+	if (tb[IPSET_ATTR_CADT_FLAGS])
+		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
+		if (!add_extension(id, cadt_flags, tb))
+			continue;
+		offset += ALIGN(len + offset, ip_set_extensions[id].align);
+		set->offset[id] = offset;
+		set->extensions |= ip_set_extensions[id].type;
+		offset += ip_set_extensions[id].len;
+	}
+	return len + offset;
+}
+EXPORT_SYMBOL_GPL(ip_set_elem_len);
+
 int
 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		      struct ip_set_ext *ext)
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 0cb840e1f8ae..3999f1719f69 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -960,7 +960,6 @@ static int
 IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
-	u32 cadt_flags = 0;
 	u8 hbits;
 #ifdef IP_SET_HASH_WITH_NETMASK
 	u8 netmask;
@@ -1034,88 +1033,23 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	rcu_assign_pointer(h->table, t);
 
 	set->data = h;
-	if (set->family ==  NFPROTO_IPV4)
+	if (set->family ==  NFPROTO_IPV4) {
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
-	else
+		set->dsize = ip_set_elem_len(set, tb,
+				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+	} else {
 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
-
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			set->timeout =
-				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			if (set->family == NFPROTO_IPV4) {
-				set->dsize = sizeof(struct
-					IPSET_TOKEN(HTYPE, 4ct_elem));
-				set->offset[IPSET_EXT_ID_TIMEOUT] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 4ct_elem),
-						timeout);
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 4ct_elem),
-						counter);
-				IPSET_TOKEN(HTYPE, 4_gc_init)(set,
-					IPSET_TOKEN(HTYPE, 4_gc));
-			} else {
-				set->dsize = sizeof(struct
-					IPSET_TOKEN(HTYPE, 6ct_elem));
-				set->offset[IPSET_EXT_ID_TIMEOUT] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 6ct_elem),
-						timeout);
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 6ct_elem),
-						counter);
-				IPSET_TOKEN(HTYPE, 6_gc_init)(set,
-					IPSET_TOKEN(HTYPE, 6_gc));
-			}
-		} else {
-			if (set->family == NFPROTO_IPV4) {
-				set->dsize =
-					sizeof(struct
-						IPSET_TOKEN(HTYPE, 4c_elem));
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 4c_elem),
-						counter);
-			} else {
-				set->dsize =
-					sizeof(struct
-						IPSET_TOKEN(HTYPE, 6c_elem));
-				set->offset[IPSET_EXT_ID_COUNTER] =
-					offsetof(struct
-						IPSET_TOKEN(HTYPE, 6c_elem),
-						counter);
-			}
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->dsize = ip_set_elem_len(set, tb,
+				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
-		set->extensions |= IPSET_EXT_TIMEOUT;
-		if (set->family == NFPROTO_IPV4) {
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem));
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem),
-					 timeout);
+		if (set->family == NFPROTO_IPV4)
 			IPSET_TOKEN(HTYPE, 4_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 4_gc));
-		} else {
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem));
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem),
-					 timeout);
+		else
 			IPSET_TOKEN(HTYPE, 6_gc_init)(set,
 				IPSET_TOKEN(HTYPE, 6_gc));
-		}
-	} else {
-		if (set->family == NFPROTO_IPV4)
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem));
-		else
-			set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem));
 	}
 
 	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index bbde7c304622..a111ffe40b46 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -35,7 +35,7 @@ MODULE_ALIAS("ip_set_hash:ip");
 #define HTYPE		hash_ip
 #define IP_SET_HASH_WITH_NETMASK
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ip4_elem {
@@ -43,22 +43,6 @@ struct hash_ip4_elem {
 	__be32 ip;
 };
 
-struct hash_ip4t_elem {
-	__be32 ip;
-	unsigned long timeout;
-};
-
-struct hash_ip4c_elem {
-	__be32 ip;
-	struct ip_set_counter counter;
-};
-
-struct hash_ip4ct_elem {
-	__be32 ip;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -178,29 +162,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 /* Member elements */
 struct hash_ip6_elem {
 	union nf_inet_addr ip;
 };
 
-struct hash_ip6t_elem {
-	union nf_inet_addr ip;
-	unsigned long timeout;
-};
-
-struct hash_ip6c_elem {
-	union nf_inet_addr ip;
-	struct ip_set_counter counter;
-};
-
-struct hash_ip6ct_elem {
-	union nf_inet_addr ip;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index dd175d6f3965..5dc735c4dac2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port");
 /* Type specific function prefix */
 #define HTYPE		hash_ipport
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ipport4_elem {
@@ -46,31 +46,6 @@ struct hash_ipport4_elem {
 	u8 padding;
 };
 
-struct hash_ipport4t_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipport4c_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipport4ct_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -221,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipport6_elem {
 	union nf_inet_addr ip;
@@ -230,31 +205,6 @@ struct hash_ipport6_elem {
 	u8 padding;
 };
 
-struct hash_ipport6t_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipport6c_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipport6ct_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 87a2cfab2568..8c43dc7811cb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,ip");
 /* Type specific function prefix */
 #define HTYPE		hash_ipportip
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements  */
 struct hash_ipportip4_elem {
@@ -47,34 +47,6 @@ struct hash_ipportip4_elem {
 	u8 padding;
 };
 
-struct hash_ipportip4t_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipportip4c_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportip4ct_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 static inline bool
 hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
 			  const struct hash_ipportip4_elem *ip2,
@@ -230,7 +202,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipportip6_elem {
 	union nf_inet_addr ip;
@@ -240,34 +212,6 @@ struct hash_ipportip6_elem {
 	u8 padding;
 };
 
-struct hash_ipportip6t_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	unsigned long timeout;
-};
-
-struct hash_ipportip6c_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportip6ct_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 proto;
-	u8 padding;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 0b9a28d7c740..34890452366c 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -46,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net");
 #define IP_SET_HASH_WITH_PROTO
 #define IP_SET_HASH_WITH_NETS
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_ipportnet4_elem {
@@ -58,37 +58,6 @@ struct hash_ipportnet4_elem {
 	u8 proto;
 };
 
-struct hash_ipportnet4t_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	unsigned long timeout;
-};
-
-struct hash_ipportnet4c_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportnet4ct_elem {
-	__be32 ip;
-	__be32 ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -328,7 +297,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_ipportnet6_elem {
 	union nf_inet_addr ip;
@@ -339,37 +308,6 @@ struct hash_ipportnet6_elem {
 	u8 proto;
 };
 
-struct hash_ipportnet6t_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	unsigned long timeout;
-};
-
-struct hash_ipportnet6c_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-};
-
-struct hash_ipportnet6ct_elem {
-	union nf_inet_addr ip;
-	union nf_inet_addr ip2;
-	__be16 port;
-	u8 cidr:7;
-	u8 nomatch:1;
-	u8 proto;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1d4caa50dacb..d5598557f4a9 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:net");
 #define HTYPE		hash_net
 #define IP_SET_HASH_WITH_NETS
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements  */
 struct hash_net4_elem {
@@ -46,31 +46,6 @@ struct hash_net4_elem {
 	u8 cidr;
 };
 
-struct hash_net4t_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	unsigned long timeout;
-};
-
-struct hash_net4c_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-};
-
-struct hash_net4ct_elem {
-	__be32 ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -228,7 +203,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_net6_elem {
 	union nf_inet_addr ip;
@@ -237,31 +212,6 @@ struct hash_net6_elem {
 	u8 cidr;
 };
 
-struct hash_net6t_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	unsigned long timeout;
-};
-
-struct hash_net6c_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-};
-
-struct hash_net6ct_elem {
-	union nf_inet_addr ip;
-	u16 padding0;
-	u8 nomatch;
-	u8 cidr;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 2f0ffe35c408..26703e9e5082 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -134,7 +134,7 @@ iface_add(struct rb_root *root, const char **iface)
 
 #define STREQ(a, b)	(strcmp(a, b) == 0)
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 struct hash_netiface4_elem_hashed {
 	__be32 ip;
@@ -144,7 +144,7 @@ struct hash_netiface4_elem_hashed {
 	u8 elem;
 };
 
-/* Member elements without timeout */
+/* Member elements */
 struct hash_netiface4_elem {
 	__be32 ip;
 	u8 physdev;
@@ -154,37 +154,6 @@ struct hash_netiface4_elem {
 	const char *iface;
 };
 
-struct hash_netiface4t_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	unsigned long timeout;
-};
-
-struct hash_netiface4c_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-};
-
-struct hash_netiface4ct_elem {
-	__be32 ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -399,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_netiface6_elem_hashed {
 	union nf_inet_addr ip;
@@ -418,37 +387,6 @@ struct hash_netiface6_elem {
 	const char *iface;
 };
 
-struct hash_netiface6t_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	unsigned long timeout;
-};
-
-struct hash_netiface6c_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-};
-
-struct hash_netiface6ct_elem {
-	union nf_inet_addr ip;
-	u8 physdev;
-	u8 cidr;
-	u8 nomatch;
-	u8 elem;
-	const char *iface;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index cab236625f97..45b6e91b0636 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -45,7 +45,7 @@ MODULE_ALIAS("ip_set_hash:net,port");
  */
 #define IP_SET_HASH_WITH_NETS_PACKED
 
-/* IPv4 variants */
+/* IPv4 variant */
 
 /* Member elements */
 struct hash_netport4_elem {
@@ -56,34 +56,6 @@ struct hash_netport4_elem {
 	u8 nomatch:1;
 };
 
-struct hash_netport4t_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	unsigned long timeout;
-};
-
-struct hash_netport4c_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-};
-
-struct hash_netport4ct_elem {
-	__be32 ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
@@ -287,7 +259,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	return ret;
 }
 
-/* IPv6 variants */
+/* IPv6 variant */
 
 struct hash_netport6_elem {
 	union nf_inet_addr ip;
@@ -297,34 +269,6 @@ struct hash_netport6_elem {
 	u8 nomatch:1;
 };
 
-struct hash_netport6t_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	unsigned long timeout;
-};
-
-struct hash_netport6c_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-};
-
-struct hash_netport6ct_elem {
-	union nf_inet_addr ip;
-	__be16 port;
-	u8 proto;
-	u8 cidr:7;
-	u8 nomatch:1;
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 /* Common functions */
 
 static inline bool
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f22d05d6e366..7fd11c79aff4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -28,28 +28,6 @@ struct set_elem {
 	ip_set_id_t id;
 };
 
-struct sett_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	unsigned long timeout;
-};
-
-struct setc_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-};
-
-struct setct_elem {
-	struct {
-		ip_set_id_t id;
-	} __attribute__ ((aligned));
-	struct ip_set_counter counter;
-	unsigned long timeout;
-};
-
 struct set_adt_elem {
 	ip_set_id_t id;
 	ip_set_id_t refid;
@@ -600,21 +578,18 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 
 /* Create list:set type of sets */
 
-static struct list_set *
-init_list_set(struct ip_set *set, u32 size, size_t dsize,
-	      unsigned long timeout)
+static bool
+init_list_set(struct ip_set *set, u32 size)
 {
 	struct list_set *map;
 	struct set_elem *e;
 	u32 i;
 
-	map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
+	map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
 	if (!map)
-		return NULL;
+		return false;
 
 	map->size = size;
-	set->dsize = dsize;
-	set->timeout = timeout;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
@@ -622,15 +597,13 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
 		e->id = IPSET_INVALID_ID;
 	}
 
-	return map;
+	return true;
 }
 
 static int
 list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 {
-	struct list_set *map;
-	u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
-	unsigned long timeout = 0;
+	u32 size = IP_SET_LIST_DEFAULT_SIZE;
 
 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
@@ -642,45 +615,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	if (size < IP_SET_LIST_MIN_SIZE)
 		size = IP_SET_LIST_MIN_SIZE;
 
-	if (tb[IPSET_ATTR_CADT_FLAGS])
-		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
-	if (tb[IPSET_ATTR_TIMEOUT])
-		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	set->variant = &set_variant;
-	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
-		set->extensions |= IPSET_EXT_COUNTER;
-		if (tb[IPSET_ATTR_TIMEOUT]) {
-			map = init_list_set(set, size,
-					sizeof(struct setct_elem), timeout);
-			if (!map)
-				return -ENOMEM;
-			set->extensions |= IPSET_EXT_TIMEOUT;
-			set->offset[IPSET_EXT_ID_TIMEOUT] =
-				offsetof(struct setct_elem, timeout);
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct setct_elem, counter);
-			list_set_gc_init(set, list_set_gc);
-		} else {
-			map = init_list_set(set, size,
-					    sizeof(struct setc_elem), 0);
-			if (!map)
-				return -ENOMEM;
-			set->offset[IPSET_EXT_ID_COUNTER] =
-				offsetof(struct setc_elem, counter);
-		}
-	} else if (tb[IPSET_ATTR_TIMEOUT]) {
-		map = init_list_set(set, size,
-				    sizeof(struct sett_elem), timeout);
-		if (!map)
-			return -ENOMEM;
-		set->extensions |= IPSET_EXT_TIMEOUT;
-		set->offset[IPSET_EXT_ID_TIMEOUT] =
-			offsetof(struct sett_elem, timeout);
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	if (!init_list_set(set, size))
+		return -ENOMEM;
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 		list_set_gc_init(set, list_set_gc);
-	} else {
-		map = init_list_set(set, size, sizeof(struct set_elem), 0);
-		if (!map)
-			return -ENOMEM;
 	}
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 40cd63bf33b21ef4d43776b1d49c605f876fe32c Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 9 Sep 2013 14:44:29 +0200
Subject: netfilter: ipset: Support extensions which need a per data destroy
 function

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h  | 22 +++++++---
 net/netfilter/ipset/ip_set_bitmap_gen.h | 38 ++++++++++++++----
 net/netfilter/ipset/ip_set_hash_gen.h   | 71 +++++++++++++++++++++------------
 net/netfilter/ipset/ip_set_list_set.c   | 19 ++++++---
 4 files changed, 107 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 66d6bd404d64..6372ee224fe8 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -49,11 +49,13 @@ enum ip_set_feature {
 
 /* Set extensions */
 enum ip_set_extension {
-	IPSET_EXT_NONE = 0,
-	IPSET_EXT_BIT_TIMEOUT = 1,
+	IPSET_EXT_BIT_TIMEOUT = 0,
 	IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT),
-	IPSET_EXT_BIT_COUNTER = 2,
+	IPSET_EXT_BIT_COUNTER = 1,
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
+	/* Mark set with an extension which needs to call destroy */
+	IPSET_EXT_BIT_DESTROY = 7,
+	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
 };
 
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
@@ -68,6 +70,8 @@ enum ip_set_ext_id {
 
 /* Extension type */
 struct ip_set_ext_type {
+	/* Destroy extension private data (can be NULL) */
+	void (*destroy)(void *ext);
 	enum ip_set_extension type;
 	enum ipset_cadt_flags flag;
 	/* Size and minimal alignment */
@@ -88,13 +92,21 @@ struct ip_set_counter {
 	atomic64_t packets;
 };
 
+struct ip_set;
+
+static inline void
+ip_set_ext_destroy(struct ip_set *set, void *data)
+{
+	/* Check that the extension is enabled for the set and
+	 * call it's destroy function for its extension part in data.
+	 */
+}
+
 #define ext_timeout(e, s)	\
 (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, s)	\
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
 
-struct ip_set;
-
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
 			   struct ip_set_ext *mext, u32 cmdflags);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f32ddbca3923..4515fe8b83dd 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -12,6 +12,7 @@
 #define mtype_gc_test		IPSET_TOKEN(MTYPE, _gc_test)
 #define mtype_is_filled		IPSET_TOKEN(MTYPE, _is_filled)
 #define mtype_do_add		IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
 #define mtype_do_del		IPSET_TOKEN(MTYPE, _do_del)
 #define mtype_do_list		IPSET_TOKEN(MTYPE, _do_list)
 #define mtype_do_head		IPSET_TOKEN(MTYPE, _do_head)
@@ -46,6 +47,17 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	add_timer(&map->gc);
 }
 
+static void
+mtype_ext_cleanup(struct ip_set *set)
+{
+	struct mtype *map = set->data;
+	u32 id;
+
+	for (id = 0; id < map->elements; id++)
+		if (test_bit(id, map->members))
+			ip_set_ext_destroy(set, get_ext(set, map, id));
+}
+
 static void
 mtype_destroy(struct ip_set *set)
 {
@@ -55,8 +67,11 @@ mtype_destroy(struct ip_set *set)
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (set->dsize)
+	if (set->dsize) {
+		if (set->extensions & IPSET_EXT_DESTROY)
+			mtype_ext_cleanup(set);
 		ip_set_free(map->extensions);
+	}
 	kfree(map);
 
 	set->data = NULL;
@@ -67,6 +82,8 @@ mtype_flush(struct ip_set *set)
 {
 	struct mtype *map = set->data;
 
+	if (set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
 	memset(map->members, 0, map->memsize);
 }
 
@@ -132,6 +149,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			ret = 0;
 		else if (!(flags & IPSET_FLAG_EXIST))
 			return -IPSET_ERR_EXIST;
+		/* Element is re-added, cleanup extensions */
+		ip_set_ext_destroy(set, x);
 	}
 
 	if (SET_WITH_TIMEOUT(set))
@@ -152,11 +171,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 {
 	struct mtype *map = set->data;
 	const struct mtype_adt_elem *e = value;
-	const void *x = get_ext(set, map, e->id);
+	void *x = get_ext(set, map, e->id);
 
-	if (mtype_do_del(e, map) ||
-	    (SET_WITH_TIMEOUT(set) &&
-	     ip_set_timeout_expired(ext_timeout(x, set))))
+	if (mtype_do_del(e, map))
+		return -IPSET_ERR_EXIST;
+
+	ip_set_ext_destroy(set, x);
+	if (SET_WITH_TIMEOUT(set) &&
+	    ip_set_timeout_expired(ext_timeout(x, set)))
 		return -IPSET_ERR_EXIST;
 
 	return 0;
@@ -235,7 +257,7 @@ mtype_gc(unsigned long ul_set)
 {
 	struct ip_set *set = (struct ip_set *) ul_set;
 	struct mtype *map = set->data;
-	const void *x;
+	void *x;
 	u32 id;
 
 	/* We run parallel with other readers (test element)
@@ -244,8 +266,10 @@ mtype_gc(unsigned long ul_set)
 	for (id = 0; id < map->elements; id++)
 		if (mtype_gc_test(id, map, set->dsize)) {
 			x = get_ext(set, map, id);
-			if (ip_set_timeout_expired(ext_timeout(x, set)))
+			if (ip_set_timeout_expired(ext_timeout(x, set))) {
 				clear_bit(id, map->members);
+				ip_set_ext_destroy(set, x);
+			}
 		}
 	read_unlock_bh(&set->lock);
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 3999f1719f69..3c26e5b946f5 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -117,23 +117,6 @@ htable_bits(u32 hashsize)
 	return bits;
 }
 
-/* Destroy the hashtable part of the set */
-static void
-ahash_destroy(struct htable *t)
-{
-	struct hbucket *n;
-	u32 i;
-
-	for (i = 0; i < jhash_size(t->htable_bits); i++) {
-		n = hbucket(t, i);
-		if (n->size)
-			/* FIXME: use slab cache */
-			kfree(n->value);
-	}
-
-	ip_set_free(t);
-}
-
 static int
 hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 {
@@ -192,6 +175,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #undef mtype_data_next
 #undef mtype_elem
 
+#undef mtype_ahash_destroy
+#undef mtype_ext_cleanup
 #undef mtype_add_cidr
 #undef mtype_del_cidr
 #undef mtype_ahash_memsize
@@ -230,6 +215,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
 #define mtype_data_list		IPSET_TOKEN(MTYPE, _data_list)
 #define mtype_data_next		IPSET_TOKEN(MTYPE, _data_next)
 #define mtype_elem		IPSET_TOKEN(MTYPE, _elem)
+#define mtype_ahash_destroy	IPSET_TOKEN(MTYPE, _ahash_destroy)
+#define mtype_ext_cleanup	IPSET_TOKEN(MTYPE, _ext_cleanup)
 #define mtype_add_cidr		IPSET_TOKEN(MTYPE, _add_cidr)
 #define mtype_del_cidr		IPSET_TOKEN(MTYPE, _del_cidr)
 #define mtype_ahash_memsize	IPSET_TOKEN(MTYPE, _ahash_memsize)
@@ -359,6 +346,19 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
 	return memsize;
 }
 
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize)	\
+	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+static void
+mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
+{
+	int i;
+
+	for (i = 0; i < n->pos; i++)
+		ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+}
+
 /* Flush a hash type of set: destroy all elements */
 static void
 mtype_flush(struct ip_set *set)
@@ -372,6 +372,8 @@ mtype_flush(struct ip_set *set)
 	for (i = 0; i < jhash_size(t->htable_bits); i++) {
 		n = hbucket(t, i);
 		if (n->size) {
+			if (set->extensions & IPSET_EXT_DESTROY)
+				mtype_ext_cleanup(set, n);
 			n->size = n->pos = 0;
 			/* FIXME: use slab cache */
 			kfree(n->value);
@@ -383,6 +385,26 @@ mtype_flush(struct ip_set *set)
 	h->elements = 0;
 }
 
+/* Destroy the hashtable part of the set */
+static void
+mtype_ahash_destroy(struct ip_set *set, struct htable *t)
+{
+	struct hbucket *n;
+	u32 i;
+
+	for (i = 0; i < jhash_size(t->htable_bits); i++) {
+		n = hbucket(t, i);
+		if (n->size) {
+			if (set->extensions & IPSET_EXT_DESTROY)
+				mtype_ext_cleanup(set, n);
+			/* FIXME: use slab cache */
+			kfree(n->value);
+		}
+	}
+
+	ip_set_free(t);
+}
+
 /* Destroy a hash type of set */
 static void
 mtype_destroy(struct ip_set *set)
@@ -392,7 +414,7 @@ mtype_destroy(struct ip_set *set)
 	if (set->extensions & IPSET_EXT_TIMEOUT)
 		del_timer_sync(&h->gc);
 
-	ahash_destroy(rcu_dereference_bh_nfnl(h->table));
+	mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table));
 #ifdef IP_SET_HASH_WITH_RBTREE
 	rbtree_destroy(&h->rbtree);
 #endif
@@ -430,10 +452,6 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 	       a->extensions == b->extensions;
 }
 
-/* Get the ith element from the array block n */
-#define ahash_data(n, i, dsize)	\
-	((struct mtype_elem *)((n)->value + ((i) * (dsize))))
-
 /* Delete expired elements from the hashtable */
 static void
 mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
@@ -456,6 +474,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 				mtype_del_cidr(h, CIDR(data->cidr),
 					       nets_length, 0);
 #endif
+				ip_set_ext_destroy(set, data);
 				if (j != n->pos - 1)
 					/* Not last one */
 					memcpy(data,
@@ -557,7 +576,7 @@ retry:
 				mtype_data_reset_flags(data, &flags);
 #endif
 				read_unlock_bh(&set->lock);
-				ahash_destroy(t);
+				mtype_ahash_destroy(set, t);
 				if (ret == -EAGAIN)
 					goto retry;
 				return ret;
@@ -578,7 +597,7 @@ retry:
 
 	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
 		 orig->htable_bits, orig, t->htable_bits, t);
-	ahash_destroy(orig);
+	mtype_ahash_destroy(set, orig);
 
 	return 0;
 }
@@ -642,6 +661,7 @@ reuse_slot:
 		mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0);
 		mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
+		ip_set_ext_destroy(set, data);
 	} else {
 		/* Use/create a new slot */
 		TUNE_AHASH_MAX(h, multi);
@@ -707,6 +727,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 #ifdef IP_SET_HASH_WITH_NETS
 		mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0);
 #endif
+		ip_set_ext_destroy(set, data);
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
 					    * set->dsize,
@@ -1033,7 +1054,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	rcu_assign_pointer(h->table, t);
 
 	set->data = h;
-	if (set->family ==  NFPROTO_IPV4) {
+	if (set->family == NFPROTO_IPV4) {
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 		set->dsize = ip_set_elem_len(set, tb,
 				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 7fd11c79aff4..e44986af1fc2 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -168,16 +168,19 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	struct set_elem *e = list_set_elem(set, map, i);
 
 	if (e->id != IPSET_INVALID_ID) {
-		if (i == map->size - 1)
+		if (i == map->size - 1) {
 			/* Last element replaced: e.g. add new,before,last */
 			ip_set_put_byindex(e->id);
-		else {
+			ip_set_ext_destroy(set, e);
+		} else {
 			struct set_elem *x = list_set_elem(set, map,
 							   map->size - 1);
 
 			/* Last element pushed off */
-			if (x->id != IPSET_INVALID_ID)
+			if (x->id != IPSET_INVALID_ID) {
 				ip_set_put_byindex(x->id);
+				ip_set_ext_destroy(set, x);
+			}
 			memmove(list_set_elem(set, map, i + 1), e,
 				set->dsize * (map->size - (i + 1)));
 		}
@@ -198,6 +201,7 @@ list_set_del(struct ip_set *set, u32 i)
 	struct set_elem *e = list_set_elem(set, map, i);
 
 	ip_set_put_byindex(e->id);
+	ip_set_ext_destroy(set, e);
 
 	if (i < map->size - 1)
 		memmove(e, list_set_elem(set, map, i + 1),
@@ -266,14 +270,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	bool flag_exist = flags & IPSET_FLAG_EXIST;
 	u32 i, ret = 0;
 
+	if (SET_WITH_TIMEOUT(set))
+		set_cleanup_entries(set);
+
 	/* Check already added element */
 	for (i = 0; i < map->size; i++) {
 		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto insert;
-		else if (SET_WITH_TIMEOUT(set) &&
-			 ip_set_timeout_expired(ext_timeout(e, set)))
-			continue;
 		else if (e->id != d->id)
 			continue;
 
@@ -286,6 +290,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			/* Can't re-add */
 			return -IPSET_ERR_EXIST;
 		/* Update extensions */
+		ip_set_ext_destroy(set, e);
+
 		if (SET_WITH_TIMEOUT(set))
 			ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
 		if (SET_WITH_COUNTER(set))
@@ -423,6 +429,7 @@ list_set_flush(struct ip_set *set)
 		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
 			ip_set_put_byindex(e->id);
+			ip_set_ext_destroy(set, e);
 			e->id = IPSET_INVALID_ID;
 		}
 	}
-- 
cgit v1.2.3-71-gd317


From 68b63f08d22f23161c43cd2417104aa213ff877f Mon Sep 17 00:00:00 2001
From: Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
Date: Sun, 22 Sep 2013 20:56:30 +0200
Subject: netfilter: ipset: Support comments for ipset entries in the core.

This adds the core support for having comments on ipset entries.

The comments are stored as standard null-terminated strings in
dynamically allocated memory after being passed to the kernel. As a
result of this, code has been added to the generic destroy function to
iterate all extensions and call that extension's destroy task if the set
has that extension activated, and if such a task is defined.

Signed-off-by: Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h         | 51 +++++++++++++++++++----
 include/linux/netfilter/ipset/ip_set_comment.h | 57 ++++++++++++++++++++++++++
 include/uapi/linux/netfilter/ipset/ip_set.h    |  8 +++-
 net/netfilter/ipset/ip_set_core.c              | 14 +++++++
 4 files changed, 121 insertions(+), 9 deletions(-)
 create mode 100644 include/linux/netfilter/ipset/ip_set_comment.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 6372ee224fe8..407f84df6a47 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -53,6 +53,8 @@ enum ip_set_extension {
 	IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT),
 	IPSET_EXT_BIT_COUNTER = 1,
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
+	IPSET_EXT_BIT_COMMENT = 2,
+	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
 	/* Mark set with an extension which needs to call destroy */
 	IPSET_EXT_BIT_DESTROY = 7,
 	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -60,11 +62,13 @@ enum ip_set_extension {
 
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
+#define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
 	IPSET_EXT_ID_COUNTER = 0,
 	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_COMMENT,
 	IPSET_EXT_ID_MAX,
 };
 
@@ -85,6 +89,7 @@ struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
 	u32 timeout;
+	char *comment;
 };
 
 struct ip_set_counter {
@@ -92,20 +97,19 @@ struct ip_set_counter {
 	atomic64_t packets;
 };
 
-struct ip_set;
+struct ip_set_comment {
+	char *str;
+};
 
-static inline void
-ip_set_ext_destroy(struct ip_set *set, void *data)
-{
-	/* Check that the extension is enabled for the set and
-	 * call it's destroy function for its extension part in data.
-	 */
-}
+struct ip_set;
 
 #define ext_timeout(e, s)	\
 (unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])
 #define ext_counter(e, s)	\
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
+#define ext_comment(e, s)	\
+(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
+
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
@@ -222,6 +226,36 @@ struct ip_set {
 	void *data;
 };
 
+static inline void
+ip_set_ext_destroy(struct ip_set *set, void *data)
+{
+	/* Check that the extension is enabled for the set and
+	 * call it's destroy function for its extension part in data.
+	 */
+	if (SET_WITH_COMMENT(set))
+		ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
+			ext_comment(data, set));
+}
+
+static inline int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+	u32 cadt_flags = 0;
+
+	if (SET_WITH_TIMEOUT(set))
+		if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+					   htonl(set->timeout))))
+			return -EMSGSIZE;
+	if (SET_WITH_COUNTER(set))
+		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+	if (SET_WITH_COMMENT(set))
+		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+
+	if (!cadt_flags)
+		return 0;
+	return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+
 static inline void
 ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
 {
@@ -425,6 +459,7 @@ bitmap_bytes(u32 a, u32 b)
 }
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_comment.h>
 
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
new file mode 100644
index 000000000000..21217ea008d7
--- /dev/null
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -0,0 +1,57 @@
+#ifndef _IP_SET_COMMENT_H
+#define _IP_SET_COMMENT_H
+
+/* Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __KERNEL__
+
+static inline char*
+ip_set_comment_uget(struct nlattr *tb)
+{
+	return nla_data(tb);
+}
+
+static inline void
+ip_set_init_comment(struct ip_set_comment *comment,
+		    const struct ip_set_ext *ext)
+{
+	size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+	if (unlikely(comment->str)) {
+		kfree(comment->str);
+		comment->str = NULL;
+	}
+	if (!len)
+		return;
+	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+		len = IPSET_MAX_COMMENT_SIZE;
+	comment->str = kzalloc(len + 1, GFP_ATOMIC);
+	if (unlikely(!comment->str))
+		return;
+	strlcpy(comment->str, ext->comment, len + 1);
+}
+
+static inline int
+ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
+{
+	if (!comment->str)
+		return 0;
+	return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+}
+
+static inline void
+ip_set_comment_free(struct ip_set_comment *comment)
+{
+	if (unlikely(!comment->str))
+		return;
+	kfree(comment->str);
+	comment->str = NULL;
+}
+
+#endif
+#endif
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 2b61ac44dcc1..25d3b2f79c02 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -10,12 +10,14 @@
 #ifndef _UAPI_IP_SET_H
 #define _UAPI_IP_SET_H
 
-
 #include <linux/types.h>
 
 /* The protocol version */
 #define IPSET_PROTOCOL		6
 
+/* The maximum permissible comment length we will accept over netlink */
+#define IPSET_MAX_COMMENT_SIZE	255
+
 /* The max length of strings including NUL: set and type identifiers */
 #define IPSET_MAXNAMELEN	32
 
@@ -110,6 +112,7 @@ enum {
 	IPSET_ATTR_IFACE,
 	IPSET_ATTR_BYTES,
 	IPSET_ATTR_PACKETS,
+	IPSET_ATTR_COMMENT,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -140,6 +143,7 @@ enum ipset_errno {
 	IPSET_ERR_IPADDR_IPV4,
 	IPSET_ERR_IPADDR_IPV6,
 	IPSET_ERR_COUNTER,
+	IPSET_ERR_COMMENT,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -176,6 +180,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_NOMATCH	= (1 << IPSET_FLAG_BIT_NOMATCH),
 	IPSET_FLAG_BIT_WITH_COUNTERS = 3,
 	IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
+	IPSET_FLAG_BIT_WITH_COMMENT = 4,
+	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f35afed3814f..3bf9a3d29dff 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -315,6 +315,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
+typedef void (*destroyer)(void *);
 /* ipset data extension types, in size order */
 
 const struct ip_set_ext_type ip_set_extensions[] = {
@@ -329,6 +330,13 @@ const struct ip_set_ext_type ip_set_extensions[] = {
 		.len	= sizeof(unsigned long),
 		.align	= __alignof__(unsigned long),
 	},
+	[IPSET_EXT_ID_COMMENT] = {
+		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
+		.flag	 = IPSET_FLAG_WITH_COMMENT,
+		.len	 = sizeof(struct ip_set_comment),
+		.align	 = __alignof__(struct ip_set_comment),
+		.destroy = (destroyer) ip_set_comment_free,
+	},
 };
 EXPORT_SYMBOL_GPL(ip_set_extensions);
 
@@ -380,6 +388,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			ext->packets = be64_to_cpu(nla_get_be64(
 						   tb[IPSET_ATTR_PACKETS]));
 	}
+	if (tb[IPSET_ATTR_COMMENT]) {
+		if (!(set->extensions & IPSET_EXT_COMMENT))
+			return -IPSET_ERR_COMMENT;
+		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-- 
cgit v1.2.3-71-gd317


From 3fd986b3d99e3847f1cce6fc36043d0f16508e1d Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 25 Sep 2013 17:44:35 +0200
Subject: netfilter: ipset: Use a common function at listing the extensions

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    | 21 +++++++++++++++++++++
 net/netfilter/ipset/ip_set_bitmap_gen.h   | 29 ++++++++++-------------------
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 11 -----------
 net/netfilter/ipset/ip_set_hash_gen.h     | 11 +----------
 net/netfilter/ipset/ip_set_list_set.c     | 11 +----------
 5 files changed, 33 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 407f84df6a47..da2a45acf74c 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -461,6 +461,27 @@ bitmap_bytes(u32 a, u32 b)
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_comment.h>
 
+static inline int
+ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+		      const void *e, bool active)
+{
+	if (SET_WITH_TIMEOUT(set)) {
+		unsigned long *timeout = ext_timeout(e, set);
+
+		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+			htonl(active ? ip_set_timeout_get(timeout)
+				: *timeout)))
+			return -EMSGSIZE;
+	}
+	if (SET_WITH_COUNTER(set) &&
+	    ip_set_put_counter(skb, ext_counter(e, set)))
+		return -EMSGSIZE;
+	if (SET_WITH_COMMENT(set) &&
+	    ip_set_put_comment(skb, ext_comment(e, set)))
+		return -EMSGSIZE;
+	return 0;
+}
+
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
 	  .timeout = ip_set_adt_opt_timeout(opt, set) }
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 6167fc9d0efe..a13e15be7911 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -183,6 +183,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	return 0;
 }
 
+#ifndef IP_SET_BITMAP_STORED_TIMEOUT
+static inline bool
+mtype_is_filled(const struct mtype_elem *x)
+{
+	return true;
+}
+#endif
+
 static int
 mtype_list(const struct ip_set *set,
 	   struct sk_buff *skb, struct netlink_callback *cb)
@@ -215,25 +223,8 @@ mtype_list(const struct ip_set *set,
 		}
 		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
-		if (SET_WITH_TIMEOUT(set)) {
-#ifdef IP_SET_BITMAP_STORED_TIMEOUT
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_stored(map, id,
-							ext_timeout(x, set),
-							set->dsize))))
-				goto nla_put_failure;
-#else
-			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_get(
-							ext_timeout(x, set)))))
-				goto nla_put_failure;
-#endif
-		}
-		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(x, set)))
-			goto nla_put_failure;
-		if (SET_WITH_COMMENT(set) &&
-		    ip_set_put_comment(skb, ext_comment(x, set)))
+		if (ip_set_put_extensions(skb, set, x,
+		    mtype_is_filled((const struct mtype_elem *) x)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index fb4d163dea82..87a218f8ab5f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -176,17 +176,6 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
 	return !test_and_clear_bit(e->id, map->members);
 }
 
-static inline unsigned long
-ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout,
-		      size_t dsize)
-{
-	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, dsize);
-
-	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
-					    *timeout;
-}
-
 static inline int
 bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
 		     u32 id, size_t dsize)
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 1cffeb977605..b4add206c603 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -961,16 +961,7 @@ mtype_list(const struct ip_set *set,
 			}
 			if (mtype_data_list(skb, e))
 				goto nla_put_failure;
-			if (SET_WITH_TIMEOUT(set) &&
-			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-					  htonl(ip_set_timeout_get(
-						ext_timeout(e, set)))))
-				goto nla_put_failure;
-			if (SET_WITH_COUNTER(set) &&
-			    ip_set_put_counter(skb, ext_counter(e, set)))
-				goto nla_put_failure;
-			if (SET_WITH_COMMENT(set) &&
-			    ip_set_put_comment(skb, ext_comment(e, set)))
+			if (ip_set_put_extensions(skb, set, e, true))
 				goto nla_put_failure;
 			ipset_nest_end(skb, nested);
 		}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e23f33c14435..ba4232e451f4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -512,16 +512,7 @@ list_set_list(const struct ip_set *set,
 		if (nla_put_string(skb, IPSET_ATTR_NAME,
 				   ip_set_name_byindex(e->id)))
 			goto nla_put_failure;
-		if (SET_WITH_TIMEOUT(set) &&
-		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
-				  htonl(ip_set_timeout_get(
-						ext_timeout(e, set)))))
-			goto nla_put_failure;
-		if (SET_WITH_COUNTER(set) &&
-		    ip_set_put_counter(skb, ext_counter(e, set)))
-			goto nla_put_failure;
-		if (SET_WITH_COMMENT(set) &&
-		    ip_set_put_comment(skb, ext_comment(e, set)))
+		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
-- 
cgit v1.2.3-71-gd317


From 1785e8f473082aa60d62c7165856cf6484077b99 Mon Sep 17 00:00:00 2001
From: Vitaly Lavrov <lve@guap.ru>
Date: Mon, 30 Sep 2013 17:07:02 +0200
Subject: netfiler: ipset: Add net namespace for ipset

This patch adds netns support for ipset.

Major changes were made in ip_set_core.c and ip_set.h.
Global variables are moved to per net namespace.
Added initialization code and the destruction of the network namespace ipset subsystem.
In the prototypes of public functions ip_set_* added parameter "struct net*".

The remaining corrections related to the change prototypes of public functions ip_set_*.

The patch for git://git.netfilter.org/ipset.git commit 6a4ec96c0b8caac5c35474e40e319704d92ca347

Signed-off-by: Vitaly Lavrov <lve@guap.ru>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h    |  16 +-
 net/netfilter/ipset/ip_set_bitmap_ip.c    |   3 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c |   2 +-
 net/netfilter/ipset/ip_set_bitmap_port.c  |   3 +-
 net/netfilter/ipset/ip_set_core.c         | 288 +++++++++++++++++++-----------
 net/netfilter/ipset/ip_set_hash_gen.h     |   3 +-
 net/netfilter/ipset/ip_set_list_set.c     |  31 ++--
 net/netfilter/xt_set.c                    |  40 ++---
 net/sched/em_ipset.c                      |   7 +-
 9 files changed, 245 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index da2a45acf74c..7967516adc0d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -184,7 +184,8 @@ struct ip_set_type {
 	u8 revision_min, revision_max;
 
 	/* Create set */
-	int (*create)(struct ip_set *set, struct nlattr *tb[], u32 flags);
+	int (*create)(struct net *net, struct ip_set *set,
+		      struct nlattr *tb[], u32 flags);
 
 	/* Attribute policies */
 	const struct nla_policy create_policy[IPSET_ATTR_CREATE_MAX + 1];
@@ -316,12 +317,13 @@ ip_set_init_counter(struct ip_set_counter *counter,
 }
 
 /* register and unregister set references */
-extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set);
-extern void ip_set_put_byindex(ip_set_id_t index);
-extern const char *ip_set_name_byindex(ip_set_id_t index);
-extern ip_set_id_t ip_set_nfnl_get(const char *name);
-extern ip_set_id_t ip_set_nfnl_get_byindex(ip_set_id_t index);
-extern void ip_set_nfnl_put(ip_set_id_t index);
+extern ip_set_id_t ip_set_get_byname(struct net *net,
+				     const char *name, struct ip_set **set);
+extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
+extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name);
+extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
 /* API for iptables set match, and SET target */
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index faac124e2645..6f1f9f494808 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -242,7 +242,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 }
 
 static int
-bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		 u32 flags)
 {
 	struct bitmap_ip *map;
 	u32 first_ip = 0, last_ip = 0, hosts;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 87a218f8ab5f..740eabededd9 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -309,7 +309,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 }
 
 static int
-bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
+bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		    u32 flags)
 {
 	u32 first_ip = 0, last_ip = 0;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 407a63caee6b..e7603c5b53d7 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -228,7 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 }
 
 static int
-bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		   u32 flags)
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3bf9a3d29dff..dc9284bdd2dd 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -17,6 +17,8 @@
 #include <linux/spinlock.h>
 #include <linux/rculist.h>
 #include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
@@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list);		/* all registered set types */
 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
 static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
 
-static struct ip_set * __rcu *ip_set_list;	/* all individual sets */
-static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
+struct ip_set_net {
+	struct ip_set * __rcu *ip_set_list;	/* all individual sets */
+	ip_set_id_t	ip_set_max;	/* max number of sets */
+	int		is_deleted;	/* deleted by ip_set_net_exit */
+};
+static int ip_set_net_id __read_mostly;
+
+static inline struct ip_set_net *ip_set_pernet(struct net *net)
+{
+	return net_generic(net, ip_set_net_id);
+}
 
 #define IP_SET_INC	64
 #define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
@@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 /* When the nfnl mutex is held: */
 #define nfnl_dereference(p)		\
 	rcu_dereference_protected(p, 1)
-#define nfnl_set(id)			\
-	nfnl_dereference(ip_set_list)[id]
+#define nfnl_set(inst, id)			\
+	nfnl_dereference((inst)->ip_set_list)[id]
 
 /*
  * The set types are implemented in modules and registered set types
@@ -434,13 +445,14 @@ __ip_set_put(struct ip_set *set)
  */
 
 static inline struct ip_set *
-ip_set_rcu_get(ip_set_id_t index)
+ip_set_rcu_get(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	rcu_read_lock();
 	/* ip_set_list itself needs to be protected */
-	set = rcu_dereference(ip_set_list)[index];
+	set = rcu_dereference(inst->ip_set_list)[index];
 	rcu_read_unlock();
 
 	return set;
@@ -450,7 +462,8 @@ int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
 	BUG_ON(set == NULL);
@@ -488,7 +501,8 @@ int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret;
 
 	BUG_ON(set == NULL);
@@ -510,7 +524,8 @@ int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(index);
+	struct ip_set *set = ip_set_rcu_get(
+			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
 	BUG_ON(set == NULL);
@@ -534,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del);
  *
  */
 ip_set_id_t
-ip_set_get_byname(const char *name, struct ip_set **set)
+ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 {
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	rcu_read_lock();
-	for (i = 0; i < ip_set_max; i++) {
-		s = rcu_dereference(ip_set_list)[i];
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = rcu_dereference(inst->ip_set_list)[i];
 		if (s != NULL && STREQ(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
@@ -561,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
  * to be valid, after calling this function.
  *
  */
-void
-ip_set_put_byindex(ip_set_id_t index)
+
+static inline void
+__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 {
 	struct ip_set *set;
 
 	rcu_read_lock();
-	set = rcu_dereference(ip_set_list)[index];
+	set = rcu_dereference(inst->ip_set_list)[index];
 	if (set != NULL)
 		__ip_set_put(set);
 	rcu_read_unlock();
 }
+
+void
+ip_set_put_byindex(struct net *net, ip_set_id_t index)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	__ip_set_put_byindex(inst, index);
+}
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /*
@@ -582,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
  *
  */
 const char *
-ip_set_name_byindex(ip_set_id_t index)
+ip_set_name_byindex(struct net *net, ip_set_id_t index)
 {
-	const struct ip_set *set = ip_set_rcu_get(index);
+	const struct ip_set *set = ip_set_rcu_get(net, index);
 
 	BUG_ON(set == NULL);
 	BUG_ON(set->ref == 0);
@@ -606,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
  * The nfnl mutex is used in the function.
  */
 ip_set_id_t
-ip_set_nfnl_get(const char *name)
+ip_set_nfnl_get(struct net *net, const char *name)
 {
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	for (i = 0; i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s != NULL && STREQ(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
@@ -633,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
  * The nfnl mutex is used in the function.
  */
 ip_set_id_t
-ip_set_nfnl_get_byindex(ip_set_id_t index)
+ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (index > ip_set_max)
+	if (index > inst->ip_set_max)
 		return IPSET_INVALID_ID;
 
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	set = nfnl_set(index);
+	set = nfnl_set(inst, index);
 	if (set)
 		__ip_set_get(set);
 	else
@@ -660,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
  * The nfnl mutex is used in the function.
  */
 void
-ip_set_nfnl_put(ip_set_id_t index)
+ip_set_nfnl_put(struct net *net, ip_set_id_t index)
 {
 	struct ip_set *set;
+	struct ip_set_net *inst = ip_set_pernet(net);
+
 	nfnl_lock(NFNL_SUBSYS_IPSET);
-	set = nfnl_set(index);
-	if (set != NULL)
-		__ip_set_put(set);
+	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
+		set = nfnl_set(inst, index);
+		if (set != NULL)
+			__ip_set_put(set);
+	}
 	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -724,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static struct ip_set *
-find_set_and_id(const char *name, ip_set_id_t *id)
+find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
 {
 	struct ip_set *set = NULL;
 	ip_set_id_t i;
 
 	*id = IPSET_INVALID_ID;
-	for (i = 0; i < ip_set_max; i++) {
-		set = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = nfnl_set(inst, i);
 		if (set != NULL && STREQ(set->name, name)) {
 			*id = i;
 			break;
@@ -741,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id)
 }
 
 static inline struct ip_set *
-find_set(const char *name)
+find_set(struct ip_set_net *inst, const char *name)
 {
 	ip_set_id_t id;
 
-	return find_set_and_id(name, &id);
+	return find_set_and_id(inst, name, &id);
 }
 
 static int
-find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
+find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
+	     struct ip_set **set)
 {
 	struct ip_set *s;
 	ip_set_id_t i;
 
 	*index = IPSET_INVALID_ID;
-	for (i = 0;  i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0;  i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s == NULL) {
 			if (*index == IPSET_INVALID_ID)
 				*index = i;
@@ -785,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct net *net = sock_net(ctnl);
+	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
 	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
@@ -843,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		goto put_out;
 	}
 
-	ret = set->type->create(set, tb, flags);
+	ret = set->type->create(net, set, tb, flags);
 	if (ret != 0)
 		goto put_out;
 
@@ -854,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * by the nfnl mutex. Find the first free index in ip_set_list
 	 * and check clashing.
 	 */
-	ret = find_free_id(set->name, &index, &clash);
+	ret = find_free_id(inst, set->name, &index, &clash);
 	if (ret == -EEXIST) {
 		/* If this is the same set and requested, ignore error */
 		if ((flags & IPSET_FLAG_EXIST) &&
@@ -867,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		goto cleanup;
 	} else if (ret == -IPSET_ERR_MAX_SETS) {
 		struct ip_set **list, **tmp;
-		ip_set_id_t i = ip_set_max + IP_SET_INC;
+		ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
 
-		if (i < ip_set_max || i == IPSET_INVALID_ID)
+		if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
 			/* Wraparound */
 			goto cleanup;
 
@@ -877,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		if (!list)
 			goto cleanup;
 		/* nfnl mutex is held, both lists are valid */
-		tmp = nfnl_dereference(ip_set_list);
-		memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
-		rcu_assign_pointer(ip_set_list, list);
+		tmp = nfnl_dereference(inst->ip_set_list);
+		memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
+		rcu_assign_pointer(inst->ip_set_list, list);
 		/* Make sure all current packets have passed through */
 		synchronize_net();
 		/* Use new list */
-		index = ip_set_max;
-		ip_set_max = i;
+		index = inst->ip_set_max;
+		inst->ip_set_max = i;
 		kfree(tmp);
 		ret = 0;
 	} else if (ret)
@@ -894,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * Finally! Add our shiny new set to the list, and be done.
 	 */
 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
-	nfnl_set(index) = set;
+	nfnl_set(inst, index) = set;
 
 	return ret;
 
@@ -917,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static void
-ip_set_destroy_set(ip_set_id_t index)
+ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
 {
-	struct ip_set *set = nfnl_set(index);
+	struct ip_set *set = nfnl_set(inst, index);
 
 	pr_debug("set: %s\n",  set->name);
-	nfnl_set(index) = NULL;
+	nfnl_set(inst, index) = NULL;
 
 	/* Must call it without holding any lock */
 	set->variant->destroy(set);
@@ -935,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	       const struct nlmsghdr *nlh,
 	       const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *s;
 	ip_set_id_t i;
 	int ret = 0;
@@ -954,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	 */
 	read_lock_bh(&ip_set_ref_lock);
 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL && s->ref) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
 		}
 		read_unlock_bh(&ip_set_ref_lock);
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL)
-				ip_set_destroy_set(i);
+				ip_set_destroy_set(inst, i);
 		}
 	} else {
-		s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
+		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+				    &i);
 		if (s == NULL) {
 			ret = -ENOENT;
 			goto out;
@@ -978,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 		}
 		read_unlock_bh(&ip_set_ref_lock);
 
-		ip_set_destroy_set(i);
+		ip_set_destroy_set(inst, i);
 	}
 	return 0;
 out:
@@ -1003,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *s;
 	ip_set_id_t i;
 
@@ -1010,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 		return -IPSET_ERR_PROTOCOL;
 
 	if (!attr[IPSET_ATTR_SETNAME]) {
-		for (i = 0; i < ip_set_max; i++) {
-			s = nfnl_set(i);
+		for (i = 0; i < inst->ip_set_max; i++) {
+			s = nfnl_set(inst, i);
 			if (s != NULL)
 				ip_set_flush_set(s);
 		}
 	} else {
-		s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 		if (s == NULL)
 			return -ENOENT;
 
@@ -1042,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set, *s;
 	const char *name2;
 	ip_set_id_t i;
@@ -1052,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1063,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
-	for (i = 0; i < ip_set_max; i++) {
-		s = nfnl_set(i);
+	for (i = 0; i < inst->ip_set_max; i++) {
+		s = nfnl_set(inst, i);
 		if (s != NULL && STREQ(s->name, name2)) {
 			ret = -IPSET_ERR_EXIST_SETNAME2;
 			goto out;
@@ -1091,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *from, *to;
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
@@ -1100,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME2] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
+	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+			       &from_id);
 	if (from == NULL)
 		return -ENOENT;
 
-	to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
+	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
+			     &to_id);
 	if (to == NULL)
 		return -IPSET_ERR_EXIST_SETNAME2;
 
@@ -1121,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 
 	write_lock_bh(&ip_set_ref_lock);
 	swap(from->ref, to->ref);
-	nfnl_set(from_id) = to;
-	nfnl_set(to_id) = from;
+	nfnl_set(inst, from_id) = to;
+	nfnl_set(inst, to_id) = from;
 	write_unlock_bh(&ip_set_ref_lock);
 
 	return 0;
@@ -1141,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
+	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 	if (cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
-		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
+		pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name);
+		__ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]);
 	}
 	return 0;
 }
@@ -1169,6 +1211,7 @@ dump_init(struct netlink_callback *cb)
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
+	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
 	/* Second pass, so parser can't fail */
 	nla_parse(cda, IPSET_ATTR_CMD_MAX,
@@ -1182,7 +1225,7 @@ dump_init(struct netlink_callback *cb)
 	if (cda[IPSET_ATTR_SETNAME]) {
 		struct ip_set *set;
 
-		set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
+		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
 				      &index);
 		if (set == NULL)
 			return -ENOENT;
@@ -1210,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
 	u32 dump_type, dump_flags;
 	int ret = 0;
+	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
 	if (!cb->args[0]) {
 		ret = dump_init(cb);
@@ -1223,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 
-	if (cb->args[1] >= ip_set_max)
+	if (cb->args[1] >= inst->ip_set_max)
 		goto out;
 
 	dump_type = DUMP_TYPE(cb->args[0]);
 	dump_flags = DUMP_FLAGS(cb->args[0]);
-	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max;
 dump_last:
 	pr_debug("args[0]: %u %u args[1]: %ld\n",
 		 dump_type, dump_flags, cb->args[1]);
 	for (; cb->args[1] < max; cb->args[1]++) {
 		index = (ip_set_id_t) cb->args[1];
-		set = nfnl_set(index);
+		set = nfnl_set(inst, index);
 		if (set == NULL) {
 			if (dump_type == DUMP_ONE) {
 				ret = -ENOENT;
@@ -1312,8 +1356,8 @@ next_set:
 release_refcount:
 	/* If there was an error or set is done, release set */
 	if (ret || !cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(index)->name);
-		ip_set_put_byindex(index);
+		pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+		__ip_set_put_byindex(inst, index);
 		cb->args[2] = 0;
 	}
 out:
@@ -1331,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -1338,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 		struct netlink_dump_control c = {
 			.dump = ip_set_dump_start,
 			.done = ip_set_dump_done,
+			.data = (void *)inst
 		};
 		return netlink_dump_start(ctnl, skb, nlh, &c);
 	}
@@ -1416,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	const struct nlattr *nla;
@@ -1434,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 		       attr[IPSET_ATTR_LINENO] == NULL))))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1470,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	const struct nlattr *nla;
@@ -1488,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 		       attr[IPSET_ATTR_LINENO] == NULL))))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1524,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh,
 	     const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
 	int ret = 0;
@@ -1534,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1559,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
+	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	const struct ip_set *set;
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1568,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 		     attr[IPSET_ATTR_SETNAME] == NULL))
 		return -IPSET_ERR_PROTOCOL;
 
-	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
 
@@ -1793,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 	unsigned int *op;
 	void *data;
 	int copylen = *len, ret = 0;
+	struct net *net = sock_net(sk);
+	struct ip_set_net *inst = ip_set_pernet(net);
 
-	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 	if (optval != SO_IP_SET)
 		return -EBADF;
@@ -1843,7 +1896,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		find_set_and_id(req_get->set.name, &id);
+		find_set_and_id(inst, req_get->set.name, &id);
 		req_get->set.index = id;
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
@@ -1858,10 +1911,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		find_set_and_id(req_get->set.name, &id);
+		find_set_and_id(inst, req_get->set.name, &id);
 		req_get->set.index = id;
 		if (id != IPSET_INVALID_ID)
-			req_get->family = nfnl_set(id)->family;
+			req_get->family = nfnl_set(inst, id)->family;
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
@@ -1870,12 +1923,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		struct ip_set *set;
 
 		if (*len != sizeof(struct ip_set_req_get_set) ||
-		    req_get->set.index >= ip_set_max) {
+		    req_get->set.index >= inst->ip_set_max) {
 			ret = -EINVAL;
 			goto done;
 		}
 		nfnl_lock(NFNL_SUBSYS_IPSET);
-		set = nfnl_set(req_get->set.index);
+		set = nfnl_set(inst, req_get->set.index);
 		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
 		nfnl_unlock(NFNL_SUBSYS_IPSET);
@@ -1904,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static int __init
-ip_set_init(void)
+static int __net_init
+ip_set_net_init(struct net *net)
 {
+	struct ip_set_net *inst = ip_set_pernet(net);
+
 	struct ip_set **list;
-	int ret;
 
-	if (max_sets)
-		ip_set_max = max_sets;
-	if (ip_set_max >= IPSET_INVALID_ID)
-		ip_set_max = IPSET_INVALID_ID - 1;
+	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
+	if (inst->ip_set_max >= IPSET_INVALID_ID)
+		inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-	list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
+	list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
+	inst->is_deleted = 0;
+	rcu_assign_pointer(inst->ip_set_list, list);
+	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	return 0;
+}
+
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+	struct ip_set_net *inst = ip_set_pernet(net);
+
+	struct ip_set *set = NULL;
+	ip_set_id_t i;
+
+	inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
+
+	for (i = 0; i < inst->ip_set_max; i++) {
+		set = nfnl_set(inst, i);
+		if (set != NULL)
+			ip_set_destroy_set(inst, i);
+	}
+	kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+}
+
+static struct pernet_operations ip_set_net_ops = {
+	.init	= ip_set_net_init,
+	.exit   = ip_set_net_exit,
+	.id	= &ip_set_net_id,
+	.size	= sizeof(struct ip_set_net)
+};
+
 
-	rcu_assign_pointer(ip_set_list, list);
-	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+static int __init
+ip_set_init(void)
+{
+	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
-		kfree(list);
 		return ret;
 	}
 	ret = nf_register_sockopt(&so_set);
 	if (ret != 0) {
 		pr_err("SO_SET registry failed: %d\n", ret);
 		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-		kfree(list);
 		return ret;
 	}
-
-	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	ret = register_pernet_subsys(&ip_set_net_ops);
+	if (ret) {
+		pr_err("ip_set: cannot register pernet_subsys.\n");
+		nf_unregister_sockopt(&so_set);
+		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+		return ret;
+	}
 	return 0;
 }
 
 static void __exit
 ip_set_fini(void)
 {
-	struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
-
-	/* There can't be any existing set */
+	unregister_pernet_subsys(&ip_set_net_ops);
 	nf_unregister_sockopt(&so_set);
 	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-	kfree(list);
 	pr_debug("these are the famous last words\n");
 }
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index b4add206c603..6a80dbd30df7 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1011,7 +1011,8 @@ static const struct ip_set_type_variant mtype_variant = {
 
 #ifdef IP_SET_EMIT_CREATE
 static int
-IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
+			    struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
 	u8 hbits;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index ba4232e451f4..ec6f6d15dded 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -39,6 +39,7 @@ struct set_adt_elem {
 struct list_set {
 	u32 size;		/* size of set list array */
 	struct timer_list gc;	/* garbage collection */
+	struct net *net;	/* namespace */
 	struct set_elem members[0]; /* the set members */
 };
 
@@ -171,7 +172,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 	if (e->id != IPSET_INVALID_ID) {
 		if (i == map->size - 1) {
 			/* Last element replaced: e.g. add new,before,last */
-			ip_set_put_byindex(e->id);
+			ip_set_put_byindex(map->net, e->id);
 			ip_set_ext_destroy(set, e);
 		} else {
 			struct set_elem *x = list_set_elem(set, map,
@@ -179,7 +180,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
 
 			/* Last element pushed off */
 			if (x->id != IPSET_INVALID_ID) {
-				ip_set_put_byindex(x->id);
+				ip_set_put_byindex(map->net, x->id);
 				ip_set_ext_destroy(set, x);
 			}
 			memmove(list_set_elem(set, map, i + 1), e,
@@ -205,7 +206,7 @@ list_set_del(struct ip_set *set, u32 i)
 	struct list_set *map = set->data;
 	struct set_elem *e = list_set_elem(set, map, i);
 
-	ip_set_put_byindex(e->id);
+	ip_set_put_byindex(map->net, e->id);
 	ip_set_ext_destroy(set, e);
 
 	if (i < map->size - 1)
@@ -307,7 +308,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 		if (SET_WITH_COMMENT(set))
 			ip_set_init_comment(ext_comment(e, set), ext);
 		/* Set is already added to the list */
-		ip_set_put_byindex(d->id);
+		ip_set_put_byindex(map->net, d->id);
 		return 0;
 	}
 insert:
@@ -366,6 +367,7 @@ static int
 list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
+	struct list_set *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -385,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
-	e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+	e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s);
 	if (e.id == IPSET_INVALID_ID)
 		return -IPSET_ERR_NAME;
 	/* "Loop detection" */
@@ -405,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (tb[IPSET_ATTR_NAMEREF]) {
-		e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+		e.refid = ip_set_get_byname(map->net,
+					    nla_data(tb[IPSET_ATTR_NAMEREF]),
 					    &s);
 		if (e.refid == IPSET_INVALID_ID) {
 			ret = -IPSET_ERR_NAMEREF;
@@ -421,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 
 finish:
 	if (e.refid != IPSET_INVALID_ID)
-		ip_set_put_byindex(e.refid);
+		ip_set_put_byindex(map->net, e.refid);
 	if (adt != IPSET_ADD || ret)
-		ip_set_put_byindex(e.id);
+		ip_set_put_byindex(map->net, e.id);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
 }
@@ -438,7 +441,7 @@ list_set_flush(struct ip_set *set)
 	for (i = 0; i < map->size; i++) {
 		e = list_set_elem(set, map, i);
 		if (e->id != IPSET_INVALID_ID) {
-			ip_set_put_byindex(e->id);
+			ip_set_put_byindex(map->net, e->id);
 			ip_set_ext_destroy(set, e);
 			e->id = IPSET_INVALID_ID;
 		}
@@ -510,7 +513,7 @@ list_set_list(const struct ip_set *set,
 				goto nla_put_failure;
 		}
 		if (nla_put_string(skb, IPSET_ATTR_NAME,
-				   ip_set_name_byindex(e->id)))
+				   ip_set_name_byindex(map->net, e->id)))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
@@ -587,7 +590,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 /* Create list:set type of sets */
 
 static bool
-init_list_set(struct ip_set *set, u32 size)
+init_list_set(struct net *net, struct ip_set *set, u32 size)
 {
 	struct list_set *map;
 	struct set_elem *e;
@@ -598,6 +601,7 @@ init_list_set(struct ip_set *set, u32 size)
 		return false;
 
 	map->size = size;
+	map->net = net;
 	set->data = map;
 
 	for (i = 0; i < size; i++) {
@@ -609,7 +613,8 @@ init_list_set(struct ip_set *set, u32 size)
 }
 
 static int
-list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+		u32 flags)
 {
 	u32 size = IP_SET_LIST_DEFAULT_SIZE;
 
@@ -625,7 +630,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 
 	set->variant = &set_variant;
 	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
-	if (!init_list_set(set, size))
+	if (!init_list_set(net, set, size))
 		return -ENOMEM;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 2095488684ff..e7c4e0e01ff5 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	struct xt_set_info_match_v0 *info = par->matchinfo;
 	ip_set_id_t index;
 
-	index = ip_set_nfnl_get_byindex(info->match_set.index);
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
 		pr_warning("Cannot find set indentified by id %u to match\n",
@@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
-		ip_set_nfnl_put(info->match_set.index);
+		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -106,7 +106,7 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par)
 {
 	struct xt_set_info_match_v0 *info = par->matchinfo;
 
-	ip_set_nfnl_put(info->match_set.index);
+	ip_set_nfnl_put(par->net, info->match_set.index);
 }
 
 /* Revision 1 match */
@@ -131,7 +131,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
 	struct xt_set_info_match_v1 *info = par->matchinfo;
 	ip_set_id_t index;
 
-	index = ip_set_nfnl_get_byindex(info->match_set.index);
+	index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
 
 	if (index == IPSET_INVALID_ID) {
 		pr_warning("Cannot find set indentified by id %u to match\n",
@@ -141,7 +141,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.dim > IPSET_DIM_MAX) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
-		ip_set_nfnl_put(info->match_set.index);
+		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -153,7 +153,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par)
 {
 	struct xt_set_info_match_v1 *info = par->matchinfo;
 
-	ip_set_nfnl_put(info->match_set.index);
+	ip_set_nfnl_put(par->net, info->match_set.index);
 }
 
 /* Revision 3 match */
@@ -228,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	ip_set_id_t index;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find add_set index %u as target\n",
 				   info->add_set.index);
@@ -237,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	if (info->del_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(info->add_set.index);
+				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -251,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->add_set.index);
+			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->del_set.index);
+			ip_set_nfnl_put(par->net, info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -270,9 +270,9 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_set_info_target_v0 *info = par->targinfo;
 
 	if (info->add_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->add_set.index);
+		ip_set_nfnl_put(par->net, info->add_set.index);
 	if (info->del_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->del_set.index);
+		ip_set_nfnl_put(par->net, info->del_set.index);
 }
 
 /* Revision 1 target */
@@ -301,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	ip_set_id_t index;
 
 	if (info->add_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find add_set index %u as target\n",
 				   info->add_set.index);
@@ -310,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	if (info->del_set.index != IPSET_INVALID_ID) {
-		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
 			if (info->add_set.index != IPSET_INVALID_ID)
-				ip_set_nfnl_put(info->add_set.index);
+				ip_set_nfnl_put(par->net, info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -324,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->add_set.index);
+			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
-			ip_set_nfnl_put(info->del_set.index);
+			ip_set_nfnl_put(par->net, info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -339,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_set_info_target_v1 *info = par->targinfo;
 
 	if (info->add_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->add_set.index);
+		ip_set_nfnl_put(par->net, info->add_set.index);
 	if (info->del_set.index != IPSET_INVALID_ID)
-		ip_set_nfnl_put(info->del_set.index);
+		ip_set_nfnl_put(par->net, info->del_set.index);
 }
 
 /* Revision 2 target */
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 938b7cbf5627..1ac41d3de5c3 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
 {
 	struct xt_set_info *set = data;
 	ip_set_id_t index;
+	struct net *net = qdisc_dev(tp->q)->nd_net;
 
 	if (data_len != sizeof(*set))
 		return -EINVAL;
 
-	index = ip_set_nfnl_get_byindex(set->index);
+	index = ip_set_nfnl_get_byindex(net, set->index);
 	if (index == IPSET_INVALID_ID)
 		return -ENOENT;
 
@@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
 	if (em->data)
 		return 0;
 
-	ip_set_nfnl_put(index);
+	ip_set_nfnl_put(net, index);
 	return -ENOMEM;
 }
 
@@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
 {
 	const struct xt_set_info *set = (const void *) em->data;
 	if (set) {
-		ip_set_nfnl_put(set->index);
+		ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index);
 		kfree((void *) em->data);
 	}
 }
-- 
cgit v1.2.3-71-gd317


From 36a8f39e05ccc308a5619a7edb5ad6e15ee82ff6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Sep 2013 01:21:32 -0700
Subject: net: skb_is_gso_v6() requires skb_is_gso()

bnx2x makes a dangerous use of skb_is_gso_v6().

It should first make sure skb is a gso packet

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Acked-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 18 ++++++++++--------
 include/linux/skbuff.h                          |  1 +
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 61726af1de6e..0c7fb1ed1261 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3256,14 +3256,16 @@ static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
 	if (prot == IPPROTO_TCP)
 		rc |= XMIT_CSUM_TCP;
 
-	if (skb_is_gso_v6(skb)) {
-		rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP);
-		if (rc & XMIT_CSUM_ENC)
-			rc |= XMIT_GSO_ENC_V6;
-	} else if (skb_is_gso(skb)) {
-		rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP);
-		if (rc & XMIT_CSUM_ENC)
-			rc |= XMIT_GSO_ENC_V4;
+	if (skb_is_gso(skb)) {
+		if (skb_is_gso_v6(skb)) {
+			rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP);
+			if (rc & XMIT_CSUM_ENC)
+				rc |= XMIT_GSO_ENC_V6;
+		} else {
+			rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP);
+			if (rc & XMIT_CSUM_ENC)
+				rc |= XMIT_GSO_ENC_V4;
+		}
 	}
 
 	return rc;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6d56840e561e..d72d71efa7a3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2755,6 +2755,7 @@ static inline bool skb_is_gso(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_size;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
 static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
-- 
cgit v1.2.3-71-gd317


From 5eb7906b47dcd906b3ffd811e689e0de4a6b1b6a Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Thu, 29 Aug 2013 15:03:14 +0300
Subject: ieee80211: fix vht cap definitions

VHT_CAP_BEAMFORMER_ANTENNAS cap is actually defined in the draft as
VHT_CAP_BEAMFORMEE_STS_MAX, and its size is 3 bits long.

VHT_CAP_SOUNDING_DIMENSIONS is also 3 bits long.

Fix the definitions and change the cap masking accordingly.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 net/mac80211/vht.c        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a5b598a79bec..7c1e1ebc0e23 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1391,8 +1391,8 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_RXSTBC_MASK				0x00000700
 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE			0x00000800
 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE			0x00001000
-#define IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX		0x00006000
-#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX		0x00030000
+#define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX			0x0000e000
+#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX		0x00070000
 #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE			0x00080000
 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE			0x00100000
 #define IEEE80211_VHT_CAP_VHT_TXOP_PS				0x00200000
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 97c289414e32..de0112785aae 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -185,13 +185,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
 		vht_cap->cap |= cap_info &
 				(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
-				 IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
 				 IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
 	}
 
 	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
 		vht_cap->cap |= cap_info &
-				IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+				(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+				 IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX);
 
 	if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
 		vht_cap->cap |= cap_info &
-- 
cgit v1.2.3-71-gd317


From 180cf72f56fab2810e00497c087c7126bfe53c85 Mon Sep 17 00:00:00 2001
From: "holger@eitzenberger.org" <holger@eitzenberger.org>
Date: Mon, 30 Sep 2013 17:07:28 +0200
Subject: netfilter: nf_ct_sip: consolidate NAT hook functions

There are currently seven different NAT hooks used in both
nf_conntrack_sip and nf_nat_sip, each of the hooks is exported in
nf_conntrack_sip, then set from the nf_nat_sip NAT helper.

And because each of them is exported there is quite some overhead
introduced due of this.

By introducing nf_nat_sip_hooks I am able to reduce both text/data
somewhat.  For nf_conntrack_sip e. g. I get

        text             data              bss              dec
old    15243             5256               32            20531
new    15010             5192               32            20234

Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_sip.h | 107 +++++++++++++-----------
 net/netfilter/nf_conntrack_sip.c           | 127 ++++++++---------------------
 net/netfilter/nf_nat_sip.c                 |  35 ++++----
 3 files changed, 107 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index ba7f571a2b1c..4cb71551f611 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -107,55 +107,64 @@ enum sdp_header_types {
 	SDP_HDR_MEDIA,
 };
 
-extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
-				       unsigned int protoff,
-				       unsigned int dataoff,
-				       const char **dptr,
-				       unsigned int *datalen);
-extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb,
-					  unsigned int protoff, s16 off);
-extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
-					      unsigned int protoff,
-					      unsigned int dataoff,
-					      const char **dptr,
-					      unsigned int *datalen,
-					      struct nf_conntrack_expect *exp,
-					      unsigned int matchoff,
-					      unsigned int matchlen);
-extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
-					    unsigned int protoff,
-					    unsigned int dataoff,
-					    const char **dptr,
-					    unsigned int *datalen,
-					    unsigned int sdpoff,
-					    enum sdp_header_types type,
-					    enum sdp_header_types term,
-					    const union nf_inet_addr *addr);
-extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
-					    unsigned int protoff,
-					    unsigned int dataoff,
-					    const char **dptr,
-					    unsigned int *datalen,
-					    unsigned int matchoff,
-					    unsigned int matchlen,
-					    u_int16_t port);
-extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					       unsigned int protoff,
-					       unsigned int dataoff,
-					       const char **dptr,
-					       unsigned int *datalen,
-					       unsigned int sdpoff,
-					       const union nf_inet_addr *addr);
-extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
-					     unsigned int protoff,
-					     unsigned int dataoff,
-					     const char **dptr,
-					     unsigned int *datalen,
-					     struct nf_conntrack_expect *rtp_exp,
-					     struct nf_conntrack_expect *rtcp_exp,
-					     unsigned int mediaoff,
-					     unsigned int medialen,
-					     union nf_inet_addr *rtp_addr);
+struct nf_nat_sip_hooks {
+	unsigned int (*msg)(struct sk_buff *skb,
+			    unsigned int protoff,
+			    unsigned int dataoff,
+			    const char **dptr,
+			    unsigned int *datalen);
+
+	void (*seq_adjust)(struct sk_buff *skb,
+			   unsigned int protoff, s16 off);
+
+	unsigned int (*expect)(struct sk_buff *skb,
+			       unsigned int protoff,
+			       unsigned int dataoff,
+			       const char **dptr,
+			       unsigned int *datalen,
+			       struct nf_conntrack_expect *exp,
+			       unsigned int matchoff,
+			       unsigned int matchlen);
+
+	unsigned int (*sdp_addr)(struct sk_buff *skb,
+				 unsigned int protoff,
+				 unsigned int dataoff,
+				 const char **dptr,
+				 unsigned int *datalen,
+				 unsigned int sdpoff,
+				 enum sdp_header_types type,
+				 enum sdp_header_types term,
+				 const union nf_inet_addr *addr);
+
+	unsigned int (*sdp_port)(struct sk_buff *skb,
+				 unsigned int protoff,
+				 unsigned int dataoff,
+				 const char **dptr,
+				 unsigned int *datalen,
+				 unsigned int matchoff,
+				 unsigned int matchlen,
+				 u_int16_t port);
+
+	unsigned int (*sdp_session)(struct sk_buff *skb,
+				    unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr,
+				    unsigned int *datalen,
+				    unsigned int sdpoff,
+				    const union nf_inet_addr *addr);
+
+	unsigned int (*sdp_media)(struct sk_buff *skb,
+				  unsigned int protoff,
+				  unsigned int dataoff,
+				  const char **dptr,
+				  unsigned int *datalen,
+				  struct nf_conntrack_expect *rtp_exp,
+				  struct nf_conntrack_expect *rtcp_exp,
+				  unsigned int mediaoff,
+				  unsigned int medialen,
+				  union nf_inet_addr *rtp_addr);
+};
+extern const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
 
 extern int ct_sip_parse_request(const struct nf_conn *ct,
 				const char *dptr, unsigned int datalen,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 5ed8c441dffd..466410eaa482 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
-				unsigned int dataoff, const char **dptr,
-				unsigned int *datalen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-
-void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff,
-				   s16 off) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
-
-unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
-				       unsigned int protoff,
-				       unsigned int dataoff,
-				       const char **dptr,
-				       unsigned int *datalen,
-				       struct nf_conntrack_expect *exp,
-				       unsigned int matchoff,
-				       unsigned int matchlen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
-
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr,
-				     unsigned int *datalen,
-				     unsigned int sdpoff,
-				     enum sdp_header_types type,
-				     enum sdp_header_types term,
-				     const union nf_inet_addr *addr)
-				     __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
-
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr,
-				     unsigned int *datalen,
-				     unsigned int matchoff,
-				     unsigned int matchlen,
-				     u_int16_t port) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
-
-unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					unsigned int protoff,
-					unsigned int dataoff,
-					const char **dptr,
-					unsigned int *datalen,
-					unsigned int sdpoff,
-					const union nf_inet_addr *addr)
-					__read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
-
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff,
-				      unsigned int dataoff,
-				      const char **dptr,
-				      unsigned int *datalen,
-				      struct nf_conntrack_expect *rtp_exp,
-				      struct nf_conntrack_expect *rtcp_exp,
-				      unsigned int mediaoff,
-				      unsigned int medialen,
-				      union nf_inet_addr *rtp_addr)
-				      __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook);
+const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
 
 static int string_len(const struct nf_conn *ct, const char *dptr,
 		      const char *limit, int *shift)
@@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	int direct_rtp = 0, skip_expect = 0, ret = NF_DROP;
 	u_int16_t base_port;
 	__be16 rtp_port, rtcp_port;
-	typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port;
-	typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media;
+	const struct nf_nat_sip_hooks *hooks;
 
 	saddr = NULL;
 	if (sip_direct_media) {
@@ -972,9 +913,9 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	rtcp_port = htons(base_port + 1);
 
 	if (direct_rtp) {
-		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
-		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks &&
+		    !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
@@ -996,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr,
 			  IPPROTO_UDP, NULL, &rtcp_port);
 
-	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
-	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
-				       rtp_exp, rtcp_exp,
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp)
+		ret = hooks->sdp_media(skb, protoff, dataoff, dptr,
+				       datalen, rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -1053,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	unsigned int caddr_len, maddr_len;
 	unsigned int i;
 	union nf_inet_addr caddr, maddr, rtp_addr;
+	const struct nf_nat_sip_hooks *hooks;
 	unsigned int port;
 	const struct sdp_media_type *t;
 	int ret = NF_ACCEPT;
-	typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
-	typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
 
-	nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
 
 	/* Find beginning of session description */
 	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
@@ -1127,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 		}
 
 		/* Update media connection address if present */
-		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, protoff, dataoff,
+		if (maddr_len && hooks && ct->status & IPS_NAT_MASK) {
+			ret = hooks->sdp_addr(skb, protoff, dataoff,
 					      dptr, datalen, mediaoff,
-					      SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+					      SDP_HDR_CONNECTION,
+					      SDP_HDR_MEDIA,
 					      &rtp_addr);
 			if (ret != NF_ACCEPT) {
 				nf_ct_helper_log(skb, ct, "cannot mangle SDP");
@@ -1141,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	/* Update session connection and owner addresses */
-	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
-	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, protoff, dataoff,
-					 dptr, datalen, sdpoff, &rtp_addr);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->sdp_session(skb, protoff, dataoff,
+					 dptr, datalen, sdpoff,
+					 &rtp_addr);
 
 	return ret;
 }
@@ -1244,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	unsigned int matchoff, matchlen;
 	struct nf_conntrack_expect *exp;
 	union nf_inet_addr *saddr, daddr;
+	const struct nf_nat_sip_hooks *hooks;
 	__be16 port;
 	u8 proto;
 	unsigned int expires = 0;
 	int ret;
-	typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
 
 	/* Expected connections can not register again. */
 	if (ct->status & IPS_EXPECTED)
@@ -1311,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	exp->helper = nfct_help(ct)->helper;
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
 
-	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
-	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
-					exp, matchoff, matchlen);
+	hooks = rcu_dereference(nf_nat_sip_hooks);
+	if (hooks && ct->status & IPS_NAT_MASK)
+		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
+				    exp, matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
@@ -1517,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 			   unsigned int protoff, unsigned int dataoff,
 			   const char **dptr, unsigned int *datalen)
 {
-	typeof(nf_nat_sip_hook) nf_nat_sip;
+	const struct nf_nat_sip_hooks *hooks;
 	int ret;
 
 	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
@@ -1526,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 		ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
-					      dptr, datalen)) {
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks && !hooks->msg(skb, protoff, dataoff,
+					 dptr, datalen)) {
 			nf_ct_helper_log(skb, ct, "cannot NAT SIP message");
 			ret = NF_DROP;
 		}
@@ -1548,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	s16 diff, tdiff = 0;
 	int ret = NF_ACCEPT;
 	bool term;
-	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
 	    ctinfo != IP_CT_ESTABLISHED_REPLY)
@@ -1612,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
-		if (nf_nat_sip_seq_adjust)
-			nf_nat_sip_seq_adjust(skb, protoff, tdiff);
+		const struct nf_nat_sip_hooks *hooks;
+
+		hooks = rcu_dereference(nf_nat_sip_hooks);
+		if (hooks)
+			hooks->seq_adjust(skb, protoff, tdiff);
 	}
 
 	return ret;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index f9790405b7ff..b4d691db955e 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = {
 
 static void __exit nf_nat_sip_fini(void)
 {
-	RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, NULL);
+
 	nf_ct_helper_expectfn_unregister(&sip_nat);
 	synchronize_rcu();
 }
 
+static const struct nf_nat_sip_hooks sip_hooks = {
+	.msg		= nf_nat_sip,
+	.seq_adjust	= nf_nat_sip_seq_adjust,
+	.expect		= nf_nat_sip_expect,
+	.sdp_addr	= nf_nat_sdp_addr,
+	.sdp_port	= nf_nat_sdp_port,
+	.sdp_session	= nf_nat_sdp_session,
+	.sdp_media	= nf_nat_sdp_media,
+};
+
 static int __init nf_nat_sip_init(void)
 {
-	BUG_ON(nf_nat_sip_hook != NULL);
-	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
-	BUG_ON(nf_nat_sip_expect_hook != NULL);
-	BUG_ON(nf_nat_sdp_addr_hook != NULL);
-	BUG_ON(nf_nat_sdp_port_hook != NULL);
-	BUG_ON(nf_nat_sdp_session_hook != NULL);
-	BUG_ON(nf_nat_sdp_media_hook != NULL);
-	RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media);
+	BUG_ON(nf_nat_sip_hooks != NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks);
 	nf_ct_helper_expectfn_register(&sip_nat);
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 4bcef89f0c6ca1eb4f1a789c2a226f4c02656a4b Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 28 Sep 2013 23:15:27 +0200
Subject: ssb: provide phy address for Gigabit Ethernet driver

Add a function to provide the phy address which should be used to the
Gigabit Ethernet driver connected to ssb.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Reviewed-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ssb/ssb_driver_gige.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 86a12b0cb239..0688472500bb 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -108,6 +108,16 @@ static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 	return 0;
 }
 
+/* Get the device phy address */
+static inline int ssb_gige_get_phyaddr(struct pci_dev *pdev)
+{
+	struct ssb_gige *dev = pdev_to_ssb_gige(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	return dev->dev->bus->sprom.et0phyaddr;
+}
+
 extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev,
 					  struct pci_dev *pdev);
 extern int ssb_gige_map_irq(struct ssb_device *sdev,
@@ -174,6 +184,10 @@ static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
 	return -ENODEV;
 }
+static inline int ssb_gige_get_phyaddr(struct pci_dev *pdev)
+{
+	return -ENODEV;
+}
 
 #endif /* CONFIG_SSB_DRIVER_GIGE */
 #endif /* LINUX_SSB_DRIVER_GIGE_H_ */
-- 
cgit v1.2.3-71-gd317


From 0b3d8e087bbee2a4e3f479d538a7edd3f1d2950c Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Wed, 2 Oct 2013 05:58:32 +0400
Subject: include/linux/skbuff.h: move CONFIG_XFRM check inside the
 skb_sec_path()

And thus we have only one function definition

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 71b1d9402fd3..1cd32f96055e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2708,17 +2708,14 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues);
 
-#ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
+#ifdef CONFIG_XFRM
 	return skb->sp;
-}
 #else
-static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
-{
 	return NULL;
-}
 #endif
+}
 
 /* Keeps track of mac header offset relative to skb->head.
  * It is useful for TSO of Tunneling protocol. e.g. GRE.
-- 
cgit v1.2.3-71-gd317


From 5080546682bae3d32734b18e281091684f0ebbe4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Oct 2013 04:29:50 -0700
Subject: inet: consolidate INET_TW_MATCH

TCP listener refactoring, part 2 :

We can use a generic lookup, sockets being in whatever state, if
we are sure all relevant fields are at the same place in all socket
types (ESTABLISH, TIME_WAIT, SYN_RECV)

This patch removes these macros :

 inet_addrpair, inet_addrpair, tw_addrpair, tw_portpair

And adds :

 sk_portpair, sk_addrpair, sk_daddr, sk_rcv_saddr

Then, INET_TW_MATCH() is really the same than INET_MATCH()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             |  4 ++--
 include/net/inet_hashtables.h    | 26 ++++++++------------------
 include/net/inet_sock.h          |  2 --
 include/net/inet_timewait_sock.h |  8 --------
 include/net/sock.h               |  4 ++++
 net/ipv4/inet_connection_sock.c  | 11 +++++------
 net/ipv6/udp.c                   |  6 ++----
 7 files changed, 21 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 28ea38439313..b7f1f3bb346d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -370,7 +370,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
 #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
-	((inet_sk(__sk)->inet_portpair == (__ports))		&&	\
+	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_family == AF_INET6)			&&	\
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&&	\
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&&	\
@@ -379,7 +379,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	 net_eq(sock_net(__sk), (__net)))
 
 #define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	   \
-	((inet_twsk(__sk)->tw_portpair == (__ports))			&& \
+	(((__sk)->sk_portpair == (__ports))				&& \
 	 ((__sk)->sk_family == AF_INET6)				&& \
 	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 594dfeead70f..10d6838378c3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -302,35 +302,25 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 				   ((__force __u64)(__be32)(__saddr)));
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
-	((inet_sk(__sk)->inet_portpair == (__ports))		&&	\
-	 (inet_sk(__sk)->inet_addrpair == (__cookie))		&&	\
+	(((__sk)->sk_portpair == (__ports))			&&	\
+	 ((__sk)->sk_addrpair == (__cookie))			&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
 	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
 	 net_eq(sock_net(__sk), (__net)))
-#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
-	((inet_twsk(__sk)->tw_portpair == (__ports))	&&		\
-	 (inet_twsk(__sk)->tw_addrpair == (__cookie))	&&		\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif)))	&&		\
-	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
-	((inet_sk(__sk)->inet_portpair == (__ports))	&&		\
-	 (inet_sk(__sk)->inet_daddr	== (__saddr))	&&		\
-	 (inet_sk(__sk)->inet_rcv_saddr	== (__daddr))	&&		\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
-	 net_eq(sock_net(__sk), (__net)))
-#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
-	((inet_twsk(__sk)->tw_portpair == (__ports))	&&		\
-	 (inet_twsk(__sk)->tw_daddr	== (__saddr))	&&		\
-	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))	&&		\
+	(((__sk)->sk_portpair == (__ports))		&&		\
+	 ((__sk)->sk_daddr	== (__saddr))		&&		\
+	 ((__sk)->sk_rcv_saddr	== (__daddr))		&&		\
 	 (!(__sk)->sk_bound_dev_if	||				\
 	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
 	 net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
+#define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
+	INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)
+
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f3141773c14d..6d9a7e6eb5a4 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -146,10 +146,8 @@ struct inet_sock {
 	/* Socket demultiplex comparisons on incoming packets. */
 #define inet_daddr		sk.__sk_common.skc_daddr
 #define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
-#define inet_addrpair		sk.__sk_common.skc_addrpair
 #define inet_dport		sk.__sk_common.skc_dport
 #define inet_num		sk.__sk_common.skc_num
-#define inet_portpair		sk.__sk_common.skc_portpair
 
 	__be32			inet_saddr;
 	__s16			uc_ttl;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 828200ab1125..48fd3561722c 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -112,10 +112,8 @@ struct inet_timewait_sock {
 #define tw_net			__tw_common.skc_net
 #define tw_daddr        	__tw_common.skc_daddr
 #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
-#define tw_addrpair		__tw_common.skc_addrpair
 #define tw_dport		__tw_common.skc_dport
 #define tw_num			__tw_common.skc_num
-#define tw_portpair		__tw_common.skc_portpair
 
 	int			tw_timeout;
 	volatile unsigned char	tw_substate;
@@ -189,12 +187,6 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 	return (struct inet_timewait_sock *)sk;
 }
 
-static inline __be32 sk_rcv_saddr(const struct sock *sk)
-{
-/* both inet_sk() and inet_twsk() store rcv_saddr in skc_rcv_saddr */
-	return sk->__sk_common.skc_rcv_saddr;
-}
-
 void inet_twsk_put(struct inet_timewait_sock *tw);
 
 int inet_twsk_unhash(struct inet_timewait_sock *tw);
diff --git a/include/net/sock.h b/include/net/sock.h
index f0a44cc1ff9d..e3bf213be625 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -300,6 +300,10 @@ struct sock {
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
 #define sk_hash			__sk_common.skc_hash
+#define sk_portpair		__sk_common.skc_portpair
+#define sk_addrpair		__sk_common.skc_addrpair
+#define sk_daddr		__sk_common.skc_daddr
+#define sk_rcv_saddr		__sk_common.skc_rcv_saddr
 #define sk_family		__sk_common.skc_family
 #define sk_state		__sk_common.skc_state
 #define sk_reuse		__sk_common.skc_reuse
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7ac7aa11130e..56e82a4027b4 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -71,17 +71,16 @@ int inet_csk_bind_conflict(const struct sock *sk,
 			    (!reuseport || !sk2->sk_reuseport ||
 			    (sk2->sk_state != TCP_TIME_WAIT &&
 			     !uid_eq(uid, sock_i_uid(sk2))))) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
+
+				if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
+				    sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
 					break;
 			}
 			if (!relax && reuse && sk2->sk_reuse &&
 			    sk2->sk_state != TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 
-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
+				if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
+				    sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
 					break;
 			}
 		}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 72b7eaaf3ca0..8119791e8a95 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -57,8 +57,6 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
-	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
 	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
@@ -67,8 +65,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	/* if both are mapped, treat as IPv4 */
 	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
 		return (!sk2_ipv6only &&
-			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
-			  sk1_rcv_saddr == sk2_rcv_saddr));
+			(!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
+			  sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
 
 	if (addr_type2 == IPV6_ADDR_ANY &&
 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
-- 
cgit v1.2.3-71-gd317


From 5cde282938915f36a2e6769b51c24c4159654859 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 5 Oct 2013 19:26:05 -0700
Subject: net: Separate the close_list and the unreg_list v2

Separate the unreg_list and the close_list in dev_close_many preventing
dev_close_many from permuting the unreg_list.  The permutations of the
unreg_list have resulted in cases where the loopback device is accessed
it has been freed in code such as dst_ifdown.  Resulting in subtle memory
corruption.

This is the second bug from sharing the storage between the close_list
and the unreg_list.  The issues that crop up with sharing are
apparently too subtle to show up in normal testing or usage, so let's
forget about being clever and use two separate lists.

v2: Make all callers pass in a close_list to dev_close_many

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 25 ++++++++++++++-----------
 net/sched/sch_generic.c   |  6 +++---
 3 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f5cd464271bf..6d77e0f3cc10 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1143,6 +1143,7 @@ struct net_device {
 	struct list_head	dev_list;
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
+	struct list_head	close_list;
 
 	/* directly linked devices, like slaves for bonding */
 	struct {
diff --git a/net/core/dev.c b/net/core/dev.c
index c25db20a4246..fa0b2b06c1a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head)
 	ASSERT_RTNL();
 	might_sleep();
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
 		clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head)
 
 	dev_deactivate_many(head);
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		/*
@@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev)
 	/* Temporarily disable netpoll until the interface is down */
 	netpoll_rx_disable(dev);
 
-	list_add(&dev->unreg_list, &single);
+	list_add(&dev->close_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
 
@@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev)
 static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
-	LIST_HEAD(tmp_list);
 
-	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+	/* Remove the devices that don't need to be closed */
+	list_for_each_entry_safe(dev, tmp, head, close_list)
 		if (!(dev->flags & IFF_UP))
-			list_move(&dev->unreg_list, &tmp_list);
+			list_del_init(&dev->close_list);
 
 	__dev_close_many(head);
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry_safe(dev, tmp, head, close_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
+		list_del_init(&dev->close_list);
 	}
 
-	/* rollback_registered_many needs the complete original list */
-	list_splice(&tmp_list, head);
 	return 0;
 }
 
@@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev)
 		/* Block netpoll rx while the interface is going down */
 		netpoll_rx_disable(dev);
 
-		list_add(&dev->unreg_list, &single);
+		list_add(&dev->close_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
 
@@ -5439,6 +5438,7 @@ static void net_set_todo(struct net_device *dev)
 static void rollback_registered_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
+	LIST_HEAD(close_head);
 
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
@@ -5461,7 +5461,9 @@ static void rollback_registered_many(struct list_head *head)
 	}
 
 	/* If device is running, close it first. */
-	dev_close_many(head);
+	list_for_each_entry(dev, head, unreg_list)
+		list_add_tail(&dev->close_list, &close_head);
+	dev_close_many(&close_head);
 
 	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
@@ -6257,6 +6259,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
+	INIT_LIST_HEAD(&dev->close_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
 	INIT_LIST_HEAD(&dev->adj_list.upper);
 	INIT_LIST_HEAD(&dev->adj_list.lower);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e7121d29c4bd..7fc899a943a8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head)
 	struct net_device *dev;
 	bool sync_needed = false;
 
-	list_for_each_entry(dev, head, unreg_list) {
+	list_for_each_entry(dev, head, close_list) {
 		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
 					 &noop_qdisc);
 		if (dev_ingress_queue(dev))
@@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head)
 		synchronize_net();
 
 	/* Wait for outstanding qdisc_run calls. */
-	list_for_each_entry(dev, head, unreg_list)
+	list_for_each_entry(dev, head, close_list)
 		while (some_qdisc_is_busy(dev))
 			yield();
 }
@@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev)
 {
 	LIST_HEAD(single);
 
-	list_add(&dev->unreg_list, &single);
+	list_add(&dev->close_list, &single);
 	dev_deactivate_many(&single);
 	list_del(&single);
 }
-- 
cgit v1.2.3-71-gd317


From efe4208f47f907b86f528788da711e8ab9dea44d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 3 Oct 2013 15:42:29 -0700
Subject: ipv6: make lookups simpler and faster

TCP listener refactoring, part 4 :

To speed up inet lookups, we moved IPv4 addresses from inet to struct
sock_common

Now is time to do the same for IPv6, because it permits us to have fast
lookups for all kind of sockets, including upcoming SYN_RECV.

Getting IPv6 addresses in TCP lookups currently requires two extra cache
lines, plus a dereference (and memory stall).

inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6

This patch is way bigger than its IPv4 counter part, because for IPv4,
we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6,
it's not doable easily.

inet6_sk(sk)->daddr becomes sk->sk_v6_daddr
inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr

And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr
at the same offset.

We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic
macro.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h                           | 46 +++-----------------
 include/net/inet6_hashtables.h                 |  5 +--
 include/net/inet_timewait_sock.h               |  4 +-
 include/net/ip.h                               |  2 +-
 include/net/ip6_checksum.h                     |  2 +-
 include/net/sock.h                             |  9 ++++
 net/dccp/ipv6.c                                | 24 +++++------
 net/dccp/ipv6.h                                |  1 -
 net/dccp/minisocks.c                           |  7 +---
 net/ipv4/inet_diag.c                           | 35 +++++++---------
 net/ipv4/ping.c                                | 15 ++++---
 net/ipv4/tcp_metrics.c                         | 10 ++---
 net/ipv4/tcp_minisocks.c                       |  7 +---
 net/ipv4/tcp_probe.c                           | 29 +++++--------
 net/ipv4/tcp_timer.c                           |  3 +-
 net/ipv6/af_inet6.c                            | 10 ++---
 net/ipv6/datagram.c                            | 25 ++++++-----
 net/ipv6/inet6_connection_sock.c               |  7 ++--
 net/ipv6/inet6_hashtables.c                    | 58 +++++++++-----------------
 net/ipv6/ipv6_sockglue.c                       |  7 ++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  4 +-
 net/ipv6/ping.c                                |  2 +-
 net/ipv6/raw.c                                 | 17 ++++----
 net/ipv6/tcp_ipv6.c                            | 44 ++++++++++---------
 net/ipv6/udp.c                                 | 48 ++++++++++-----------
 net/l2tp/l2tp_core.c                           | 10 ++---
 net/l2tp/l2tp_debugfs.c                        |  5 ++-
 net/l2tp/l2tp_ip6.c                            | 16 +++----
 net/l2tp/l2tp_netlink.c                        |  4 +-
 net/l2tp/l2tp_ppp.c                            | 12 +++---
 net/netfilter/xt_TPROXY.c                      |  2 +-
 net/netfilter/xt_socket.c                      |  2 +-
 net/sctp/ipv6.c                                | 22 +++++-----
 net/sunrpc/svcsock.c                           |  2 +-
 security/lsm_audit.c                           |  5 +--
 35 files changed, 213 insertions(+), 288 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b7f1f3bb346d..35f6c1b562c4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -141,8 +141,6 @@ struct ipv6_fl_socklist;
  */
 struct ipv6_pinfo {
 	struct in6_addr 	saddr;
-	struct in6_addr 	rcv_saddr;
-	struct in6_addr		daddr;
 	struct in6_pktinfo	sticky_pktinfo;
 	const struct in6_addr		*daddr_cache;
 #ifdef CONFIG_IPV6_SUBTREES
@@ -256,22 +254,10 @@ struct tcp6_sock {
 
 extern int inet6_sk_rebuild_header(struct sock *sk);
 
-struct inet6_timewait_sock {
-	struct in6_addr tw_v6_daddr;
-	struct in6_addr	tw_v6_rcv_saddr;
-};
-
 struct tcp6_timewait_sock {
 	struct tcp_timewait_sock   tcp6tw_tcp;
-	struct inet6_timewait_sock tcp6tw_inet6;
 };
 
-static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk)
-{
-	return (struct inet6_timewait_sock *)(((u8 *)sk) +
-					      inet_twsk(sk)->tw_ipv6_offset);
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
@@ -321,21 +307,11 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 #define __ipv6_only_sock(sk)	(inet6_sk(sk)->ipv6only)
 #define ipv6_only_sock(sk)	((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
 
-static inline u16 inet6_tw_offset(const struct proto *prot)
-{
-	return prot->twsk_prot->twsk_obj_size -
-			sizeof(struct inet6_timewait_sock);
-}
-
-static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk)
+static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
 {
-	return likely(sk->sk_state != TCP_TIME_WAIT) ?
-		&inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr;
-}
-
-static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
-{
-	return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL;
+	if (sk->sk_family == AF_INET6)
+		return &sk->sk_v6_rcv_saddr;
+	return NULL;
 }
 
 static inline int inet_v6_ipv6only(const struct sock *sk)
@@ -363,7 +339,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	return NULL;
 }
 
-#define __inet6_rcv_saddr(__sk)	NULL
 #define inet6_rcv_saddr(__sk)	NULL
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
@@ -372,19 +347,10 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_family == AF_INET6)			&&	\
-	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&&	\
-	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
 	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 
-#define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	   \
-	(((__sk)->sk_portpair == (__ports))				&& \
-	 ((__sk)->sk_family == AF_INET6)				&& \
-	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))	&& \
-	 ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
-	 (!(__sk)->sk_bound_dev_if	||				   \
-	  ((__sk)->sk_bound_dev_if == (__dif)))				&& \
-	 net_eq(sock_net(__sk), (__net)))
-
 #endif /* _IPV6_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index f52fa88feb64..a105d1a2fc00 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -43,9 +43,8 @@ static inline unsigned int inet6_ehashfn(struct net *net,
 static inline int inet6_sk_ehashfn(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *laddr = &np->rcv_saddr;
-	const struct in6_addr *faddr = &np->daddr;
+	const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *faddr = &sk->sk_v6_daddr;
 	const __u16 lport = inet->inet_num;
 	const __be16 fport = inet->inet_dport;
 	struct net *net = sock_net(sk);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index de9e3ab7d43d..b647c6270eb7 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -116,7 +116,9 @@ struct inet_timewait_sock {
 #define tw_prot			__tw_common.skc_prot
 #define tw_net			__tw_common.skc_net
 #define tw_daddr        	__tw_common.skc_daddr
+#define tw_v6_daddr		__tw_common.skc_v6_daddr
 #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
+#define tw_v6_rcv_saddr    	__tw_common.skc_v6_rcv_saddr
 #define tw_dport		__tw_common.skc_dport
 #define tw_num			__tw_common.skc_num
 
@@ -133,7 +135,7 @@ struct inet_timewait_sock {
 				tw_transparent  : 1,
 				tw_pad		: 6,	/* 6 bits hole */
 				tw_tos		: 8,
-				tw_ipv6_offset  : 16;
+				tw_pad2		: 16 /* 16 bits hole */
 	kmemcheck_bitfield_end(flags);
 	u32			tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
diff --git a/include/net/ip.h b/include/net/ip.h
index b39ebe5339ac..217bc5bfc6c6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -374,7 +374,7 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
 		memset(&np->saddr, 0, sizeof(np->saddr));
-		memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+		memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
 	}
 #endif
 }
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 7686e3f5033d..1944406949ba 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -70,7 +70,7 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 
-	__tcp_v6_send_check(skb, &np->saddr, &np->daddr);
+	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
 }
 
 int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3f3e48c4704d..7e50df5c71d4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -191,6 +191,12 @@ struct sock_common {
 #ifdef CONFIG_NET_NS
 	struct net	 	*skc_net;
 #endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+	struct in6_addr		skc_v6_daddr;
+	struct in6_addr		skc_v6_rcv_saddr;
+#endif
+
 	/*
 	 * fields between dontcopy_begin/dontcopy_end
 	 * are not copied in sock_copy()
@@ -314,6 +320,9 @@ struct sock {
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_prot			__sk_common.skc_prot
 #define sk_net			__sk_common.skc_net
+#define sk_v6_daddr		__sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr	__sk_common.skc_v6_rcv_saddr
+
 	socket_lock_t		sk_lock;
 	struct sk_buff_head	sk_receive_queue;
 	/*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6cf9f7782ad4..7f075b83128a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 	struct dccp_hdr *dh = dccp_hdr(skb);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
+	dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
 }
 
 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
@@ -467,11 +467,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		newnp->rcv_saddr = newnp->saddr;
+		newsk->sk_v6_rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -538,9 +538,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newnp->daddr = ireq6->rmt_addr;
+	newsk->sk_v6_daddr = ireq6->rmt_addr;
 	newnp->saddr = ireq6->loc_addr;
-	newnp->rcv_saddr = ireq6->loc_addr;
+	newsk->sk_v6_rcv_saddr = ireq6->loc_addr;
 	newsk->sk_bound_dev_if = ireq6->iif;
 
 	/* Now IPv6 options...
@@ -885,7 +885,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			return -EINVAL;
 	}
 
-	np->daddr = usin->sin6_addr;
+	sk->sk_v6_daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -915,16 +915,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			goto failure;
 		}
 		ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
-		ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr);
+		ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);
 
 		return err;
 	}
 
-	if (!ipv6_addr_any(&np->rcv_saddr))
-		saddr = &np->rcv_saddr;
+	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+		saddr = &sk->sk_v6_rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.fl6_dport = usin->sin6_port;
@@ -941,7 +941,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		np->rcv_saddr = *saddr;
+		sk->sk_v6_rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
@@ -963,7 +963,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		goto late_failure;
 
 	dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
-						      np->daddr.s6_addr32,
+						      sk->sk_v6_daddr.s6_addr32,
 						      inet->inet_sport,
 						      inet->inet_dport);
 	err = dccp_connect(sk);
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 6eef81fdbe56..6604fc3fe953 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -30,7 +30,6 @@ struct dccp6_request_sock {
 
 struct dccp6_timewait_sock {
 	struct inet_timewait_sock   inet;
-	struct inet6_timewait_sock  tw6;
 };
 
 #endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 662071b249cc..32e80d96d4c0 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
-			struct inet6_timewait_sock *tw6;
 
-			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
-			tw6 = inet6_twsk((struct sock *)tw);
-			tw6->tw_v6_daddr = np->daddr;
-			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
+			tw->tw_v6_daddr = sk->sk_v6_daddr;
+			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e1e40653357..ecc179d676e4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 
 #if IS_ENABLED(CONFIG_IPV6)
 	if (r->idiag_family == AF_INET6) {
-		const struct ipv6_pinfo *np = inet6_sk(sk);
 
-		*(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
-		*(struct in6_addr *)r->id.idiag_dst = np->daddr;
+		*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
 
 		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
-			if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0)
+			if (nla_put_u8(skb, INET_DIAG_TCLASS,
+				       inet6_sk(sk)->tclass) < 0)
 				goto errout;
 	}
 #endif
@@ -255,11 +255,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_inode	      = 0;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tw->tw_family == AF_INET6) {
-		const struct inet6_timewait_sock *tw6 =
-						inet6_twsk((struct sock *)tw);
-
-		*(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
-		*(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
+		*(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr;
 	}
 #endif
 
@@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
-		return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
-					   skb, r, portid, seq, nlmsg_flags,
-					   unlh);
-	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh);
+		return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
+					   nlmsg_flags, unlh);
+
+	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
+				  nlmsg_flags, unlh);
 }
 
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
@@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 	entry.family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (entry.family == AF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 
-		entry.saddr = np->rcv_saddr.s6_addr32;
-		entry.daddr = np->daddr.s6_addr32;
+		entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32;
+		entry.daddr = sk->sk_v6_daddr.s6_addr32;
 	} else
 #endif
 	{
@@ -649,10 +646,8 @@ static int inet_twsk_diag_dump(struct sock *sk,
 		entry.family = tw->tw_family;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == AF_INET6) {
-			struct inet6_timewait_sock *tw6 =
-						inet6_twsk((struct sock *)tw);
-			entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
-			entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+			entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32;
+			entry.daddr = tw->tw_v6_daddr.s6_addr32;
 		} else
 #endif
 		{
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a62610443152..ccefc07beacd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 #if IS_ENABLED(CONFIG_IPV6)
 		} else if (skb->protocol == htons(ETH_P_IPV6) &&
 			   sk->sk_family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 
 			pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
 				 (int) isk->inet_num,
-				 &inet6_sk(sk)->rcv_saddr,
+				 &sk->sk_v6_rcv_saddr,
 				 sk->sk_bound_dev_if);
 
-			if (!ipv6_addr_any(&np->rcv_saddr) &&
-			    !ipv6_addr_equal(&np->rcv_saddr,
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr,
 					     &ipv6_hdr(skb)->daddr))
 				continue;
 #endif
@@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
 	} else if (saddr->sa_family == AF_INET6) {
 		struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
 		struct ipv6_pinfo *np = inet6_sk(sk);
-		np->rcv_saddr = np->saddr = addr->sin6_addr;
+		sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr;
 #endif
 	}
 }
@@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif)
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (sk->sk_family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
-		memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+		memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
 		memset(&np->saddr, 0, sizeof(np->saddr));
 #endif
 	}
@@ -418,7 +417,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	err = 0;
 	if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
 	    (sk->sk_family == AF_INET6 &&
-	     !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
+	     !ipv6_addr_any(&sk->sk_v6_rcv_saddr)))
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
 
 	if (snum)
@@ -429,7 +428,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6)
-		memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
+		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
 #endif
 
 	sk_dst_reset(sk);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 52f3c6b971d2..27535fd5ea10 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -240,7 +240,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 
 static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
 {
-	struct inet6_timewait_sock *tw6;
 	struct tcp_metrics_block *tm;
 	struct inetpeer_addr addr;
 	unsigned int hash;
@@ -253,9 +252,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
 		hash = (__force unsigned int) addr.addr.a4;
 		break;
 	case AF_INET6:
-		tw6 = inet6_twsk((struct sock *)tw);
-		*(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
-		hash = ipv6_addr_hash(&tw6->tw_v6_daddr);
+		*(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr;
+		hash = ipv6_addr_hash(&tw->tw_v6_daddr);
 		break;
 	default:
 		return NULL;
@@ -289,8 +287,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 		hash = (__force unsigned int) addr.addr.a4;
 		break;
 	case AF_INET6:
-		*(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr;
-		hash = ipv6_addr_hash(&inet6_sk(sk)->daddr);
+		*(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr;
+		hash = ipv6_addr_hash(&sk->sk_v6_daddr);
 		break;
 	default:
 		return NULL;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 58a3e69aef64..97b684159861 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -293,12 +293,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			struct inet6_timewait_sock *tw6;
 
-			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
-			tw6 = inet6_twsk((struct sock *)tw);
-			tw6->tw_v6_daddr = np->daddr;
-			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
+			tw->tw_v6_daddr = sk->sk_v6_daddr;
+			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_tclass = np->tclass;
 			tw->tw_ipv6only = np->ipv6only;
 		}
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 611beab38a00..8b97d71e193b 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -101,22 +101,6 @@ static inline int tcp_probe_avail(void)
 		si4.sin_addr.s_addr = inet->inet_##mem##addr;	\
 	} while (0)						\
 
-#if IS_ENABLED(CONFIG_IPV6)
-#define tcp_probe_copy_fl_to_si6(inet, si6, mem)		\
-	do {							\
-		struct ipv6_pinfo *pi6 = inet->pinet6;		\
-		si6.sin6_family = AF_INET6;			\
-		si6.sin6_port = inet->inet_##mem##port;		\
-		si6.sin6_addr = pi6->mem##addr;			\
-		si6.sin6_flowinfo = 0; /* No need here. */	\
-		si6.sin6_scope_id = 0;	/* No need here. */	\
-	} while (0)
-#else
-#define tcp_probe_copy_fl_to_si6(fl, si6, mem)			\
-	do {							\
-		memset(&si6, 0, sizeof(si6));			\
-	} while (0)
-#endif
 
 /*
  * Hook inserted to be called before each receive packet.
@@ -147,8 +131,17 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
 				break;
 			case AF_INET6:
-				tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
-				tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
+				memset(&p->src.v6, 0, sizeof(p->src.v6));
+				memset(&p->dst.v6, 0, sizeof(p->dst.v6));
+#if IS_ENABLED(CONFIG_IPV6)
+				p->src.v6.sin6_family = AF_INET6;
+				p->src.v6.sin6_port = inet->inet_sport;
+				p->src.v6.sin6_addr = inet6_sk(sk)->saddr;
+
+				p->dst.v6.sin6_family = AF_INET6;
+				p->dst.v6.sin6_port = inet->inet_dport;
+				p->dst.v6.sin6_addr = sk->sk_v6_daddr;
+#endif
 				break;
 			default:
 				BUG();
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4b85e6f636c9..af07b5b23ebf 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -374,9 +374,8 @@ void tcp_retransmit_timer(struct sock *sk)
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (sk->sk_family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
-				       &np->daddr,
+				       &sk->sk_v6_daddr,
 				       ntohs(inet->inet_dport), inet->inet_num,
 				       tp->snd_una, tp->snd_nxt);
 		}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4966b124dc2e..a2cb07cd3850 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -364,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	inet->inet_rcv_saddr = v4addr;
 	inet->inet_saddr = v4addr;
 
-	np->rcv_saddr = addr->sin6_addr;
+	sk->sk_v6_rcv_saddr = addr->sin6_addr;
 
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		np->saddr = addr->sin6_addr;
@@ -461,14 +461,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		    peer == 1)
 			return -ENOTCONN;
 		sin->sin6_port = inet->inet_dport;
-		sin->sin6_addr = np->daddr;
+		sin->sin6_addr = sk->sk_v6_daddr;
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
 	} else {
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 			sin->sin6_addr = np->saddr;
 		else
-			sin->sin6_addr = np->rcv_saddr;
+			sin->sin6_addr = sk->sk_v6_rcv_saddr;
 
 		sin->sin6_port = inet->inet_sport;
 	}
@@ -655,7 +655,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = sk->sk_protocol;
-		fl6.daddr = np->daddr;
+		fl6.daddr = sk->sk_v6_daddr;
 		fl6.saddr = np->saddr;
 		fl6.flowlabel = np->flow_label;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 48b6bd2a9a14..a454b0ff57c7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -107,16 +107,16 @@ ipv4_connected:
 		if (err)
 			goto out;
 
-		ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+		ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);
 
 		if (ipv6_addr_any(&np->saddr) ||
 		    ipv6_mapped_addr_any(&np->saddr))
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr) ||
-		    ipv6_mapped_addr_any(&np->rcv_saddr)) {
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+		    ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
-					       &np->rcv_saddr);
+					       &sk->sk_v6_rcv_saddr);
 			if (sk->sk_prot->rehash)
 				sk->sk_prot->rehash(sk);
 		}
@@ -145,7 +145,7 @@ ipv4_connected:
 		}
 	}
 
-	np->daddr = *daddr;
+	sk->sk_v6_daddr = *daddr;
 	np->flow_label = fl6.flowlabel;
 
 	inet->inet_dport = usin->sin6_port;
@@ -156,7 +156,7 @@ ipv4_connected:
 	 */
 
 	fl6.flowi6_proto = sk->sk_protocol;
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 	fl6.saddr = np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
@@ -183,16 +183,16 @@ ipv4_connected:
 	if (ipv6_addr_any(&np->saddr))
 		np->saddr = fl6.saddr;
 
-	if (ipv6_addr_any(&np->rcv_saddr)) {
-		np->rcv_saddr = fl6.saddr;
+	if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+		sk->sk_v6_rcv_saddr = fl6.saddr;
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 		if (sk->sk_prot->rehash)
 			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
-		      ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
-		      &np->daddr : NULL,
+		      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+		      &sk->sk_v6_daddr : NULL,
 #ifdef CONFIG_IPV6_SUBTREES
 		      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
 		      &np->saddr :
@@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
 void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 			     __u16 srcp, __u16 destp, int bucket)
 {
-	struct ipv6_pinfo *np = inet6_sk(sp);
 	const struct in6_addr *dest, *src;
 
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
+	dest  = &sp->sk_v6_daddr;
+	src   = &sp->sk_v6_rcv_saddr;
 	seq_printf(seq,
 		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e4311cbc8b4e..b7400b480e74 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
 
 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
 
 	sin6->sin6_family = AF_INET6;
-	sin6->sin6_addr = np->daddr;
+	sin6->sin6_addr = sk->sk_v6_daddr;
 	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
@@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = sk->sk_protocol;
-	fl6->daddr = np->daddr;
+	fl6->daddr = sk->sk_v6_daddr;
 	fl6->saddr = np->saddr;
 	fl6->flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
@@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 	skb_dst_set_noref(skb, dst);
 
 	/* Restore final destination back after routing done */
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 
 	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 	rcu_read_unlock();
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 46440777e1c5..842d833dfc18 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -89,30 +89,16 @@ begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		if (sk->sk_hash != hash)
 			continue;
-		if (sk->sk_state == TCP_TIME_WAIT) {
-			if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))
-				continue;
-		} else {
-			if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
-				continue;
-		}
+		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+			continue;
 		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
 			goto out;
 
-		if (sk->sk_state == TCP_TIME_WAIT) {
-			if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
-						     ports, dif))) {
-				sock_gen_put(sk);
-				goto begin;
-			}
-		} else {
-			if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
-						  ports, dif))) {
-				sock_put(sk);
-				goto begin;
-			}
-		goto found;
+		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+			sock_gen_put(sk);
+			goto begin;
 		}
+		goto found;
 	}
 	if (get_nulls_value(node) != slot)
 		goto begin;
@@ -133,11 +119,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
 	    sk->sk_family == PF_INET6) {
-		const struct ipv6_pinfo *np = inet6_sk(sk);
 
 		score = 1;
-		if (!ipv6_addr_any(&np->rcv_saddr)) {
-			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 				return -1;
 			score++;
 		}
@@ -229,9 +214,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
+	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *saddr = &sk->sk_v6_daddr;
 	const int dif = sk->sk_bound_dev_if;
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
@@ -250,23 +234,19 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		if (sk2->sk_hash != hash)
 			continue;
 
-		if (sk2->sk_state == TCP_TIME_WAIT) {
-			if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
-						  ports, dif))) {
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
-					goto unique;
-				else
-					goto not_unique;
+					break;
 			}
-		}
-		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
 			goto not_unique;
+		}
 	}
 
-unique:
 	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
+	 * in hash table socket with a funny identity.
+	 */
 	inet->inet_num = lport;
 	inet->inet_sport = htons(lport);
 	sk->sk_hash = hash;
@@ -299,9 +279,9 @@ not_unique:
 static inline u32 inet6_sk_port_offset(const struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					  np->daddr.s6_addr32,
+
+	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+					  sk->sk_v6_daddr.s6_addr32,
 					  inet->inet_dport);
 }
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1e2e8ef29c5..4919a8e6063e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 			}
 
 			if (ipv6_only_sock(sk) ||
-			    !ipv6_addr_v4mapped(&np->daddr)) {
+			    !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
 				retv = -EADDRNOTAVAIL;
 				break;
 			}
@@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxhlim) {
@@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
+								     np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxohlim) {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d6e4dd8b58df..54b75ead5a69 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -297,9 +297,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
 	struct nf_conn *ct;
 
-	tuple.src.u3.in6 = inet6->rcv_saddr;
+	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
 	tuple.src.u.tcp.port = inet->inet_sport;
-	tuple.dst.u3.in6 = inet6->daddr;
+	tuple.dst.u3.in6 = sk->sk_v6_daddr;
 	tuple.dst.u.tcp.port = inet->inet_dport;
 	tuple.dst.protonum = sk->sk_protocol;
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 18f19df4189f..8815e31a87fe 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 	}
 
 	if (!iif)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a4ed2416399e..3c00842b0079 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 
 	sk_for_each_from(sk)
 		if (inet_sk(sk)->inet_num == num) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 
 			if (!net_eq(sock_net(sk), net))
 				continue;
 
-			if (!ipv6_addr_any(&np->daddr) &&
-			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
 			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 				continue;
 
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+				if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					goto found;
 				if (is_multicast &&
 				    inet6_mc_check(sk, loc_addr, rmt_addr))
@@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
-	np->rcv_saddr = addr->sin6_addr;
+	sk->sk_v6_rcv_saddr = addr->sin6_addr;
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
 		np->saddr = addr->sin6_addr;
 	err = 0;
@@ -804,8 +803,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
@@ -816,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 			return -EDESTADDRREQ;
 
 		proto = inet->inet_num;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 		fl6.flowlabel = np->flow_label;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 528e61afaf5e..541dfc40c7b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	}
 
 	if (tp->rx_opt.ts_recent_stamp &&
-	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
 		tp->rx_opt.ts_recent = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
 		tp->write_seq = 0;
 	}
 
-	np->daddr = usin->sin6_addr;
+	sk->sk_v6_daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		} else {
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
-					       &np->rcv_saddr);
+					       &sk->sk_v6_rcv_saddr);
 		}
 
 		return err;
 	}
 
-	if (!ipv6_addr_any(&np->rcv_saddr))
-		saddr = &np->rcv_saddr;
+	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+		saddr = &sk->sk_v6_rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_TCP;
-	fl6.daddr = np->daddr;
+	fl6.daddr = sk->sk_v6_daddr;
 	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
@@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		np->rcv_saddr = *saddr;
+		sk->sk_v6_rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
@@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	rt = (struct rt6_info *) dst;
 	if (tcp_death_row.sysctl_tw_recycle &&
 	    !tp->rx_opt.ts_recent_stamp &&
-	    ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+	    ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
 		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
@@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
-							     np->daddr.s6_addr32,
+							     sk->sk_v6_daddr.s6_addr32,
 							     inet->inet_sport,
 							     inet->inet_dport);
 
@@ -515,7 +515,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
 						struct sock *addr_sk)
 {
-	return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
 }
 
 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
@@ -621,7 +621,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 
 	if (sk) {
 		saddr = &inet6_sk(sk)->saddr;
-		daddr = &inet6_sk(sk)->daddr;
+		daddr = &sk->sk_v6_daddr;
 	} else if (req) {
 		saddr = &inet6_rsk(req)->loc_addr;
 		daddr = &inet6_rsk(req)->rmt_addr;
@@ -1116,11 +1116,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		newnp->rcv_saddr = newnp->saddr;
+		newsk->sk_v6_rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1185,9 +1185,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newnp->daddr = treq->rmt_addr;
+	newsk->sk_v6_daddr = treq->rmt_addr;
 	newnp->saddr = treq->loc_addr;
-	newnp->rcv_saddr = treq->loc_addr;
+	newsk->sk_v6_rcv_saddr = treq->loc_addr;
 	newsk->sk_bound_dev_if = treq->iif;
 
 	/* Now IPv6 options...
@@ -1244,13 +1244,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
-	if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+	if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
 		/* We're using one, so create a matching key
 		 * on the newsk structure. If we fail to get
 		 * memory, then we end up not copying the key
 		 * across. Shucks.
 		 */
-		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
 			       AF_INET6, key->key, key->keylen,
 			       sk_gfp_atomic(sk, GFP_ATOMIC));
 	}
@@ -1758,10 +1758,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	const struct inet_sock *inet = inet_sk(sp);
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	const struct ipv6_pinfo *np = inet6_sk(sp);
 
-	dest  = &np->daddr;
-	src   = &np->rcv_saddr;
+	dest  = &sp->sk_v6_daddr;
+	src   = &sp->sk_v6_rcv_saddr;
 	destp = ntohs(inet->inet_dport);
 	srcp  = ntohs(inet->inet_sport);
 
@@ -1810,11 +1809,10 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	s32 delta = tw->tw_ttd - inet_tw_time_stamp();
 
-	dest = &tw6->tw_v6_daddr;
-	src  = &tw6->tw_v6_rcv_saddr;
+	dest = &tw->tw_v6_daddr;
+	src  = &tw->tw_v6_rcv_saddr;
 	destp = ntohs(tw->tw_dport);
 	srcp  = ntohs(tw->tw_sport);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 37532478e3ba..b496de19a341 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -55,11 +55,10 @@
 
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
-	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
-	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
 
 	/* if both are mapped, treat as IPv4 */
@@ -77,7 +76,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 		return 1;
 
 	if (sk2_rcv_saddr6 &&
-	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
 		return 1;
 
 	return 0;
@@ -105,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 	unsigned int hash2_nulladdr =
 		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
 	unsigned int hash2_partial =
-		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+		udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
 
 	/* precompute partial secondary hash */
 	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -115,7 +114,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 static void udp_v6_rehash(struct sock *sk)
 {
 	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
-					  &inet6_sk(sk)->rcv_saddr,
+					  &sk->sk_v6_rcv_saddr,
 					  inet_sk(sk)->inet_num);
 
 	udp_lib_rehash(sk, new_hash);
@@ -131,7 +130,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
 
 	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			sk->sk_family == PF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
 
 		score = 0;
@@ -140,13 +138,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
-		if (!ipv6_addr_any(&np->rcv_saddr)) {
-			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 				return -1;
 			score++;
 		}
-		if (!ipv6_addr_any(&np->daddr)) {
-			if (!ipv6_addr_equal(&np->daddr, saddr))
+		if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
 				return -1;
 			score++;
 		}
@@ -169,10 +167,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 
 	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
 			sk->sk_family == PF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+		if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
 			return -1;
 		score = 0;
 		if (inet->inet_dport) {
@@ -180,8 +177,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
-		if (!ipv6_addr_any(&np->daddr)) {
-			if (!ipv6_addr_equal(&np->daddr, saddr))
+		if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
 				return -1;
 			score++;
 		}
@@ -549,7 +546,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
-	if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) {
+	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
 		sock_rps_save_rxhash(sk, skb);
 		sk_mark_napi_id(sk, skb);
 	}
@@ -690,20 +687,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 
 		if (udp_sk(s)->udp_port_hash == num &&
 		    s->sk_family == PF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(s);
 			if (inet->inet_dport) {
 				if (inet->inet_dport != rmt_port)
 					continue;
 			}
-			if (!ipv6_addr_any(&np->daddr) &&
-			    !ipv6_addr_equal(&np->daddr, rmt_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
 			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
 				continue;
 
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+				if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
 					continue;
 			}
 			if (!inet6_mc_check(s, loc_addr, rmt_addr))
@@ -1063,7 +1059,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	} else if (!up->pending) {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 	} else
 		daddr = NULL;
 
@@ -1133,8 +1129,8 @@ do_udp_sendmsg:
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    sin6->sin6_scope_id &&
@@ -1145,7 +1141,7 @@ do_udp_sendmsg:
 			return -EDESTADDRREQ;
 
 		fl6.fl6_dport = inet->inet_dport;
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 		fl6.flowlabel = np->flow_label;
 		connected = 1;
 	}
@@ -1261,8 +1257,8 @@ do_append_data:
 	if (dst) {
 		if (connected) {
 			ip6_dst_store(sk, dst,
-				      ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
-				      &np->daddr : NULL,
+				      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+				      &sk->sk_v6_daddr : NULL,
 #ifdef CONFIG_IPV6_SUBTREES
 				      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
 				      &np->saddr :
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b076e8309bc2..9af77d9c0ec9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1181,7 +1181,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
 	    !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
 		__wsum csum = skb_checksum(skb, 0, udp_len, 0);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len,
+		uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len,
 					    IPPROTO_UDP, csum);
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
@@ -1189,7 +1189,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr,
+		uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr,
 					     udp_len, IPPROTO_UDP, 0);
 	}
 }
@@ -1713,13 +1713,13 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
 		if (ipv6_addr_v4mapped(&np->saddr) &&
-		    ipv6_addr_v4mapped(&np->daddr)) {
+		    ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
 			struct inet_sock *inet = inet_sk(sk);
 
 			tunnel->v4mapped = true;
 			inet->inet_saddr = np->saddr.s6_addr32[3];
-			inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3];
-			inet->inet_daddr = np->daddr.s6_addr32[3];
+			inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
+			inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
 		} else {
 			tunnel->v4mapped = false;
 		}
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 072d7202e182..2d6760a2ae34 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -127,9 +127,10 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tunnel->sock->sk_family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
+			const struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
+
 			seq_printf(m, " from %pI6c to %pI6c\n",
-				&np->saddr, &np->daddr);
+				&np->saddr, &tunnel->sock->sk_v6_daddr);
 		} else
 #endif
 		seq_printf(m, " from %pI4 to %pI4\n",
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b8a6039314e8..cfd65304be60 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -63,7 +63,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 	struct sock *sk;
 
 	sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
-		struct in6_addr *addr = inet6_rcv_saddr(sk);
+		const struct in6_addr *addr = inet6_rcv_saddr(sk);
 		struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
 
 		if (l2tp == NULL)
@@ -331,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rcu_read_unlock();
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
-	np->rcv_saddr = addr->l2tp_addr;
+	sk->sk_v6_rcv_saddr = addr->l2tp_addr;
 	np->saddr = addr->l2tp_addr;
 
 	l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
@@ -421,14 +421,14 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 		if (!lsk->peer_conn_id)
 			return -ENOTCONN;
 		lsa->l2tp_conn_id = lsk->peer_conn_id;
-		lsa->l2tp_addr = np->daddr;
+		lsa->l2tp_addr = sk->sk_v6_daddr;
 		if (np->sndflow)
 			lsa->l2tp_flowinfo = np->flow_label;
 	} else {
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 			lsa->l2tp_addr = np->saddr;
 		else
-			lsa->l2tp_addr = np->rcv_saddr;
+			lsa->l2tp_addr = sk->sk_v6_rcv_saddr;
 
 		lsa->l2tp_conn_id = lsk->conn_id;
 	}
@@ -537,8 +537,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		 * sk->sk_dst_cache.
 		 */
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    ipv6_addr_equal(daddr, &np->daddr))
-			daddr = &np->daddr;
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+			daddr = &sk->sk_v6_daddr;
 
 		if (addr_len >= sizeof(struct sockaddr_in6) &&
 		    lsa->l2tp_scope_id &&
@@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 
-		daddr = &np->daddr;
+		daddr = &sk->sk_v6_daddr;
 		fl6.flowlabel = np->flow_label;
 	}
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 0825ff26e113..be446d517bc9 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -306,8 +306,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 		if (np) {
 			if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr),
 				    &np->saddr) ||
-			    nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(np->daddr),
-				    &np->daddr))
+			    nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr),
+				    &sk->sk_v6_daddr))
 				goto nla_put_failure;
 		} else
 #endif
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5ebee2ded9e9..f0a7adaef2ea 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -906,8 +906,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if ((tunnel->version == 2) &&
 		   (tunnel->sock->sk_family == AF_INET6)) {
-		struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
 		struct sockaddr_pppol2tpin6 sp;
+
 		len = sizeof(sp);
 		memset(&sp, 0, len);
 		sp.sa_family	= AF_PPPOX;
@@ -920,13 +920,13 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		sp.pppol2tp.d_session = session->peer_session_id;
 		sp.pppol2tp.addr.sin6_family = AF_INET6;
 		sp.pppol2tp.addr.sin6_port = inet->inet_dport;
-		memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr,
-		       sizeof(np->daddr));
+		memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
+		       sizeof(tunnel->sock->sk_v6_daddr));
 		memcpy(uaddr, &sp, len);
 	} else if ((tunnel->version == 3) &&
 		   (tunnel->sock->sk_family == AF_INET6)) {
-		struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
 		struct sockaddr_pppol2tpv3in6 sp;
+
 		len = sizeof(sp);
 		memset(&sp, 0, len);
 		sp.sa_family	= AF_PPPOX;
@@ -939,8 +939,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		sp.pppol2tp.d_session = session->peer_session_id;
 		sp.pppol2tp.addr.sin6_family = AF_INET6;
 		sp.pppol2tp.addr.sin6_port = inet->inet_dport;
-		memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr,
-		       sizeof(np->daddr));
+		memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
+		       sizeof(tunnel->sock->sk_v6_daddr));
 		memcpy(uaddr, &sp, len);
 #endif
 	} else if (tunnel->version == 3) {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 5d8a3a3cd5a7..ef8a926752a9 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
 				     in->ifindex);
 		if (sk) {
 			int connected = (sk->sk_state == TCP_ESTABLISHED);
-			int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+			int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
 
 			/* NOTE: we return listeners even if bound to
 			 * 0.0.0.0, those are filtered out in
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 06df2b9110f5..3dd0e374bc2b 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 		 */
 		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
 			    sk->sk_state != TCP_TIME_WAIT &&
-			    ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
+			    ipv6_addr_any(&sk->sk_v6_rcv_saddr));
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e7b2d4fe2b6a..f6334aa19151 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -426,20 +426,20 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = 0;
-	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+	addr->v6.sin6_addr = sk->sk_v6_rcv_saddr;
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
 static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 {
 	if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
-		inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0;
-		inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0;
-		inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
-		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
+		sk->sk_v6_rcv_saddr.s6_addr32[0] = 0;
+		sk->sk_v6_rcv_saddr.s6_addr32[1] = 0;
+		sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
+		sk->sk_v6_rcv_saddr.s6_addr32[3] =
 			addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+		sk->sk_v6_rcv_saddr = addr->v6.sin6_addr;
 	}
 }
 
@@ -447,12 +447,12 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 {
 	if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
-		inet6_sk(sk)->daddr.s6_addr32[0] = 0;
-		inet6_sk(sk)->daddr.s6_addr32[1] = 0;
-		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
-		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
+		sk->sk_v6_daddr.s6_addr32[0] = 0;
+		sk->sk_v6_daddr.s6_addr32[1] = 0;
+		sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff);
+		sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+		sk->sk_v6_daddr = addr->v6.sin6_addr;
 	}
 }
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9c9caaa5e0d3..0045c7cf1e9e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -294,7 +294,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
 	case PF_INET6:
 		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
 				proto_name,
-				&inet6_sk(sk)->rcv_saddr,
+				&sk->sk_v6_rcv_saddr,
 				inet_sk(sk)->inet_num);
 		break;
 	default:
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 8d8d97dbb389..80554fcf9fcc 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -304,12 +304,11 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 			}
 			case AF_INET6: {
 				struct inet_sock *inet = inet_sk(sk);
-				struct ipv6_pinfo *inet6 = inet6_sk(sk);
 
-				print_ipv6_addr(ab, &inet6->rcv_saddr,
+				print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr,
 						inet->inet_sport,
 						"laddr", "lport");
-				print_ipv6_addr(ab, &inet6->daddr,
+				print_ipv6_addr(ab, &sk->sk_v6_daddr,
 						inet->inet_dport,
 						"faddr", "fport");
 				break;
-- 
cgit v1.2.3-71-gd317


From 634fb979e8f3a70f04c1f2f519d0cd1142eb5c1a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Oct 2013 15:21:29 -0700
Subject: inet: includes a sock_common in request_sock

TCP listener refactoring, part 5 :

We want to be able to insert request sockets (SYN_RECV) into main
ehash table instead of the per listener hash table to allow RCU
lookups and remove listener lock contention.

This patch includes the needed struct sock_common in front
of struct request_sock

This means there is no more inet6_request_sock IPv6 specific
structure.

Following inet_request_sock fields were renamed as they became
macros to reference fields from struct sock_common.
Prefix ir_ was chosen to avoid name collisions.

loc_port   -> ir_loc_port
loc_addr   -> ir_loc_addr
rmt_addr   -> ir_rmt_addr
rmt_port   -> ir_rmt_port
iif        -> ir_iif

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             | 26 ++---------------
 include/net/inet_sock.h          | 16 +++++-----
 include/net/request_sock.h       |  1 +
 include/net/tcp.h                |  4 +--
 net/dccp/ipv4.c                  | 18 ++++++------
 net/dccp/ipv6.c                  | 63 ++++++++++++++++++++--------------------
 net/dccp/ipv6.h                  |  1 -
 net/dccp/minisocks.c             |  4 +--
 net/dccp/output.c                |  4 +--
 net/ipv4/inet_connection_sock.c  | 23 ++++++++-------
 net/ipv4/inet_diag.c             | 22 +++++++-------
 net/ipv4/syncookies.c            | 12 ++++----
 net/ipv4/tcp_ipv4.c              | 38 ++++++++++++------------
 net/ipv4/tcp_metrics.c           |  8 +++--
 net/ipv4/tcp_output.c            |  4 +--
 net/ipv6/inet6_connection_sock.c | 26 ++++++++---------
 net/ipv6/syncookies.c            | 24 +++++++--------
 net/ipv6/tcp_ipv6.c              | 61 +++++++++++++++++++-------------------
 net/netlabel/netlabel_kapi.c     |  2 +-
 19 files changed, 169 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 35f6c1b562c4..a80a63cfb70c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -115,16 +115,8 @@ static inline int inet6_iif(const struct sk_buff *skb)
 	return IP6CB(skb)->iif;
 }
 
-struct inet6_request_sock {
-	struct in6_addr		loc_addr;
-	struct in6_addr		rmt_addr;
-	struct sk_buff		*pktopts;
-	int			iif;
-};
-
 struct tcp6_request_sock {
 	struct tcp_request_sock	  tcp6rsk_tcp;
-	struct inet6_request_sock tcp6rsk_inet6;
 };
 
 struct ipv6_mc_socklist;
@@ -264,26 +256,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 	return inet_sk(__sk)->pinet6;
 }
 
-static inline struct inet6_request_sock *
-			inet6_rsk(const struct request_sock *rsk)
-{
-	return (struct inet6_request_sock *)(((u8 *)rsk) +
-					     inet_rsk(rsk)->inet6_rsk_offset);
-}
-
-static inline u32 inet6_rsk_offset(struct request_sock *rsk)
-{
-	return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock);
-}
-
 static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops)
 {
 	struct request_sock *req = reqsk_alloc(ops);
 
-	if (req != NULL) {
-		inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
-		inet6_rsk(req)->pktopts = NULL;
-	}
+	if (req)
+		inet_rsk(req)->pktopts = NULL;
 
 	return req;
 }
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 6d9a7e6eb5a4..f91204442efa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -70,13 +70,14 @@ struct ip_options_data {
 
 struct inet_request_sock {
 	struct request_sock	req;
-#if IS_ENABLED(CONFIG_IPV6)
-	u16			inet6_rsk_offset;
-#endif
-	__be16			loc_port;
-	__be32			loc_addr;
-	__be32			rmt_addr;
-	__be16			rmt_port;
+#define ir_loc_addr		req.__req_common.skc_rcv_saddr
+#define ir_rmt_addr		req.__req_common.skc_daddr
+#define ir_loc_port		req.__req_common.skc_num
+#define ir_rmt_port		req.__req_common.skc_dport
+#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
+#define ir_iif			req.__req_common.skc_bound_dev_if
+
 	kmemcheck_bitfield_begin(flags);
 	u16			snd_wscale : 4,
 				rcv_wscale : 4,
@@ -88,6 +89,7 @@ struct inet_request_sock {
 				no_srccheck: 1;
 	kmemcheck_bitfield_end(flags);
 	struct ip_options_rcu	*opt;
+	struct sk_buff		*pktopts;
 };
 
 static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 65c3e5164a5c..7f830ff67f08 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -48,6 +48,7 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
 /* struct request_sock - mini sock to represent a connection request
  */
 struct request_sock {
+	struct sock_common		__req_common;
 	struct request_sock		*dl_next;
 	u16				mss;
 	u8				num_retrans; /* number of retransmits */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 39bbfa1602b2..24a06161d174 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1109,8 +1109,8 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->wscale_ok = rx_opt->wscale_ok;
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
-	ireq->rmt_port = tcp_hdr(skb)->source;
-	ireq->loc_port = tcp_hdr(skb)->dest;
+	ireq->ir_rmt_port = tcp_hdr(skb)->source;
+	ireq->ir_loc_port = tcp_hdr(skb)->dest;
 }
 
 void tcp_enter_memory_pressure(struct sock *sk);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ebc54fef85a5..720c36225ed9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -409,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newinet		   = inet_sk(newsk);
 	ireq		   = inet_rsk(req);
-	newinet->inet_daddr	= ireq->rmt_addr;
-	newinet->inet_rcv_saddr = ireq->loc_addr;
-	newinet->inet_saddr	= ireq->loc_addr;
+	newinet->inet_daddr	= ireq->ir_rmt_addr;
+	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+	newinet->inet_saddr	= ireq->ir_loc_addr;
 	newinet->inet_opt	= ireq->opt;
 	ireq->opt	   = NULL;
 	newinet->mc_index  = inet_iif(skb);
@@ -516,10 +516,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
 		const struct inet_request_sock *ireq = inet_rsk(req);
 		struct dccp_hdr *dh = dccp_hdr(skb);
 
-		dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
-							      ireq->rmt_addr);
-		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-					    ireq->rmt_addr,
+		dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
+							      ireq->ir_rmt_addr);
+		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+					    ireq->ir_rmt_addr,
 					    ireq->opt);
 		err = net_xmit_eval(err);
 	}
@@ -641,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq = inet_rsk(req);
-	ireq->loc_addr = ip_hdr(skb)->daddr;
-	ireq->rmt_addr = ip_hdr(skb)->saddr;
+	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f075b83128a..5cc5b24a956e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -216,7 +216,7 @@ out:
 
 static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 {
-	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct in6_addr *final_p, final;
@@ -226,12 +226,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	fl6.daddr = ireq6->rmt_addr;
-	fl6.saddr = ireq6->loc_addr;
+	fl6.daddr = ireq->ir_v6_rmt_addr;
+	fl6.saddr = ireq->ir_v6_loc_addr;
 	fl6.flowlabel = 0;
-	fl6.flowi6_oif = ireq6->iif;
-	fl6.fl6_dport = inet_rsk(req)->rmt_port;
-	fl6.fl6_sport = inet_rsk(req)->loc_port;
+	fl6.flowi6_oif = ireq->ir_iif;
+	fl6.fl6_dport = ireq->ir_rmt_port;
+	fl6.fl6_sport = ireq->ir_loc_port;
 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 
@@ -249,9 +249,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 		struct dccp_hdr *dh = dccp_hdr(skb);
 
 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
-							 &ireq6->loc_addr,
-							 &ireq6->rmt_addr);
-		fl6.daddr = ireq6->rmt_addr;
+							 &ireq->ir_v6_loc_addr,
+							 &ireq->ir_v6_rmt_addr);
+		fl6.daddr = ireq->ir_v6_rmt_addr;
 		err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -264,8 +264,7 @@ done:
 static void dccp_v6_reqsk_destructor(struct request_sock *req)
 {
 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
-	if (inet6_rsk(req)->pktopts != NULL)
-		kfree_skb(inet6_rsk(req)->pktopts);
+	kfree_skb(inet_rsk(req)->pktopts);
 }
 
 static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
@@ -359,7 +358,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct request_sock *req;
 	struct dccp_request_sock *dreq;
-	struct inet6_request_sock *ireq6;
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -398,22 +397,22 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	ireq6 = inet6_rsk(req);
-	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
-	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+	ireq = inet_rsk(req);
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 		atomic_inc(&skb->users);
-		ireq6->pktopts = skb;
+		ireq->pktopts = skb;
 	}
-	ireq6->iif = sk->sk_bound_dev_if;
+	ireq->ir_iif = sk->sk_bound_dev_if;
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		ireq6->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
 
 	/*
 	 * Step 3: Process LISTEN state
@@ -446,7 +445,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 					      struct request_sock *req,
 					      struct dst_entry *dst)
 {
-	struct inet6_request_sock *ireq6 = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct inet_sock *newinet;
 	struct dccp6_sock *newdp6;
@@ -505,12 +504,12 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_DCCP;
-		fl6.daddr = ireq6->rmt_addr;
+		fl6.daddr = ireq->ir_v6_rmt_addr;
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		fl6.saddr = ireq6->loc_addr;
+		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.fl6_dport = inet_rsk(req)->rmt_port;
-		fl6.fl6_sport = inet_rsk(req)->loc_port;
+		fl6.fl6_dport = ireq->ir_rmt_port;
+		fl6.fl6_sport = ireq->ir_loc_port;
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
@@ -538,10 +537,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newsk->sk_v6_daddr = ireq6->rmt_addr;
-	newnp->saddr = ireq6->loc_addr;
-	newsk->sk_v6_rcv_saddr = ireq6->loc_addr;
-	newsk->sk_bound_dev_if = ireq6->iif;
+	newsk->sk_v6_daddr	= ireq->ir_v6_rmt_addr;
+	newnp->saddr		= ireq->ir_v6_loc_addr;
+	newsk->sk_v6_rcv_saddr	= ireq->ir_v6_loc_addr;
+	newsk->sk_bound_dev_if	= ireq->ir_iif;
 
 	/* Now IPv6 options...
 
@@ -554,10 +553,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (ireq6->pktopts != NULL) {
-		newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
-		consume_skb(ireq6->pktopts);
-		ireq6->pktopts = NULL;
+	if (ireq->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+		consume_skb(ireq->pktopts);
+		ireq->pktopts = NULL;
 		if (newnp->pktoptions)
 			skb_set_owner_r(newnp->pktoptions, newsk);
 	}
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 6604fc3fe953..af259e15e7f0 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -25,7 +25,6 @@ struct dccp6_sock {
 
 struct dccp6_request_sock {
 	struct dccp_request_sock  dccp;
-	struct inet6_request_sock inet6;
 };
 
 struct dccp6_timewait_sock {
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 32e80d96d4c0..66afbcec2941 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -266,8 +266,8 @@ int dccp_reqsk_init(struct request_sock *req,
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
-	inet_rsk(req)->rmt_port	  = dccp_hdr(skb)->dccph_sport;
-	inet_rsk(req)->loc_port	  = dccp_hdr(skb)->dccph_dport;
+	inet_rsk(req)->ir_rmt_port	  = dccp_hdr(skb)->dccph_sport;
+	inet_rsk(req)->ir_loc_port	  = dccp_hdr(skb)->dccph_dport;
 	inet_rsk(req)->acked	  = 0;
 	dreq->dreq_timestamp_echo = 0;
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d17fc90a74b6..9bf195d1b87a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -424,8 +424,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Build and checksum header */
 	dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-	dh->dccph_sport	= inet_rsk(req)->loc_port;
-	dh->dccph_dport	= inet_rsk(req)->rmt_port;
+	dh->dccph_sport	= inet_rsk(req)->ir_loc_port;
+	dh->dccph_dport	= inet_rsk(req)->ir_rmt_port;
 	dh->dccph_doff	= (dccp_header_size +
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
 	dh->dccph_type	= DCCP_PKT_RESPONSE;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 56e82a4027b4..2ffd931d652f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -412,8 +412,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
 			   flags,
-			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
+			   ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -448,8 +448,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
-			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
+			   ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -495,9 +495,9 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 	     prev = &req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
-		if (ireq->rmt_port == rport &&
-		    ireq->rmt_addr == raddr &&
-		    ireq->loc_addr == laddr &&
+		if (ireq->ir_rmt_port == rport &&
+		    ireq->ir_rmt_addr == raddr &&
+		    ireq->ir_loc_addr == laddr &&
 		    AF_INET_FAMILY(req->rsk_ops->family)) {
 			WARN_ON(req->sk);
 			*prevp = prev;
@@ -514,7 +514,8 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
+	const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
+				     inet_rsk(req)->ir_rmt_port,
 				     lopt->hash_rnd, lopt->nr_table_entries);
 
 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
@@ -674,9 +675,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		newsk->sk_state = TCP_SYN_RECV;
 		newicsk->icsk_bind_hash = NULL;
 
-		inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
-		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
-		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
+		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
+		inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->ir_loc_port);
+		inet_sk(newsk)->inet_sport = inet_rsk(req)->ir_loc_port;
 		newsk->sk_write_space = sk_stream_write_space;
 
 		newicsk->icsk_retransmits = 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ecc179d676e4..41e1c3ea8b51 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -679,12 +679,12 @@ static inline void inet_diag_req_addrs(const struct sock *sk,
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6) {
 		if (req->rsk_ops->family == AF_INET6) {
-			entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32;
-			entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32;
+			entry->saddr = ireq->ir_v6_loc_addr.s6_addr32;
+			entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32;
 		} else if (req->rsk_ops->family == AF_INET) {
-			ipv6_addr_set_v4mapped(ireq->loc_addr,
+			ipv6_addr_set_v4mapped(ireq->ir_loc_addr,
 					       &entry->saddr_storage);
-			ipv6_addr_set_v4mapped(ireq->rmt_addr,
+			ipv6_addr_set_v4mapped(ireq->ir_rmt_addr,
 					       &entry->daddr_storage);
 			entry->saddr = entry->saddr_storage.s6_addr32;
 			entry->daddr = entry->daddr_storage.s6_addr32;
@@ -692,8 +692,8 @@ static inline void inet_diag_req_addrs(const struct sock *sk,
 	} else
 #endif
 	{
-		entry->saddr = &ireq->loc_addr;
-		entry->daddr = &ireq->rmt_addr;
+		entry->saddr = &ireq->ir_loc_addr;
+		entry->daddr = &ireq->ir_rmt_addr;
 	}
 }
 
@@ -728,9 +728,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 		tmo = 0;
 
 	r->id.idiag_sport = inet->inet_sport;
-	r->id.idiag_dport = ireq->rmt_port;
-	r->id.idiag_src[0] = ireq->loc_addr;
-	r->id.idiag_dst[0] = ireq->rmt_addr;
+	r->id.idiag_dport = ireq->ir_rmt_port;
+	r->id.idiag_src[0] = ireq->ir_loc_addr;
+	r->id.idiag_dst[0] = ireq->ir_rmt_addr;
 	r->idiag_expires = jiffies_to_msecs(tmo);
 	r->idiag_rqueue = 0;
 	r->idiag_wqueue = 0;
@@ -789,13 +789,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 			if (reqnum < s_reqnum)
 				continue;
-			if (r->id.idiag_dport != ireq->rmt_port &&
+			if (r->id.idiag_dport != ireq->ir_rmt_port &&
 			    r->id.idiag_dport)
 				continue;
 
 			if (bc) {
 				inet_diag_req_addrs(sk, req, &entry);
-				entry.dport = ntohs(ireq->rmt_port);
+				entry.dport = ntohs(ireq->ir_rmt_port);
 
 				if (!inet_diag_bc_run(bc, &entry))
 					continue;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 15e024105f91..984e21cf3c50 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -304,10 +304,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
-	ireq->loc_port		= th->dest;
-	ireq->rmt_port		= th->source;
-	ireq->loc_addr		= ip_hdr(skb)->daddr;
-	ireq->rmt_addr		= ip_hdr(skb)->saddr;
+	ireq->ir_loc_port		= th->dest;
+	ireq->ir_rmt_port		= th->source;
+	ireq->ir_loc_addr		= ip_hdr(skb)->daddr;
+	ireq->ir_rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -347,8 +347,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
-			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
-			   ireq->loc_addr, th->source, th->dest);
+			   (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
+			   ireq->ir_loc_addr, th->source, th->dest);
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e4695dde1af6..114d1b748cbb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -835,11 +835,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
-		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
+		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
 		skb_set_queue_mapping(skb, queue_mapping);
-		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-					    ireq->rmt_addr,
+		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+					    ireq->ir_rmt_addr,
 					    ireq->opt);
 		err = net_xmit_eval(err);
 		if (!tcp_rsk(req)->snt_synack && !err)
@@ -972,7 +972,7 @@ static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 {
 	union tcp_md5_addr *addr;
 
-	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
 	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 
@@ -1149,8 +1149,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 		saddr = inet_sk(sk)->inet_saddr;
 		daddr = inet_sk(sk)->inet_daddr;
 	} else if (req) {
-		saddr = inet_rsk(req)->loc_addr;
-		daddr = inet_rsk(req)->rmt_addr;
+		saddr = inet_rsk(req)->ir_loc_addr;
+		daddr = inet_rsk(req)->ir_rmt_addr;
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
 		saddr = iph->saddr;
@@ -1366,8 +1366,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
 		kfree_skb(skb_synack);
 		return -1;
 	}
-	err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
-				    ireq->rmt_addr, ireq->opt);
+	err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+				    ireq->ir_rmt_addr, ireq->opt);
 	err = net_xmit_eval(err);
 	if (!err)
 		tcp_rsk(req)->snt_synack = tcp_time_stamp;
@@ -1502,8 +1502,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	ireq = inet_rsk(req);
-	ireq->loc_addr = daddr;
-	ireq->rmt_addr = saddr;
+	ireq->ir_loc_addr = daddr;
+	ireq->ir_rmt_addr = saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
 
@@ -1578,15 +1578,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
 
 	if (skb_synack) {
-		__tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
+		__tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
 	} else
 		goto drop_and_free;
 
 	if (likely(!do_fastopen)) {
 		int err;
-		err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
-		     ireq->rmt_addr, ireq->opt);
+		err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+		     ireq->ir_rmt_addr, ireq->opt);
 		err = net_xmit_eval(err);
 		if (err || want_cookie)
 			goto drop_and_free;
@@ -1644,9 +1644,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
 	ireq		      = inet_rsk(req);
-	newinet->inet_daddr   = ireq->rmt_addr;
-	newinet->inet_rcv_saddr = ireq->loc_addr;
-	newinet->inet_saddr	      = ireq->loc_addr;
+	newinet->inet_daddr   = ireq->ir_rmt_addr;
+	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+	newinet->inet_saddr	      = ireq->ir_loc_addr;
 	inet_opt	      = ireq->opt;
 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
 	ireq->opt	      = NULL;
@@ -2548,10 +2548,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n",
 		i,
-		ireq->loc_addr,
+		ireq->ir_loc_addr,
 		ntohs(inet_sk(sk)->inet_sport),
-		ireq->rmt_addr,
-		ntohs(ireq->rmt_port),
+		ireq->ir_rmt_addr,
+		ntohs(ireq->ir_rmt_port),
 		TCP_SYN_RECV,
 		0, 0, /* could print option size, but that is af dependent. */
 		1,    /* timers active (only the expire timer) */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 8fcc2cb9dba4..4a2a84110dfb 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -215,13 +215,15 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 	addr.family = req->rsk_ops->family;
 	switch (addr.family) {
 	case AF_INET:
-		addr.addr.a4 = inet_rsk(req)->rmt_addr;
+		addr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
 		hash = (__force unsigned int) addr.addr.a4;
 		break;
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		*(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr;
-		hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr);
+		*(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
+		hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
 		break;
+#endif
 	default:
 		return NULL;
 	}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c6f01f2cdb32..faec81353522 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2734,8 +2734,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->syn = 1;
 	th->ack = 1;
 	TCP_ECN_make_synack(req, th);
-	th->source = ireq->loc_port;
-	th->dest = ireq->rmt_port;
+	th->source = ireq->ir_loc_port;
+	th->dest = ireq->ir_rmt_port;
 	/* Setting of flags are superfluous here for callers (and ECE is
 	 * not even correctly set)
 	 */
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index b7400b480e74..1317c569b58f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -70,20 +70,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 				      struct flowi6 *fl6,
 				      const struct request_sock *req)
 {
-	struct inet6_request_sock *treq = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *final_p, final;
 	struct dst_entry *dst;
 
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = IPPROTO_TCP;
-	fl6->daddr = treq->rmt_addr;
+	fl6->daddr = ireq->ir_v6_rmt_addr;
 	final_p = fl6_update_dst(fl6, np->opt, &final);
-	fl6->saddr = treq->loc_addr;
-	fl6->flowi6_oif = treq->iif;
+	fl6->saddr = ireq->ir_v6_loc_addr;
+	fl6->flowi6_oif = ireq->ir_iif;
 	fl6->flowi6_mark = sk->sk_mark;
-	fl6->fl6_dport = inet_rsk(req)->rmt_port;
-	fl6->fl6_sport = inet_rsk(req)->loc_port;
+	fl6->fl6_dport = ireq->ir_rmt_port;
+	fl6->fl6_sport = ireq->ir_loc_port;
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
@@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 						     lopt->nr_table_entries)];
 	     (req = *prev) != NULL;
 	     prev = &req->dl_next) {
-		const struct inet6_request_sock *treq = inet6_rsk(req);
+		const struct inet_request_sock *ireq = inet_rsk(req);
 
-		if (inet_rsk(req)->rmt_port == rport &&
+		if (ireq->ir_rmt_port == rport &&
 		    req->rsk_ops->family == AF_INET6 &&
-		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
-		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
-		    (!treq->iif || treq->iif == iif)) {
+		    ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
+		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
+		    (!ireq->ir_iif || ireq->ir_iif == iif)) {
 			WARN_ON(req->sk != NULL);
 			*prevp = prev;
 			return req;
@@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
-				      inet_rsk(req)->rmt_port,
+	const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
+				      inet_rsk(req)->ir_rmt_port,
 				      lopt->hash_rnd, lopt->nr_table_entries);
 
 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d703218a653b..bc5698f9e4cd 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -150,7 +150,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
 	struct inet_request_sock *ireq;
-	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -187,7 +186,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ireq = inet_rsk(req);
-	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
 	treq->listener = NULL;
 
@@ -195,22 +193,22 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out_free;
 
 	req->mss = mss;
-	ireq->rmt_port = th->source;
-	ireq->loc_port = th->dest;
-	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
-	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+	ireq->ir_rmt_port = th->source;
+	ireq->ir_loc_port = th->dest;
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 		atomic_inc(&skb->users);
-		ireq6->pktopts = skb;
+		ireq->pktopts = skb;
 	}
 
-	ireq6->iif = sk->sk_bound_dev_if;
+	ireq->ir_iif = sk->sk_bound_dev_if;
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		ireq6->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
 
 	req->expires = 0UL;
 	req->num_retrans = 0;
@@ -234,12 +232,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		struct flowi6 fl6;
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
-		fl6.daddr = ireq6->rmt_addr;
+		fl6.daddr = ireq->ir_v6_rmt_addr;
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		fl6.saddr = ireq6->loc_addr;
+		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = sk->sk_mark;
-		fl6.fl6_dport = inet_rsk(req)->rmt_port;
+		fl6.fl6_dport = ireq->ir_rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 541dfc40c7b3..db234d609b33 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -465,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
 			      u16 queue_mapping)
 {
-	struct inet6_request_sock *treq = inet6_rsk(req);
+	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	int err = -ENOMEM;
@@ -477,9 +477,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
-		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
+		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
+				    &ireq->ir_v6_rmt_addr);
 
-		fl6->daddr = treq->rmt_addr;
+		fl6->daddr = ireq->ir_v6_rmt_addr;
 		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
 		err = net_xmit_eval(err);
@@ -502,7 +503,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
-	kfree_skb(inet6_rsk(req)->pktopts);
+	kfree_skb(inet_rsk(req)->pktopts);
 }
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -521,7 +522,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
 						      struct request_sock *req)
 {
-	return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
+	return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
 }
 
 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
@@ -623,8 +624,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 		saddr = &inet6_sk(sk)->saddr;
 		daddr = &sk->sk_v6_daddr;
 	} else if (req) {
-		saddr = &inet6_rsk(req)->loc_addr;
-		daddr = &inet6_rsk(req)->rmt_addr;
+		saddr = &inet_rsk(req)->ir_v6_loc_addr;
+		daddr = &inet_rsk(req)->ir_v6_rmt_addr;
 	} else {
 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 		saddr = &ip6h->saddr;
@@ -949,7 +950,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
-	struct inet6_request_sock *treq;
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
@@ -994,25 +995,25 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	treq = inet6_rsk(req);
-	treq->rmt_addr = ipv6_hdr(skb)->saddr;
-	treq->loc_addr = ipv6_hdr(skb)->daddr;
+	ireq = inet_rsk(req);
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
-	treq->iif = sk->sk_bound_dev_if;
+	ireq->ir_iif = sk->sk_bound_dev_if;
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		treq->iif = inet6_iif(skb);
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
 
 	if (!isn) {
 		if (ipv6_opt_accepted(sk, skb) ||
 		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
 			atomic_inc(&skb->users);
-			treq->pktopts = skb;
+			ireq->pktopts = skb;
 		}
 
 		if (want_cookie) {
@@ -1051,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			 * to the moment of synflood.
 			 */
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
-				       &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+				       &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
 			goto drop_and_release;
 		}
 
@@ -1086,7 +1087,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct request_sock *req,
 					  struct dst_entry *dst)
 {
-	struct inet6_request_sock *treq;
+	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1151,7 +1152,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		return newsk;
 	}
 
-	treq = inet6_rsk(req);
+	ireq = inet_rsk(req);
 
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
@@ -1185,10 +1186,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	newsk->sk_v6_daddr = treq->rmt_addr;
-	newnp->saddr = treq->loc_addr;
-	newsk->sk_v6_rcv_saddr = treq->loc_addr;
-	newsk->sk_bound_dev_if = treq->iif;
+	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+	newnp->saddr = ireq->ir_v6_loc_addr;
+	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+	newsk->sk_bound_dev_if = ireq->ir_iif;
 
 	/* Now IPv6 options...
 
@@ -1203,11 +1204,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (treq->pktopts != NULL) {
-		newnp->pktoptions = skb_clone(treq->pktopts,
+	if (ireq->pktopts != NULL) {
+		newnp->pktoptions = skb_clone(ireq->pktopts,
 					      sk_gfp_atomic(sk, GFP_ATOMIC));
-		consume_skb(treq->pktopts);
-		treq->pktopts = NULL;
+		consume_skb(ireq->pktopts);
+		ireq->pktopts = NULL;
 		if (newnp->pktoptions)
 			skb_set_owner_r(newnp->pktoptions, newsk);
 	}
@@ -1722,8 +1723,8 @@ static void get_openreq6(struct seq_file *seq,
 			 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
 {
 	int ttd = req->expires - jiffies;
-	const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
-	const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
+	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
+	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
 
 	if (ttd < 0)
 		ttd = 0;
@@ -1734,10 +1735,10 @@ static void get_openreq6(struct seq_file *seq,
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3],
-		   ntohs(inet_rsk(req)->loc_port),
+		   ntohs(inet_rsk(req)->ir_loc_port),
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3],
-		   ntohs(inet_rsk(req)->rmt_port),
+		   ntohs(inet_rsk(req)->ir_rmt_port),
 		   TCP_SYN_RECV,
 		   0,0, /* could print option size, but that is af dependent. */
 		   1,   /* timers active (only the expire timer) */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 96a458e12f60..dce1bebf7aec 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -817,7 +817,7 @@ int netlbl_req_setattr(struct request_sock *req,
 	switch (req->rsk_ops->family) {
 	case AF_INET:
 		entry = netlbl_domhsh_getentry_af4(secattr->domain,
-						   inet_rsk(req)->rmt_addr);
+						   inet_rsk(req)->ir_rmt_addr);
 		if (entry == NULL) {
 			ret_val = -ENOENT;
 			goto req_setattr_return;
-- 
cgit v1.2.3-71-gd317


From 795aa6ef6a1aba99050735eadd0c2341b789b53b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 10 Oct 2013 09:21:55 +0200
Subject: netfilter: pass hook ops to hookfn

Pass the hook ops to the hookfn to allow for generic hook
functions. This change is required by nf_tables.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                      |  3 +-
 net/bridge/br_netfilter.c                      | 22 +++++++++-----
 net/bridge/netfilter/ebtable_filter.c          | 16 ++++++----
 net/bridge/netfilter/ebtable_nat.c             | 16 ++++++----
 net/decnet/netfilter/dn_rtmsg.c                |  2 +-
 net/ipv4/netfilter/arptable_filter.c           |  5 +--
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |  2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c              |  2 +-
 net/ipv4/netfilter/iptable_filter.c            |  7 +++--
 net/ipv4/netfilter/iptable_mangle.c            | 10 +++---
 net/ipv4/netfilter/iptable_nat.c               | 26 ++++++++--------
 net/ipv4/netfilter/iptable_raw.c               |  6 ++--
 net/ipv4/netfilter/iptable_security.c          |  7 +++--
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 12 ++++----
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  6 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c             |  2 +-
 net/ipv6/netfilter/ip6table_filter.c           |  5 +--
 net/ipv6/netfilter/ip6table_mangle.c           | 10 +++---
 net/ipv6/netfilter/ip6table_nat.c              | 27 +++++++++--------
 net/ipv6/netfilter/ip6table_raw.c              |  5 +--
 net/ipv6/netfilter/ip6table_security.c         |  5 +--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 +++++----
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  6 ++--
 net/netfilter/core.c                           |  2 +-
 net/netfilter/ipvs/ip_vs_core.c                | 42 +++++++++++++-------------
 security/selinux/hooks.c                       | 10 +++---
 26 files changed, 148 insertions(+), 122 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 61223c52414f..fef7e67f7101 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -42,7 +42,8 @@ int netfilter_init(void);
 
 struct sk_buff;
 
-typedef unsigned int nf_hookfn(unsigned int hooknum,
+struct nf_hook_ops;
+typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
 			       struct sk_buff *skb,
 			       const struct net_device *in,
 			       const struct net_device *out,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f87736270eaa..878f008afefa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -619,7 +619,7 @@ bad:
 
 /* Replicate the checks that IPv6 does on packet reception and pass the packet
  * to ip6tables, which doesn't support NAT, so things are fairly simple. */
-static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
+static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
 					   const struct net_device *in,
 					   const struct net_device *out,
@@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
  * receiving device) to make netfilter happy, the REDIRECT
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
@@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 			return NF_ACCEPT;
 
 		nf_bridge_pull_encap_header_rcsum(skb);
-		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
+		return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn);
 	}
 
 	if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
  * took place when the packet entered the bridge), but we
  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
  * prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+				   struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
@@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb)
  * but we are still able to filter on the 'real' indev/outdev
  * because of the physdev module. For ARP, indev and outdev are the
  * bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+				     struct sk_buff *skb,
 				     const struct net_device *in,
 				     const struct net_device *out,
 				     int (*okfn)(struct sk_buff *))
@@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
@@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 #endif
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
@@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 /* IP/SABOTAGE *****************************************************/
 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
  * for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+				   struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 94b2b700cff8..bb2da7b706e7 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -60,17 +60,21 @@ static const struct ebt_table frame_filter =
 };
 
 static unsigned int
-ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	    const struct net_device *in, const struct net_device *out,
+	    int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(in)->xt.frame_filter);
 }
 
 static unsigned int
-ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	     const struct net_device *in, const struct net_device *out,
+	     int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(out)->xt.frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 322555acdd40..bd238f1f105b 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -60,17 +60,21 @@ static struct ebt_table frame_nat =
 };
 
 static unsigned int
-ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in
-   , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	   const struct net_device *in, const struct net_device *out,
+	   int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(in)->xt.frame_nat);
 }
 
 static unsigned int
-ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in
-   , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+	    const struct net_device *in, const struct net_device *out,
+	    int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat);
+	return ebt_do_table(ops->hooknum, skb, in, out,
+			    dev_net(out)->xt.frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2a7efe388344..e83015cecfa7 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
 }
 
 
-static unsigned int dnrmg_hook(unsigned int hook,
+static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
 			struct sk_buff *skb,
 			const struct net_device *in,
 			const struct net_device *out,
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index a865f6f94013..802ddecb30b8 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,14 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c */
 static unsigned int
-arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
+	return arpt_do_table(skb, ops->hooknum, in, out,
+			     net->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0b732efd32e2..a2e2b61cd7da 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload)
 #endif
 
 static unsigned int
-arp_mangle(unsigned int hook,
+arp_mangle(const struct nf_hook_ops *ops,
 	   struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index b6346bf2fde3..01cffeaa0085 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 50af5b45c050..e08a74a243a8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,20 +33,21 @@ static const struct xt_table packet_filter = {
 };
 
 static unsigned int
-iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		    const struct net_device *in, const struct net_device *out,
 		    int (*okfn)(struct sk_buff *))
 {
 	const struct net *net;
 
-	if (hook == NF_INET_LOCAL_OUT &&
+	if (ops->hooknum == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
 	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
+	return ipt_do_table(skb, ops->hooknum, in, out,
+			    net->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 0d8cd82e0fad..6a5079c34bb3 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_mangle_hook(unsigned int hook,
+iptable_mangle_hook(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT)
+	if (ops->hooknum == NF_INET_LOCAL_OUT)
 		return ipt_mangle_out(skb, out);
-	if (hook == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, hook, in, out,
+	if (ops->hooknum == NF_INET_POST_ROUTING)
+		return ipt_do_table(skb, ops->hooknum, in, out,
 				    dev_net(out)->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
-	return ipt_do_table(skb, hook, in, out,
+	return ipt_do_table(skb, ops->hooknum, in, out,
 			    dev_net(in)->ipv4.iptable_mangle);
 }
 
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 683bfaffed65..ee2886126e3d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv4_fn(unsigned int hooknum,
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
 	/* maniptype == SRC for postrouting. */
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	 * and local-out, and nf_nat_out protects post-routing.
@@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 	case IP_CT_RELATED_REPLY:
 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-							   hooknum))
+							   ops->hooknum))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 				goto oif_changed;
 		}
 		break;
@@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 			goto oif_changed;
 	}
 
-	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
 
 oif_changed:
 	nf_ct_kill_acct(ct, ctinfo, skb);
@@ -149,7 +149,7 @@ oif_changed:
 }
 
 static unsigned int
-nf_nat_ipv4_in(unsigned int hooknum,
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
 	unsigned int ret;
 	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    daddr != ip_hdr(skb)->daddr)
 		skb_dst_drop(skb);
@@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv4_out(unsigned int hooknum,
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
 		struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv4_local_fn(unsigned int hooknum,
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
@@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1f82aea11df6..b2f7e8f98316 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,20 +20,20 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		 const struct net_device *in, const struct net_device *out,
 		 int (*okfn)(struct sk_buff *))
 {
 	const struct net *net;
 
-	if (hook == NF_INET_LOCAL_OUT && 
+	if (ops->hooknum == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
 	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
+	return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f867a8d38bf7..c86647ed2078 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,21 +37,22 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-iptable_security_hook(unsigned int hook, struct sk_buff *skb,
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
 	const struct net *net;
 
-	if (hook == NF_INET_LOCAL_OUT &&
+	if (ops->hooknum == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* Somebody is playing with raw sockets. */
 		return NF_ACCEPT;
 
 	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
+	return ipt_do_table(skb, ops->hooknum, in, out,
+			    net->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 86f5b34a4ed1..ecd8bec411c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_helper(unsigned int hooknum,
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
@@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum,
 			    ct, ctinfo);
 }
 
-static unsigned int ipv4_confirm(unsigned int hooknum,
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
 				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
@@ -147,16 +147,16 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+	return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
 }
 
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
-	return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+	return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 742815518b0f..12e13bd82b5b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 		return IP_DEFRAG_CONNTRACK_OUT + zone;
 }
 
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
 					  const struct net_device *in,
 					  const struct net_device *out,
@@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #endif
 	/* Gather fragments. */
 	if (ip_is_fragment(ip_hdr(skb))) {
-		enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+		enum ip_defrag_users user =
+			nf_ct_defrag_user(ops->hooknum, skb);
+
 		if (nf_ct_ipv4_gather_frags(skb, user))
 			return NF_STOLEN;
 	}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 2748b042da72..bf9f612c1bc2 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 29b44b14c5ea..ca7f6c128086 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,13 +32,14 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c705907ae6ab..307bbb782d14 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	if (hook == NF_INET_LOCAL_OUT)
+	if (ops->hooknum == NF_INET_LOCAL_OUT)
 		return ip6t_mangle_out(skb, out);
-	if (hook == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, hook, in, out,
+	if (ops->hooknum == NF_INET_POST_ROUTING)
+		return ip6t_do_table(skb, ops->hooknum, in, out,
 				     dev_net(out)->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
-	return ip6t_do_table(skb, hook, in, out,
+	return ip6t_do_table(skb, ops->hooknum, in, out,
 			     dev_net(in)->ipv6.ip6table_mangle);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 9b076d2d3a7b..84c7f33d0cf8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv6_fn(unsigned int hooknum,
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
 	__be16 frag_off;
 	int hdrlen;
 	u8 nexthdr;
@@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 
 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
-							     hooknum, hdrlen))
+							     ops->hooknum,
+							     hdrlen))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 				goto oif_changed;
 		}
 		break;
@@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
 			goto oif_changed;
 	}
 
-	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
 
 oif_changed:
 	nf_ct_kill_acct(ct, ctinfo, skb);
@@ -152,7 +153,7 @@ oif_changed:
 }
 
 static unsigned int
-nf_nat_ipv6_in(unsigned int hooknum,
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
 	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
@@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
 	unsigned int ret;
 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
 		skb_dst_drop(skb);
@@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv6_out(unsigned int hooknum,
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
 		struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
@@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
 }
 
 static unsigned int
-nf_nat_ipv6_local_fn(unsigned int hooknum,
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
@@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9a626d86720f..5274740acecc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		  const struct net_device *in, const struct net_device *out,
 		  int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ce88d1d7e525..ab3b0219ecfa 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		       const struct net_device *in,
 		       const struct net_device *out,
 		       int (*okfn)(struct sk_buff *))
 {
 	const struct net *net = dev_net((in != NULL) ? in : out);
 
-	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+	return ip6t_do_table(skb, ops->hooknum, in, out,
+			     net->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 54b75ead5a69..486545eb42ce 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv6_helper(unsigned int hooknum,
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
@@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
 	return helper->help(skb, protoff, ct, ctinfo);
 }
 
-static unsigned int ipv6_confirm(unsigned int hooknum,
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
 				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
@@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
 	return nf_conntrack_in(net, PF_INET6, hooknum, skb);
 }
 
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+	return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
+				   okfn);
 }
 
-static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+	return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
+				   okfn);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c5..ec483aa3f60f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 
 }
 
-static unsigned int ipv6_defrag(unsigned int hooknum,
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
@@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 		return NF_ACCEPT;
 #endif
 
-	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
@@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	if (reasm == skb)
 		return NF_ACCEPT;
 
-	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+	nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
 			   (struct net_device *)out, okfn);
 
 	return NF_STOLEN;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 593b16ea45e0..1fbab0cdd302 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn);
+		verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 74fd00c27210..34fda62f40f6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET);
+	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
 
 /*
@@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET);
+	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET6);
+	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
 
 /*
@@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_out(hooknum, skb, AF_INET6);
+	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET);
+	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
 
 /*
@@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET);
+	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
  * Copy info from first fragment, to the rest of them.
  */
 static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in,
 		     const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
@@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in,
 		      const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET6);
+	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
 
 /*
@@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct net_device *in, const struct net_device *out,
 		     int (*okfn)(struct sk_buff *))
 {
-	return ip_vs_in(hooknum, skb, AF_INET6);
+	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
 
 #endif
@@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
@@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(skb, &r, hooknum);
+	return ip_vs_in_icmp(skb, &r, ops->hooknum);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 static unsigned int
-ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		      const struct net_device *in, const struct net_device *out,
 		      int (*okfn)(struct sk_buff *))
 {
@@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
+	return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
 }
 #endif
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 568c7699abf1..3f224d7795f5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_forward(unsigned int hooknum,
+static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum,
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_forward(unsigned int hooknum,
+static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
@@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_output(unsigned int hooknum,
+static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
 					const struct net_device *in,
 					const struct net_device *out,
@@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
+static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
 					   const struct net_device *in,
 					   const struct net_device *out,
@@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static unsigned int selinux_ipv6_postroute(unsigned int hooknum,
+static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
 					   const struct net_device *in,
 					   const struct net_device *out,
-- 
cgit v1.2.3-71-gd317


From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 14 Oct 2013 11:00:02 +0200
Subject: netfilter: add nftables

This patch adds nftables which is the intended successor of iptables.
This packet filtering framework reuses the existing netfilter hooks,
the connection tracking system, the NAT subsystem, the transparent
proxying engine, the logging infrastructure and the userspace packet
queueing facilities.

In a nutshell, nftables provides a pseudo-state machine with 4 general
purpose registers of 128 bits and 1 specific purpose register to store
verdicts. This pseudo-machine comes with an extensible instruction set,
a.k.a. "expressions" in the nftables jargon. The expressions included
in this patch provide the basic functionality, they are:

* bitwise: to perform bitwise operations.
* byteorder: to change from host/network endianess.
* cmp: to compare data with the content of the registers.
* counter: to enable counters on rules.
* ct: to store conntrack keys into register.
* exthdr: to match IPv6 extension headers.
* immediate: to load data into registers.
* limit: to limit matching based on packet rate.
* log: to log packets.
* meta: to match metainformation that usually comes with the skbuff.
* nat: to perform Network Address Translation.
* payload: to fetch data from the packet payload and store it into
  registers.
* reject (IPv4 only): to explicitly close connection, eg. TCP RST.

Using this instruction-set, the userspace utility 'nft' can transform
the rules expressed in human-readable text representation (using a
new syntax, inspired by tcpdump) to nftables bytecode.

nftables also inherits the table, chain and rule objects from
iptables, but in a more configurable way, and it also includes the
original datatype-agnostic set infrastructure with mapping support.
This set infrastructure is enhanced in the follow up patch (netfilter:
nf_tables: add netlink set API).

This patch includes the following components:

* the netlink API: net/netfilter/nf_tables_api.c and
  include/uapi/netfilter/nf_tables.h
* the packet filter core: net/netfilter/nf_tables_core.c
* the expressions (described above): net/netfilter/nft_*.c
* the filter tables: arp, IPv4, IPv6 and bridge:
  net/ipv4/netfilter/nf_tables_ipv4.c
  net/ipv6/netfilter/nf_tables_ipv6.c
  net/ipv4/netfilter/nf_tables_arp.c
  net/bridge/netfilter/nf_tables_bridge.c
* the NAT table (IPv4 only):
  net/ipv4/netfilter/nf_table_nat_ipv4.c
* the route table (similar to mangle):
  net/ipv4/netfilter/nf_table_route_ipv4.c
  net/ipv6/netfilter/nf_table_route_ipv6.c
* internal definitions under:
  include/net/netfilter/nf_tables.h
  include/net/netfilter/nf_tables_core.h
* It also includes an skeleton expression:
  net/netfilter/nft_expr_template.c
  and the preliminary implementation of the meta target
  net/netfilter/nft_meta_target.c

It also includes a change in struct nf_hook_ops to add a new
pointer to store private data to the hook, that is used to store
the rule list per chain.

This patch is based on the patch from Patrick McHardy, plus merged
accumulated cleanups, fixes and small enhancements to the nftables
code that has been done since 2009, which are:

From Patrick McHardy:
* nf_tables: adjust netlink handler function signatures
* nf_tables: only retry table lookup after successful table module load
* nf_tables: fix event notification echo and avoid unnecessary messages
* nft_ct: add l3proto support
* nf_tables: pass expression context to nft_validate_data_load()
* nf_tables: remove redundant definition
* nft_ct: fix maxattr initialization
* nf_tables: fix invalid event type in nf_tables_getrule()
* nf_tables: simplify nft_data_init() usage
* nf_tables: build in more core modules
* nf_tables: fix double lookup expression unregistation
* nf_tables: move expression initialization to nf_tables_core.c
* nf_tables: build in payload module
* nf_tables: use NFPROTO constants
* nf_tables: rename pid variables to portid
* nf_tables: save 48 bits per rule
* nf_tables: introduce chain rename
* nf_tables: check for duplicate names on chain rename
* nf_tables: remove ability to specify handles for new rules
* nf_tables: return error for rule change request
* nf_tables: return error for NLM_F_REPLACE without rule handle
* nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification
* nf_tables: fix NLM_F_MULTI usage in netlink notifications
* nf_tables: include NLM_F_APPEND in rule dumps

From Pablo Neira Ayuso:
* nf_tables: fix stack overflow in nf_tables_newrule
* nf_tables: nft_ct: fix compilation warning
* nf_tables: nft_ct: fix crash with invalid packets
* nft_log: group and qthreshold are 2^16
* nf_tables: nft_meta: fix socket uid,gid handling
* nft_counter: allow to restore counters
* nf_tables: fix module autoload
* nf_tables: allow to remove all rules placed in one chain
* nf_tables: use 64-bits rule handle instead of 16-bits
* nf_tables: fix chain after rule deletion
* nf_tables: improve deletion performance
* nf_tables: add missing code in route chain type
* nf_tables: rise maximum number of expressions from 12 to 128
* nf_tables: don't delete table if in use
* nf_tables: fix basechain release

From Tomasz Bursztyka:
* nf_tables: Add support for changing users chain's name
* nf_tables: Change chain's name to be fixed sized
* nf_tables: Add support for replacing a rule by another one
* nf_tables: Update uapi nftables netlink header documentation

From Florian Westphal:
* nft_log: group is u16, snaplen u32

From Phil Oester:
* nf_tables: operational limit match

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                          |   11 +-
 include/net/netfilter/nf_tables.h                  |  301 ++++
 include/net/netfilter/nf_tables_core.h             |   25 +
 include/uapi/linux/netfilter/Kbuild                |    1 +
 include/uapi/linux/netfilter/nf_conntrack_common.h |    4 +
 include/uapi/linux/netfilter/nf_tables.h           |  582 +++++++
 include/uapi/linux/netfilter/nfnetlink.h           |    5 +-
 net/bridge/netfilter/Kconfig                       |    3 +
 net/bridge/netfilter/Makefile                      |    2 +
 net/bridge/netfilter/nf_tables_bridge.c            |   37 +
 net/ipv4/netfilter/Kconfig                         |   16 +
 net/ipv4/netfilter/Makefile                        |    5 +
 net/ipv4/netfilter/nf_table_nat_ipv4.c             |  409 +++++
 net/ipv4/netfilter/nf_table_route_ipv4.c           |   97 ++
 net/ipv4/netfilter/nf_tables_ipv4.c                |   59 +
 net/ipv4/netfilter/nft_reject_ipv4.c               |  117 ++
 net/ipv6/netfilter/Kconfig                         |    8 +
 net/ipv6/netfilter/Makefile                        |    4 +
 net/ipv6/netfilter/nf_table_route_ipv6.c           |   93 ++
 net/ipv6/netfilter/nf_tables_ipv6.c                |   57 +
 net/netfilter/Kconfig                              |   37 +
 net/netfilter/Makefile                             |   16 +
 net/netfilter/nf_tables_api.c                      | 1760 ++++++++++++++++++++
 net/netfilter/nf_tables_core.c                     |  152 ++
 net/netfilter/nft_bitwise.c                        |  140 ++
 net/netfilter/nft_byteorder.c                      |  167 ++
 net/netfilter/nft_cmp.c                            |  146 ++
 net/netfilter/nft_counter.c                        |  107 ++
 net/netfilter/nft_ct.c                             |  252 +++
 net/netfilter/nft_expr_template.c                  |   88 +
 net/netfilter/nft_exthdr.c                         |  127 ++
 net/netfilter/nft_hash.c                           |  348 ++++
 net/netfilter/nft_immediate.c                      |  113 ++
 net/netfilter/nft_limit.c                          |  113 ++
 net/netfilter/nft_log.c                            |  140 ++
 net/netfilter/nft_meta.c                           |  222 +++
 net/netfilter/nft_meta_target.c                    |  117 ++
 net/netfilter/nft_payload.c                        |  137 ++
 net/netfilter/nft_set.c                            |  381 +++++
 39 files changed, 6393 insertions(+), 6 deletions(-)
 create mode 100644 include/net/netfilter/nf_tables.h
 create mode 100644 include/net/netfilter/nf_tables_core.h
 create mode 100644 include/uapi/linux/netfilter/nf_tables.h
 create mode 100644 net/bridge/netfilter/nf_tables_bridge.c
 create mode 100644 net/ipv4/netfilter/nf_table_nat_ipv4.c
 create mode 100644 net/ipv4/netfilter/nf_table_route_ipv4.c
 create mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c
 create mode 100644 net/ipv4/netfilter/nft_reject_ipv4.c
 create mode 100644 net/ipv6/netfilter/nf_table_route_ipv6.c
 create mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c
 create mode 100644 net/netfilter/nf_tables_api.c
 create mode 100644 net/netfilter/nf_tables_core.c
 create mode 100644 net/netfilter/nft_bitwise.c
 create mode 100644 net/netfilter/nft_byteorder.c
 create mode 100644 net/netfilter/nft_cmp.c
 create mode 100644 net/netfilter/nft_counter.c
 create mode 100644 net/netfilter/nft_ct.c
 create mode 100644 net/netfilter/nft_expr_template.c
 create mode 100644 net/netfilter/nft_exthdr.c
 create mode 100644 net/netfilter/nft_hash.c
 create mode 100644 net/netfilter/nft_immediate.c
 create mode 100644 net/netfilter/nft_limit.c
 create mode 100644 net/netfilter/nft_log.c
 create mode 100644 net/netfilter/nft_meta.c
 create mode 100644 net/netfilter/nft_meta_target.c
 create mode 100644 net/netfilter/nft_payload.c
 create mode 100644 net/netfilter/nft_set.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index fef7e67f7101..2077489f9887 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -53,12 +53,13 @@ struct nf_hook_ops {
 	struct list_head list;
 
 	/* User fills in from here down. */
-	nf_hookfn *hook;
-	struct module *owner;
-	u_int8_t pf;
-	unsigned int hooknum;
+	nf_hookfn	*hook;
+	struct module	*owner;
+	void		*priv;
+	u_int8_t	pf;
+	unsigned int	hooknum;
 	/* Hooks are ordered in ascending priority. */
-	int priority;
+	int		priority;
 };
 
 struct nf_sockopt_ops {
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
new file mode 100644
index 000000000000..d26dfa345f49
--- /dev/null
+++ b/include/net/netfilter/nf_tables.h
@@ -0,0 +1,301 @@
+#ifndef _NET_NF_TABLES_H
+#define _NET_NF_TABLES_H
+
+#include <linux/list.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netlink.h>
+
+struct nft_pktinfo {
+	struct sk_buff			*skb;
+	const struct net_device		*in;
+	const struct net_device		*out;
+	u8				hooknum;
+	u8				nhoff;
+	u8				thoff;
+};
+
+struct nft_data {
+	union {
+		u32				data[4];
+		struct {
+			u32			verdict;
+			struct nft_chain	*chain;
+		};
+	};
+} __attribute__((aligned(__alignof__(u64))));
+
+static inline int nft_data_cmp(const struct nft_data *d1,
+			       const struct nft_data *d2,
+			       unsigned int len)
+{
+	return memcmp(d1->data, d2->data, len);
+}
+
+static inline void nft_data_copy(struct nft_data *dst,
+				 const struct nft_data *src)
+{
+	BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64));
+	*(u64 *)&dst->data[0] = *(u64 *)&src->data[0];
+	*(u64 *)&dst->data[2] = *(u64 *)&src->data[2];
+}
+
+static inline void nft_data_debug(const struct nft_data *data)
+{
+	pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
+		 data->data[0], data->data[1],
+		 data->data[2], data->data[3]);
+}
+
+/**
+ *	struct nft_ctx - nf_tables rule context
+ *
+ * 	@afi: address family info
+ * 	@table: the table the chain is contained in
+ * 	@chain: the chain the rule is contained in
+ */
+struct nft_ctx {
+	const struct nft_af_info	*afi;
+	const struct nft_table		*table;
+	const struct nft_chain		*chain;
+};
+
+enum nft_data_types {
+	NFT_DATA_VALUE,
+	NFT_DATA_VERDICT,
+};
+
+struct nft_data_desc {
+	enum nft_data_types		type;
+	unsigned int			len;
+};
+
+extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+			 struct nft_data_desc *desc, const struct nlattr *nla);
+extern void nft_data_uninit(const struct nft_data *data,
+			    enum nft_data_types type);
+extern int nft_data_dump(struct sk_buff *skb, int attr,
+			 const struct nft_data *data,
+			 enum nft_data_types type, unsigned int len);
+
+static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
+{
+	return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
+}
+
+extern int nft_validate_input_register(enum nft_registers reg);
+extern int nft_validate_output_register(enum nft_registers reg);
+extern int nft_validate_data_load(const struct nft_ctx *ctx,
+				  enum nft_registers reg,
+				  const struct nft_data *data,
+				  enum nft_data_types type);
+
+/**
+ *	struct nft_expr_ops - nf_tables expression operations
+ *
+ *	@eval: Expression evaluation function
+ *	@init: initialization function
+ *	@destroy: destruction function
+ *	@dump: function to dump parameters
+ *	@list: used internally
+ *	@name: Identifier
+ *	@owner: module reference
+ *	@policy: netlink attribute policy
+ *	@maxattr: highest netlink attribute number
+ *	@size: full expression size, including private data size
+ */
+struct nft_expr;
+struct nft_expr_ops {
+	void				(*eval)(const struct nft_expr *expr,
+						struct nft_data data[NFT_REG_MAX + 1],
+						const struct nft_pktinfo *pkt);
+	int				(*init)(const struct nft_ctx *ctx,
+						const struct nft_expr *expr,
+						const struct nlattr * const tb[]);
+	void				(*destroy)(const struct nft_expr *expr);
+	int				(*dump)(struct sk_buff *skb,
+						const struct nft_expr *expr);
+
+	struct list_head		list;
+	const char			*name;
+	struct module			*owner;
+	const struct nla_policy		*policy;
+	unsigned int			maxattr;
+	unsigned int			size;
+};
+
+#define NFT_EXPR_SIZE(size)		(sizeof(struct nft_expr) + \
+					 ALIGN(size, __alignof__(struct nft_expr)))
+
+/**
+ *	struct nft_expr - nf_tables expression
+ *
+ *	@ops: expression ops
+ *	@data: expression private data
+ */
+struct nft_expr {
+	const struct nft_expr_ops	*ops;
+	unsigned char			data[];
+};
+
+static inline void *nft_expr_priv(const struct nft_expr *expr)
+{
+	return (void *)expr->data;
+}
+
+/**
+ *	struct nft_rule - nf_tables rule
+ *
+ *	@list: used internally
+ *	@rcu_head: used internally for rcu
+ *	@handle: rule handle
+ *	@dlen: length of expression data
+ *	@data: expression data
+ */
+struct nft_rule {
+	struct list_head		list;
+	struct rcu_head			rcu_head;
+	u64				handle:48,
+					dlen:16;
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
+{
+	return (struct nft_expr *)&rule->data[0];
+}
+
+static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr)
+{
+	return ((void *)expr) + expr->ops->size;
+}
+
+static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
+{
+	return (struct nft_expr *)&rule->data[rule->dlen];
+}
+
+/*
+ * The last pointer isn't really necessary, but the compiler isn't able to
+ * determine that the result of nft_expr_last() is always the same since it
+ * can't assume that the dlen value wasn't changed within calls in the loop.
+ */
+#define nft_rule_for_each_expr(expr, last, rule) \
+	for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \
+	     (expr) != (last); \
+	     (expr) = nft_expr_next(expr))
+
+enum nft_chain_flags {
+	NFT_BASE_CHAIN			= 0x1,
+	NFT_CHAIN_BUILTIN		= 0x2,
+};
+
+/**
+ *	struct nft_chain - nf_tables chain
+ *
+ *	@rules: list of rules in the chain
+ *	@list: used internally
+ *	@rcu_head: used internally
+ *	@handle: chain handle
+ *	@flags: bitmask of enum nft_chain_flags
+ *	@use: number of jump references to this chain
+ *	@level: length of longest path to this chain
+ *	@name: name of the chain
+ */
+struct nft_chain {
+	struct list_head		rules;
+	struct list_head		list;
+	struct rcu_head			rcu_head;
+	u64				handle;
+	u8				flags;
+	u16				use;
+	u16				level;
+	char				name[NFT_CHAIN_MAXNAMELEN];
+};
+
+/**
+ *	struct nft_base_chain - nf_tables base chain
+ *
+ *	@ops: netfilter hook ops
+ *	@chain: the chain
+ */
+struct nft_base_chain {
+	struct nf_hook_ops		ops;
+	struct nft_chain		chain;
+};
+
+static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
+{
+	return container_of(chain, struct nft_base_chain, chain);
+}
+
+extern unsigned int nft_do_chain(const struct nf_hook_ops *ops,
+				 struct sk_buff *skb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 int (*okfn)(struct sk_buff *));
+
+enum nft_table_flags {
+	NFT_TABLE_BUILTIN		= 0x1,
+};
+
+/**
+ *	struct nft_table - nf_tables table
+ *
+ *	@list: used internally
+ *	@chains: chains in the table
+ *	@sets: sets in the table
+ *	@hgenerator: handle generator state
+ *	@use: number of chain references to this table
+ *	@flags: table flag (see enum nft_table_flags)
+ *	@name: name of the table
+ */
+struct nft_table {
+	struct list_head		list;
+	struct list_head		chains;
+	struct list_head		sets;
+	u64				hgenerator;
+	u32				use;
+	u16				flags;
+	char				name[];
+};
+
+/**
+ *	struct nft_af_info - nf_tables address family info
+ *
+ *	@list: used internally
+ *	@family: address family
+ *	@nhooks: number of hooks in this family
+ *	@owner: module owner
+ *	@tables: used internally
+ *	@hooks: hookfn overrides for packet validation
+ */
+struct nft_af_info {
+	struct list_head		list;
+	int				family;
+	unsigned int			nhooks;
+	struct module			*owner;
+	struct list_head		tables;
+	nf_hookfn			*hooks[NF_MAX_HOOKS];
+};
+
+extern int nft_register_afinfo(struct nft_af_info *);
+extern void nft_unregister_afinfo(struct nft_af_info *);
+
+extern int nft_register_table(struct nft_table *, int family);
+extern void nft_unregister_table(struct nft_table *, int family);
+
+extern int nft_register_expr(struct nft_expr_ops *);
+extern void nft_unregister_expr(struct nft_expr_ops *);
+
+#define MODULE_ALIAS_NFT_FAMILY(family)	\
+	MODULE_ALIAS("nft-afinfo-" __stringify(family))
+
+#define MODULE_ALIAS_NFT_TABLE(family, name) \
+	MODULE_ALIAS("nft-table-" __stringify(family) "-" name)
+
+#define MODULE_ALIAS_NFT_EXPR(name) \
+	MODULE_ALIAS("nft-expr-" name)
+
+#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
new file mode 100644
index 000000000000..283396c916e0
--- /dev/null
+++ b/include/net/netfilter/nf_tables_core.h
@@ -0,0 +1,25 @@
+#ifndef _NET_NF_TABLES_CORE_H
+#define _NET_NF_TABLES_CORE_H
+
+extern int nf_tables_core_module_init(void);
+extern void nf_tables_core_module_exit(void);
+
+extern int nft_immediate_module_init(void);
+extern void nft_immediate_module_exit(void);
+
+extern int nft_cmp_module_init(void);
+extern void nft_cmp_module_exit(void);
+
+extern int nft_lookup_module_init(void);
+extern void nft_lookup_module_exit(void);
+
+extern int nft_bitwise_module_init(void);
+extern void nft_bitwise_module_exit(void);
+
+extern int nft_byteorder_module_init(void);
+extern void nft_byteorder_module_exit(void);
+
+extern int nft_payload_module_init(void);
+extern void nft_payload_module_exit(void);
+
+#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 174915420d3f..6ce0b7f566a7 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
 header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
+header-y += nf_tables.h
 header-y += nf_nat.h
 header-y += nfnetlink.h
 header-y += nfnetlink_acct.h
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 8dd803818ebe..319f47128db8 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -25,6 +25,10 @@ enum ip_conntrack_info {
 	IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
 };
 
+#define NF_CT_STATE_INVALID_BIT			(1 << 0)
+#define NF_CT_STATE_BIT(ctinfo)			(1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
+#define NF_CT_STATE_UNTRACKED_BIT		(1 << (IP_CT_NUMBER + 1))
+
 /* Bitset representing status of connection. */
 enum ip_conntrack_status {
 	/* It's an expected connection: bit 0 set.  This bit never changed */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
new file mode 100644
index 000000000000..ec6d84a8ed1e
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -0,0 +1,582 @@
+#ifndef _LINUX_NF_TABLES_H
+#define _LINUX_NF_TABLES_H
+
+#define NFT_CHAIN_MAXNAMELEN 32
+
+enum nft_registers {
+	NFT_REG_VERDICT,
+	NFT_REG_1,
+	NFT_REG_2,
+	NFT_REG_3,
+	NFT_REG_4,
+	__NFT_REG_MAX
+};
+#define NFT_REG_MAX	(__NFT_REG_MAX - 1)
+
+/**
+ * enum nft_verdicts - nf_tables internal verdicts
+ *
+ * @NFT_CONTINUE: continue evaluation of the current rule
+ * @NFT_BREAK: terminate evaluation of the current rule
+ * @NFT_JUMP: push the current chain on the jump stack and jump to a chain
+ * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack
+ * @NFT_RETURN: return to the topmost chain on the jump stack
+ *
+ * The nf_tables verdicts share their numeric space with the netfilter verdicts.
+ */
+enum nft_verdicts {
+	NFT_CONTINUE	= -1,
+	NFT_BREAK	= -2,
+	NFT_JUMP	= -3,
+	NFT_GOTO	= -4,
+	NFT_RETURN	= -5,
+};
+
+/**
+ * enum nf_tables_msg_types - nf_tables netlink message types
+ *
+ * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes)
+ * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes)
+ * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes)
+ * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes)
+ * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes)
+ * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes)
+ * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes)
+ * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes)
+ * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes)
+ */
+enum nf_tables_msg_types {
+	NFT_MSG_NEWTABLE,
+	NFT_MSG_GETTABLE,
+	NFT_MSG_DELTABLE,
+	NFT_MSG_NEWCHAIN,
+	NFT_MSG_GETCHAIN,
+	NFT_MSG_DELCHAIN,
+	NFT_MSG_NEWRULE,
+	NFT_MSG_GETRULE,
+	NFT_MSG_DELRULE,
+	NFT_MSG_MAX,
+};
+
+enum nft_list_attributes {
+	NFTA_LIST_UNPEC,
+	NFTA_LIST_ELEM,
+	__NFTA_LIST_MAX
+};
+#define NFTA_LIST_MAX		(__NFTA_LIST_MAX - 1)
+
+/**
+ * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes
+ *
+ * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
+ * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ */
+enum nft_hook_attributes {
+	NFTA_HOOK_UNSPEC,
+	NFTA_HOOK_HOOKNUM,
+	NFTA_HOOK_PRIORITY,
+	__NFTA_HOOK_MAX
+};
+#define NFTA_HOOK_MAX		(__NFTA_HOOK_MAX - 1)
+
+/**
+ * enum nft_table_attributes - nf_tables table netlink attributes
+ *
+ * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
+ */
+enum nft_table_attributes {
+	NFTA_TABLE_UNSPEC,
+	NFTA_TABLE_NAME,
+	__NFTA_TABLE_MAX
+};
+#define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
+
+/**
+ * enum nft_chain_attributes - nf_tables chain netlink attributes
+ *
+ * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING)
+ * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64)
+ * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING)
+ * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes)
+ */
+enum nft_chain_attributes {
+	NFTA_CHAIN_UNSPEC,
+	NFTA_CHAIN_TABLE,
+	NFTA_CHAIN_HANDLE,
+	NFTA_CHAIN_NAME,
+	NFTA_CHAIN_HOOK,
+	__NFTA_CHAIN_MAX
+};
+#define NFTA_CHAIN_MAX		(__NFTA_CHAIN_MAX - 1)
+
+/**
+ * enum nft_rule_attributes - nf_tables rule netlink attributes
+ *
+ * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING)
+ * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING)
+ * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64)
+ * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
+ */
+enum nft_rule_attributes {
+	NFTA_RULE_UNSPEC,
+	NFTA_RULE_TABLE,
+	NFTA_RULE_CHAIN,
+	NFTA_RULE_HANDLE,
+	NFTA_RULE_EXPRESSIONS,
+	__NFTA_RULE_MAX
+};
+#define NFTA_RULE_MAX		(__NFTA_RULE_MAX - 1)
+
+enum nft_data_attributes {
+	NFTA_DATA_UNSPEC,
+	NFTA_DATA_VALUE,
+	NFTA_DATA_VERDICT,
+	__NFTA_DATA_MAX
+};
+#define NFTA_DATA_MAX		(__NFTA_DATA_MAX - 1)
+
+/**
+ * enum nft_verdict_attributes - nf_tables verdict netlink attributes
+ *
+ * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts)
+ * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING)
+ */
+enum nft_verdict_attributes {
+	NFTA_VERDICT_UNSPEC,
+	NFTA_VERDICT_CODE,
+	NFTA_VERDICT_CHAIN,
+	__NFTA_VERDICT_MAX
+};
+#define NFTA_VERDICT_MAX	(__NFTA_VERDICT_MAX - 1)
+
+/**
+ * enum nft_expr_attributes - nf_tables expression netlink attributes
+ *
+ * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING)
+ * @NFTA_EXPR_DATA: type specific data (NLA_NESTED)
+ */
+enum nft_expr_attributes {
+	NFTA_EXPR_UNSPEC,
+	NFTA_EXPR_NAME,
+	NFTA_EXPR_DATA,
+	__NFTA_EXPR_MAX
+};
+#define NFTA_EXPR_MAX		(__NFTA_EXPR_MAX - 1)
+
+/**
+ * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes
+ *
+ * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32)
+ * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_immediate_attributes {
+	NFTA_IMMEDIATE_UNSPEC,
+	NFTA_IMMEDIATE_DREG,
+	NFTA_IMMEDIATE_DATA,
+	__NFTA_IMMEDIATE_MAX
+};
+#define NFTA_IMMEDIATE_MAX	(__NFTA_IMMEDIATE_MAX - 1)
+
+/**
+ * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
+ *
+ * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BITWISE_LEN: length of operands (NLA_U32)
+ * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes)
+ * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes)
+ *
+ * The bitwise expression performs the following operation:
+ *
+ * dreg = (sreg & mask) ^ xor
+ *
+ * which allow to express all bitwise operations:
+ *
+ * 		mask	xor
+ * NOT:		1	1
+ * OR:		0	x
+ * XOR:		1	x
+ * AND:		x	0
+ */
+enum nft_bitwise_attributes {
+	NFTA_BITWISE_UNSPEC,
+	NFTA_BITWISE_SREG,
+	NFTA_BITWISE_DREG,
+	NFTA_BITWISE_LEN,
+	NFTA_BITWISE_MASK,
+	NFTA_BITWISE_XOR,
+	__NFTA_BITWISE_MAX
+};
+#define NFTA_BITWISE_MAX	(__NFTA_BITWISE_MAX - 1)
+
+/**
+ * enum nft_byteorder_ops - nf_tables byteorder operators
+ *
+ * @NFT_BYTEORDER_NTOH: network to host operator
+ * @NFT_BYTEORDER_HTON: host to network opertaor
+ */
+enum nft_byteorder_ops {
+	NFT_BYTEORDER_NTOH,
+	NFT_BYTEORDER_HTON,
+};
+
+/**
+ * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes
+ *
+ * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops)
+ * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32)
+ * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4)
+ */
+enum nft_byteorder_attributes {
+	NFTA_BYTEORDER_UNSPEC,
+	NFTA_BYTEORDER_SREG,
+	NFTA_BYTEORDER_DREG,
+	NFTA_BYTEORDER_OP,
+	NFTA_BYTEORDER_LEN,
+	NFTA_BYTEORDER_SIZE,
+	__NFTA_BYTEORDER_MAX
+};
+#define NFTA_BYTEORDER_MAX	(__NFTA_BYTEORDER_MAX - 1)
+
+/**
+ * enum nft_cmp_ops - nf_tables relational operator
+ *
+ * @NFT_CMP_EQ: equal
+ * @NFT_CMP_NEQ: not equal
+ * @NFT_CMP_LT: less than
+ * @NFT_CMP_LTE: less than or equal to
+ * @NFT_CMP_GT: greater than
+ * @NFT_CMP_GTE: greater than or equal to
+ */
+enum nft_cmp_ops {
+	NFT_CMP_EQ,
+	NFT_CMP_NEQ,
+	NFT_CMP_LT,
+	NFT_CMP_LTE,
+	NFT_CMP_GT,
+	NFT_CMP_GTE,
+};
+
+/**
+ * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes
+ *
+ * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_cmp_attributes {
+	NFTA_CMP_UNSPEC,
+	NFTA_CMP_SREG,
+	NFTA_CMP_OP,
+	NFTA_CMP_DATA,
+	__NFTA_CMP_MAX
+};
+#define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
+
+enum nft_set_elem_flags {
+	NFT_SE_INTERVAL_END	= 0x1,
+};
+
+enum nft_set_elem_attributes {
+	NFTA_SE_UNSPEC,
+	NFTA_SE_KEY,
+	NFTA_SE_DATA,
+	NFTA_SE_FLAGS,
+	__NFTA_SE_MAX
+};
+#define NFTA_SE_MAX		(__NFTA_SE_MAX - 1)
+
+enum nft_set_flags {
+	NFT_SET_INTERVAL	= 0x1,
+	NFT_SET_MAP		= 0x2,
+};
+
+enum nft_set_attributes {
+	NFTA_SET_UNSPEC,
+	NFTA_SET_FLAGS,
+	NFTA_SET_SREG,
+	NFTA_SET_DREG,
+	NFTA_SET_KLEN,
+	NFTA_SET_DLEN,
+	NFTA_SET_ELEMENTS,
+	__NFTA_SET_MAX
+};
+#define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
+
+enum nft_hash_flags {
+	NFT_HASH_MAP		= 0x1,
+};
+
+enum nft_hash_elem_attributes {
+	NFTA_HE_UNSPEC,
+	NFTA_HE_KEY,
+	NFTA_HE_DATA,
+	__NFTA_HE_MAX
+};
+#define NFTA_HE_MAX		(__NFTA_HE_MAX - 1)
+
+enum nft_hash_attributes {
+	NFTA_HASH_UNSPEC,
+	NFTA_HASH_FLAGS,
+	NFTA_HASH_SREG,
+	NFTA_HASH_DREG,
+	NFTA_HASH_KLEN,
+	NFTA_HASH_ELEMENTS,
+	__NFTA_HASH_MAX
+};
+#define NFTA_HASH_MAX		(__NFTA_HASH_MAX - 1)
+
+/**
+ * enum nft_payload_bases - nf_tables payload expression offset bases
+ *
+ * @NFT_PAYLOAD_LL_HEADER: link layer header
+ * @NFT_PAYLOAD_NETWORK_HEADER: network header
+ * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
+ */
+enum nft_payload_bases {
+	NFT_PAYLOAD_LL_HEADER,
+	NFT_PAYLOAD_NETWORK_HEADER,
+	NFT_PAYLOAD_TRANSPORT_HEADER,
+};
+
+/**
+ * enum nft_payload_attributes - nf_tables payload expression netlink attributes
+ *
+ * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers)
+ * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
+ * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
+ * @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
+ */
+enum nft_payload_attributes {
+	NFTA_PAYLOAD_UNSPEC,
+	NFTA_PAYLOAD_DREG,
+	NFTA_PAYLOAD_BASE,
+	NFTA_PAYLOAD_OFFSET,
+	NFTA_PAYLOAD_LEN,
+	__NFTA_PAYLOAD_MAX
+};
+#define NFTA_PAYLOAD_MAX	(__NFTA_PAYLOAD_MAX - 1)
+
+/**
+ * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes
+ *
+ * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
+ * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
+ * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ */
+enum nft_exthdr_attributes {
+	NFTA_EXTHDR_UNSPEC,
+	NFTA_EXTHDR_DREG,
+	NFTA_EXTHDR_TYPE,
+	NFTA_EXTHDR_OFFSET,
+	NFTA_EXTHDR_LEN,
+	__NFTA_EXTHDR_MAX
+};
+#define NFTA_EXTHDR_MAX		(__NFTA_EXTHDR_MAX - 1)
+
+/**
+ * enum nft_meta_keys - nf_tables meta expression keys
+ *
+ * @NFT_META_LEN: packet length (skb->len)
+ * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT
+ * @NFT_META_PRIORITY: packet priority (skb->priority)
+ * @NFT_META_MARK: packet mark (skb->mark)
+ * @NFT_META_IIF: packet input interface index (dev->ifindex)
+ * @NFT_META_OIF: packet output interface index (dev->ifindex)
+ * @NFT_META_IIFNAME: packet input interface name (dev->name)
+ * @NFT_META_OIFNAME: packet output interface name (dev->name)
+ * @NFT_META_IIFTYPE: packet input interface type (dev->type)
+ * @NFT_META_OIFTYPE: packet output interface type (dev->type)
+ * @NFT_META_SKUID: originating socket UID (fsuid)
+ * @NFT_META_SKGID: originating socket GID (fsgid)
+ * @NFT_META_NFTRACE: packet nftrace bit
+ * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid)
+ * @NFT_META_SECMARK: packet secmark (skb->secmark)
+ */
+enum nft_meta_keys {
+	NFT_META_LEN,
+	NFT_META_PROTOCOL,
+	NFT_META_PRIORITY,
+	NFT_META_MARK,
+	NFT_META_IIF,
+	NFT_META_OIF,
+	NFT_META_IIFNAME,
+	NFT_META_OIFNAME,
+	NFT_META_IIFTYPE,
+	NFT_META_OIFTYPE,
+	NFT_META_SKUID,
+	NFT_META_SKGID,
+	NFT_META_NFTRACE,
+	NFT_META_RTCLASSID,
+	NFT_META_SECMARK,
+};
+
+/**
+ * enum nft_meta_attributes - nf_tables meta expression netlink attributes
+ *
+ * @NFTA_META_DREG: destination register (NLA_U32)
+ * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
+ */
+enum nft_meta_attributes {
+	NFTA_META_UNSPEC,
+	NFTA_META_DREG,
+	NFTA_META_KEY,
+	__NFTA_META_MAX
+};
+#define NFTA_META_MAX		(__NFTA_META_MAX - 1)
+
+/**
+ * enum nft_ct_keys - nf_tables ct expression keys
+ *
+ * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
+ * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir)
+ * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status)
+ * @NFT_CT_MARK: conntrack mark value
+ * @NFT_CT_SECMARK: conntrack secmark value
+ * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
+ * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
+ * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
+ * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
+ * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
+ */
+enum nft_ct_keys {
+	NFT_CT_STATE,
+	NFT_CT_DIRECTION,
+	NFT_CT_STATUS,
+	NFT_CT_MARK,
+	NFT_CT_SECMARK,
+	NFT_CT_EXPIRATION,
+	NFT_CT_HELPER,
+	NFT_CT_L3PROTOCOL,
+	NFT_CT_SRC,
+	NFT_CT_DST,
+	NFT_CT_PROTOCOL,
+	NFT_CT_PROTO_SRC,
+	NFT_CT_PROTO_DST,
+};
+
+/**
+ * enum nft_ct_attributes - nf_tables ct expression netlink attributes
+ *
+ * @NFTA_CT_DREG: destination register (NLA_U32)
+ * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys)
+ * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8)
+ */
+enum nft_ct_attributes {
+	NFTA_CT_UNSPEC,
+	NFTA_CT_DREG,
+	NFTA_CT_KEY,
+	NFTA_CT_DIRECTION,
+	__NFTA_CT_MAX
+};
+#define NFTA_CT_MAX		(__NFTA_CT_MAX - 1)
+
+/**
+ * enum nft_limit_attributes - nf_tables limit expression netlink attributes
+ *
+ * @NFTA_LIMIT_RATE: refill rate (NLA_U64)
+ * @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
+ */
+enum nft_limit_attributes {
+	NFTA_LIMIT_UNSPEC,
+	NFTA_LIMIT_RATE,
+	NFTA_LIMIT_UNIT,
+	__NFTA_LIMIT_MAX
+};
+#define NFTA_LIMIT_MAX		(__NFTA_LIMIT_MAX - 1)
+
+/**
+ * enum nft_counter_attributes - nf_tables counter expression netlink attributes
+ *
+ * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64)
+ * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64)
+ */
+enum nft_counter_attributes {
+	NFTA_COUNTER_UNSPEC,
+	NFTA_COUNTER_BYTES,
+	NFTA_COUNTER_PACKETS,
+	__NFTA_COUNTER_MAX
+};
+#define NFTA_COUNTER_MAX	(__NFTA_COUNTER_MAX - 1)
+
+/**
+ * enum nft_log_attributes - nf_tables log expression netlink attributes
+ *
+ * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32)
+ * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
+ * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
+ * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
+ */
+enum nft_log_attributes {
+	NFTA_LOG_UNSPEC,
+	NFTA_LOG_GROUP,
+	NFTA_LOG_PREFIX,
+	NFTA_LOG_SNAPLEN,
+	NFTA_LOG_QTHRESHOLD,
+	__NFTA_LOG_MAX
+};
+#define NFTA_LOG_MAX		(__NFTA_LOG_MAX - 1)
+
+/**
+ * enum nft_reject_types - nf_tables reject expression reject types
+ *
+ * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
+ * @NFT_REJECT_TCP_RST: reject using TCP RST
+ */
+enum nft_reject_types {
+	NFT_REJECT_ICMP_UNREACH,
+	NFT_REJECT_TCP_RST,
+};
+
+/**
+ * enum nft_reject_attributes - nf_tables reject expression netlink attributes
+ *
+ * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types)
+ * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8)
+ */
+enum nft_reject_attributes {
+	NFTA_REJECT_UNSPEC,
+	NFTA_REJECT_TYPE,
+	NFTA_REJECT_ICMP_CODE,
+	__NFTA_REJECT_MAX
+};
+#define NFTA_REJECT_MAX		(__NFTA_REJECT_MAX - 1)
+
+/**
+ * enum nft_nat_types - nf_tables nat expression NAT types
+ *
+ * @NFT_NAT_SNAT: source NAT
+ * @NFT_NAT_DNAT: destination NAT
+ */
+enum nft_nat_types {
+	NFT_NAT_SNAT,
+	NFT_NAT_DNAT,
+};
+
+/**
+ * enum nft_nat_attributes - nf_tables nat expression netlink attributes
+ *
+ * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types)
+ * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
+ * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
+ * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
+ */
+enum nft_nat_attributes {
+	NFTA_NAT_UNSPEC,
+	NFTA_NAT_TYPE,
+	NFTA_NAT_ADDR_MIN,
+	NFTA_NAT_ADDR_MAX,
+	NFTA_NAT_PROTO_MIN,
+	NFTA_NAT_PROTO_MAX,
+	__NFTA_NAT_MAX
+};
+#define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
+
+#endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 4a4efafad5f4..d276c3bd55b8 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -18,6 +18,8 @@ enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_UPDATE	NFNLGRP_CONNTRACK_EXP_UPDATE
 	NFNLGRP_CONNTRACK_EXP_DESTROY,
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
+	NFNLGRP_NFTABLES,
+#define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
@@ -51,6 +53,7 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_ACCT		7
 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT	8
 #define NFNL_SUBSYS_CTHELPER		9
-#define NFNL_SUBSYS_COUNT		10
+#define NFNL_SUBSYS_NFTABLES		10
+#define NFNL_SUBSYS_COUNT		11
 
 #endif /* _UAPI_NFNETLINK_H */
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index a9aff9c7d027..68f8128147be 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -1,6 +1,9 @@
 #
 # Bridge netfilter configuration
 #
+#
+config NF_TABLES_BRIDGE
+	tristate "Ethernet Bridge nf_tables support"
 
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 0718699540b0..ea7629f58b3d 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -2,6 +2,8 @@
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
 
+obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
 
 # tables
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
new file mode 100644
index 000000000000..bc5c21c911c0
--- /dev/null
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_bridge __read_mostly = {
+	.family		= NFPROTO_BRIDGE,
+	.nhooks		= NF_BR_NUMHOOKS,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nf_tables_bridge_init(void)
+{
+	return nft_register_afinfo(&nft_af_bridge);
+}
+
+static void __exit nf_tables_bridge_exit(void)
+{
+	nft_unregister_afinfo(&nft_af_bridge);
+}
+
+module_init(nf_tables_bridge_init);
+module_exit(nf_tables_bridge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1657e39b291f..eb1d56ece361 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,22 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
+config NF_TABLES_IPV4
+	depends on NF_TABLES
+	tristate "IPv4 nf_tables support"
+
+config NFT_REJECT_IPV4
+	depends on NF_TABLES_IPV4
+	tristate "nf_tables IPv4 reject support"
+
+config NF_TABLE_ROUTE_IPV4
+	depends on NF_TABLES_IPV4
+	tristate "IPv4 nf_tables route table support"
+
+config NF_TABLE_NAT_IPV4
+	depends on NF_TABLES_IPV4
+	tristate "IPv4 nf_tables nat table support"
+
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3622b248b6dd..b2f01cd2cd65 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,6 +27,11 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 # NAT protocols (nf_nat)
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o
+obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o
+
 # generic IP tables 
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 
diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c
new file mode 100644
index 000000000000..2a6f184c10bd
--- /dev/null
+++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+struct nft_nat {
+	enum nft_registers	sreg_addr_min:8;
+	enum nft_registers	sreg_addr_max:8;
+	enum nft_registers	sreg_proto_min:8;
+	enum nft_registers	sreg_proto_max:8;
+	enum nf_nat_manip_type	type;
+};
+
+static void nft_nat_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+	struct nf_nat_range range;
+
+	memset(&range, 0, sizeof(range));
+	if (priv->sreg_addr_min) {
+		range.min_addr.ip = data[priv->sreg_addr_min].data[0];
+		range.max_addr.ip = data[priv->sreg_addr_max].data[0];
+		range.flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = data[priv->sreg_proto_min].data[0];
+		range.max_proto.all = data[priv->sreg_proto_max].data[0];
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	data[NFT_REG_VERDICT].verdict =
+		nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+	[NFTA_NAT_ADDR_MIN]	= { .type = NLA_U32 },
+	[NFTA_NAT_ADDR_MAX]	= { .type = NLA_U32 },
+	[NFTA_NAT_PROTO_MIN]	= { .type = NLA_U32 },
+	[NFTA_NAT_PROTO_MAX]	= { .type = NLA_U32 },
+	[NFTA_NAT_TYPE]		= { .type = NLA_U32 },
+};
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_nat *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_NAT_TYPE] == NULL)
+		return -EINVAL;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+	case NFT_NAT_SNAT:
+		priv->type = NF_NAT_MANIP_SRC;
+		break;
+	case NFT_NAT_DNAT:
+		priv->type = NF_NAT_MANIP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_NAT_ADDR_MIN]) {
+		priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN]));
+		err = nft_validate_input_register(priv->sreg_addr_min);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_NAT_ADDR_MAX]) {
+		priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX]));
+		err = nft_validate_input_register(priv->sreg_addr_max);
+		if (err < 0)
+			return err;
+	} else
+		priv->sreg_addr_max = priv->sreg_addr_min;
+
+	if (tb[NFTA_NAT_PROTO_MIN]) {
+		priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN]));
+		err = nft_validate_input_register(priv->sreg_proto_min);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_NAT_PROTO_MAX]) {
+		priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX]));
+		err = nft_validate_input_register(priv->sreg_proto_max);
+		if (err < 0)
+			return err;
+	} else
+		priv->sreg_proto_max = priv->sreg_proto_min;
+
+	return 0;
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_nat *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NF_NAT_MANIP_SRC:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+			goto nla_put_failure;
+		break;
+	case NF_NAT_MANIP_DST:
+		if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+			goto nla_put_failure;
+		break;
+	}
+
+	if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_nat_ops __read_mostly = {
+	.name		= "nat",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_nat_eval,
+	.init		= nft_nat_init,
+	.dump		= nft_nat_dump,
+	.policy		= nft_nat_policy,
+	.maxattr	= NFTA_NAT_MAX,
+};
+
+/*
+ * NAT table
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+			      struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_nat *nat;
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+	unsigned int ret;
+
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return NF_ACCEPT;
+
+	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+	nat = nfct_nat(ct);
+	if (nat == NULL) {
+		/* Conntrack module was loaded late, can't add extension. */
+		if (nf_ct_is_confirmed(ct))
+			return NF_ACCEPT;
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL)
+			return NF_ACCEPT;
+	}
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED + IP_CT_IS_REPLY:
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+							   ops->hooknum))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall through */
+	case IP_CT_NEW:
+		if (nf_nat_initialized(ct, maniptype))
+			break;
+
+		ret = nft_do_chain(ops, skb, in, out, okfn);
+		if (ret != NF_ACCEPT)
+			return ret;
+		if (!nf_nat_initialized(ct, maniptype)) {
+			ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+			if (ret != NF_ACCEPT)
+				return ret;
+		}
+	default:
+		break;
+	}
+
+	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+				      struct sk_buff *skb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	__be32 daddr = ip_hdr(skb)->daddr;
+	unsigned int ret;
+
+	ret = nf_nat_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ip_hdr(skb)->daddr != daddr) {
+		skb_dst_drop(skb);
+	}
+	return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo __maybe_unused;
+	const struct nf_conn *ct __maybe_unused;
+	unsigned int ret;
+
+	ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.src.u3.ip !=
+		    ct->tuplehash[!dir].tuple.dst.u3.ip ||
+		    ct->tuplehash[dir].tuple.src.u.all !=
+		    ct->tuplehash[!dir].tuple.dst.u.all)
+			return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+								ret : NF_DROP;
+	}
+#endif
+	return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  int (*okfn)(struct sk_buff *))
+{
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	unsigned int ret;
+
+	ret = nf_nat_fn(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+		    ct->tuplehash[!dir].tuple.src.u3.ip) {
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
+				ret = NF_DROP;
+		}
+#ifdef CONFIG_XFRM
+		else if (ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all)
+			if (nf_xfrm_me_harder(skb, AF_INET))
+				ret = NF_DROP;
+#endif
+	}
+	return ret;
+}
+
+static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = {
+	.chain	= {
+		.name		= "PREROUTING",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_prerouting,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_NAT_DST,
+		.priv		= &nf_chain_nat_prerouting.chain,
+	},
+};
+
+static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = {
+	.chain	= {
+		.name		= "POSTROUTING",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_postrouting,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_NAT_SRC,
+		.priv		= &nf_chain_nat_postrouting.chain,
+	},
+};
+
+static struct nft_base_chain nf_chain_nat_output __read_mostly = {
+	.chain	= {
+		.name		= "OUTPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_output.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_output,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST,
+		.priv		= &nf_chain_nat_output.chain,
+	},
+};
+
+static struct nft_base_chain nf_chain_nat_input __read_mostly = {
+	.chain	= {
+		.name		= "INPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_nat_input.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_nat_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC,
+		.priv		= &nf_chain_nat_input.chain,
+	},
+};
+
+
+static struct nft_table nf_table_nat_ipv4 __read_mostly = {
+	.name	= "nat",
+	.chains	= LIST_HEAD_INIT(nf_table_nat_ipv4.chains),
+};
+
+static int __init nf_table_nat_init(void)
+{
+	int err;
+
+	list_add_tail(&nf_chain_nat_prerouting.chain.list,
+		      &nf_table_nat_ipv4.chains);
+	list_add_tail(&nf_chain_nat_postrouting.chain.list,
+		      &nf_table_nat_ipv4.chains);
+	list_add_tail(&nf_chain_nat_output.chain.list,
+		      &nf_table_nat_ipv4.chains);
+	list_add_tail(&nf_chain_nat_input.chain.list,
+		      &nf_table_nat_ipv4.chains);
+
+	err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4);
+	if (err < 0)
+		goto err1;
+
+	err = nft_register_expr(&nft_nat_ops);
+	if (err < 0)
+		goto err2;
+
+	return 0;
+
+err2:
+	nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4);
+err1:
+	return err;
+}
+
+static void __exit nf_table_nat_exit(void)
+{
+	nft_unregister_expr(&nft_nat_ops);
+	nft_unregister_table(&nf_table_nat_ipv4, AF_INET);
+}
+
+module_init(nf_table_nat_init);
+module_exit(nf_table_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET, "nat");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c
new file mode 100644
index 000000000000..4f257a1ed661
--- /dev/null
+++ b/net/ipv4/netfilter/nf_table_route_ipv4.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	u32 mark;
+	__be32 saddr, daddr;
+	u_int8_t tos;
+	const struct iphdr *iph;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
+		return NF_ACCEPT;
+
+	mark = skb->mark;
+	iph = ip_hdr(skb);
+	saddr = iph->saddr;
+	daddr = iph->daddr;
+	tos = iph->tos;
+
+	ret = nft_do_chain(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_QUEUE) {
+		iph = ip_hdr(skb);
+
+		if (iph->saddr != saddr ||
+		    iph->daddr != daddr ||
+		    skb->mark != mark ||
+		    iph->tos != tos)
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
+				ret = NF_DROP;
+	}
+	return ret;
+}
+
+static struct nft_base_chain nf_chain_route_output __read_mostly = {
+	.chain	= {
+		.name		= "OUTPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_route_output.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_route_table_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_MANGLE,
+		.priv		= &nf_chain_route_output.chain,
+	},
+};
+
+static struct nft_table nf_table_route_ipv4 __read_mostly = {
+	.name	= "route",
+	.chains	= LIST_HEAD_INIT(nf_table_route_ipv4.chains),
+};
+
+static int __init nf_table_route_init(void)
+{
+	list_add_tail(&nf_chain_route_output.chain.list,
+		      &nf_table_route_ipv4.chains);
+	return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4);
+}
+
+static void __exit nf_table_route_exit(void)
+{
+	nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4);
+}
+
+module_init(nf_table_route_init);
+module_exit(nf_table_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 000000000000..63d0a3bf53d3
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/ip.h>
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+				    struct sk_buff *skb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	if (unlikely(skb->len < sizeof(struct iphdr) ||
+		     ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+		if (net_ratelimit())
+			pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+				"packet\n");
+		return NF_ACCEPT;
+	}
+
+	return nft_do_chain(ops, skb, in, out, okfn);
+}
+
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.nhooks		= NF_INET_NUMHOOKS,
+	.owner		= THIS_MODULE,
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nft_ipv4_output,
+	},
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+	return nft_register_afinfo(&nft_af_ipv4);
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+	nft_unregister_afinfo(&nft_af_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..b4ee8d3bb1e4
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+
+struct nft_reject {
+	enum nft_reject_types	type:8;
+	u8			icmp_code;
+};
+
+static void nft_reject_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
+		break;
+	case NFT_REJECT_TCP_RST:
+		break;
+	}
+
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+	[NFTA_REJECT_TYPE]		= { .type = NLA_U32 },
+	[NFTA_REJECT_ICMP_CODE]		= { .type = NLA_U8 },
+};
+
+static int nft_reject_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_REJECT_TYPE] == NULL)
+		return -EINVAL;
+
+	priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+			return -EINVAL;
+		priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+	case NFT_REJECT_TCP_RST:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type))
+		goto nla_put_failure;
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+			goto nla_put_failure;
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops reject_ops __read_mostly = {
+	.name		= "reject",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_reject_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+};
+
+static int __init nft_reject_module_init(void)
+{
+	return nft_register_expr(&reject_ops);
+}
+
+static void __exit nft_reject_module_exit(void)
+{
+	nft_unregister_expr(&reject_ops);
+}
+
+module_init(nft_reject_module_init);
+module_exit(nft_reject_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a7f842b29b67..5677e38eeca3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,14 @@ config NF_CONNTRACK_IPV6
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_TABLES_IPV6
+	depends on NF_TABLES
+	tristate "IPv6 nf_tables support"
+
+config NF_TABLE_ROUTE_IPV6
+	depends on NF_TABLES_IPV6
+	tristate "IPv6 nf_tables route table support"
+
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b53738f798c..956af4492d10 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,10 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o
+
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c
new file mode 100644
index 000000000000..48ac65c7b398
--- /dev/null
+++ b/net/ipv6/netfilter/nf_table_route_ipv6.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	struct in6_addr saddr, daddr;
+	u_int8_t hop_limit;
+	u32 mark, flowlabel;
+
+	/* save source/dest address, mark, hoplimit, flowlabel, priority */
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
+
+	/* flowlabel and prio (includes version, which shouldn't change either */
+	flowlabel = *((u32 *)ipv6_hdr(skb));
+
+	ret = nft_do_chain(ops, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_QUEUE &&
+	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+	     skb->mark != mark ||
+	     ipv6_hdr(skb)->hop_limit != hop_limit ||
+	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+	return ret;
+}
+
+static struct nft_base_chain nf_chain_route_output __read_mostly = {
+	.chain	= {
+		.name		= "OUTPUT",
+		.rules		= LIST_HEAD_INIT(nf_chain_route_output.chain.rules),
+		.flags		= NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN,
+	},
+	.ops	= {
+		.hook		= nf_route_table_hook,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_MANGLE,
+		.priv		= &nf_chain_route_output.chain,
+	},
+};
+
+static struct nft_table nf_table_route_ipv6 __read_mostly = {
+	.name	= "route",
+	.chains	= LIST_HEAD_INIT(nf_table_route_ipv6.chains),
+};
+
+static int __init nf_table_route_init(void)
+{
+	list_add_tail(&nf_chain_route_output.chain.list,
+		      &nf_table_route_ipv6.chains);
+	return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6);
+}
+
+static void __exit nf_table_route_exit(void)
+{
+	nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6);
+}
+
+module_init(nf_table_route_init);
+module_exit(nf_table_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_TABLE(AF_INET6, "route");
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 000000000000..e0717cea4913
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+				    struct sk_buff *skb,
+				    const struct net_device *in,
+				    const struct net_device *out,
+				    int (*okfn)(struct sk_buff *))
+{
+	if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+		if (net_ratelimit())
+			pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+				"packet\n");
+		return NF_ACCEPT;
+	}
+
+	return nft_do_chain(ops, skb, in, out, okfn);
+}
+
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.nhooks		= NF_INET_NUMHOOKS,
+	.owner		= THIS_MODULE,
+	.hooks		= {
+		[NF_INET_LOCAL_OUT]	= nft_ipv6_output,
+	},
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+	return nft_register_afinfo(&nft_af_ipv6);
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+	nft_unregister_afinfo(&nft_af_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6dff2b..c271e1af93b5 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY
 
 endif # NF_CONNTRACK
 
+config NF_TABLES
+	depends on NETFILTER_NETLINK
+	tristate "Netfilter nf_tables support"
+
+config NFT_EXTHDR
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables IPv6 exthdr module"
+
+config NFT_META
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables meta module"
+
+config NFT_CT
+	depends on NF_TABLES
+	depends on NF_CONNTRACK
+	tristate "Netfilter nf_tables conntrack module"
+
+config NFT_SET
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables set module"
+
+config NFT_HASH
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables hash module"
+
+config NFT_COUNTER
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables counter module"
+
+config NFT_LOG
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables log module"
+
+config NFT_LIMIT
+	depends on NF_TABLES
+	tristate "Netfilter nf_tables limit module"
+
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c3a0a12907f6..1ca3f3932826 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 # SYNPROXY
 obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 
+# nf_tables
+nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+
+obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
+obj-$(CONFIG_NFT_EXTHDR)	+= nft_exthdr.o
+obj-$(CONFIG_NFT_META)		+= nft_meta.o
+obj-$(CONFIG_NFT_CT)		+= nft_ct.o
+obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
+#nf_tables-objs			+= nft_meta_target.o
+obj-$(CONFIG_NFT_SET)		+= nft_set.o
+obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
+obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
+obj-$(CONFIG_NFT_LOG)		+= nft_log.o
+
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 000000000000..7d59c89c6c75
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,1760 @@
+/*
+ * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/sock.h>
+
+static LIST_HEAD(nf_tables_afinfo);
+static LIST_HEAD(nf_tables_expressions);
+
+/**
+ *	nft_register_afinfo - register nf_tables address family info
+ *
+ *	@afi: address family info to register
+ *
+ *	Register the address family for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_afinfo(struct nft_af_info *afi)
+{
+	INIT_LIST_HEAD(&afi->tables);
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&afi->list, &nf_tables_afinfo);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_afinfo);
+
+/**
+ *	nft_unregister_afinfo - unregister nf_tables address family info
+ *
+ *	@afi: address family info to unregister
+ *
+ *	Unregister the address family for use with nf_tables.
+ */
+void nft_unregister_afinfo(struct nft_af_info *afi)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&afi->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+
+static struct nft_af_info *nft_afinfo_lookup(int family)
+{
+	struct nft_af_info *afi;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (afi->family == family)
+			return afi;
+	}
+	return NULL;
+}
+
+static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload)
+{
+	struct nft_af_info *afi;
+
+	afi = nft_afinfo_lookup(family);
+	if (afi != NULL)
+		return afi;
+#ifdef CONFIG_MODULES
+	if (autoload) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-afinfo-%u", family);
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		afi = nft_afinfo_lookup(family);
+		if (afi != NULL)
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-EAFNOSUPPORT);
+}
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+					  const struct nlattr *nla)
+{
+	struct nft_table *table;
+
+	list_for_each_entry(table, &afi->tables, list) {
+		if (!nla_strcmp(nla, table->name))
+			return table;
+	}
+	return NULL;
+}
+
+static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+						const struct nlattr *nla,
+						bool autoload)
+{
+	struct nft_table *table;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	table = nft_table_lookup(afi, nla);
+	if (table != NULL)
+		return table;
+
+#ifdef CONFIG_MODULES
+	if (autoload) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-table-%u-%*.s", afi->family,
+			       nla_len(nla)-1, (const char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (nft_table_lookup(afi, nla))
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+	return ++table->hgenerator;
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING },
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
+				     int event, u32 flags, int family,
+				     const struct nft_table *table)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name))
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_table_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  int event, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	bool report;
+	int err;
+
+	report = nlh ? nlmsg_report(nlh) : false;
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+					family, table);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			if (idx < s_idx)
+				goto cont;
+			if (idx > s_idx)
+				memset(&cb->args[1], 0,
+				       sizeof(cb->args) - sizeof(cb->args[0]));
+			if (nf_tables_fill_table_info(skb,
+						      NETLINK_CB(cb->skb).portid,
+						      cb->nlh->nlmsg_seq,
+						      NFT_MSG_NEWTABLE,
+						      NLM_F_MULTI,
+						      afi->family, table) < 0)
+				goto done;
+cont:
+			idx++;
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	struct sk_buff *skb2;
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_tables,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
+					family, table);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr *name;
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	name = nla[NFTA_TABLE_NAME];
+	table = nf_tables_table_lookup(afi, name, false);
+	if (IS_ERR(table)) {
+		if (PTR_ERR(table) != -ENOENT)
+			return PTR_ERR(table);
+		table = NULL;
+	}
+
+	if (table != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+		return 0;
+	}
+
+	table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+	if (table == NULL)
+		return -ENOMEM;
+
+	nla_strlcpy(table->name, name, nla_len(name));
+	INIT_LIST_HEAD(&table->chains);
+
+	list_add_tail(&table->list, &afi->tables);
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+	return 0;
+}
+
+static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (table->flags & NFT_TABLE_BUILTIN)
+		return -EOPNOTSUPP;
+
+	if (table->use)
+		return -EBUSY;
+
+	list_del(&table->list);
+	nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
+	kfree(table);
+	return 0;
+}
+
+static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi,
+						  const char *name)
+{
+	struct nft_table *table;
+
+	list_for_each_entry(table, &afi->tables, list) {
+		if (!strcmp(name, table->name))
+			return table;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain,
+				  int event, int family);
+
+/**
+ *	nft_register_table - register a built-in table
+ *
+ *	@table: the table to register
+ *	@family: protocol family to register table with
+ *
+ *	Register a built-in table for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_table(struct nft_table *table, int family)
+{
+	struct nft_af_info *afi;
+	struct nft_table *t;
+	struct nft_chain *chain;
+	int err;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+again:
+	afi = nf_tables_afinfo_lookup(family, true);
+	if (IS_ERR(afi)) {
+		err = PTR_ERR(afi);
+		if (err == -EAGAIN)
+			goto again;
+		goto err;
+	}
+
+	t = __nf_tables_table_lookup(afi, table->name);
+	if (IS_ERR(t)) {
+		err = PTR_ERR(t);
+		if (err != -ENOENT)
+			goto err;
+		t = NULL;
+	}
+
+	if (t != NULL) {
+		err = -EEXIST;
+		goto err;
+	}
+
+	table->flags |= NFT_TABLE_BUILTIN;
+	list_add_tail(&table->list, &afi->tables);
+	nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family);
+	list_for_each_entry(chain, &table->chains, list)
+		nf_tables_chain_notify(NULL, NULL, table, chain,
+				       NFT_MSG_NEWCHAIN, family);
+	err = 0;
+err:
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_register_table);
+
+/**
+ *	nft_unregister_table - unregister a built-in table
+ *
+ *	@table: the table to unregister
+ *	@family: protocol family to unregister table with
+ *
+ *	Unregister a built-in table for use with nf_tables.
+ */
+void nft_unregister_table(struct nft_table *table, int family)
+{
+	struct nft_chain *chain;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&table->list);
+	list_for_each_entry(chain, &table->chains, list)
+		nf_tables_chain_notify(NULL, NULL, table, chain,
+				       NFT_MSG_DELCHAIN, family);
+	nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_table);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+{
+	struct nft_chain *chain;
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (chain->handle == handle)
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
+						const struct nlattr *nla)
+{
+	struct nft_chain *chain;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	list_for_each_entry(chain, &table->chains, list) {
+		if (!nla_strcmp(nla, chain->name))
+			return chain;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+	[NFTA_CHAIN_TABLE]	= { .type = NLA_STRING },
+	[NFTA_CHAIN_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_CHAIN_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
+	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
+				     int event, u32 flags, int family,
+				     const struct nft_table *table,
+				     const struct nft_chain *chain)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+		goto nla_put_failure;
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops;
+		struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+		if (nest == NULL)
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+			goto nla_put_failure;
+		nla_nest_end(skb, nest);
+	}
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+				  const struct nlmsghdr *nlh,
+				  const struct nft_table *table,
+				  const struct nft_chain *chain,
+				  int event, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	bool report;
+	int err;
+
+	report = nlh ? nlmsg_report(nlh) : false;
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
+					table, chain);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			list_for_each_entry(chain, &table->chains, list) {
+				if (idx < s_idx)
+					goto cont;
+				if (idx > s_idx)
+					memset(&cb->args[1], 0,
+					       sizeof(cb->args) - sizeof(cb->args[0]));
+				if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+							      cb->nlh->nlmsg_seq,
+							      NFT_MSG_NEWCHAIN,
+							      NLM_F_MULTI,
+							      afi->family, table, chain) < 0)
+					goto done;
+cont:
+				idx++;
+			}
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+
+static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	struct sk_buff *skb2;
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_chains,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
+					family, table, chain);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr * uninitialized_var(name);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_base_chain *basechain;
+	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	int family = nfmsg->nfgen_family;
+	u64 handle = 0;
+	int err;
+	bool create;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (table->use == UINT_MAX)
+		return -EOVERFLOW;
+
+	chain = NULL;
+	name = nla[NFTA_CHAIN_NAME];
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		chain = nf_tables_chain_lookup_byhandle(table, handle);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+	} else {
+		chain = nf_tables_chain_lookup(table, name);
+		if (IS_ERR(chain)) {
+			if (PTR_ERR(chain) != -ENOENT)
+				return PTR_ERR(chain);
+			chain = NULL;
+		}
+	}
+
+	if (chain != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		if (nla[NFTA_CHAIN_HANDLE] && name &&
+		    !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
+			return -EEXIST;
+
+		if (nla[NFTA_CHAIN_HANDLE] && name)
+			nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+		goto notify;
+	}
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		struct nf_hook_ops *ops;
+
+		err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+				       nft_hook_policy);
+		if (err < 0)
+			return err;
+		if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+		    ha[NFTA_HOOK_PRIORITY] == NULL)
+			return -EINVAL;
+		if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks)
+			return -EINVAL;
+
+		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+		if (basechain == NULL)
+			return -ENOMEM;
+		chain = &basechain->chain;
+
+		ops = &basechain->ops;
+		ops->pf		= family;
+		ops->owner	= afi->owner;
+		ops->hooknum	= ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+		ops->priority	= ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+		ops->priv	= chain;
+		ops->hook	= nft_do_chain;
+		if (afi->hooks[ops->hooknum])
+			ops->hook = afi->hooks[ops->hooknum];
+
+		chain->flags |= NFT_BASE_CHAIN;
+	} else {
+		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+		if (chain == NULL)
+			return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&chain->rules);
+	chain->handle = nf_tables_alloc_handle(table);
+	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+	list_add_tail(&chain->list, &table->chains);
+	table->use++;
+notify:
+	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
+			       family);
+	return 0;
+}
+
+static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+{
+	struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
+
+	BUG_ON(chain->use > 0);
+
+	if (chain->flags & NFT_BASE_CHAIN)
+		kfree(nft_base_chain(chain));
+	else
+		kfree(chain);
+}
+
+static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
+			      const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (chain->flags & NFT_CHAIN_BUILTIN)
+		return -EOPNOTSUPP;
+
+	if (!list_empty(&chain->rules))
+		return -EBUSY;
+
+	list_del(&chain->list);
+	table->use--;
+
+	if (chain->flags & NFT_BASE_CHAIN)
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
+			       family);
+
+	/* Make sure all rule references are gone before this is released */
+	call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+	return 0;
+}
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+			 const struct nft_af_info *afi,
+			 const struct nft_table *table,
+			 const struct nft_chain *chain)
+{
+	ctx->afi   = afi;
+	ctx->table = table;
+	ctx->chain = chain;
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ *	nft_register_expr - register nf_tables expr operations
+ *	@ops: expr operations
+ *
+ *	Registers the expr operations for use with nf_tables. Returns zero on
+ *	success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_ops *ops)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_add_tail(&ops->list, &nf_tables_expressions);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ *	nft_unregister_expr - unregister nf_tables expr operations
+ *	@ops: expr operations
+ *
+ * 	Unregisters the expr operations for use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_ops *ops)
+{
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_del(&ops->list);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla)
+{
+	const struct nft_expr_ops *ops;
+
+	list_for_each_entry(ops, &nf_tables_expressions, list) {
+		if (!nla_strcmp(nla, ops->name))
+			return ops;
+	}
+	return NULL;
+}
+
+static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla)
+{
+	const struct nft_expr_ops *ops;
+
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	ops = __nft_expr_ops_get(nla);
+	if (ops != NULL && try_module_get(ops->owner))
+		return ops;
+
+#ifdef CONFIG_MODULES
+	if (ops == NULL) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-expr-%.*s",
+			       nla_len(nla), (char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (__nft_expr_ops_get(nla))
+			return ERR_PTR(-EAGAIN);
+	}
+#endif
+	return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+	[NFTA_EXPR_NAME]	= { .type = NLA_STRING },
+	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+				    const struct nft_expr *expr)
+{
+	if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name))
+		goto nla_put_failure;
+
+	if (expr->ops->dump) {
+		struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+		if (data == NULL)
+			goto nla_put_failure;
+		if (expr->ops->dump(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, data);
+	}
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+};
+
+struct nft_expr_info {
+	const struct nft_expr_ops	*ops;
+	struct nlattr			*tb[NFTA_EXPR_MAX + 1];
+};
+
+static int nf_tables_expr_parse(const struct nlattr *nla,
+				struct nft_expr_info *info)
+{
+	const struct nft_expr_ops *ops;
+	int err;
+
+	err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+	if (err < 0)
+		return err;
+
+	ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	info->ops = ops;
+	return 0;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+			     struct nft_expr_info *info,
+			     struct nft_expr *expr)
+{
+	const struct nft_expr_ops *ops = info->ops;
+	int err;
+
+	expr->ops = ops;
+	if (ops->init) {
+		struct nlattr *ma[ops->maxattr + 1];
+
+		if (info->tb[NFTA_EXPR_DATA]) {
+			err = nla_parse_nested(ma, ops->maxattr,
+					       info->tb[NFTA_EXPR_DATA],
+					       ops->policy);
+			if (err < 0)
+				goto err1;
+		} else
+			memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1));
+
+		err = ops->init(ctx, expr, (const struct nlattr **)ma);
+		if (err < 0)
+			goto err1;
+	}
+
+	info->ops = NULL;
+	return 0;
+
+err1:
+	expr->ops = NULL;
+	return err;
+}
+
+static void nf_tables_expr_destroy(struct nft_expr *expr)
+{
+	if (expr->ops->destroy)
+		expr->ops->destroy(expr);
+	module_put(expr->ops->owner);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
+						u64 handle)
+{
+	struct nft_rule *rule;
+
+	// FIXME: this sucks
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (handle == rule->handle)
+			return rule;
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
+					      const struct nlattr *nla)
+{
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
+	return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+	[NFTA_RULE_TABLE]	= { .type = NLA_STRING },
+	[NFTA_RULE_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
+	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
+				    int event, u32 flags, int family,
+				    const struct nft_table *table,
+				    const struct nft_chain *chain,
+				    const struct nft_rule *rule)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	const struct nft_expr *expr, *next;
+	struct nlattr *list;
+
+	event |= NFNL_SUBSYS_NFTABLES << 8;
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+			flags);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= family;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= 0;
+
+	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
+		goto nla_put_failure;
+
+	list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+	if (list == NULL)
+		goto nla_put_failure;
+	nft_rule_for_each_expr(expr, next, rule) {
+		struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
+		if (elem == NULL)
+			goto nla_put_failure;
+		if (nf_tables_fill_expr_info(skb, expr) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, elem);
+	}
+	nla_nest_end(skb, list);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -1;
+}
+
+static int nf_tables_rule_notify(const struct sk_buff *oskb,
+				 const struct nlmsghdr *nlh,
+				 const struct nft_table *table,
+				 const struct nft_chain *chain,
+				 const struct nft_rule *rule,
+				 int event, u32 flags, int family)
+{
+	struct sk_buff *skb;
+	u32 portid = NETLINK_CB(oskb).portid;
+	struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+	u32 seq = nlh->nlmsg_seq;
+	bool report;
+	int err;
+
+	report = nlmsg_report(nlh);
+	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err;
+
+	err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
+				       family, table, chain, rule);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+			     GFP_KERNEL);
+err:
+	if (err < 0)
+		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	return err;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	unsigned int idx = 0, s_idx = cb->args[0];
+	int family = nfmsg->nfgen_family;
+
+	list_for_each_entry(afi, &nf_tables_afinfo, list) {
+		if (family != NFPROTO_UNSPEC && family != afi->family)
+			continue;
+
+		list_for_each_entry(table, &afi->tables, list) {
+			list_for_each_entry(chain, &table->chains, list) {
+				list_for_each_entry(rule, &chain->rules, list) {
+					if (idx < s_idx)
+						goto cont;
+					if (idx > s_idx)
+						memset(&cb->args[1], 0,
+						       sizeof(cb->args) - sizeof(cb->args[0]));
+					if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+								      cb->nlh->nlmsg_seq,
+								      NFT_MSG_NEWRULE,
+								      NLM_F_MULTI | NLM_F_APPEND,
+								      afi->family, table, chain, rule) < 0)
+						goto done;
+cont:
+					idx++;
+				}
+			}
+		}
+	}
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
+static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+	const struct nft_rule *rule;
+	struct sk_buff *skb2;
+	int family = nfmsg->nfgen_family;
+	int err;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_rules,
+		};
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+	if (IS_ERR(rule))
+		return PTR_ERR(rule);
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		return -ENOMEM;
+
+	err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+				       family, table, chain, rule);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
+static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+{
+	struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
+	struct nft_expr *expr;
+
+	/*
+	 * Careful: some expressions might not be initialized in case this
+	 * is called on error from nf_tables_newrule().
+	 */
+	expr = nft_expr_first(rule);
+	while (expr->ops && expr != nft_expr_last(rule)) {
+		nf_tables_expr_destroy(expr);
+		expr = nft_expr_next(expr);
+	}
+	kfree(rule);
+}
+
+static void nf_tables_rule_destroy(struct nft_rule *rule)
+{
+	call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
+}
+
+#define NFT_RULE_MAXEXPRS	128
+
+static struct nft_expr_info *info;
+
+static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_expr *expr;
+	struct nft_ctx ctx;
+	struct nlattr *tmp;
+	unsigned int size, i, n;
+	int err, rem;
+	bool create;
+	u64 handle;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+		rule = __nf_tables_rule_lookup(chain, handle);
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			old_rule = rule;
+		else
+			return -EOPNOTSUPP;
+	} else {
+		if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EINVAL;
+		handle = nf_tables_alloc_handle(table);
+	}
+
+	n = 0;
+	size = 0;
+	if (nla[NFTA_RULE_EXPRESSIONS]) {
+		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+			err = -EINVAL;
+			if (nla_type(tmp) != NFTA_LIST_ELEM)
+				goto err1;
+			if (n == NFT_RULE_MAXEXPRS)
+				goto err1;
+			err = nf_tables_expr_parse(tmp, &info[n]);
+			if (err < 0)
+				goto err1;
+			size += info[n].ops->size;
+			n++;
+		}
+	}
+
+	err = -ENOMEM;
+	rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+	if (rule == NULL)
+		goto err1;
+
+	rule->handle = handle;
+	rule->dlen   = size;
+
+	nft_ctx_init(&ctx, afi, table, chain);
+	expr = nft_expr_first(rule);
+	for (i = 0; i < n; i++) {
+		err = nf_tables_newexpr(&ctx, &info[i], expr);
+		if (err < 0)
+			goto err2;
+		expr = nft_expr_next(expr);
+	}
+
+	/* Register hook when first rule is inserted into a base chain */
+	if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) {
+		err = nf_register_hook(&nft_base_chain(chain)->ops);
+		if (err < 0)
+			goto err2;
+	}
+
+	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+		list_replace_rcu(&old_rule->list, &rule->list);
+		nf_tables_rule_destroy(old_rule);
+	} else if (nlh->nlmsg_flags & NLM_F_APPEND)
+		list_add_tail_rcu(&rule->list, &chain->rules);
+	else
+		list_add_rcu(&rule->list, &chain->rules);
+
+	nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE,
+			      nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE),
+			      nfmsg->nfgen_family);
+	return 0;
+
+err2:
+	nf_tables_rule_destroy(rule);
+err1:
+	for (i = 0; i < n; i++) {
+		if (info[i].ops != NULL)
+			module_put(info[i].ops->owner);
+	}
+	return err;
+}
+
+static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
+			     const struct nlmsghdr *nlh,
+			     const struct nlattr * const nla[])
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nft_af_info *afi;
+	const struct nft_table *table;
+	struct nft_chain *chain;
+	struct nft_rule *rule, *tmp;
+	int family = nfmsg->nfgen_family;
+
+	afi = nf_tables_afinfo_lookup(family, false);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+	if (IS_ERR(chain))
+		return PTR_ERR(chain);
+
+	if (nla[NFTA_RULE_HANDLE]) {
+		rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+		if (IS_ERR(rule))
+			return PTR_ERR(rule);
+
+		/* List removal must be visible before destroying expressions */
+		list_del_rcu(&rule->list);
+
+		nf_tables_rule_notify(skb, nlh, table, chain, rule,
+				      NFT_MSG_DELRULE, 0, family);
+		nf_tables_rule_destroy(rule);
+	} else {
+		/* Remove all rules in this chain */
+		list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
+			list_del_rcu(&rule->list);
+
+			nf_tables_rule_notify(skb, nlh, table, chain, rule,
+					      NFT_MSG_DELRULE, 0, family);
+			nf_tables_rule_destroy(rule);
+		}
+	}
+
+	/* Unregister hook when last rule from base chain is deleted */
+	if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN)
+		nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+	return 0;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+	[NFT_MSG_NEWTABLE] = {
+		.call		= nf_tables_newtable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_GETTABLE] = {
+		.call		= nf_tables_gettable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_DELTABLE] = {
+		.call		= nf_tables_deltable,
+		.attr_count	= NFTA_TABLE_MAX,
+		.policy		= nft_table_policy,
+	},
+	[NFT_MSG_NEWCHAIN] = {
+		.call		= nf_tables_newchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_GETCHAIN] = {
+		.call		= nf_tables_getchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_DELCHAIN] = {
+		.call		= nf_tables_delchain,
+		.attr_count	= NFTA_CHAIN_MAX,
+		.policy		= nft_chain_policy,
+	},
+	[NFT_MSG_NEWRULE] = {
+		.call		= nf_tables_newrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_GETRULE] = {
+		.call		= nf_tables_getrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+	[NFT_MSG_DELRULE] = {
+		.call		= nf_tables_delrule,
+		.attr_count	= NFTA_RULE_MAX,
+		.policy		= nft_rule_policy,
+	},
+};
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+	.name		= "nf_tables",
+	.subsys_id	= NFNL_SUBSYS_NFTABLES,
+	.cb_count	= NFT_MSG_MAX,
+	.cb		= nf_tables_cb,
+};
+
+/**
+ *	nft_validate_input_register - validate an expressions' input register
+ *
+ *	@reg: the register number
+ *
+ * 	Validate that the input register is one of the general purpose
+ * 	registers.
+ */
+int nft_validate_input_register(enum nft_registers reg)
+{
+	if (reg <= NFT_REG_VERDICT)
+		return -EINVAL;
+	if (reg > NFT_REG_MAX)
+		return -ERANGE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_input_register);
+
+/**
+ *	nft_validate_output_register - validate an expressions' output register
+ *
+ *	@reg: the register number
+ *
+ * 	Validate that the output register is one of the general purpose
+ * 	registers or the verdict register.
+ */
+int nft_validate_output_register(enum nft_registers reg)
+{
+	if (reg < NFT_REG_VERDICT)
+		return -EINVAL;
+	if (reg > NFT_REG_MAX)
+		return -ERANGE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_output_register);
+
+/**
+ *	nft_validate_data_load - validate an expressions' data load
+ *
+ *	@ctx: context of the expression performing the load
+ * 	@reg: the destination register number
+ * 	@data: the data to load
+ * 	@type: the data type
+ *
+ * 	Validate that a data load uses the appropriate data type for
+ * 	the destination register. A value of NULL for the data means
+ * 	that its runtime gathered data, which is always of type
+ * 	NFT_DATA_VALUE.
+ */
+int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
+			   const struct nft_data *data,
+			   enum nft_data_types type)
+{
+	switch (reg) {
+	case NFT_REG_VERDICT:
+		if (data == NULL || type != NFT_DATA_VERDICT)
+			return -EINVAL;
+		// FIXME: do loop detection
+		return 0;
+	default:
+		if (data != NULL && type != NFT_DATA_VALUE)
+			return -EINVAL;
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(nft_validate_data_load);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+	[NFTA_VERDICT_CODE]	= { .type = NLA_U32 },
+	[NFTA_VERDICT_CHAIN]	= { .type = NLA_STRING,
+				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+			    struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+	struct nft_chain *chain;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_VERDICT_CODE])
+		return -EINVAL;
+	data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+	switch (data->verdict) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+	case NFT_CONTINUE:
+	case NFT_BREAK:
+	case NFT_RETURN:
+		desc->len = sizeof(data->verdict);
+		break;
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (!tb[NFTA_VERDICT_CHAIN])
+			return -EINVAL;
+		chain = nf_tables_chain_lookup(ctx->table,
+					       tb[NFTA_VERDICT_CHAIN]);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+		if (chain->flags & NFT_BASE_CHAIN)
+			return -EOPNOTSUPP;
+
+		if (ctx->chain->level + 1 > chain->level) {
+			if (ctx->chain->level + 1 == 16)
+				return -EMLINK;
+			chain->level = ctx->chain->level + 1;
+		}
+		chain->use++;
+		data->chain = chain;
+		desc->len = sizeof(data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	desc->type = NFT_DATA_VERDICT;
+	return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+	switch (data->verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		data->chain->use--;
+		break;
+	}
+}
+
+static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+		goto nla_put_failure;
+
+	switch (data->verdict) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+			goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+			  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	unsigned int len;
+
+	len = nla_len(nla);
+	if (len == 0)
+		return -EINVAL;
+	if (len > sizeof(data->data))
+		return -EOVERFLOW;
+
+	nla_memcpy(data->data, nla, sizeof(data->data));
+	desc->type = NFT_DATA_VALUE;
+	desc->len  = len;
+	return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+			  unsigned int len)
+{
+	return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+	[NFTA_DATA_VALUE]	= { .type = NLA_BINARY,
+				    .len  = FIELD_SIZEOF(struct nft_data, data) },
+	[NFTA_DATA_VERDICT]	= { .type = NLA_NESTED },
+};
+
+/**
+ *	nft_data_init - parse nf_tables data netlink attributes
+ *
+ *	@ctx: context of the expression using the data
+ *	@data: destination struct nft_data
+ *	@desc: data description
+ *	@nla: netlink attribute containing data
+ *
+ *	Parse the netlink data attributes and initialize a struct nft_data.
+ *	The type and length of data are returned in the data description.
+ *
+ *	The caller can indicate that it only wants to accept data of type
+ *	NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+		  struct nft_data_desc *desc, const struct nlattr *nla)
+{
+	struct nlattr *tb[NFTA_DATA_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_DATA_VALUE])
+		return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+	if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+		return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ *	nft_data_uninit - release a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	all others need to be released by calling this function.
+ */
+void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+{
+	switch (type) {
+	case NFT_DATA_VALUE:
+		return;
+	case NFT_DATA_VERDICT:
+		return nft_verdict_uninit(data);
+	default:
+		WARN_ON(1);
+	}
+}
+EXPORT_SYMBOL_GPL(nft_data_uninit);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+		  enum nft_data_types type, unsigned int len)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start(skb, attr);
+	if (nest == NULL)
+		return -1;
+
+	switch (type) {
+	case NFT_DATA_VALUE:
+		err = nft_value_dump(skb, data, len);
+		break;
+	case NFT_DATA_VERDICT:
+		err = nft_verdict_dump(skb, data);
+		break;
+	default:
+		err = -EINVAL;
+		WARN_ON(1);
+	}
+
+	nla_nest_end(skb, nest);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+static int __init nf_tables_module_init(void)
+{
+	int err;
+
+	info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
+		       GFP_KERNEL);
+	if (info == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = nf_tables_core_module_init();
+	if (err < 0)
+		goto err2;
+
+	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	if (err < 0)
+		goto err3;
+
+	pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+	return 0;
+err3:
+	nf_tables_core_module_exit();
+err2:
+	kfree(info);
+err1:
+	return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+	nfnetlink_subsys_unregister(&nf_tables_subsys);
+	nf_tables_core_module_exit();
+	kfree(info);
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 000000000000..bc7fb85d4002
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_JUMP_STACK_SIZE	16
+
+unsigned int nft_do_chain(const struct nf_hook_ops *ops,
+			  struct sk_buff *skb,
+			  const struct net_device *in,
+			  const struct net_device *out,
+			  int (*okfn)(struct sk_buff *))
+{
+	const struct nft_chain *chain = ops->priv;
+	const struct nft_rule *rule;
+	const struct nft_expr *expr, *last;
+	struct nft_data data[NFT_REG_MAX + 1];
+	const struct nft_pktinfo pkt = {
+		.skb		= skb,
+		.in		= in,
+		.out		= out,
+		.hooknum	= ops->hooknum,
+	};
+	unsigned int stackptr = 0;
+	struct {
+		const struct nft_chain	*chain;
+		const struct nft_rule	*rule;
+	} jumpstack[NFT_JUMP_STACK_SIZE];
+
+do_chain:
+	rule = list_entry(&chain->rules, struct nft_rule, list);
+next_rule:
+	data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+		nft_rule_for_each_expr(expr, last, rule) {
+			expr->ops->eval(expr, data, &pkt);
+			if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+				break;
+		}
+
+		switch (data[NFT_REG_VERDICT].verdict) {
+		case NFT_BREAK:
+			data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+			/* fall through */
+		case NFT_CONTINUE:
+			continue;
+		}
+		break;
+	}
+
+	switch (data[NFT_REG_VERDICT].verdict) {
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+		return data[NFT_REG_VERDICT].verdict;
+	case NFT_JUMP:
+		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+		jumpstack[stackptr].chain = chain;
+		jumpstack[stackptr].rule  = rule;
+		stackptr++;
+		/* fall through */
+	case NFT_GOTO:
+		chain = data[NFT_REG_VERDICT].chain;
+		goto do_chain;
+	case NFT_RETURN:
+	case NFT_CONTINUE:
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	if (stackptr > 0) {
+		stackptr--;
+		chain = jumpstack[stackptr].chain;
+		rule  = jumpstack[stackptr].rule;
+		goto next_rule;
+	}
+
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain);
+
+int __init nf_tables_core_module_init(void)
+{
+	int err;
+
+	err = nft_immediate_module_init();
+	if (err < 0)
+		goto err1;
+
+	err = nft_cmp_module_init();
+	if (err < 0)
+		goto err2;
+
+	err = nft_lookup_module_init();
+	if (err < 0)
+		goto err3;
+
+	err = nft_bitwise_module_init();
+	if (err < 0)
+		goto err4;
+
+	err = nft_byteorder_module_init();
+	if (err < 0)
+		goto err5;
+
+	err = nft_payload_module_init();
+	if (err < 0)
+		goto err6;
+
+	return 0;
+
+err6:
+	nft_byteorder_module_exit();
+err5:
+	nft_bitwise_module_exit();
+err4:
+	nft_lookup_module_exit();
+err3:
+	nft_cmp_module_exit();
+err2:
+	nft_immediate_module_exit();
+err1:
+	return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+	nft_payload_module_exit();
+	nft_byteorder_module_exit();
+	nft_bitwise_module_exit();
+	nft_lookup_module_exit();
+	nft_cmp_module_exit();
+	nft_immediate_module_exit();
+}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 000000000000..0f7501506367
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_bitwise {
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			len;
+	struct nft_data		mask;
+	struct nft_data		xor;
+};
+
+static void nft_bitwise_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	const struct nft_data *src = &data[priv->sreg];
+	struct nft_data *dst = &data[priv->dreg];
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
+		dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
+			       priv->xor.data[i];
+	}
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+	[NFTA_BITWISE_SREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_DREG]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_LEN]	= { .type = NLA_U32 },
+	[NFTA_BITWISE_MASK]	= { .type = NLA_NESTED },
+	[NFTA_BITWISE_XOR]	= { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_bitwise *priv = nft_expr_priv(expr);
+	struct nft_data_desc d1, d2;
+	int err;
+
+	if (tb[NFTA_BITWISE_SREG] == NULL ||
+	    tb[NFTA_BITWISE_DREG] == NULL ||
+	    tb[NFTA_BITWISE_LEN] == NULL ||
+	    tb[NFTA_BITWISE_MASK] == NULL ||
+	    tb[NFTA_BITWISE_XOR] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+
+	err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+	if (err < 0)
+		return err;
+	if (d1.len != priv->len)
+		return -EINVAL;
+
+	err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+	if (err < 0)
+		return err;
+	if (d2.len != priv->len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_bitwise_ops __read_mostly = {
+	.name		= "bitwise",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_bitwise_eval,
+	.init		= nft_bitwise_init,
+	.dump		= nft_bitwise_dump,
+	.policy		= nft_bitwise_policy,
+	.maxattr	= NFTA_BITWISE_MAX,
+};
+
+int __init nft_bitwise_module_init(void)
+{
+	return nft_register_expr(&nft_bitwise_ops);
+}
+
+void nft_bitwise_module_exit(void)
+{
+	nft_unregister_expr(&nft_bitwise_ops);
+}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 000000000000..8b0657a4d17b
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	enum nft_byteorder_ops	op:8;
+	u8			len;
+	u8			size;
+};
+
+static void nft_byteorder_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+	struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+	union { u32 u32; u16 u16; } *s, *d;
+	unsigned int i;
+
+	s = (void *)src->data;
+	d = (void *)dst->data;
+
+	switch (priv->size) {
+	case 4:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 4; i++)
+				d[i].u32 = ntohl((__force __be32)s[i].u32);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 4; i++)
+				d[i].u32 = (__force __u32)htonl(s[i].u32);
+			break;
+		}
+		break;
+	case 2:
+		switch (priv->op) {
+		case NFT_BYTEORDER_NTOH:
+			for (i = 0; i < priv->len / 2; i++)
+				d[i].u16 = ntohs((__force __be16)s[i].u16);
+			break;
+		case NFT_BYTEORDER_HTON:
+			for (i = 0; i < priv->len / 2; i++)
+				d[i].u16 = (__force __u16)htons(s[i].u16);
+			break;
+		}
+		break;
+	}
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+	[NFTA_BYTEORDER_SREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_DREG]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_OP]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_LEN]	= { .type = NLA_U32 },
+	[NFTA_BYTEORDER_SIZE]	= { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_byteorder *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+	    tb[NFTA_BYTEORDER_DREG] == NULL ||
+	    tb[NFTA_BYTEORDER_LEN] == NULL ||
+	    tb[NFTA_BYTEORDER_SIZE] == NULL ||
+	    tb[NFTA_BYTEORDER_OP] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+	switch (priv->op) {
+	case NFT_BYTEORDER_NTOH:
+	case NFT_BYTEORDER_HTON:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+	if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+	switch (priv->size) {
+	case 2:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_byteorder_ops __read_mostly = {
+	.name		= "byteorder",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_byteorder_eval,
+	.init		= nft_byteorder_init,
+	.dump		= nft_byteorder_dump,
+	.policy		= nft_byteorder_policy,
+	.maxattr	= NFTA_BYTEORDER_MAX,
+};
+
+int __init nft_byteorder_module_init(void)
+{
+	return nft_register_expr(&nft_byteorder_ops);
+}
+
+void nft_byteorder_module_exit(void)
+{
+	nft_unregister_expr(&nft_byteorder_ops);
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 000000000000..e734d670120a
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+	struct nft_data		data;
+	enum nft_registers	sreg:8;
+	u8			len;
+	enum nft_cmp_ops	op:8;
+};
+
+static void nft_cmp_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	int d;
+
+	d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+	switch (priv->op) {
+	case NFT_CMP_EQ:
+		if (d != 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_NEQ:
+		if (d == 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_LT:
+		if (d == 0)
+			goto mismatch;
+	case NFT_CMP_LTE:
+		if (d > 0)
+			goto mismatch;
+		break;
+	case NFT_CMP_GT:
+		if (d == 0)
+			goto mismatch;
+	case NFT_CMP_GTE:
+		if (d < 0)
+			goto mismatch;
+		break;
+	}
+	return;
+
+mismatch:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+	[NFTA_CMP_SREG]		= { .type = NLA_U32 },
+	[NFTA_CMP_OP]		= { .type = NLA_U32 },
+	[NFTA_CMP_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_cmp_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	int err;
+
+	if (tb[NFTA_CMP_SREG] == NULL ||
+	    tb[NFTA_CMP_OP] == NULL ||
+	    tb[NFTA_CMP_DATA] == NULL)
+		return -EINVAL;
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+	switch (priv->op) {
+	case NFT_CMP_EQ:
+	case NFT_CMP_NEQ:
+	case NFT_CMP_LT:
+	case NFT_CMP_LTE:
+	case NFT_CMP_GT:
+	case NFT_CMP_GTE:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+	if (err < 0)
+		return err;
+
+	priv->len = desc.len;
+	return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_cmp_ops __read_mostly = {
+	.name		= "cmp",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_cmp_eval,
+	.init		= nft_cmp_init,
+	.dump		= nft_cmp_dump,
+	.policy		= nft_cmp_policy,
+	.maxattr	= NFTA_CMP_MAX,
+};
+
+int __init nft_cmp_module_init(void)
+{
+	return nft_register_expr(&nft_cmp_ops);
+}
+
+void nft_cmp_module_exit(void)
+{
+	nft_unregister_expr(&nft_cmp_ops);
+}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 000000000000..33c5d36819bb
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_counter {
+	seqlock_t	lock;
+	u64		bytes;
+	u64		packets;
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+
+	write_seqlock_bh(&priv->lock);
+	priv->bytes += pkt->skb->len;
+	priv->packets++;
+	write_sequnlock_bh(&priv->lock);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+	unsigned int seq;
+	u64 bytes;
+	u64 packets;
+
+	do {
+		seq = read_seqbegin(&priv->lock);
+		bytes	= priv->bytes;
+		packets	= priv->packets;
+	} while (read_seqretry(&priv->lock, seq));
+
+	if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
+	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
+};
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_counter *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_COUNTER_PACKETS])
+	        priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+	if (tb[NFTA_COUNTER_BYTES])
+		priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+
+	seqlock_init(&priv->lock);
+	return 0;
+}
+
+static struct nft_expr_ops nft_counter_ops __read_mostly = {
+	.name		= "counter",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+	.policy		= nft_counter_policy,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.owner		= THIS_MODULE,
+	.eval		= nft_counter_eval,
+	.init		= nft_counter_init,
+	.dump		= nft_counter_dump,
+};
+
+static int __init nft_counter_module_init(void)
+{
+	return nft_register_expr(&nft_counter_ops);
+}
+
+static void __exit nft_counter_module_exit(void)
+{
+	nft_unregister_expr(&nft_counter_ops);
+}
+
+module_init(nft_counter_module_init);
+module_exit(nft_counter_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 000000000000..a1756d678226
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+
+struct nft_ct {
+	enum nft_ct_keys	key:8;
+	enum ip_conntrack_dir	dir:8;
+	enum nft_registers	dreg:8;
+	uint8_t			family;
+};
+
+static void nft_ct_eval(const struct nft_expr *expr,
+			struct nft_data data[NFT_REG_MAX + 1],
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+	struct nft_data *dest = &data[priv->dreg];
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_tuple *tuple;
+	const struct nf_conntrack_helper *helper;
+	long diff;
+	unsigned int state;
+
+	ct = nf_ct_get(pkt->skb, &ctinfo);
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+		if (ct == NULL)
+			state = NF_CT_STATE_INVALID_BIT;
+		else if (nf_ct_is_untracked(ct))
+			state = NF_CT_STATE_UNTRACKED_BIT;
+		else
+			state = NF_CT_STATE_BIT(ctinfo);
+		dest->data[0] = state;
+		return;
+	}
+
+	if (ct == NULL)
+		goto err;
+
+	switch (priv->key) {
+	case NFT_CT_DIRECTION:
+		dest->data[0] = CTINFO2DIR(ctinfo);
+		return;
+	case NFT_CT_STATUS:
+		dest->data[0] = ct->status;
+		return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+		dest->data[0] = ct->mark;
+		return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+		dest->data[0] = ct->secmark;
+		return;
+#endif
+	case NFT_CT_EXPIRATION:
+		diff = (long)jiffies - (long)ct->timeout.expires;
+		if (diff < 0)
+			diff = 0;
+		dest->data[0] = jiffies_to_msecs(diff);
+		return;
+	case NFT_CT_HELPER:
+		if (ct->master == NULL)
+			goto err;
+		help = nfct_help(ct->master);
+		if (help == NULL)
+			goto err;
+		helper = rcu_dereference(help->helper);
+		if (helper == NULL)
+			goto err;
+		if (strlen(helper->name) >= sizeof(dest->data))
+			goto err;
+		strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+		return;
+	}
+
+	tuple = &ct->tuplehash[priv->dir].tuple;
+	switch (priv->key) {
+	case NFT_CT_L3PROTOCOL:
+		dest->data[0] = nf_ct_l3num(ct);
+		return;
+	case NFT_CT_SRC:
+		memcpy(dest->data, tuple->src.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_DST:
+		memcpy(dest->data, tuple->dst.u3.all,
+		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+		return;
+	case NFT_CT_PROTOCOL:
+		dest->data[0] = nf_ct_protonum(ct);
+		return;
+	case NFT_CT_PROTO_SRC:
+		dest->data[0] = (__force __u16)tuple->src.u.all;
+		return;
+	case NFT_CT_PROTO_DST:
+		dest->data[0] = (__force __u16)tuple->dst.u.all;
+		return;
+	}
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+	[NFTA_CT_DREG]		= { .type = NLA_U32 },
+	[NFTA_CT_KEY]		= { .type = NLA_U32 },
+	[NFTA_CT_DIRECTION]	= { .type = NLA_U8 },
+};
+
+static int nft_ct_init(const struct nft_ctx *ctx,
+		       const struct nft_expr *expr,
+		       const struct nlattr * const tb[])
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_CT_DREG] == NULL ||
+	    tb[NFTA_CT_KEY] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+	if (tb[NFTA_CT_DIRECTION] != NULL) {
+		priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+		switch (priv->dir) {
+		case IP_CT_DIR_ORIGINAL:
+		case IP_CT_DIR_REPLY:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	switch (priv->key) {
+	case NFT_CT_STATE:
+	case NFT_CT_DIRECTION:
+	case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	case NFT_CT_SECMARK:
+#endif
+	case NFT_CT_EXPIRATION:
+	case NFT_CT_HELPER:
+		if (tb[NFTA_CT_DIRECTION] != NULL)
+			return -EINVAL;
+		break;
+	case NFT_CT_PROTOCOL:
+	case NFT_CT_SRC:
+	case NFT_CT_DST:
+	case NFT_CT_PROTO_SRC:
+	case NFT_CT_PROTO_DST:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nf_ct_l3proto_try_module_get(ctx->afi->family);
+	if (err < 0)
+		return err;
+	priv->family = ctx->afi->family;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		goto err1;
+
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		goto err1;
+	return 0;
+
+err1:
+	nf_ct_l3proto_module_put(ctx->afi->family);
+	return err;
+}
+
+static void nft_ct_destroy(const struct nft_expr *expr)
+{
+	struct nft_ct *priv = nft_expr_priv(expr);
+
+	nf_ct_l3proto_module_put(priv->family);
+}
+
+static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ct *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_ct_ops __read_mostly = {
+	.name		= "ct",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_ct_eval,
+	.init		= nft_ct_init,
+	.destroy	= nft_ct_destroy,
+	.dump		= nft_ct_dump,
+	.policy		= nft_ct_policy,
+	.maxattr	= NFTA_CT_MAX,
+};
+
+static int __init nft_ct_module_init(void)
+{
+	return nft_register_expr(&nft_ct_ops);
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+	nft_unregister_expr(&nft_ct_ops);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644
index 000000000000..9fc8eb308193
--- /dev/null
+++ b/net/netfilter/nft_expr_template.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_template {
+
+};
+
+static void nft_template_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
+	[NFTA_TEMPLATE_ATTR]		= { .type = NLA_U32 },
+};
+
+static int nft_template_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr *tb[])
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+	return 0;
+}
+
+static void nft_template_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_template *priv = nft_expr_priv(expr);
+
+	NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops template_ops __read_mostly = {
+	.name		= "template",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_template)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_template_eval,
+	.init		= nft_template_init,
+	.destroy	= nft_template_destroy,
+	.dump		= nft_template_dump,
+	.policy		= nft_template_policy,
+	.maxattr	= NFTA_TEMPLATE_MAX,
+};
+
+static int __init nft_template_module_init(void)
+{
+	return nft_register_expr(&template_ops);
+}
+
+static void __exit nft_template_module_exit(void)
+{
+	nft_unregister_expr(&template_ops);
+}
+
+module_init(nft_template_module_init);
+module_exit(nft_template_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 000000000000..21c6a6b7b662
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+// FIXME:
+#include <net/ipv6.h>
+
+struct nft_exthdr {
+	u8			type;
+	u8			offset;
+	u8			len;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_exthdr_eval(const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1],
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	struct nft_data *dest = &data[priv->dreg];
+	unsigned int offset;
+	int err;
+
+	err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+	if (err < 0)
+		goto err;
+	offset += priv->offset;
+
+	if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+		goto err;
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
+	[NFTA_EXTHDR_OFFSET]		= { .type = NLA_U32 },
+	[NFTA_EXTHDR_LEN]		= { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_EXTHDR_DREG] == NULL ||
+	    tb[NFTA_EXTHDR_TYPE] == NULL ||
+	    tb[NFTA_EXTHDR_OFFSET] == NULL ||
+	    tb[NFTA_EXTHDR_LEN] == NULL)
+		return -EINVAL;
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+	if (priv->len == 0 ||
+	    priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops exthdr_ops __read_mostly = {
+	.name		= "exthdr",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_exthdr_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+	.policy		= nft_exthdr_policy,
+	.maxattr	= NFTA_EXTHDR_MAX,
+};
+
+static int __init nft_exthdr_module_init(void)
+{
+	return nft_register_expr(&exthdr_ops);
+}
+
+static void __exit nft_exthdr_module_exit(void)
+{
+	nft_unregister_expr(&exthdr_ops);
+}
+
+module_init(nft_exthdr_module_init);
+module_exit(nft_exthdr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 000000000000..67cc502881f1
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+	struct hlist_head	*hash;
+	unsigned int		hsize;
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			klen;
+	u8			dlen;
+	u16			flags;
+};
+
+struct nft_hash_elem {
+	struct hlist_node	hnode;
+	struct nft_data		key;
+	struct nft_data		data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+				  unsigned int hsize, unsigned int len)
+{
+	unsigned int h;
+
+	// FIXME: can we reasonably guarantee the upper bits are fixed?
+	h = jhash2(data->data, len >> 2, nft_hash_rnd);
+	return ((u64)h * hsize) >> 32;
+}
+
+static void nft_hash_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	const struct nft_hash_elem *elem;
+	const struct nft_data *key = &data[priv->sreg];
+	unsigned int h;
+
+	h = nft_hash_data(key, priv->hsize, priv->klen);
+	hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+		if (nft_data_cmp(&elem->key, key, priv->klen))
+			continue;
+		if (priv->flags & NFT_HASH_MAP)
+			nft_data_copy(&data[priv->dreg], elem->data);
+		return;
+	}
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_hash_elem_destroy(const struct nft_expr *expr,
+				  struct nft_hash_elem *elem)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+
+	nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+	if (priv->flags & NFT_HASH_MAP)
+		nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+	kfree(elem);
+}
+
+static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = {
+	[NFTA_HE_KEY]		= { .type = NLA_NESTED },
+	[NFTA_HE_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_hash_elem_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr *nla,
+			      struct nft_hash_elem **new)
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	struct nlattr *tb[NFTA_HE_MAX + 1];
+	struct nft_hash_elem *elem;
+	struct nft_data_desc d1, d2;
+	unsigned int size;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_HE_KEY] == NULL)
+		return -EINVAL;
+	size = sizeof(*elem);
+
+	if (priv->flags & NFT_HASH_MAP) {
+		if (tb[NFTA_HE_DATA] == NULL)
+			return -EINVAL;
+		size += sizeof(elem->data[0]);
+	} else {
+		if (tb[NFTA_HE_DATA] != NULL)
+			return -EINVAL;
+	}
+
+	elem = kzalloc(size, GFP_KERNEL);
+	if (elem == NULL)
+		return -ENOMEM;
+
+	err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]);
+	if (err < 0)
+		goto err1;
+	err = -EINVAL;
+	if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+		goto err2;
+
+	if (tb[NFTA_HE_DATA] != NULL) {
+		err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]);
+		if (err < 0)
+			goto err2;
+		err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+		if (err < 0)
+			goto err3;
+	}
+
+	*new = elem;
+	return 0;
+
+err3:
+	nft_data_uninit(elem->data, d2.type);
+err2:
+	nft_data_uninit(&elem->key, d1.type);
+err1:
+	kfree(elem);
+	return err;
+}
+
+static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+			      const struct nft_hash_elem *elem)
+
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key,
+			  NFT_DATA_VALUE, priv->klen) < 0)
+		goto nla_put_failure;
+
+	if (priv->flags & NFT_HASH_MAP) {
+		if (nft_data_dump(skb, NFTA_HE_DATA, elem->data,
+				  NFT_DATA_VALUE, priv->dlen) < 0)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	const struct hlist_node *next;
+	struct nft_hash_elem *elem;
+	unsigned int i;
+
+	for (i = 0; i < priv->hsize; i++) {
+		hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+			hlist_del(&elem->hnode);
+			nft_hash_elem_destroy(expr, elem);
+		}
+	}
+	kfree(priv->hash);
+}
+
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+	[NFTA_HASH_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_KLEN]	= { .type = NLA_U32 },
+	[NFTA_HASH_ELEMENTS]	= { .type = NLA_NESTED },
+};
+
+static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	struct nft_hash_elem *elem, *uninitialized_var(new);
+	const struct nlattr *nla;
+	unsigned int cnt, i;
+	unsigned int h;
+	int err, rem;
+
+	if (unlikely(!nft_hash_rnd_initted)) {
+		get_random_bytes(&nft_hash_rnd, 4);
+		nft_hash_rnd_initted = true;
+	}
+
+	if (tb[NFTA_HASH_SREG] == NULL ||
+	    tb[NFTA_HASH_KLEN] == NULL ||
+	    tb[NFTA_HASH_ELEMENTS] == NULL)
+		return -EINVAL;
+
+	if (tb[NFTA_HASH_FLAGS] != NULL) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS]));
+		if (priv->flags & ~NFT_HASH_MAP)
+			return -EINVAL;
+	}
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_HASH_DREG] != NULL) {
+		if (!(priv->flags & NFT_HASH_MAP))
+			return -EINVAL;
+		priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG]));
+		err = nft_validate_output_register(priv->dreg);
+		if (err < 0)
+			return err;
+	}
+
+	priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN]));
+	if (priv->klen == 0)
+		return -EINVAL;
+
+	cnt = 0;
+	nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+		if (nla_type(nla) != NFTA_LIST_ELEM)
+			return -EINVAL;
+		cnt++;
+	}
+
+	/* Aim for a load factor of 0.75 */
+	cnt = cnt * 4 / 3;
+
+	priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+	if (priv->hash == NULL)
+		return -ENOMEM;
+	priv->hsize = cnt;
+
+	for (i = 0; i < cnt; i++)
+		INIT_HLIST_HEAD(&priv->hash[i]);
+
+	err = -ENOMEM;
+	nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+		err = nft_hash_elem_init(ctx, expr, nla, &new);
+		if (err < 0)
+			goto err1;
+
+		h = nft_hash_data(&new->key, priv->hsize, priv->klen);
+		hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+			if (nft_data_cmp(&elem->key, &new->key, priv->klen))
+				continue;
+			nft_hash_elem_destroy(expr, new);
+			err = -EEXIST;
+			goto err1;
+		}
+		hlist_add_head(&new->hnode, &priv->hash[h]);
+	}
+	return 0;
+
+err1:
+	nft_hash_destroy(ctx, expr);
+	return err;
+}
+
+static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+	const struct nft_hash_elem *elem;
+	struct nlattr *list;
+	unsigned int i;
+
+	if (priv->flags)
+		if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (priv->flags & NFT_HASH_MAP)
+		if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg)))
+			goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen)))
+		goto nla_put_failure;
+
+	list = nla_nest_start(skb, NFTA_HASH_ELEMENTS);
+	if (list == NULL)
+		goto nla_put_failure;
+
+	for (i = 0; i < priv->hsize; i++) {
+		hlist_for_each_entry(elem, &priv->hash[i], hnode) {
+			if (nft_hash_elem_dump(skb, expr, elem) < 0)
+				goto nla_put_failure;
+		}
+	}
+
+	nla_nest_end(skb, list);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_hash_ops __read_mostly = {
+	.name		= "hash",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_hash_eval,
+	.init		= nft_hash_init,
+	.destroy	= nft_hash_destroy,
+	.dump		= nft_hash_dump,
+	.policy		= nft_hash_policy,
+	.maxattr	= NFTA_HASH_MAX,
+};
+
+static int __init nft_hash_module_init(void)
+{
+	return nft_register_expr(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+	nft_unregister_expr(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 000000000000..3bf42c3cc49a
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_immediate_expr {
+	struct nft_data		data;
+	enum nft_registers	dreg:8;
+	u8			dlen;
+};
+
+static void nft_immediate_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	nft_data_copy(&data[priv->dreg], &priv->data);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+	[NFTA_IMMEDIATE_DREG]	= { .type = NLA_U32 },
+	[NFTA_IMMEDIATE_DATA]	= { .type = NLA_NESTED },
+};
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc;
+	int err;
+
+	if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+	    tb[NFTA_IMMEDIATE_DATA] == NULL)
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+	if (err < 0)
+		return err;
+	priv->dlen = desc.len;
+
+	err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	nft_data_uninit(&priv->data, desc.type);
+	return err;
+}
+
+static void nft_immediate_destroy(const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+
+	return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+			     nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_imm_ops __read_mostly = {
+	.name		= "immediate",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_immediate_eval,
+	.init		= nft_immediate_init,
+	.destroy	= nft_immediate_destroy,
+	.dump		= nft_immediate_dump,
+	.policy		= nft_immediate_policy,
+	.maxattr	= NFTA_IMMEDIATE_MAX,
+};
+
+int __init nft_immediate_module_init(void)
+{
+	return nft_register_expr(&nft_imm_ops);
+}
+
+void nft_immediate_module_exit(void)
+{
+	nft_unregister_expr(&nft_imm_ops);
+}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 000000000000..e0e3fc8aebc3
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+struct nft_limit {
+	u64		tokens;
+	u64		rate;
+	u64		unit;
+	unsigned long	stamp;
+};
+
+static void nft_limit_eval(const struct nft_expr *expr,
+			   struct nft_data data[NFT_REG_MAX + 1],
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_limit *priv = nft_expr_priv(expr);
+
+	spin_lock_bh(&limit_lock);
+	if (time_after_eq(jiffies, priv->stamp)) {
+		priv->tokens = priv->rate;
+		priv->stamp = jiffies + priv->unit * HZ;
+	}
+
+	if (priv->tokens >= 1) {
+		priv->tokens--;
+		spin_unlock_bh(&limit_lock);
+		return;
+	}
+	spin_unlock_bh(&limit_lock);
+
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+	[NFTA_LIMIT_RATE]	= { .type = NLA_U64 },
+	[NFTA_LIMIT_UNIT]	= { .type = NLA_U64 },
+};
+
+static int nft_limit_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_limit *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_LIMIT_RATE] == NULL ||
+	    tb[NFTA_LIMIT_UNIT] == NULL)
+		return -EINVAL;
+
+	priv->rate   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+	priv->unit   = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+	priv->stamp  = jiffies + priv->unit * HZ;
+	priv->tokens = priv->rate;
+	return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_limit *priv = nft_expr_priv(expr);
+
+	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_limit_ops __read_mostly = {
+	.name		= "limit",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_limit_eval,
+	.init		= nft_limit_init,
+	.dump		= nft_limit_dump,
+	.policy		= nft_limit_policy,
+	.maxattr	= NFTA_LIMIT_MAX,
+};
+
+static int __init nft_limit_module_init(void)
+{
+	return nft_register_expr(&nft_limit_ops);
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+	nft_unregister_expr(&nft_limit_ops);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 000000000000..da495c3b1e7e
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+	struct nf_loginfo	loginfo;
+	char			*prefix;
+	int			family;
+};
+
+static void nft_log_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+	nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
+		      pkt->out, &priv->loginfo, "%s", priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+	[NFTA_LOG_GROUP]	= { .type = NLA_U16 },
+	[NFTA_LOG_PREFIX]	= { .type = NLA_STRING },
+	[NFTA_LOG_SNAPLEN]	= { .type = NLA_U32 },
+	[NFTA_LOG_QTHRESHOLD]	= { .type = NLA_U16 },
+};
+
+static int nft_log_init(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+	struct nf_loginfo *li = &priv->loginfo;
+	const struct nlattr *nla;
+
+	priv->family = ctx->afi->family;
+
+	nla = tb[NFTA_LOG_PREFIX];
+	if (nla != NULL) {
+		priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+		if (priv->prefix == NULL)
+			return -ENOMEM;
+		nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
+	} else
+		priv->prefix = (char *)nft_log_null_prefix;
+
+	li->type = NF_LOG_TYPE_ULOG;
+	if (tb[NFTA_LOG_GROUP] != NULL)
+		li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+
+	if (tb[NFTA_LOG_SNAPLEN] != NULL)
+		li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+	if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+		li->u.ulog.qthreshold =
+			ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+	}
+
+	return 0;
+}
+
+static void nft_log_destroy(const struct nft_expr *expr)
+{
+	struct nft_log *priv = nft_expr_priv(expr);
+
+	if (priv->prefix != nft_log_null_prefix)
+		kfree(priv->prefix);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_log *priv = nft_expr_priv(expr);
+	const struct nf_loginfo *li = &priv->loginfo;
+
+	if (priv->prefix != nft_log_null_prefix)
+		if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+			goto nla_put_failure;
+	if (li->u.ulog.group)
+		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+			goto nla_put_failure;
+	if (li->u.ulog.copy_len)
+		if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+				 htonl(li->u.ulog.copy_len)))
+			goto nla_put_failure;
+	if (li->u.ulog.qthreshold)
+		if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+				 htons(li->u.ulog.qthreshold)))
+			goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_log_ops __read_mostly = {
+	.name		= "log",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_log)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_log_eval,
+	.init		= nft_log_init,
+	.destroy	= nft_log_destroy,
+	.dump		= nft_log_dump,
+	.policy		= nft_log_policy,
+	.maxattr	= NFTA_LOG_MAX,
+};
+
+static int __init nft_log_module_init(void)
+{
+	return nft_register_expr(&nft_log_ops);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+	nft_unregister_expr(&nft_log_ops);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 000000000000..96735aa2f039
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+	enum nft_meta_keys	key:8;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	const struct net_device *in = pkt->in, *out = pkt->out;
+	struct nft_data *dest = &data[priv->dreg];
+
+	switch (priv->key) {
+	case NFT_META_LEN:
+		dest->data[0] = skb->len;
+		break;
+	case NFT_META_PROTOCOL:
+		*(__be16 *)dest->data = skb->protocol;
+		break;
+	case NFT_META_PRIORITY:
+		dest->data[0] = skb->priority;
+		break;
+	case NFT_META_MARK:
+		dest->data[0] = skb->mark;
+		break;
+	case NFT_META_IIF:
+		if (in == NULL)
+			goto err;
+		dest->data[0] = in->ifindex;
+		break;
+	case NFT_META_OIF:
+		if (out == NULL)
+			goto err;
+		dest->data[0] = out->ifindex;
+		break;
+	case NFT_META_IIFNAME:
+		if (in == NULL)
+			goto err;
+		strncpy((char *)dest->data, in->name, sizeof(dest->data));
+		break;
+	case NFT_META_OIFNAME:
+		if (out == NULL)
+			goto err;
+		strncpy((char *)dest->data, out->name, sizeof(dest->data));
+		break;
+	case NFT_META_IIFTYPE:
+		if (in == NULL)
+			goto err;
+		*(u16 *)dest->data = in->type;
+		break;
+	case NFT_META_OIFTYPE:
+		if (out == NULL)
+			goto err;
+		*(u16 *)dest->data = out->type;
+		break;
+	case NFT_META_SKUID:
+		if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+			goto err;
+
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket == NULL ||
+		    skb->sk->sk_socket->file == NULL) {
+			read_unlock_bh(&skb->sk->sk_callback_lock);
+			goto err;
+		}
+
+		dest->data[0] =
+			from_kuid_munged(&init_user_ns,
+				skb->sk->sk_socket->file->f_cred->fsuid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+		break;
+	case NFT_META_SKGID:
+		if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+			goto err;
+
+		read_lock_bh(&skb->sk->sk_callback_lock);
+		if (skb->sk->sk_socket == NULL ||
+		    skb->sk->sk_socket->file == NULL) {
+			read_unlock_bh(&skb->sk->sk_callback_lock);
+			goto err;
+		}
+		dest->data[0] =
+			from_kgid_munged(&init_user_ns,
+				 skb->sk->sk_socket->file->f_cred->fsgid);
+		read_unlock_bh(&skb->sk->sk_callback_lock);
+		break;
+#ifdef CONFIG_NET_CLS_ROUTE
+	case NFT_META_RTCLASSID: {
+		const struct dst_entry *dst = skb_dst(skb);
+
+		if (dst == NULL)
+			goto err;
+		dest->data[0] = dst->tclassid;
+		break;
+	}
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		dest->data[0] = skb->secmark;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		goto err;
+	}
+	return;
+
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_DREG]	= { .type = NLA_U32 },
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_META_DREG] == NULL ||
+	    tb[NFTA_META_KEY] == NULL)
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_LEN:
+	case NFT_META_PROTOCOL:
+	case NFT_META_PRIORITY:
+	case NFT_META_MARK:
+	case NFT_META_IIF:
+	case NFT_META_OIF:
+	case NFT_META_IIFNAME:
+	case NFT_META_OIFNAME:
+	case NFT_META_IIFTYPE:
+	case NFT_META_OIFTYPE:
+	case NFT_META_SKUID:
+	case NFT_META_SKGID:
+#ifdef CONFIG_NET_CLS_ROUTE
+	case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_meta_ops __read_mostly = {
+	.name		= "meta",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_meta_eval,
+	.init		= nft_meta_init,
+	.dump		= nft_meta_dump,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+};
+
+static int __init nft_meta_module_init(void)
+{
+	return nft_register_expr(&nft_meta_ops);
+}
+
+static void __exit nft_meta_module_exit(void)
+{
+	nft_unregister_expr(&nft_meta_ops);
+}
+
+module_init(nft_meta_module_init);
+module_exit(nft_meta_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644
index 000000000000..71177df75ffb
--- /dev/null
+++ b/net/netfilter/nft_meta_target.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+	enum nft_meta_keys	key;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+			  struct nft_data *nfres,
+			  struct nft_data *data,
+			  const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *meta = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	u32 val = data->data[0];
+
+	switch (meta->key) {
+	case NFT_META_MARK:
+		skb->mark = val;
+		break;
+	case NFT_META_PRIORITY:
+		skb->priority = val;
+		break;
+	case NFT_META_NFTRACE:
+		skb->nf_trace = val;
+		break;
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+		skb->secmark = val;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+	}
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+	[NFTA_META_KEY]		= { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
+{
+	struct nft_meta *meta = nft_expr_priv(expr);
+
+	if (tb[NFTA_META_KEY] == NULL)
+		return -EINVAL;
+
+	meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (meta->key) {
+	case NFT_META_MARK:
+	case NFT_META_PRIORITY:
+	case NFT_META_NFTRACE:
+#ifdef CONFIG_NETWORK_SECMARK
+	case NFT_META_SECMARK:
+#endif
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_meta *meta = nft_expr_priv(expr);
+
+	NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops meta_target __read_mostly = {
+	.name		= "meta",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_meta_eval,
+	.init		= nft_meta_init,
+	.dump		= nft_meta_dump,
+	.policy		= nft_meta_policy,
+	.maxattr	= NFTA_META_MAX,
+};
+
+static int __init nft_meta_target_init(void)
+{
+	return nft_register_expr(&meta_target);
+}
+
+static void __exit nft_meta_target_exit(void)
+{
+	nft_unregister_expr(&meta_target);
+}
+
+module_init(nft_meta_target_init);
+module_exit(nft_meta_target_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 000000000000..329f134b3f89
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_payload {
+	enum nft_payload_bases	base:8;
+	u8			offset;
+	u8			len;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_payload_eval(const struct nft_expr *expr,
+			     struct nft_data data[NFT_REG_MAX + 1],
+			     const struct nft_pktinfo *pkt)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct sk_buff *skb = pkt->skb;
+	struct nft_data *dest = &data[priv->dreg];
+	int offset;
+
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+		if (!skb_mac_header_was_set(skb))
+			goto err;
+		offset = skb_mac_header(skb) - skb->data;
+		break;
+	case NFT_PAYLOAD_NETWORK_HEADER:
+		offset = skb_network_offset(skb);
+		break;
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		offset = skb_transport_offset(skb);
+		break;
+	default:
+		BUG();
+	}
+	offset += priv->offset;
+
+	if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+		goto err;
+	return;
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+	[NFTA_PAYLOAD_DREG]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_BASE]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_OFFSET]	= { .type = NLA_U32 },
+	[NFTA_PAYLOAD_LEN]	= { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nlattr * const tb[])
+{
+	struct nft_payload *priv = nft_expr_priv(expr);
+	int err;
+
+	if (tb[NFTA_PAYLOAD_DREG] == NULL ||
+	    tb[NFTA_PAYLOAD_BASE] == NULL ||
+	    tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+	    tb[NFTA_PAYLOAD_LEN] == NULL)
+		return -EINVAL;
+
+	priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+	switch (priv->base) {
+	case NFT_PAYLOAD_LL_HEADER:
+	case NFT_PAYLOAD_NETWORK_HEADER:
+	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+	priv->len    = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+	if (priv->len == 0 ||
+	    priv->len > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+	return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+	    nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_payload_ops __read_mostly = {
+	.name		= "payload",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_payload_eval,
+	.init		= nft_payload_init,
+	.dump		= nft_payload_dump,
+	.policy		= nft_payload_policy,
+	.maxattr	= NFTA_PAYLOAD_MAX,
+};
+
+int __init nft_payload_module_init(void)
+{
+	return nft_register_expr(&nft_payload_ops);
+}
+
+void nft_payload_module_exit(void)
+{
+	nft_unregister_expr(&nft_payload_ops);
+}
diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c
new file mode 100644
index 000000000000..7b7c8354c327
--- /dev/null
+++ b/net/netfilter/nft_set.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_set {
+	struct rb_root		root;
+	enum nft_registers	sreg:8;
+	enum nft_registers	dreg:8;
+	u8			klen;
+	u8			dlen;
+	u16			flags;
+};
+
+struct nft_set_elem {
+	struct rb_node		node;
+	enum nft_set_elem_flags	flags;
+	struct nft_data		key;
+	struct nft_data		data[];
+};
+
+static void nft_set_eval(const struct nft_expr *expr,
+			 struct nft_data data[NFT_REG_MAX + 1],
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_set *priv = nft_expr_priv(expr);
+	const struct rb_node *parent = priv->root.rb_node;
+	const struct nft_set_elem *elem, *interval = NULL;
+	const struct nft_data *key = &data[priv->sreg];
+	int d;
+
+	while (parent != NULL) {
+		elem = rb_entry(parent, struct nft_set_elem, node);
+
+		d = nft_data_cmp(&elem->key, key, priv->klen);
+		if (d < 0) {
+			parent = parent->rb_left;
+			interval = elem;
+		} else if (d > 0)
+			parent = parent->rb_right;
+		else {
+found:
+			if (elem->flags & NFT_SE_INTERVAL_END)
+				goto out;
+			if (priv->flags & NFT_SET_MAP)
+				nft_data_copy(&data[priv->dreg], elem->data);
+			return;
+		}
+	}
+
+	if (priv->flags & NFT_SET_INTERVAL && interval != NULL) {
+		elem = interval;
+		goto found;
+	}
+out:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_set_elem_destroy(const struct nft_expr *expr,
+				 struct nft_set_elem *elem)
+{
+	const struct nft_set *priv = nft_expr_priv(expr);
+
+	nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+	if (priv->flags & NFT_SET_MAP)
+		nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+	kfree(elem);
+}
+
+static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = {
+	[NFTA_SE_KEY]		= { .type = NLA_NESTED },
+	[NFTA_SE_DATA]		= { .type = NLA_NESTED },
+	[NFTA_SE_FLAGS]		= { .type = NLA_U32 },
+};
+
+static int nft_set_elem_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr *nla,
+			     struct nft_set_elem **new)
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	struct nlattr *tb[NFTA_SE_MAX + 1];
+	struct nft_set_elem *elem;
+	struct nft_data_desc d1, d2;
+	enum nft_set_elem_flags flags = 0;
+	unsigned int size;
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_SE_KEY] == NULL)
+		return -EINVAL;
+
+	if (tb[NFTA_SE_FLAGS] != NULL) {
+		flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS]));
+		if (flags & ~NFT_SE_INTERVAL_END)
+			return -EINVAL;
+	}
+
+	size = sizeof(*elem);
+	if (priv->flags & NFT_SET_MAP) {
+		if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END))
+			return -EINVAL;
+		size += sizeof(elem->data[0]);
+	} else {
+		if (tb[NFTA_SE_DATA] != NULL)
+			return -EINVAL;
+	}
+
+	elem = kzalloc(size, GFP_KERNEL);
+	if (elem == NULL)
+		return -ENOMEM;
+	elem->flags = flags;
+
+	err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]);
+	if (err < 0)
+		goto err1;
+	err = -EINVAL;
+	if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+		goto err2;
+
+	if (tb[NFTA_SE_DATA] != NULL) {
+		err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]);
+		if (err < 0)
+			goto err2;
+		err = -EINVAL;
+		if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen)
+			goto err2;
+		err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+		if (err < 0)
+			goto err3;
+	}
+
+	*new = elem;
+	return 0;
+
+err3:
+	nft_data_uninit(elem->data, d2.type);
+err2:
+	nft_data_uninit(&elem->key, d1.type);
+err1:
+	kfree(elem);
+	return err;
+}
+
+static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+			     const struct nft_set_elem *elem)
+
+{
+	const struct nft_set *priv = nft_expr_priv(expr);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key,
+			  NFT_DATA_VALUE, priv->klen) < 0)
+		goto nla_put_failure;
+
+	if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) {
+		if (nft_data_dump(skb, NFTA_SE_DATA, elem->data,
+				  nft_dreg_to_type(priv->dreg), priv->dlen) < 0)
+			goto nla_put_failure;
+	}
+
+	if (elem->flags){
+		if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags)))
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_set_destroy(const struct nft_expr *expr)
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	struct nft_set_elem *elem;
+	struct rb_node *node;
+
+	while ((node = priv->root.rb_node) != NULL) {
+		rb_erase(node, &priv->root);
+		elem = rb_entry(node, struct nft_set_elem, node);
+		nft_set_elem_destroy(expr, elem);
+	}
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+	[NFTA_SET_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_SET_SREG]		= { .type = NLA_U32 },
+	[NFTA_SET_DREG]		= { .type = NLA_U32 },
+	[NFTA_SET_KLEN]		= { .type = NLA_U32 },
+	[NFTA_SET_DLEN]		= { .type = NLA_U32 },
+	[NFTA_SET_ELEMENTS]	= { .type = NLA_NESTED },
+};
+
+static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	struct nft_set_elem *elem, *uninitialized_var(new);
+	struct rb_node *parent, **p;
+	const struct nlattr *nla;
+	int err, rem, d;
+
+	if (tb[NFTA_SET_SREG] == NULL ||
+	    tb[NFTA_SET_KLEN] == NULL ||
+	    tb[NFTA_SET_ELEMENTS] == NULL)
+		return -EINVAL;
+
+	priv->root = RB_ROOT;
+
+	if (tb[NFTA_SET_FLAGS] != NULL) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS]));
+		if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP))
+			return -EINVAL;
+	}
+
+	priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG]));
+	err = nft_validate_input_register(priv->sreg);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_SET_DREG] != NULL) {
+		if (!(priv->flags & NFT_SET_MAP))
+			return -EINVAL;
+		if (tb[NFTA_SET_DLEN] == NULL)
+			return -EINVAL;
+
+		priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG]));
+		err = nft_validate_output_register(priv->dreg);
+		if (err < 0)
+			return err;
+
+		if (priv->dreg == NFT_REG_VERDICT)
+			priv->dlen = FIELD_SIZEOF(struct nft_data, data);
+		else {
+			priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN]));
+			if (priv->dlen == 0 ||
+			    priv->dlen > FIELD_SIZEOF(struct nft_data, data))
+				return -EINVAL;
+		}
+	} else {
+		if (priv->flags & NFT_SET_MAP)
+			return -EINVAL;
+		if (tb[NFTA_SET_DLEN] != NULL)
+			return -EINVAL;
+	}
+
+	priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN]));
+	if (priv->klen == 0 ||
+	    priv->klen > FIELD_SIZEOF(struct nft_data, data))
+		return -EINVAL;
+
+	nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) {
+		err = -EINVAL;
+		if (nla_type(nla) != NFTA_LIST_ELEM)
+			goto err1;
+
+		err = nft_set_elem_init(ctx, expr, nla, &new);
+		if (err < 0)
+			goto err1;
+
+		parent = NULL;
+		p = &priv->root.rb_node;
+		while (*p != NULL) {
+			parent = *p;
+			elem = rb_entry(parent, struct nft_set_elem, node);
+			d = nft_data_cmp(&elem->key, &new->key, priv->klen);
+			if (d < 0)
+				p = &parent->rb_left;
+			else if (d > 0)
+				p = &parent->rb_right;
+			else {
+				err = -EEXIST;
+				goto err2;
+			}
+		}
+		rb_link_node(&new->node, parent, p);
+		rb_insert_color(&new->node, &priv->root);
+	}
+
+	return 0;
+
+err2:
+	nft_set_elem_destroy(expr, new);
+err1:
+	nft_set_destroy(expr);
+	return err;
+}
+
+static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_set *priv = nft_expr_priv(expr);
+	const struct nft_set_elem *elem;
+	struct rb_node *node;
+	struct nlattr *list;
+
+	if (priv->flags) {
+		if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen)))
+		goto nla_put_failure;
+
+	if (priv->flags & NFT_SET_MAP) {
+		if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg)))
+			goto nla_put_failure;
+		if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen)))
+			goto nla_put_failure;
+	}
+
+	list = nla_nest_start(skb, NFTA_SET_ELEMENTS);
+	if (list == NULL)
+		goto nla_put_failure;
+
+	for (node = rb_first(&priv->root); node; node = rb_next(node)) {
+		elem = rb_entry(node, struct nft_set_elem, node);
+		if (nft_set_elem_dump(skb, expr, elem) < 0)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, list);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_ops nft_set_ops __read_mostly = {
+	.name		= "set",
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_set)),
+	.owner		= THIS_MODULE,
+	.eval		= nft_set_eval,
+	.init		= nft_set_init,
+	.destroy	= nft_set_destroy,
+	.dump		= nft_set_dump,
+	.policy		= nft_set_policy,
+	.maxattr	= NFTA_SET_MAX,
+};
+
+static int __init nft_set_module_init(void)
+{
+	return nft_register_expr(&nft_set_ops);
+}
+
+static void __exit nft_set_module_exit(void)
+{
+	nft_unregister_expr(&nft_set_ops);
+}
+
+module_init(nft_set_module_init);
+module_exit(nft_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("set");
-- 
cgit v1.2.3-71-gd317


From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 14 Oct 2013 11:05:33 +0200
Subject: netfilter: nfnetlink: add batch support and use it from nf_tables

This patch adds a batch support to nfnetlink. Basically, it adds
two new control messages:

* NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch,
  the nfgenmsg->res_id indicates the nfnetlink subsystem ID.

* NFNL_MSG_BATCH_END, that results in the invocation of the
  ss->commit callback function. If not specified or an error
  ocurred in the batch, the ss->abort function is invoked
  instead.

The end message represents the commit operation in nftables, the
lack of end message results in an abort. This patch also adds the
.call_batch function that is only called from the batch receival
path.

This patch adds atomic rule updates and dumps based on
bitmask generations. This allows to atomically commit a set of
rule-set updates incrementally without altering the internal
state of existing nf_tables expressions/matches/targets.

The idea consists of using a generation cursor of 1 bit and
a bitmask of 2 bits per rule. Assuming the gencursor is 0,
then the genmask (expressed as a bitmask) can be interpreted
as:

00 active in the present, will be active in the next generation.
01 inactive in the present, will be active in the next generation.
10 active in the present, will be deleted in the next generation.
 ^
 gencursor

Once you invoke the transition to the next generation, the global
gencursor is updated:

00 active in the present, will be active in the next generation.
01 active in the present, needs to zero its future, it becomes 00.
10 inactive in the present, delete now.
^
gencursor

If a dump is in progress and nf_tables enters a new generation,
the dump will stop and return -EBUSY to let userspace know that
it has to retry again. In order to invalidate dumps, a global
genctr counter is increased everytime nf_tables enters a new
generation.

This new operation can be used from the user-space utility
that controls the firewall, eg.

nft -f restore

The rule updates contained in `file' will be applied atomically.

cat file
-----
add filter INPUT ip saddr 1.1.1.1 counter accept #1
del filter INPUT ip daddr 2.2.2.2 counter drop   #2
-EOF-

Note that the rule 1 will be inactive until the transition to the
next generation, the rule 2 will be evicted in the next generation.

There is a penalty during the rule update due to the branch
misprediction in the packet matching framework. But that should be
quickly resolved once the iteration over the commit list that
contain rules that require updates is finished.

Event notification happens once the rule-set update has been
committed. So we skip notifications is case the rule-set update
is aborted, which can happen in case that the rule-set is tested
to apply correctly.

This patch squashed the following patches from Pablo:

* nf_tables: atomic rule updates and dumps
* nf_tables: get rid of per rule list_head for commits
* nf_tables: use per netns commit list
* nfnetlink: add batch support and use it from nf_tables
* nf_tables: all rule updates are transactional
* nf_tables: attach replacement rule after stale one
* nf_tables: do not allow deletion/replacement of stale rules
* nf_tables: remove unused NFTA_RULE_FLAGS

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h      |   5 +
 include/net/netfilter/nf_tables.h        |  25 +++-
 include/net/netns/nftables.h             |   3 +
 include/uapi/linux/netfilter/nfnetlink.h |   4 +
 net/netfilter/nf_tables_api.c            | 202 ++++++++++++++++++++++++++++---
 net/netfilter/nf_tables_core.c           |  10 ++
 net/netfilter/nfnetlink.c                | 175 +++++++++++++++++++++++++-
 7 files changed, 401 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4f68cd7141d2..28c74367e900 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,6 +14,9 @@ struct nfnl_callback {
 	int (*call_rcu)(struct sock *nl, struct sk_buff *skb, 
 		    const struct nlmsghdr *nlh,
 		    const struct nlattr * const cda[]);
+	int (*call_batch)(struct sock *nl, struct sk_buff *skb,
+			  const struct nlmsghdr *nlh,
+			  const struct nlattr * const cda[]);
 	const struct nla_policy *policy;	/* netlink attribute policy */
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
@@ -23,6 +26,8 @@ struct nfnetlink_subsystem {
 	__u8 subsys_id;			/* nfnetlink subsystem ID */
 	__u8 cb_count;			/* number of callbacks */
 	const struct nfnl_callback *cb;	/* callback for individual types */
+	int (*commit)(struct sk_buff *skb);
+	int (*abort)(struct sk_buff *skb);
 };
 
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d3272e943aac..975ad3c573c7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -323,18 +323,39 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *	@list: used internally
  *	@rcu_head: used internally for rcu
  *	@handle: rule handle
+ *	@genmask: generation mask
  *	@dlen: length of expression data
  *	@data: expression data
  */
 struct nft_rule {
 	struct list_head		list;
 	struct rcu_head			rcu_head;
-	u64				handle:48,
+	u64				handle:46,
+					genmask:2,
 					dlen:16;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(struct nft_expr))));
 };
 
+/**
+ *	struct nft_rule_trans - nf_tables rule update in transaction
+ *
+ *	@list: used internally
+ *	@rule: rule that needs to be updated
+ *	@chain: chain that this rule belongs to
+ *	@table: table for which this chain applies
+ *	@nlh: netlink header of the message that contain this update
+ *	@family: family expressesed as AF_*
+ */
+struct nft_rule_trans {
+	struct list_head		list;
+	struct nft_rule			*rule;
+	const struct nft_chain		*chain;
+	const struct nft_table		*table;
+	const struct nlmsghdr		*nlh;
+	u8				family;
+};
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
@@ -370,6 +391,7 @@ enum nft_chain_flags {
  *	@rules: list of rules in the chain
  *	@list: used internally
  *	@rcu_head: used internally
+ *	@net: net namespace that this chain belongs to
  *	@handle: chain handle
  *	@flags: bitmask of enum nft_chain_flags
  *	@use: number of jump references to this chain
@@ -380,6 +402,7 @@ struct nft_chain {
 	struct list_head		rules;
 	struct list_head		list;
 	struct rcu_head			rcu_head;
+	struct net			*net;
 	u64				handle;
 	u8				flags;
 	u16				use;
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index a98b1c5d9913..08a4248a12b5 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,9 +7,12 @@ struct nft_af_info;
 
 struct netns_nftables {
 	struct list_head	af_info;
+	struct list_head	commit_list;
 	struct nft_af_info	*ipv4;
 	struct nft_af_info	*ipv6;
 	struct nft_af_info	*bridge;
+	u8			gencursor;
+	u8			genctr;
 };
 
 #endif
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 288959404d54..596ddd45253c 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -57,4 +57,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_NFT_COMPAT		11
 #define NFNL_SUBSYS_COUNT		12
 
+/* Reserved control nfnetlink messages */
+#define NFNL_MSG_BATCH_BEGIN		NLMSG_MIN_TYPE
+#define NFNL_MSG_BATCH_END		NLMSG_MIN_TYPE+1
+
 #endif /* _UAPI_NFNETLINK_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0f140663ec71..79e1418a6043 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
+	chain->net = net;
 	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
 
 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
@@ -1371,6 +1372,41 @@ err:
 	return err;
 }
 
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+	return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+	return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+	return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+	/* Now inactive, will be active in the future */
+	rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
+{
+	rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+	rule->genmask = 0;
+}
+
 static int nf_tables_dump_rules(struct sk_buff *skb,
 				struct netlink_callback *cb)
 {
@@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	u8 genctr = ACCESS_ONCE(net->nft.genctr);
+	u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
 
 	list_for_each_entry(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
@@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 		list_for_each_entry(table, &afi->tables, list) {
 			list_for_each_entry(chain, &table->chains, list) {
 				list_for_each_entry(rule, &chain->rules, list) {
+					if (!nft_rule_is_active(net, rule))
+						goto cont;
 					if (idx < s_idx)
 						goto cont;
 					if (idx > s_idx)
@@ -1408,6 +1448,10 @@ cont:
 		}
 	}
 done:
+	/* Invalidate this dump, a transition to the new generation happened */
+	if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
+		return -EBUSY;
+
 	cb->args[0] = idx;
 	return skb->len;
 }
@@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
 
 static struct nft_expr_info *info;
 
+static struct nft_rule_trans *
+nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+{
+	struct nft_rule_trans *rupd;
+
+	rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
+	if (rupd == NULL)
+	       return NULL;
+
+	rupd->chain = ctx->chain;
+	rupd->table = ctx->table;
+	rupd->rule = rule;
+	rupd->family = ctx->afi->family;
+	rupd->nlh = ctx->nlh;
+	list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+
+	return rupd;
+}
+
 static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
@@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_rule_trans *repl = NULL;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
@@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	if (rule == NULL)
 		goto err1;
 
+	nft_rule_activate_next(net, rule);
+
 	rule->handle = handle;
 	rule->dlen   = size;
 
@@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		list_replace_rcu(&old_rule->list, &rule->list);
-		nf_tables_rule_destroy(old_rule);
+		if (nft_rule_is_active_next(net, old_rule)) {
+			repl = nf_tables_trans_add(old_rule, &ctx);
+			if (repl == NULL) {
+				err = -ENOMEM;
+				goto err2;
+			}
+			nft_rule_disactivate_next(net, old_rule);
+			list_add_tail(&rule->list, &old_rule->list);
+		} else {
+			err = -ENOENT;
+			goto err2;
+		}
 	} else if (nlh->nlmsg_flags & NLM_F_APPEND)
 		if (old_rule)
 			list_add_rcu(&rule->list, &old_rule->list);
@@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			list_add_rcu(&rule->list, &chain->rules);
 	}
 
-	nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE,
-			      nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE),
-			      nfmsg->nfgen_family);
+	if (nf_tables_trans_add(rule, &ctx) == NULL) {
+		err = -ENOMEM;
+		goto err3;
+	}
 	return 0;
 
+err3:
+	list_del_rcu(&rule->list);
+	if (repl) {
+		list_del_rcu(&repl->rule->list);
+		list_del(&repl->list);
+		nft_rule_clear(net, repl->rule);
+		kfree(repl);
+	}
 err2:
 	nf_tables_rule_destroy(rule);
 err1:
@@ -1618,6 +1703,19 @@ err1:
 	return err;
 }
 
+static int
+nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+	/* You cannot delete the same rule twice */
+	if (nft_rule_is_active_next(ctx->net, rule)) {
+		if (nf_tables_trans_add(rule, ctx) == NULL)
+			return -ENOMEM;
+		nft_rule_disactivate_next(ctx->net, rule);
+		return 0;
+	}
+	return -ENOENT;
+}
+
 static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
@@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 	const struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *tmp;
-	int family = nfmsg->nfgen_family;
+	int family = nfmsg->nfgen_family, err = 0;
+	struct nft_ctx ctx;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1642,31 +1741,95 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
 	if (nla[NFTA_RULE_HANDLE]) {
 		rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 		if (IS_ERR(rule))
 			return PTR_ERR(rule);
 
-		/* List removal must be visible before destroying expressions */
-		list_del_rcu(&rule->list);
-
-		nf_tables_rule_notify(skb, nlh, table, chain, rule,
-				      NFT_MSG_DELRULE, 0, family);
-		nf_tables_rule_destroy(rule);
+		err = nf_tables_delrule_one(&ctx, rule);
 	} else {
 		/* Remove all rules in this chain */
 		list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
-			list_del_rcu(&rule->list);
+			err = nf_tables_delrule_one(&ctx, rule);
+			if (err < 0)
+				break;
+		}
+	}
+
+	return err;
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_rule_trans *rupd, *tmp;
 
-			nf_tables_rule_notify(skb, nlh, table, chain, rule,
-					      NFT_MSG_DELRULE, 0, family);
-			nf_tables_rule_destroy(rule);
+	/* Bump generation counter, invalidate any dump in progress */
+	net->nft.genctr++;
+
+	/* A new generation has just started */
+	net->nft.gencursor = gencursor_next(net);
+
+	/* Make sure all packets have left the previous generation before
+	 * purging old rules.
+	 */
+	synchronize_rcu();
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+		/* Delete this rule from the dirty list */
+		list_del(&rupd->list);
+
+		/* This rule was inactive in the past and just became active.
+		 * Clear the next bit of the genmask since its meaning has
+		 * changed, now it is the future.
+		 */
+		if (nft_rule_is_active(net, rupd->rule)) {
+			nft_rule_clear(net, rupd->rule);
+			nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
+					      rupd->chain, rupd->rule,
+					      NFT_MSG_NEWRULE, 0,
+					      rupd->family);
+			kfree(rupd);
+			continue;
 		}
+
+		/* This rule is in the past, get rid of it */
+		list_del_rcu(&rupd->rule->list);
+		nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+				      rupd->rule, NFT_MSG_DELRULE, 0,
+				      rupd->family);
+		nf_tables_rule_destroy(rupd->rule);
+		kfree(rupd);
 	}
 
 	return 0;
 }
 
+static int nf_tables_abort(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nft_rule_trans *rupd, *tmp;
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+		/* Delete all rules from the dirty list */
+		list_del(&rupd->list);
+
+		if (!nft_rule_is_active_next(net, rupd->rule)) {
+			nft_rule_clear(net, rupd->rule);
+			kfree(rupd);
+			continue;
+		}
+
+		/* This rule is inactive, get rid of it */
+		list_del_rcu(&rupd->rule->list);
+		nf_tables_rule_destroy(rupd->rule);
+		kfree(rupd);
+	}
+	return 0;
+}
+
 /*
  * Sets
  */
@@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_NEWRULE] = {
-		.call		= nf_tables_newrule,
+		.call_batch	= nf_tables_newrule,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
@@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_DELRULE] = {
-		.call		= nf_tables_delrule,
+		.call_batch	= nf_tables_delrule,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
@@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.subsys_id	= NFNL_SUBSYS_NFTABLES,
 	.cb_count	= NFT_MSG_MAX,
 	.cb		= nf_tables_cb,
+	.commit		= nf_tables_commit,
+	.abort		= nf_tables_abort,
 };
 
 /*
@@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump);
 static int nf_tables_init_net(struct net *net)
 {
 	INIT_LIST_HEAD(&net->nft.af_info);
+	INIT_LIST_HEAD(&net->nft.commit_list);
 	return 0;
 }
 
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3c13007d80df..d581ef660248 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 	struct nft_data data[NFT_REG_MAX + 1];
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+	/*
+	 * Cache cursor to avoid problems in case that the cursor is updated
+	 * while traversing the ruleset.
+	 */
+	unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
 
 do_chain:
 	rule = list_entry(&chain->rules, struct nft_rule, list);
 next_rule:
 	data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
 	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+
+		/* This rule is not active, skip. */
+		if (unlikely(rule->genmask & (1 << gencursor)))
+			continue;
+
 		nft_rule_for_each_expr(expr, last, rule) {
 			if (expr->ops == &nft_cmp_fast_ops)
 				nft_cmp_fast_eval(expr, data);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87dc116f..027f16af51a0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
 
-	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-		return -EPERM;
-
 	/* All the messages must at least contain nfgenmsg */
 	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
 		return 0;
@@ -217,9 +214,179 @@ replay:
 	}
 }
 
+static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+				u_int16_t subsys_id)
+{
+	struct sk_buff *nskb, *oskb = skb;
+	struct net *net = sock_net(skb->sk);
+	const struct nfnetlink_subsystem *ss;
+	const struct nfnl_callback *nc;
+	bool success = true, done = false;
+	int err;
+
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
+		return netlink_ack(skb, nlh, -EINVAL);
+replay:
+	nskb = netlink_skb_clone(oskb, GFP_KERNEL);
+	if (!nskb)
+		return netlink_ack(oskb, nlh, -ENOMEM);
+
+	nskb->sk = oskb->sk;
+	skb = nskb;
+
+	nfnl_lock(subsys_id);
+	ss = rcu_dereference_protected(table[subsys_id].subsys,
+				       lockdep_is_held(&table[subsys_id].mutex));
+	if (!ss) {
+#ifdef CONFIG_MODULES
+		nfnl_unlock(subsys_id);
+		request_module("nfnetlink-subsys-%d", subsys_id);
+		nfnl_lock(subsys_id);
+		ss = rcu_dereference_protected(table[subsys_id].subsys,
+					       lockdep_is_held(&table[subsys_id].mutex));
+		if (!ss)
+#endif
+		{
+			nfnl_unlock(subsys_id);
+			kfree_skb(nskb);
+			return netlink_ack(skb, nlh, -EOPNOTSUPP);
+		}
+	}
+
+	if (!ss->commit || !ss->abort) {
+		nfnl_unlock(subsys_id);
+		kfree_skb(nskb);
+		return netlink_ack(skb, nlh, -EOPNOTSUPP);
+	}
+
+	while (skb->len >= nlmsg_total_size(0)) {
+		int msglen, type;
+
+		nlh = nlmsg_hdr(skb);
+		err = 0;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* Only requests are handled by the kernel */
+		if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		type = nlh->nlmsg_type;
+		if (type == NFNL_MSG_BATCH_BEGIN) {
+			/* Malformed: Batch begin twice */
+			success = false;
+			goto done;
+		} else if (type == NFNL_MSG_BATCH_END) {
+			done = true;
+			goto done;
+		} else if (type < NLMSG_MIN_TYPE) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		/* We only accept a batch with messages for the same
+		 * subsystem.
+		 */
+		if (NFNL_SUBSYS_ID(type) != subsys_id) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		nc = nfnetlink_find_client(type, ss);
+		if (!nc) {
+			err = -EINVAL;
+			goto ack;
+		}
+
+		{
+			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+			u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+			struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+			struct nlattr *attr = (void *)nlh + min_len;
+			int attrlen = nlh->nlmsg_len - min_len;
+
+			err = nla_parse(cda, ss->cb[cb_id].attr_count,
+					attr, attrlen, ss->cb[cb_id].policy);
+			if (err < 0)
+				goto ack;
+
+			if (nc->call_batch) {
+				err = nc->call_batch(net->nfnl, skb, nlh,
+						     (const struct nlattr **)cda);
+			}
+
+			/* The lock was released to autoload some module, we
+			 * have to abort and start from scratch using the
+			 * original skb.
+			 */
+			if (err == -EAGAIN) {
+				ss->abort(skb);
+				nfnl_unlock(subsys_id);
+				kfree_skb(nskb);
+				goto replay;
+			}
+		}
+ack:
+		if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+			/* We don't stop processing the batch on errors, thus,
+			 * userspace gets all the errors that the batch
+			 * triggers.
+			 */
+			netlink_ack(skb, nlh, err);
+			if (err)
+				success = false;
+		}
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+		skb_pull(skb, msglen);
+	}
+done:
+	if (success && done)
+		ss->commit(skb);
+	else
+		ss->abort(skb);
+
+	nfnl_unlock(subsys_id);
+	kfree_skb(nskb);
+}
+
 static void nfnetlink_rcv(struct sk_buff *skb)
 {
-	netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	struct net *net = sock_net(skb->sk);
+	int msglen;
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return netlink_ack(skb, nlh, -EPERM);
+
+	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+	    skb->len < nlh->nlmsg_len)
+		return;
+
+	if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
+		struct nfgenmsg *nfgenmsg;
+
+		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (msglen > skb->len)
+			msglen = skb->len;
+
+		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+		    skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+			return;
+
+		nfgenmsg = nlmsg_data(nlh);
+		skb_pull(skb, msglen);
+		nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+	} else {
+		netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+	}
 }
 
 #ifdef CONFIG_MODULES
-- 
cgit v1.2.3-71-gd317


From 5930e8d0ab3689f1e239566443ca8f53e45e01cc Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 15 Oct 2013 16:55:22 +0200
Subject: net/mlx4: Fix typo, move similar defs to same location

Small code cleanup:

1. change MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN to MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN

2. put MLX4_SET_PORT_PRIO2TC and MLX4_SET_PORT_SCHEDULER in the same union with the
   other MLX4_SET_PORT_yyy

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 2 +-
 include/linux/mlx4/cmd.h                       | 6 ++----
 include/linux/mlx4/device.h                    | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index fa37b7a61213..85d91665d400 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1733,7 +1733,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 
 	/* Unregister Mac address for the port */
 	mlx4_en_put_qp(priv);
-	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
+	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN))
 		mdev->mac_removed[priv->port] = 1;
 
 	/* Free RX Rings */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 0d63daa2f422..a377484cf544 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -652,7 +652,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
 	if (field & 1<<6)
-		dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN;
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
 	MLX4_GET(dev_cap->max_icm_sz, outbox,
 		 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
 	if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index cd1fdf75103b..8df61bc5da00 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -154,10 +154,6 @@ enum {
 	MLX4_CMD_QUERY_IF_STAT	 = 0X54,
 	MLX4_CMD_SET_IF_STAT	 = 0X55,
 
-	/* set port opcode modifiers */
-	MLX4_SET_PORT_PRIO2TC = 0x8,
-	MLX4_SET_PORT_SCHEDULER  = 0x9,
-
 	/* register/delete flow steering network rules */
 	MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
 	MLX4_QP_FLOW_STEERING_DETACH = 0x66,
@@ -182,6 +178,8 @@ enum {
 	MLX4_SET_PORT_VLAN_TABLE = 0x3,
 	MLX4_SET_PORT_PRIO_MAP  = 0x4,
 	MLX4_SET_PORT_GID_TABLE = 0x5,
+	MLX4_SET_PORT_PRIO2TC	= 0x8,
+	MLX4_SET_PORT_SCHEDULER = 0x9,
 };
 
 enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 24ce6bdd540e..9ad0c18495ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -155,7 +155,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_RSS_TOP		= 1LL <<  1,
 	MLX4_DEV_CAP_FLAG2_RSS_XOR		= 1LL <<  2,
 	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3,
-	MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN	= 1LL <<  4,
+	MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN	= 1LL <<  4,
 	MLX4_DEV_CAP_FLAG2_TS			= 1LL <<  5,
 	MLX4_DEV_CAP_FLAG2_VLAN_CONTROL		= 1LL <<  6,
 	MLX4_DEV_CAP_FLAG2_FSM			= 1LL <<  7,
-- 
cgit v1.2.3-71-gd317


From 400dfd3ae899849b27d398ca7894e1b44430887f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 17 Oct 2013 16:27:07 -0700
Subject: net: refactor sk_page_frag_refill()

While working on virtio_net new allocation strategy to increase
payload/truesize ratio, we found that refactoring sk_page_frag_refill()
was needed.

This patch splits sk_page_frag_refill() into two parts, adding
skb_page_frag_refill() which can be used without a socket.

While we are at it, add a minimum frag size of 32 for
sk_page_frag_refill()

Michael will either use netdev_alloc_frag() from softirq context,
or skb_page_frag_refill() from process context in refill_work()
 (GFP_KERNEL allocations)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Michael Dalton <mwdalton@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/sock.c        | 27 +++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1cd32f96055e..ba74474836c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2062,6 +2062,8 @@ static inline void skb_frag_set_page(struct sk_buff *skb, int f,
 	__skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
 }
 
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
+
 /**
  * skb_frag_dma_map - maps a paged fragment via the DMA API
  * @dev: the device to map the fragment to
diff --git a/net/core/sock.c b/net/core/sock.c
index fd6afa267475..440afdca1e8f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1847,7 +1847,17 @@ EXPORT_SYMBOL(sock_alloc_send_skb);
 /* On 32bit arches, an skb frag is limited to 2^15 */
 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
 
-bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+/**
+ * skb_page_frag_refill - check that a page_frag contains enough room
+ * @sz: minimum size of the fragment we want to get
+ * @pfrag: pointer to page_frag
+ * @prio: priority for memory allocation
+ *
+ * Note: While this allocator tries to use high order pages, there is
+ * no guarantee that allocations succeed. Therefore, @sz MUST be
+ * less or equal than PAGE_SIZE.
+ */
+bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
 {
 	int order;
 
@@ -1856,16 +1866,16 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 			pfrag->offset = 0;
 			return true;
 		}
-		if (pfrag->offset < pfrag->size)
+		if (pfrag->offset + sz <= pfrag->size)
 			return true;
 		put_page(pfrag->page);
 	}
 
 	/* We restrict high order allocations to users that can afford to wait */
-	order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+	order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
 
 	do {
-		gfp_t gfp = sk->sk_allocation;
+		gfp_t gfp = prio;
 
 		if (order)
 			gfp |= __GFP_COMP | __GFP_NOWARN;
@@ -1877,6 +1887,15 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 		}
 	} while (--order >= 0);
 
+	return false;
+}
+EXPORT_SYMBOL(skb_page_frag_refill);
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
+		return true;
+
 	sk_enter_memory_pressure(sk);
 	sk_stream_moderate_sndbuf(sk);
 	return false;
-- 
cgit v1.2.3-71-gd317


From 7cc7c5e54b7128195a1403747a63971c3c3f8e25 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 14 Oct 2013 21:49:21 +0100
Subject: net: Delete trailing semi-colon from definition of netdev_WARN()

Macro definitions should not normally end with a semi-colon, as this
makes it dangerous to use them an if...else statement.  Happily this
has not happened yet.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e53b44454ad..27f62f746621 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3049,7 +3049,7 @@ do {								\
  * file/line information and a backtrace.
  */
 #define netdev_WARN(dev, format, args...)			\
-	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args);
+	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args)
 
 /* netif printk helpers, similar to netdev_printk */
 
-- 
cgit v1.2.3-71-gd317


From 3347c960295583eee3fd58e5c539fb1972fbc005 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 19 Oct 2013 11:42:56 -0700
Subject: ipv4: gso: make inet_gso_segment() stackable

In order to support GSO on IPIP, we need to make
inet_gso_segment() stackable.

It should not assume network header starts right after mac
header.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  7 +++++--
 net/core/dev.c         |  2 ++
 net/ipv4/af_inet.c     | 25 ++++++++++++++++++-------
 3 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ba74474836c0..cad1e0c5cc04 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2722,9 +2722,12 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 /* Keeps track of mac header offset relative to skb->head.
  * It is useful for TSO of Tunneling protocol. e.g. GRE.
  * For non-tunnel skb it points to skb_mac_header() and for
- * tunnel skb it points to outer mac header. */
+ * tunnel skb it points to outer mac header.
+ * Keeps track of level of encapsulation of network headers.
+ */
 struct skb_gso_cb {
-	int mac_offset;
+	int	mac_offset;
+	int	encap_level;
 };
 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b6eadf69289..0918aadc20fd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2377,6 +2377,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	}
 
 	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+	SKB_GSO_CB(skb)->encap_level = 0;
+
 	skb_reset_mac_header(skb);
 	skb_reset_mac_len(skb);
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4f8cd4fc451d..5783ab5b5ef8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1273,16 +1273,17 @@ out:
 }
 
 static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+					netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	const struct net_offload *ops;
+	unsigned int offset = 0;
 	struct iphdr *iph;
+	bool tunnel;
 	int proto;
+	int nhoff;
 	int ihl;
 	int id;
-	unsigned int offset = 0;
-	bool tunnel;
 
 	if (unlikely(skb_shinfo(skb)->gso_type &
 		     ~(SKB_GSO_TCPV4 |
@@ -1296,6 +1297,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       0)))
 		goto out;
 
+	skb_reset_network_header(skb);
+	nhoff = skb_network_header(skb) - skb_mac_header(skb);
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
@@ -1312,7 +1315,10 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		goto out;
 	__skb_pull(skb, ihl);
 
-	tunnel = !!skb->encapsulation;
+	tunnel = SKB_GSO_CB(skb)->encap_level > 0;
+	if (tunnel)
+		features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	SKB_GSO_CB(skb)->encap_level += ihl;
 
 	skb_reset_transport_header(skb);
 
@@ -1327,18 +1333,23 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 
 	skb = segs;
 	do {
-		iph = ip_hdr(skb);
+		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
 		if (!tunnel && proto == IPPROTO_UDP) {
 			iph->id = htons(id);
 			iph->frag_off = htons(offset >> 3);
 			if (skb->next != NULL)
 				iph->frag_off |= htons(IP_MF);
-			offset += (skb->len - skb->mac_len - iph->ihl * 4);
+			offset += skb->len - nhoff - ihl;
 		} else  {
 			iph->id = htons(id++);
 		}
-		iph->tot_len = htons(skb->len - skb->mac_len);
+		iph->tot_len = htons(skb->len - nhoff);
 		ip_send_check(iph);
+		if (tunnel) {
+			skb_reset_inner_headers(skb);
+			skb->encapsulation = 1;
+		}
+		skb->network_header = (u8 *)iph - skb->head;
 	} while ((skb = skb->next));
 
 out:
-- 
cgit v1.2.3-71-gd317


From cb32f511a70be8967ac9025cf49c44324ced9a39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 19 Oct 2013 11:42:57 -0700
Subject: ipip: add GSO/TSO support

Now inet_gso_segment() is stackable, its relatively easy to
implement GSO/TSO support for IPIP

Performance results, when segmentation is done after tunnel
device (as no NIC is yet enabled for TSO IPIP support) :

Before patch :

lpq83:~# ./netperf -H 7.7.9.84 -Cc
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.9.84 () port 0 AF_INET
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      3357.88   5.09     3.70     2.983   2.167

After patch :

lpq83:~# ./netperf -H 7.7.9.84 -Cc
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.9.84 () port 0 AF_INET
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      7710.19   4.52     6.62     1.152   1.687

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  2 ++
 include/linux/skbuff.h          |  6 ++++--
 net/core/ethtool.c              |  1 +
 net/ipv4/af_inet.c              |  9 +++++++++
 net/ipv4/gre_offload.c          |  3 ++-
 net/ipv4/ipip.c                 | 11 ++++++-----
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp_offload.c          |  1 +
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  1 +
 net/mpls/mpls_gso.c             |  1 +
 11 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index a2a89a5c7be5..8dad68cede1c 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -42,6 +42,7 @@ enum {
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
+	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
@@ -107,6 +108,7 @@ enum {
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
+#define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cad1e0c5cc04..60729134d253 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -318,9 +318,11 @@ enum {
 
 	SKB_GSO_GRE = 1 << 6,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 7,
+	SKB_GSO_IPIP = 1 << 7,
 
-	SKB_GSO_MPLS = 1 << 8,
+	SKB_GSO_UDP_TUNNEL = 1 << 8,
+
+	SKB_GSO_MPLS = 1 << 9,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 78e9d9223e40..8cab7744790e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
 	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
+	[NETIF_F_GSO_IPIP_BIT] =	 "tx-ipip-segmentation",
 	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
 	[NETIF_F_GSO_MPLS_BIT] =	 "tx-mpls-segmentation",
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5783ab5b5ef8..4049906010f7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1291,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_IPIP |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
@@ -1656,6 +1657,13 @@ static struct packet_offload ip_packet_offload __read_mostly = {
 	},
 };
 
+static const struct net_offload ipip_offload = {
+	.callbacks = {
+		.gso_send_check = inet_gso_send_check,
+		.gso_segment	= inet_gso_segment,
+	},
+};
+
 static int __init ipv4_offload_init(void)
 {
 	/*
@@ -1667,6 +1675,7 @@ static int __init ipv4_offload_init(void)
 		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
 
 	dev_add_offload(&ip_packet_offload);
+	inet_add_offload(&ipip_offload, IPPROTO_IPIP);
 	return 0;
 }
 
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 55e6bfb3a289..e5d436188464 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -39,7 +39,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_UDP |
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
-				  SKB_GSO_GRE)))
+				  SKB_GSO_GRE |
+				  SKB_GSO_IPIP)))
 		goto out;
 
 	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7f80fb4b82d3..fe3e9f7f1f0b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -220,17 +220,17 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(skb->protocol != htons(ETH_P_IP)))
 		goto tx_error;
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+	if (IS_ERR(skb))
+		goto out;
 
 	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
 	return NETDEV_TX_OK;
 
 tx_error:
-	dev->stats.tx_errors++;
 	dev_kfree_skb(skb);
+out:
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -275,6 +275,7 @@ static const struct net_device_ops ipip_netdev_ops = {
 #define IPIP_FEATURES (NETIF_F_SG |		\
 		       NETIF_F_FRAGLIST |	\
 		       NETIF_F_HIGHDMA |	\
+		       NETIF_F_GSO_SOFTWARE |	\
 		       NETIF_F_HW_CSUM)
 
 static void ipip_tunnel_setup(struct net_device *dev)
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8e3113f46ec1..dfc96b00673e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -56,6 +56,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
+			       SKB_GSO_IPIP |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
 			       0) ||
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f35eccaa855e..83206de2bc76 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -52,6 +52,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 
 		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_IPIP |
 				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b405fba91c72..5c2fc1d04196 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -96,6 +96,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_IPIP |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 60559511bd9c..f63780ff3732 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -64,6 +64,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_GRE |
+				      SKB_GSO_IPIP |
 				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 1bec1219ab81..851cd880b0c0 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_IPIP |
 				  SKB_GSO_MPLS)))
 		goto out;
 
-- 
cgit v1.2.3-71-gd317


From c4b2c0c5f647aa1093e8f9097a30c17ce0f94d4d Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 19 Oct 2013 21:48:53 +0200
Subject: static_key: WARN on usage before jump_label_init was called

Usage of the static key primitives to toggle a branch must not be used
before jump_label_init() is called from init/main.c. jump_label_init
reorganizes and wires up the jump_entries so usage before that could
have unforeseen consequences.

Following primitives are now checked for correct use:
* static_key_slow_inc
* static_key_slow_dec
* static_key_slow_dec_deferred
* jump_label_rate_limit

The x86 architecture already checks this by testing if the default_nop
was already replaced with an optimal nop or with a branch instruction. It
will panic then. Other architectures don't check for this.

Because we need to relax this check for the x86 arch to allow code to
transition from default_nop to the enabled state and other architectures
did not check for this at all this patch introduces checking on the
static_key primitives in a non-arch dependent manner.

All checked functions are considered slow-path so the additional check
does no harm to performance.

The warnings are best observed with earlyprintk.

Based on a patch from Andi Kleen.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jump_label.h           | 10 ++++++++++
 include/linux/jump_label_ratelimit.h |  2 ++
 init/main.c                          |  7 +++++++
 kernel/jump_label.c                  |  5 +++++
 4 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a5079072da66..e96be7245717 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -48,6 +48,13 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/bug.h>
+
+extern bool static_key_initialized;
+
+#define STATIC_KEY_CHECK_USE() WARN(!static_key_initialized,		      \
+				    "%s used before call to jump_label_init", \
+				    __func__)
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 
@@ -128,6 +135,7 @@ struct static_key {
 
 static __always_inline void jump_label_init(void)
 {
+	static_key_initialized = true;
 }
 
 static __always_inline bool static_key_false(struct static_key *key)
@@ -146,11 +154,13 @@ static __always_inline bool static_key_true(struct static_key *key)
 
 static inline void static_key_slow_inc(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	atomic_inc(&key->enabled);
 }
 
 static inline void static_key_slow_dec(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	atomic_dec(&key->enabled);
 }
 
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index 113788389b3d..089f70f83e97 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -23,12 +23,14 @@ struct static_key_deferred {
 };
 static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
+	STATIC_KEY_CHECK_USE();
 	static_key_slow_dec(&key->key);
 }
 static inline void
 jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
 {
+	STATIC_KEY_CHECK_USE();
 }
 #endif	/* HAVE_JUMP_LABEL */
 #endif	/* _LINUX_JUMP_LABEL_RATELIMIT_H */
diff --git a/init/main.c b/init/main.c
index af310afbef28..27bbec1a5b35 100644
--- a/init/main.c
+++ b/init/main.c
@@ -135,6 +135,13 @@ static char *static_command_line;
 static char *execute_command;
 static char *ramdisk_execute_command;
 
+/*
+ * Used to generate warnings if static_key manipulation functions are used
+ * before jump_label_init is called.
+ */
+bool static_key_initialized __read_mostly = false;
+EXPORT_SYMBOL_GPL(static_key_initialized);
+
 /*
  * If set, this is an indication to the drivers that reset the underlying
  * device before going ahead with the initialization otherwise driver might
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 297a9247a3b3..9019f15deab2 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,6 +58,7 @@ static void jump_label_update(struct static_key *key, int enable);
 
 void static_key_slow_inc(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	if (atomic_inc_not_zero(&key->enabled))
 		return;
 
@@ -103,12 +104,14 @@ static void jump_label_update_timeout(struct work_struct *work)
 
 void static_key_slow_dec(struct static_key *key)
 {
+	STATIC_KEY_CHECK_USE();
 	__static_key_slow_dec(key, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec);
 
 void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
+	STATIC_KEY_CHECK_USE();
 	__static_key_slow_dec(&key->key, key->timeout, &key->work);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
@@ -116,6 +119,7 @@ EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
 void jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
 {
+	STATIC_KEY_CHECK_USE();
 	key->timeout = rl;
 	INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
 }
@@ -212,6 +216,7 @@ void __init jump_label_init(void)
 		key->next = NULL;
 #endif
 	}
+	static_key_initialized = true;
 	jump_label_unlock();
 }
 
-- 
cgit v1.2.3-71-gd317


From a48e42920ff38bc90bbf75143fff4555723d4540 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 19 Oct 2013 21:48:55 +0200
Subject: net: introduce new macro net_get_random_once

net_get_random_once is a new macro which handles the initialization
of secret keys. It is possible to call it in the fast path. Only the
initialization depends on the spinlock and is rather slow. Otherwise
it should get used just before the key is used to delay the entropy
extration as late as possible to get better randomness. It returns true
if the key got initialized.

The usage of static_keys for net_get_random_once is a bit uncommon so
it needs some further explanation why this actually works:

=== In the simple non-HAVE_JUMP_LABEL case we actually have ===
no constrains to use static_key_(true|false) on keys initialized with
STATIC_KEY_INIT_(FALSE|TRUE). So this path just expands in favor of
the likely case that the initialization is already done. The key is
initialized like this:

___done_key = { .enabled = ATOMIC_INIT(0) }

The check

                if (!static_key_true(&___done_key))                     \

expands into (pseudo code)

                if (!likely(___done_key > 0))

, so we take the fast path as soon as ___done_key is increased from the
helper function.

=== If HAVE_JUMP_LABELs are available this depends ===
on patching of jumps into the prepared NOPs, which is done in
jump_label_init at boot-up time (from start_kernel). It is forbidden
and dangerous to use net_get_random_once in functions which are called
before that!

At compilation time NOPs are generated at the call sites of
net_get_random_once. E.g. net/ipv6/inet6_hashtable.c:inet6_ehashfn (we
need to call net_get_random_once two times in inet6_ehashfn, so two NOPs):

      71:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
      76:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)

Both will be patched to the actual jumps to the end of the function to
call __net_get_random_once at boot time as explained above.

arch_static_branch is optimized and inlined for false as return value and
actually also returns false in case the NOP is placed in the instruction
stream. So in the fast case we get a "return false". But because we
initialize ___done_key with (enabled != (entries & 1)) this call-site
will get patched up at boot thus returning true. The final check looks
like this:

                if (!static_key_true(&___done_key))                     \
                        ___ret = __net_get_random_once(buf,             \

expands to

                if (!!static_key_false(&___done_key))                     \
                        ___ret = __net_get_random_once(buf,             \

So we get true at boot time and as soon as static_key_slow_inc is called
on the key it will invert the logic and return false for the fast path.
static_key_slow_inc will change the branch because it got initialized
with .enabled == 0. After static_key_slow_inc is called on the key the
branch is replaced with a nop again.

=== Misc: ===
The helper defers the increment into a workqueue so we don't
have problems calling this code from atomic sections. A seperate boolean
(___done) guards the case where we enter net_get_random_once again before
the increment happend.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 25 +++++++++++++++++++++++++
 net/core/utils.c    | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index ca9ec8540905..a489705f6fa3 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -239,6 +239,31 @@ do {								\
 #define net_random()		prandom_u32()
 #define net_srandom(seed)	prandom_seed((__force u32)(seed))
 
+bool __net_get_random_once(void *buf, int nbytes, bool *done,
+			   struct static_key *done_key);
+
+#ifdef HAVE_JUMP_LABEL
+#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \
+		{ .enabled = ATOMIC_INIT(0), .entries = (void *)1 })
+#else /* !HAVE_JUMP_LABEL */
+#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
+#endif /* HAVE_JUMP_LABEL */
+
+/* BE CAREFUL: this function is not interrupt safe */
+#define net_get_random_once(buf, nbytes)				\
+	({								\
+		bool ___ret = false;					\
+		static bool ___done = false;				\
+		static struct static_key ___done_key =			\
+			___NET_RANDOM_STATIC_KEY_INIT;			\
+		if (!static_key_true(&___done_key))			\
+			___ret = __net_get_random_once(buf,		\
+						       nbytes,		\
+						       &___done,	\
+						       &___done_key);	\
+		___ret;							\
+	})
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
diff --git a/net/core/utils.c b/net/core/utils.c
index aa88e23fc87a..bf09371e19b1 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -338,3 +338,51 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
 				  csum_unfold(*sum)));
 }
 EXPORT_SYMBOL(inet_proto_csum_replace16);
+
+struct __net_random_once_work {
+	struct work_struct work;
+	struct static_key *key;
+};
+
+static void __net_random_once_deferred(struct work_struct *w)
+{
+	struct __net_random_once_work *work =
+		container_of(w, struct __net_random_once_work, work);
+	if (!static_key_enabled(work->key))
+		static_key_slow_inc(work->key);
+	kfree(work);
+}
+
+static void __net_random_once_disable_jump(struct static_key *key)
+{
+	struct __net_random_once_work *w;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return;
+
+	INIT_WORK(&w->work, __net_random_once_deferred);
+	w->key = key;
+	schedule_work(&w->work);
+}
+
+bool __net_get_random_once(void *buf, int nbytes, bool *done,
+			   struct static_key *done_key)
+{
+	static DEFINE_SPINLOCK(lock);
+
+	spin_lock_bh(&lock);
+	if (*done) {
+		spin_unlock_bh(&lock);
+		return false;
+	}
+
+	get_random_bytes(buf, nbytes);
+	*done = true;
+	spin_unlock_bh(&lock);
+
+	__net_random_once_disable_jump(done_key);
+
+	return true;
+}
+EXPORT_SYMBOL(__net_get_random_once);
-- 
cgit v1.2.3-71-gd317


From c68c7f5a88328fbcd992c68e99ebd6bf7d49e9d2 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sun, 20 Oct 2013 06:26:02 +0200
Subject: net: fix build warnings because of net_get_random_once merge

This patch fixes the following warning:

   In file included from include/linux/skbuff.h:27:0,
                    from include/linux/netfilter.h:5,
                    from include/net/netns/netfilter.h:5,
                    from include/net/net_namespace.h:20,
                    from include/linux/init_task.h:14,
                    from init/init_task.c:1:
include/linux/net.h:243:14: warning: 'struct static_key' declared inside parameter list [enabled by default]
          struct static_key *done_key);

on x86_64 allnoconfig, um defconfig and ia64 allmodconfig and maybe others as well.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index a489705f6fa3..aca446b46754 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -24,6 +24,7 @@
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/kmemcheck.h>
 #include <linux/rcupdate.h>
+#include <linux/jump_label.h>
 #include <uapi/linux/net.h>
 
 struct poll_table_struct;
-- 
cgit v1.2.3-71-gd317


From 61c1db7fae21ed33c614356a43bf6580c5e53118 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 20 Oct 2013 20:47:30 -0700
Subject: ipv6: sit: add GSO/TSO support

Now ipv6_gso_segment() is stackable, its relatively easy to
implement GSO/TSO support for SIT tunnels

Performance results, when segmentation is done after tunnel
device (as no NIC is yet enabled for TSO SIT support) :

Before patch :

lpq84:~# ./netperf -H 2002:af6:1153:: -Cc
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:1153:: () port 0 AF_INET6
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      3168.31   4.81     4.64     2.988   2.877

After patch :

lpq84:~# ./netperf -H 2002:af6:1153:: -Cc
MIGRATED TCP STREAM TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:1153:: () port 0 AF_INET6
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  16384    10.00      5525.00   7.76     5.17     2.763   1.840

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  2 ++
 include/linux/skbuff.h          |  6 ++++--
 net/core/ethtool.c              |  1 +
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv6/ip6_offload.c          | 11 +++++++++++
 net/ipv6/sit.c                  | 28 +++++++++++++++++++---------
 net/ipv6/udp_offload.c          |  1 +
 8 files changed, 40 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 8dad68cede1c..b05a4b501ab5 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -43,6 +43,7 @@ enum {
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
+	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
@@ -109,6 +110,7 @@ enum {
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
+#define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 60729134d253..2c154976394b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -320,9 +320,11 @@ enum {
 
 	SKB_GSO_IPIP = 1 << 7,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 8,
+	SKB_GSO_SIT = 1 << 8,
 
-	SKB_GSO_MPLS = 1 << 9,
+	SKB_GSO_UDP_TUNNEL = 1 << 9,
+
+	SKB_GSO_MPLS = 1 << 10,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8cab7744790e..862989898f61 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
 	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
 	[NETIF_F_GSO_IPIP_BIT] =	 "tx-ipip-segmentation",
+	[NETIF_F_GSO_SIT_BIT] =		 "tx-sit-segmentation",
 	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
 	[NETIF_F_GSO_MPLS_BIT] =	 "tx-mpls-segmentation",
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24a53fc275b0..f4a159e705c0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1265,6 +1265,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
 		       SKB_GSO_IPIP |
+		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index dfc96b00673e..a7a5583eab04 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
 			       SKB_GSO_IPIP |
+			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
 			       0) ||
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f9b33d82bb9d..4b851692b1f6 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
 		       SKB_GSO_IPIP |
+		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
@@ -276,6 +277,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 	},
 };
 
+static const struct net_offload sit_offload = {
+	.callbacks = {
+		.gso_send_check = ipv6_gso_send_check,
+		.gso_segment	= ipv6_gso_segment,
+	},
+};
+
 static int __init ipv6_offload_init(void)
 {
 
@@ -287,6 +295,9 @@ static int __init ipv6_offload_init(void)
 		pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
 
 	dev_add_offload(&ipv6_packet_offload);
+
+	inet_add_offload(&sit_offload, IPPROTO_IPV6);
+
 	return 0;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 19269453a8ea..3a9038dd818d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -933,10 +933,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		ttl = iph6->hop_limit;
 	tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+	if (IS_ERR(skb))
+		goto out;
 
 	err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
 			    ttl, df, !net_eq(tunnel->net, dev_net(dev)));
@@ -946,8 +945,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 tx_error_icmp:
 	dst_link_failure(skb);
 tx_error:
-	dev->stats.tx_errors++;
 	dev_kfree_skb(skb);
+out:
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -956,13 +956,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *tiph = &tunnel->parms.iph;
 
-	if (likely(!skb->encapsulation)) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+	if (IS_ERR(skb))
+		goto out;
 
 	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
 	return NETDEV_TX_OK;
+out:
+	dev->stats.tx_errors++;
+	return NETDEV_TX_OK;
 }
 
 static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
@@ -1292,6 +1294,12 @@ static void ipip6_dev_free(struct net_device *dev)
 	free_netdev(dev);
 }
 
+#define SIT_FEATURES (NETIF_F_SG	   | \
+		      NETIF_F_FRAGLIST	   | \
+		      NETIF_F_HIGHDMA	   | \
+		      NETIF_F_GSO_SOFTWARE | \
+		      NETIF_F_HW_CSUM)
+
 static void ipip6_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &ipip6_netdev_ops;
@@ -1305,6 +1313,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
+	dev->features		|= SIT_FEATURES;
+	dev->hw_features	|= SIT_FEATURES;
 }
 
 static int ipip6_tunnel_init(struct net_device *dev)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index f63780ff3732..08e23b0bf302 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -65,6 +65,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_GRE |
 				      SKB_GSO_IPIP |
+				      SKB_GSO_SIT |
 				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
-- 
cgit v1.2.3-71-gd317


From 93302880d8a3e5dc6b7da3f9825beb839152c940 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 18 Oct 2013 11:41:55 +0200
Subject: netfilter: ipset: Use netlink callback dump args only

Instead of cb->data, use callback dump args only and introduce symbolic
names instead of plain numbers at accessing the argument members.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h  | 10 +++++
 net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +++---
 net/netfilter/ipset/ip_set_core.c       | 70 ++++++++++++++++-----------------
 net/netfilter/ipset/ip_set_hash_gen.h   | 20 +++++-----
 net/netfilter/ipset/ip_set_list_set.c   | 11 +++---
 5 files changed, 68 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 7967516adc0d..c7174b816674 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -316,6 +316,16 @@ ip_set_init_counter(struct ip_set_counter *counter,
 		atomic64_set(&(counter)->packets, (long long)(ext->packets));
 }
 
+/* Netlink CB args */
+enum {
+	IPSET_CB_NET = 0,
+	IPSET_CB_DUMP,
+	IPSET_CB_INDEX,
+	IPSET_CB_ARG0,
+	IPSET_CB_ARG1,
+	IPSET_CB_ARG2,
+};
+
 /* register and unregister set references */
 extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index a13e15be7911..f2c7d83dc23f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set,
 	struct mtype *map = set->data;
 	struct nlattr *adt, *nested;
 	void *x;
-	u32 id, first = cb->args[2];
+	u32 id, first = cb->args[IPSET_CB_ARG0];
 
 	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
 	if (!adt)
 		return -EMSGSIZE;
-	for (; cb->args[2] < map->elements; cb->args[2]++) {
-		id = cb->args[2];
+	for (; cb->args[IPSET_CB_ARG0] < map->elements;
+	     cb->args[IPSET_CB_ARG0]++) {
+		id = cb->args[IPSET_CB_ARG0];
 		x = get_ext(set, map, id);
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
@@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set,
 	ipset_nest_end(skb, adt);
 
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 
 	return 0;
 
 nla_put_failure:
 	nla_nest_cancel(skb, nested);
 	if (unlikely(id == first)) {
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
 	ipset_nest_end(skb, adt);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index dc9284bdd2dd..bac7e01df67f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 static int
 ip_set_dump_done(struct netlink_callback *cb)
 {
-	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
-	if (cb->args[2]) {
-		pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name);
-		__ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]);
+	struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
+	if (cb->args[IPSET_CB_ARG0]) {
+		pr_debug("release set %s\n",
+			 nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+		__ip_set_put_byindex(inst,
+			(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
 	}
 	return 0;
 }
@@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh)
 }
 
 static int
-dump_init(struct netlink_callback *cb)
+dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
 	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
@@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb)
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
-	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
 	/* Second pass, so parser can't fail */
 	nla_parse(cda, IPSET_ATTR_CMD_MAX,
 		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
 
-	/* cb->args[0] : dump single set/all sets
-	 *         [1] : set index
-	 *         [..]: type specific
+	/* cb->args[IPSET_CB_NET]:	net namespace
+	 *         [IPSET_CB_DUMP]:	dump single set/all sets
+	 *         [IPSET_CB_INDEX]: 	set index
+	 *         [IPSET_CB_ARG0]:	type specific
 	 */
 
 	if (cda[IPSET_ATTR_SETNAME]) {
@@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb)
 			return -ENOENT;
 
 		dump_type = DUMP_ONE;
-		cb->args[1] = index;
+		cb->args[IPSET_CB_INDEX] = index;
 	} else
 		dump_type = DUMP_ALL;
 
@@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb)
 		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
 		dump_type |= (f << 16);
 	}
-	cb->args[0] = dump_type;
+	cb->args[IPSET_CB_NET] = (unsigned long)inst;
+	cb->args[IPSET_CB_DUMP] = dump_type;
 
 	return 0;
 }
@@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	struct ip_set *set = NULL;
 	struct nlmsghdr *nlh = NULL;
 	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
+	struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
 	u32 dump_type, dump_flags;
 	int ret = 0;
-	struct ip_set_net *inst = (struct ip_set_net *)cb->data;
 
-	if (!cb->args[0]) {
-		ret = dump_init(cb);
+	if (!cb->args[IPSET_CB_DUMP]) {
+		ret = dump_init(cb, inst);
 		if (ret < 0) {
 			nlh = nlmsg_hdr(cb->skb);
 			/* We have to create and send the error message
@@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 
-	if (cb->args[1] >= inst->ip_set_max)
+	if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max)
 		goto out;
 
-	dump_type = DUMP_TYPE(cb->args[0]);
-	dump_flags = DUMP_FLAGS(cb->args[0]);
-	max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max;
+	dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]);
+	dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]);
+	max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1
+				    : inst->ip_set_max;
 dump_last:
-	pr_debug("args[0]: %u %u args[1]: %ld\n",
-		 dump_type, dump_flags, cb->args[1]);
-	for (; cb->args[1] < max; cb->args[1]++) {
-		index = (ip_set_id_t) cb->args[1];
+	pr_debug("dump type, flag: %u %u index: %ld\n",
+		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
+	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
+		index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
 		set = nfnl_set(inst, index);
 		if (set == NULL) {
 			if (dump_type == DUMP_ONE) {
@@ -1294,7 +1298,7 @@ dump_last:
 		     !!(set->type->features & IPSET_DUMP_LAST)))
 			continue;
 		pr_debug("List set: %s\n", set->name);
-		if (!cb->args[2]) {
+		if (!cb->args[IPSET_CB_ARG0]) {
 			/* Start listing: make sure set won't be destroyed */
 			pr_debug("reference set\n");
 			__ip_set_get(set);
@@ -1311,7 +1315,7 @@ dump_last:
 			goto nla_put_failure;
 		if (dump_flags & IPSET_FLAG_LIST_SETNAME)
 			goto next_set;
-		switch (cb->args[2]) {
+		switch (cb->args[IPSET_CB_ARG0]) {
 		case 0:
 			/* Core header data */
 			if (nla_put_string(skb, IPSET_ATTR_TYPENAME,
@@ -1331,7 +1335,7 @@ dump_last:
 			read_lock_bh(&set->lock);
 			ret = set->variant->list(set, skb, cb);
 			read_unlock_bh(&set->lock);
-			if (!cb->args[2])
+			if (!cb->args[IPSET_CB_ARG0])
 				/* Set is done, proceed with next one */
 				goto next_set;
 			goto release_refcount;
@@ -1340,8 +1344,8 @@ dump_last:
 	/* If we dump all sets, continue with dumping last ones */
 	if (dump_type == DUMP_ALL) {
 		dump_type = DUMP_LAST;
-		cb->args[0] = dump_type | (dump_flags << 16);
-		cb->args[1] = 0;
+		cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
+		cb->args[IPSET_CB_INDEX] = 0;
 		goto dump_last;
 	}
 	goto out;
@@ -1350,15 +1354,15 @@ nla_put_failure:
 	ret = -EFAULT;
 next_set:
 	if (dump_type == DUMP_ONE)
-		cb->args[1] = IPSET_INVALID_ID;
+		cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID;
 	else
-		cb->args[1]++;
+		cb->args[IPSET_CB_INDEX]++;
 release_refcount:
 	/* If there was an error or set is done, release set */
-	if (ret || !cb->args[2]) {
+	if (ret || !cb->args[IPSET_CB_ARG0]) {
 		pr_debug("release set %s\n", nfnl_set(inst, index)->name);
 		__ip_set_put_byindex(inst, index);
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 	}
 out:
 	if (nlh) {
@@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 	    const struct nlmsghdr *nlh,
 	    const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
-
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 		struct netlink_dump_control c = {
 			.dump = ip_set_dump_start,
 			.done = ip_set_dump_done,
-			.data = (void *)inst
 		};
 		return netlink_dump_start(ctnl, skb, nlh, &c);
 	}
@@ -1961,7 +1962,6 @@ static int __net_init
 ip_set_net_init(struct net *net)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
-
 	struct ip_set **list;
 
 	inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6a80dbd30df7..2f80c74c871f 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -931,7 +931,7 @@ mtype_list(const struct ip_set *set,
 	struct nlattr *atd, *nested;
 	const struct hbucket *n;
 	const struct mtype_elem *e;
-	u32 first = cb->args[2];
+	u32 first = cb->args[IPSET_CB_ARG0];
 	/* We assume that one hash bucket fills into one page */
 	void *incomplete;
 	int i;
@@ -940,20 +940,22 @@ mtype_list(const struct ip_set *set,
 	if (!atd)
 		return -EMSGSIZE;
 	pr_debug("list hash set %s\n", set->name);
-	for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
+	for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
+	     cb->args[IPSET_CB_ARG0]++) {
 		incomplete = skb_tail_pointer(skb);
-		n = hbucket(t, cb->args[2]);
-		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
+		n = hbucket(t, cb->args[IPSET_CB_ARG0]);
+		pr_debug("cb->arg bucket: %lu, t %p n %p\n",
+			 cb->args[IPSET_CB_ARG0], t, n);
 		for (i = 0; i < n->pos; i++) {
 			e = ahash_data(n, i, set->dsize);
 			if (SET_WITH_TIMEOUT(set) &&
 			    ip_set_timeout_expired(ext_timeout(e, set)))
 				continue;
 			pr_debug("list hash %lu hbucket %p i %u, data %p\n",
-				 cb->args[2], n, i, e);
+				 cb->args[IPSET_CB_ARG0], n, i, e);
 			nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 			if (!nested) {
-				if (cb->args[2] == first) {
+				if (cb->args[IPSET_CB_ARG0] == first) {
 					nla_nest_cancel(skb, atd);
 					return -EMSGSIZE;
 				} else
@@ -968,16 +970,16 @@ mtype_list(const struct ip_set *set,
 	}
 	ipset_nest_end(skb, atd);
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 
 	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, incomplete);
-	if (unlikely(first == cb->args[2])) {
+	if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
 		pr_warning("Can't list set %s: one bucket does not fit into "
 			   "a message. Please report it!\n", set->name);
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
 	ipset_nest_end(skb, atd);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index ec6f6d15dded..3e2317f3cf68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set,
 {
 	const struct list_set *map = set->data;
 	struct nlattr *atd, *nested;
-	u32 i, first = cb->args[2];
+	u32 i, first = cb->args[IPSET_CB_ARG0];
 	const struct set_elem *e;
 
 	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
 	if (!atd)
 		return -EMSGSIZE;
-	for (; cb->args[2] < map->size; cb->args[2]++) {
-		i = cb->args[2];
+	for (; cb->args[IPSET_CB_ARG0] < map->size;
+	     cb->args[IPSET_CB_ARG0]++) {
+		i = cb->args[IPSET_CB_ARG0];
 		e = list_set_elem(set, map, i);
 		if (e->id == IPSET_INVALID_ID)
 			goto finish;
@@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set,
 finish:
 	ipset_nest_end(skb, atd);
 	/* Set listing finished */
-	cb->args[2] = 0;
+	cb->args[IPSET_CB_ARG0] = 0;
 	return 0;
 
 nla_put_failure:
 	nla_nest_cancel(skb, nested);
 	if (unlikely(i == first)) {
-		cb->args[2] = 0;
+		cb->args[IPSET_CB_ARG0] = 0;
 		return -EMSGSIZE;
 	}
 	ipset_nest_end(skb, atd);
-- 
cgit v1.2.3-71-gd317


From f84be2bd96a108b09c8440263fa3adb3fb225fa3 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 23 Oct 2013 20:05:27 +0200
Subject: net: make net_get_random_once irq safe

I initial build non irq safe version of net_get_random_once because I
would liked to have the freedom to defer even the extraction process of
get_random_bytes until the nonblocking pool is fully seeded.

I don't think this is a good idea anymore and thus this patch makes
net_get_random_once irq safe. Now someone using net_get_random_once does
not need to care from where it is called.

Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 -
 net/core/utils.c    | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index aca446b46754..b292a0435571 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -250,7 +250,6 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 #define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
 #endif /* HAVE_JUMP_LABEL */
 
-/* BE CAREFUL: this function is not interrupt safe */
 #define net_get_random_once(buf, nbytes)				\
 	({								\
 		bool ___ret = false;					\
diff --git a/net/core/utils.c b/net/core/utils.c
index bf09371e19b1..2f737bf90b3f 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -370,16 +370,17 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 			   struct static_key *done_key)
 {
 	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
 
-	spin_lock_bh(&lock);
+	spin_lock_irqsave(&lock, flags);
 	if (*done) {
-		spin_unlock_bh(&lock);
+		spin_unlock_irqrestore(&lock, flags);
 		return false;
 	}
 
 	get_random_bytes(buf, nbytes);
 	*done = true;
-	spin_unlock_bh(&lock);
+	spin_unlock_irqrestore(&lock, flags);
 
 	__net_random_once_disable_jump(done_key);
 
-- 
cgit v1.2.3-71-gd317


From 7f29405403d7c17f539c099987972b862e7e5255 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 23 Oct 2013 16:02:42 -0700
Subject: net: fix rtnl notification in atomic context

commit 991fb3f74c "dev: always advertise rx_flags changes via netlink"
introduced rtnl notification from __dev_set_promiscuity(),
which can be called in atomic context.

Steps to reproduce:
ip tuntap add dev tap1 mode tap
ifconfig tap1 up
tcpdump -nei tap1 &
ip tuntap del dev tap1 mode tap

[  271.627994] device tap1 left promiscuous mode
[  271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940
[  271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip
[  271.677525] INFO: lockdep is turned off.
[  271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G        W    3.12.0-rc3+ #73
[  271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[  271.731254]  ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428
[  271.760261]  ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8
[  271.790683]  0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8
[  271.822332] Call Trace:
[  271.838234]  [<ffffffff817544e5>] dump_stack+0x55/0x76
[  271.854446]  [<ffffffff8108bad1>] __might_sleep+0x181/0x240
[  271.870836]  [<ffffffff81110ff8>] ? rcu_irq_exit+0x68/0xb0
[  271.887076]  [<ffffffff811a80be>] kmem_cache_alloc_node+0x4e/0x2a0
[  271.903368]  [<ffffffff810b4ddc>] ? vprintk_emit+0x1dc/0x5a0
[  271.919716]  [<ffffffff81614d67>] ? __alloc_skb+0x57/0x2a0
[  271.936088]  [<ffffffff810b4de0>] ? vprintk_emit+0x1e0/0x5a0
[  271.952504]  [<ffffffff81614d67>] __alloc_skb+0x57/0x2a0
[  271.968902]  [<ffffffff8163a0b2>] rtmsg_ifinfo+0x52/0x100
[  271.985302]  [<ffffffff8162ac6d>] __dev_notify_flags+0xad/0xc0
[  272.001642]  [<ffffffff8162ad0c>] __dev_set_promiscuity+0x8c/0x1c0
[  272.017917]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.033961]  [<ffffffff8162b109>] dev_set_promiscuity+0x29/0x50
[  272.049855]  [<ffffffff8172e937>] packet_dev_mc+0x87/0xc0
[  272.065494]  [<ffffffff81732052>] packet_notifier+0x1b2/0x380
[  272.080915]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.096009]  [<ffffffff81761c66>] notifier_call_chain+0x66/0x150
[  272.110803]  [<ffffffff8108503e>] __raw_notifier_call_chain+0xe/0x10
[  272.125468]  [<ffffffff81085056>] raw_notifier_call_chain+0x16/0x20
[  272.139984]  [<ffffffff81620190>] call_netdevice_notifiers_info+0x40/0x70
[  272.154523]  [<ffffffff816201d6>] call_netdevice_notifiers+0x16/0x20
[  272.168552]  [<ffffffff816224c5>] rollback_registered_many+0x145/0x240
[  272.182263]  [<ffffffff81622641>] rollback_registered+0x31/0x40
[  272.195369]  [<ffffffff816229c8>] unregister_netdevice_queue+0x58/0x90
[  272.208230]  [<ffffffff81547ca0>] __tun_detach+0x140/0x340
[  272.220686]  [<ffffffff81547ed6>] tun_chr_close+0x36/0x60

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  4 ++--
 include/linux/rtnetlink.h       |  2 +-
 net/core/dev.c                  | 16 ++++++++--------
 net/core/rtnetlink.c            |  9 +++++----
 4 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2daa066c6cdd..a141f406cb98 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1213,7 +1213,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
 	if (err)
 		return err;
 	slave_dev->flags |= IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 	return 0;
 }
 
@@ -1222,7 +1222,7 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev,
 {
 	netdev_upper_dev_unlink(slave_dev, bond_dev);
 	slave_dev->flags &= ~IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 }
 
 /* enslave device <slave> to bond device <master> */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f28544b2f9af..939428ad25ac 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -15,7 +15,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
-extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
 
 /* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index bdffd654edc4..0054c8c75f50 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev)
 	if (ret < 0)
 		return ret;
 
-	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 
 	return ret;
@@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head)
 	__dev_close_many(head);
 
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
 		list_del_init(&dev->close_list);
 	}
@@ -5258,7 +5258,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 	unsigned int changes = dev->flags ^ old_flags;
 
 	if (gchanges)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
@@ -5490,7 +5490,7 @@ static void rollback_registered_many(struct list_head *head)
 
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 		/*
 		 *	Flush the unicast and multicast chains
@@ -5889,7 +5889,7 @@ int register_netdevice(struct net_device *dev)
 	 */
 	if (!dev->rtnl_link_ops ||
 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 out:
 	return ret;
@@ -6501,7 +6501,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 	/*
 	 *	Flush the unicast and multicast chains
@@ -6540,7 +6540,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 	synchronize_net();
 	err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4aedf03da052..cf67144d3e3c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
 	if (skb == NULL)
 		goto errout;
 
@@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 	return;
 errout:
 	if (err < 0)
@@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_JOIN:
 		break;
 	default:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 		break;
 	}
 	return NOTIFY_DONE;
-- 
cgit v1.2.3-71-gd317


From 8f2535b92d685c68db4bc699dd78462a646f6ef9 Mon Sep 17 00:00:00 2001
From: Chun-Yeow Yeoh <yeohchunyeow@cozybit.com>
Date: Mon, 14 Oct 2013 19:08:27 -0700
Subject: mac80211: process the CSA frame for mesh accordingly

Process the CSA frame according to the procedures define in IEEE Std
802.11-2012 section 10.9.8.4.3 as follow:
* The mesh channel switch parameters element (MCSP) must be availabe.
* If the MCSP's TTL is 1, drop the frame but still process the CSA.
* If the MCSP's precedence value is less than or equal to the current
  precedence value, drop the frame and do not process the CSA.
* The CSA frame is forwarded after TTL is decremented by 1 and the
  initiator field is set to 0. Transmit restrict field and others
  are maintained as is.
* No beacon or probe response frame are handled here.

Also, introduce the debug message used for mesh CSA purpose.

Signed-off-by: Chun-Yeow Yeoh <yeohchunyeow@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 20 +++++++++++
 net/mac80211/Kconfig       | 11 ++++++
 net/mac80211/debug.h       | 10 ++++++
 net/mac80211/ieee80211_i.h |  4 +++
 net/mac80211/mesh.c        | 83 ++++++++++++++++++++++++++++++++++++++++++++--
 net/mac80211/util.c        |  9 +++++
 6 files changed, 134 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7c1e1ebc0e23..8c3b26a21574 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -696,6 +696,18 @@ struct ieee80211_sec_chan_offs_ie {
 	u8 sec_chan_offs;
 } __packed;
 
+/**
+ * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE
+ *
+ * This structure represents the "Mesh Channel Switch Paramters element"
+ */
+struct ieee80211_mesh_chansw_params_ie {
+	u8 mesh_ttl;
+	u8 mesh_flags;
+	__le16 mesh_reason;
+	__le16 mesh_pre_value;
+} __packed;
+
 /**
  * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE
  */
@@ -750,6 +762,14 @@ enum mesh_config_capab_flags {
 	IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL	= 0x40,
 };
 
+/**
+ * mesh channel switch parameters element's flag indicator
+ *
+ */
+#define WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT BIT(0)
+#define WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR BIT(1)
+#define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2)
+
 /**
  * struct ieee80211_rann_ie
  *
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index dc31ec3db404..97b5dcad5025 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -259,6 +259,17 @@ config MAC80211_MESH_SYNC_DEBUG
 
 	  Do not select this option.
 
+config MAC80211_MESH_CSA_DEBUG
+	bool "Verbose mesh channel switch debugging"
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_MESH
+	---help---
+	  Selecting this option causes mac80211 to print out very verbose mesh
+	  channel switch debugging messages (when mac80211 is taking part in a
+	  mesh network).
+
+	  Do not select this option.
+
 config MAC80211_MESH_PS_DEBUG
 	bool "Verbose mesh powersave debugging"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 4ccc5ed6237d..493d68061f0c 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -44,6 +44,12 @@
 #define MAC80211_MESH_SYNC_DEBUG 0
 #endif
 
+#ifdef CONFIG_MAC80211_MESH_CSA_DEBUG
+#define MAC80211_MESH_CSA_DEBUG 1
+#else
+#define MAC80211_MESH_CSA_DEBUG 0
+#endif
+
 #ifdef CONFIG_MAC80211_MESH_PS_DEBUG
 #define MAC80211_MESH_PS_DEBUG 1
 #else
@@ -157,6 +163,10 @@ do {									\
 	_sdata_dbg(MAC80211_MESH_SYNC_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
 
+#define mcsa_dbg(sdata, fmt, ...)					\
+	_sdata_dbg(MAC80211_MESH_CSA_DEBUG,				\
+		   sdata, fmt, ##__VA_ARGS__)
+
 #define mps_dbg(sdata, fmt, ...)					\
 	_sdata_dbg(MAC80211_MESH_PS_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cbaea32bccf1..4ebbcc6f67e0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -603,6 +603,9 @@ struct ieee80211_if_mesh {
 	int ps_peers_light_sleep;
 	int ps_peers_deep_sleep;
 	struct ps_data ps;
+	/* Channel Switching Support */
+	bool chsw_init;
+	u16 pre_value;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -1252,6 +1255,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_timeout_interval_ie *timeout_int;
 	const u8 *opmode_notif;
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
+	const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
 
 	/* length of them, respectively */
 	u8 ssid_len;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 707ac61d63e5..0a3ccaa275f9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -920,6 +920,82 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			stype, mgmt, &elems, rx_status);
 }
 
+static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
+			       struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_mgmt *mgmt_fwd;
+	struct sk_buff *skb;
+	struct ieee80211_local *local = sdata->local;
+	u8 *pos = mgmt->u.action.u.chan_switch.variable;
+	size_t offset_ttl;
+
+	skb = dev_alloc_skb(local->tx_headroom + len);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, local->tx_headroom);
+	mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len);
+
+	/* offset_ttl is based on whether the secondary channel
+	 * offset is available or not. Substract 1 from the mesh TTL
+	 * and disable the initiator flag before forwarding.
+	 */
+	offset_ttl = (len < 42) ? 7 : 10;
+	*(pos + offset_ttl) -= 1;
+	*(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
+
+	memcpy(mgmt_fwd, mgmt, len);
+	eth_broadcast_addr(mgmt_fwd->da);
+	memcpy(mgmt_fwd->sa, sdata->vif.addr, ETH_ALEN);
+	memcpy(mgmt_fwd->bssid, sdata->vif.addr, ETH_ALEN);
+
+	ieee80211_tx_skb(sdata, skb);
+	return 0;
+}
+
+static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
+			      struct ieee80211_mgmt *mgmt, size_t len)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct ieee802_11_elems elems;
+	u16 pre_value;
+	bool block_tx, fwd_csa = true;
+	size_t baselen;
+	u8 *pos, ttl;
+
+	if (mgmt->u.action.u.measurement.action_code !=
+	    WLAN_ACTION_SPCT_CHL_SWITCH)
+		return;
+
+	pos = mgmt->u.action.u.chan_switch.variable;
+	baselen = offsetof(struct ieee80211_mgmt,
+			   u.action.u.chan_switch.variable);
+	ieee802_11_parse_elems(pos, len - baselen, false, &elems);
+
+	ttl = elems.mesh_chansw_params_ie->mesh_ttl;
+	if (!--ttl)
+		fwd_csa = false;
+
+	pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value);
+	if (ifmsh->pre_value >= pre_value)
+		return;
+
+	ifmsh->pre_value = pre_value;
+
+	/* forward or re-broadcast the CSA frame */
+	if (fwd_csa) {
+		if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0)
+			mcsa_dbg(sdata, "Failed to forward the CSA frame");
+	}
+
+	/* block the Tx only after forwarding the CSA frame if required */
+	block_tx = elems.mesh_chansw_params_ie->mesh_flags &
+		   WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT;
+	if (block_tx)
+		ieee80211_stop_queues_by_reason(&sdata->local->hw,
+				IEEE80211_MAX_QUEUE_MAP,
+				IEEE80211_QUEUE_STOP_REASON_CSA);
+}
+
 static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_mgmt *mgmt,
 					  size_t len,
@@ -939,6 +1015,9 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 		if (mesh_action_is_path_sel(mgmt))
 			mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
+	case WLAN_CATEGORY_SPECTRUM_MGMT:
+		mesh_rx_csa_frame(sdata, mgmt, len);
+		break;
 	}
 }
 
@@ -1056,13 +1135,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    (unsigned long) sdata);
 
 	ifmsh->accepting_plinks = true;
-	ifmsh->preq_id = 0;
-	ifmsh->sn = 0;
-	ifmsh->num_gates = 0;
 	atomic_set(&ifmsh->mpaths, 0);
 	mesh_rmc_init(sdata);
 	ifmsh->last_preq = jiffies;
 	ifmsh->next_perr = jiffies;
+	ifmsh->chsw_init = false;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 65ebe0c5e835..523783cedf6e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -740,6 +740,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 		case WLAN_EID_TIMEOUT_INTERVAL:
 		case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
 		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
+		case WLAN_EID_CHAN_SWITCH_PARAM:
 		/*
 		 * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
 		 * that if the content gets bigger it might be needed more than once
@@ -905,6 +906,14 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			}
 			elems->sec_chan_offs = (void *)pos;
 			break;
+		case WLAN_EID_CHAN_SWITCH_PARAM:
+			if (elen !=
+			    sizeof(*elems->mesh_chansw_params_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->mesh_chansw_params_ie = (void *)pos;
+			break;
 		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
 			if (!action ||
 			    elen != sizeof(*elems->wide_bw_chansw_ie)) {
-- 
cgit v1.2.3-71-gd317


From 5d9efa7ee99eed58388f186c13cf2e2a87e9ceb4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 28 Oct 2013 20:07:50 -0400
Subject: ipv6: Remove privacy config option.

The code for privacy extentions is very mature, and making it
configurable only gives marginal memory/code savings in exchange
for obfuscation and hard to read code via CPP ifdef'ery.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h   |  2 --
 include/net/if_inet6.h |  5 +----
 net/ipv6/Kconfig       | 18 ------------------
 net/ipv6/addrconf.c    | 41 +++--------------------------------------
 4 files changed, 4 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a80a63cfb70c..5d89d1b808a6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -21,13 +21,11 @@ struct ipv6_devconf {
 	__s32		force_mld_version;
 	__s32		mldv1_unsolicited_report_interval;
 	__s32		mldv2_unsolicited_report_interval;
-#ifdef CONFIG_IPV6_PRIVACY
 	__s32		use_tempaddr;
 	__s32		temp_valid_lft;
 	__s32		temp_prefered_lft;
 	__s32		regen_max_retry;
 	__s32		max_desync_factor;
-#endif
 	__s32		max_addresses;
 	__s32		accept_ra_defrtr;
 	__s32		accept_ra_pinfo;
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 02ef7727bb55..76d54270f2e2 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -66,11 +66,10 @@ struct inet6_ifaddr {
 	struct hlist_node	addr_lst;
 	struct list_head	if_list;
 
-#ifdef CONFIG_IPV6_PRIVACY
 	struct list_head	tmp_list;
 	struct inet6_ifaddr	*ifpub;
 	int			regen_count;
-#endif
+
 	bool			tokenized;
 
 	struct rcu_head		rcu;
@@ -192,11 +191,9 @@ struct inet6_dev {
 	__u32			if_flags;
 	int			dead;
 
-#ifdef CONFIG_IPV6_PRIVACY
 	u8			rndid[8];
 	struct timer_list	regen_timer;
 	struct list_head	tempaddr_list;
-#endif
 
 	struct in6_addr		token;
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e1a8d903e366..d92e5586783e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -21,24 +21,6 @@ menuconfig IPV6
 
 if IPV6
 
-config IPV6_PRIVACY
-	bool "IPv6: Privacy Extensions (RFC 3041) support"
-	---help---
-	  Privacy Extensions for Stateless Address Autoconfiguration in IPv6
-	  support.  With this option, additional periodically-altered
-	  pseudo-random global-scope unicast address(es) will be assigned to
-	  your interface(s).
-	
-	  We use our standard pseudo-random algorithm to generate the
-          randomized interface identifier, instead of one described in RFC 3041.
-
-	  By default the kernel does not generate temporary addresses.
-	  To use temporary addresses, do
-	
-	        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 
-
-	  See <file:Documentation/networking/ip-sysctl.txt> for details.
-
 config IPV6_ROUTER_PREF
 	bool "IPv6: Router Preference (RFC 4191) support"
 	---help---
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cd3fb301da38..542d09561ed6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -83,11 +83,7 @@
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 #include <linux/netconf.h>
-
-#ifdef CONFIG_IPV6_PRIVACY
 #include <linux/random.h>
-#endif
-
 #include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
@@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 }
 #endif
 
-#ifdef CONFIG_IPV6_PRIVACY
 static void __ipv6_regen_rndid(struct inet6_dev *idev);
 static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 static void ipv6_regen_rndid(unsigned long data);
-#endif
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -183,13 +177,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
 	.use_tempaddr 		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
 	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
 	.regen_max_retry	= REGEN_MAX_RETRY,
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
-#endif
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
 	.accept_ra_pinfo	= 1,
@@ -221,13 +213,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
-#ifdef CONFIG_IPV6_PRIVACY
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
 	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,
 	.regen_max_retry	= REGEN_MAX_RETRY,
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
-#endif
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
 	.accept_ra_pinfo	= 1,
@@ -371,7 +361,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	}
 #endif
 
-#ifdef CONFIG_IPV6_PRIVACY
 	INIT_LIST_HEAD(&ndev->tempaddr_list);
 	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
 	if ((dev->flags&IFF_LOOPBACK) ||
@@ -384,7 +373,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 		in6_dev_hold(ndev);
 		ipv6_regen_rndid((unsigned long) ndev);
 	}
-#endif
+
 	ndev->token = in6addr_any;
 
 	if (netif_running(dev) && addrconf_qdisc_ok(dev))
@@ -865,12 +854,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	/* Add to inet6_dev unicast addr list. */
 	ipv6_link_dev_addr(idev, ifa);
 
-#ifdef CONFIG_IPV6_PRIVACY
 	if (ifa->flags&IFA_F_TEMPORARY) {
 		list_add(&ifa->tmp_list, &idev->tempaddr_list);
 		in6_ifa_hold(ifa);
 	}
-#endif
 
 	in6_ifa_hold(ifa);
 	write_unlock(&idev->lock);
@@ -913,7 +900,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
-#ifdef CONFIG_IPV6_PRIVACY
+
 	if (ifp->flags&IFA_F_TEMPORARY) {
 		list_del(&ifp->tmp_list);
 		if (ifp->ifpub) {
@@ -922,7 +909,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		}
 		__in6_ifa_put(ifp);
 	}
-#endif
 
 	list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
 		if (ifa == ifp) {
@@ -1013,7 +999,6 @@ out:
 	in6_ifa_put(ifp);
 }
 
-#ifdef CONFIG_IPV6_PRIVACY
 static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
 {
 	struct inet6_dev *idev = ifp->idev;
@@ -1116,7 +1101,6 @@ retry:
 out:
 	return ret;
 }
-#endif
 
 /*
  *	Choose an appropriate source address (RFC3484)
@@ -1131,9 +1115,7 @@ enum {
 #endif
 	IPV6_SADDR_RULE_OIF,
 	IPV6_SADDR_RULE_LABEL,
-#ifdef CONFIG_IPV6_PRIVACY
 	IPV6_SADDR_RULE_PRIVACY,
-#endif
 	IPV6_SADDR_RULE_ORCHID,
 	IPV6_SADDR_RULE_PREFIX,
 	IPV6_SADDR_RULE_MAX
@@ -1247,7 +1229,6 @@ static int ipv6_get_saddr_eval(struct net *net,
 				      &score->ifa->addr, score->addr_type,
 				      score->ifa->idev->dev->ifindex) == dst->label;
 		break;
-#ifdef CONFIG_IPV6_PRIVACY
 	case IPV6_SADDR_RULE_PRIVACY:
 	    {
 		/* Rule 7: Prefer public address
@@ -1259,7 +1240,6 @@ static int ipv6_get_saddr_eval(struct net *net,
 		ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
 		break;
 	    }
-#endif
 	case IPV6_SADDR_RULE_ORCHID:
 		/* Rule 8-: Prefer ORCHID vs ORCHID or
 		 *	    non-ORCHID vs non-ORCHID
@@ -1588,7 +1568,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		if (dad_failed)
 			ipv6_ifa_notify(0, ifp);
 		in6_ifa_put(ifp);
-#ifdef CONFIG_IPV6_PRIVACY
 	} else if (ifp->flags&IFA_F_TEMPORARY) {
 		struct inet6_ifaddr *ifpub;
 		spin_lock_bh(&ifp->lock);
@@ -1602,7 +1581,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 			spin_unlock_bh(&ifp->lock);
 		}
 		ipv6_del_addr(ifp);
-#endif
 	} else
 		ipv6_del_addr(ifp);
 }
@@ -1851,7 +1829,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	return err;
 }
 
-#ifdef CONFIG_IPV6_PRIVACY
 /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
 static void __ipv6_regen_rndid(struct inet6_dev *idev)
 {
@@ -1919,7 +1896,6 @@ static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp
 	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
 		__ipv6_regen_rndid(idev);
 }
-#endif
 
 /*
  *	Add prefix route.
@@ -2207,9 +2183,7 @@ ok:
 		if (ifp) {
 			int flags;
 			unsigned long now;
-#ifdef CONFIG_IPV6_PRIVACY
 			struct inet6_ifaddr *ift;
-#endif
 			u32 stored_lft;
 
 			/* update lifetime (RFC2462 5.5.3 e) */
@@ -2250,7 +2224,6 @@ ok:
 			} else
 				spin_unlock(&ifp->lock);
 
-#ifdef CONFIG_IPV6_PRIVACY
 			read_lock_bh(&in6_dev->lock);
 			/* update all temporary addresses in the list */
 			list_for_each_entry(ift, &in6_dev->tempaddr_list,
@@ -2315,7 +2288,7 @@ ok:
 			} else {
 				read_unlock_bh(&in6_dev->lock);
 			}
-#endif
+
 			in6_ifa_put(ifp);
 			addrconf_verify(0);
 		}
@@ -2995,7 +2968,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	if (!how)
 		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
-#ifdef CONFIG_IPV6_PRIVACY
 	if (how && del_timer(&idev->regen_timer))
 		in6_dev_put(idev);
 
@@ -3015,7 +2987,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		in6_ifa_put(ifa);
 		write_lock_bh(&idev->lock);
 	}
-#endif
 
 	while (!list_empty(&idev->addr_list)) {
 		ifa = list_first_entry(&idev->addr_list,
@@ -3528,7 +3499,6 @@ restart:
 					in6_ifa_put(ifp);
 					goto restart;
 				}
-#ifdef CONFIG_IPV6_PRIVACY
 			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
 				   !(ifp->flags&IFA_F_TENTATIVE)) {
 				unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
@@ -3556,7 +3526,6 @@ restart:
 				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
 					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
 				spin_unlock(&ifp->lock);
-#endif
 			} else {
 				/* ifp->prefered_lft <= ifp->valid_lft */
 				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -4128,13 +4097,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 		jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
 	array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
 		jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
-#ifdef CONFIG_IPV6_PRIVACY
 	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
 	array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
 	array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
 	array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
 	array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
-#endif
 	array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
 	array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
 	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
@@ -4828,7 +4795,6 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
-#ifdef CONFIG_IPV6_PRIVACY
 		{
 			.procname	= "use_tempaddr",
 			.data		= &ipv6_devconf.use_tempaddr,
@@ -4864,7 +4830,6 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-#endif
 		{
 			.procname	= "max_addresses",
 			.data		= &ipv6_devconf.max_addresses,
-- 
cgit v1.2.3-71-gd317


From 80c33ddd31d0e801953e02b7b003f395c1920e4e Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Sat, 21 Sep 2013 05:05:39 +0000
Subject: net: add might_sleep() call to napi_disable

napi_disable uses an msleep() call to wait for outstanding napi work to be
finished after setting the disable bit. It does not always sleep incase there
was no outstanding work. This resulted in a rare bug in ixgbe_down operation
where a napi_disable call took place inside of a local_bh_disable()d context.
In order to enable easier detection of future sleep while atomic BUGs, this
patch adds a might_sleep() call, so that every use of napi_disable during
atomic context will be visible.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Cc: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Cc: Alexander Duyck <alexander.duyck@intel.com>
Cc: Hyong-Youb Kim <hykim@myri.com>
Cc: Amir Vadai <amirv@mellanox.com>
Cc: Dmitry Kravkov <dmitry@broadcom.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 27f62f746621..cb1d918ecdf1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -483,6 +483,7 @@ void napi_hash_del(struct napi_struct *napi);
  */
 static inline void napi_disable(struct napi_struct *n)
 {
+	might_sleep();
 	set_bit(NAPI_STATE_DISABLE, &n->state);
 	while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
 		msleep(1);
-- 
cgit v1.2.3-71-gd317


From f6701d5f73c5c2f4ca37634514631fb056291f89 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:39 +0100
Subject: net: cdc_ncm: add include protection to cdc_ncm.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes it a lot easier to test modified versions

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc_ncm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index cc25b70af33c..89f0bbc2cf83 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -36,6 +36,9 @@
  * SUCH DAMAGE.
  */
 
+#ifndef __LINUX_USB_CDC_NCM_H
+#define __LINUX_USB_CDC_NCM_H
+
 #define CDC_NCM_COMM_ALTSETTING_NCM		0
 #define CDC_NCM_COMM_ALTSETTING_MBIM		1
 
@@ -133,3 +136,5 @@ extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign);
 extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
 extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
+
+#endif /* __LINUX_USB_CDC_NCM_H */
-- 
cgit v1.2.3-71-gd317


From 3e515665a76ad8f60a1c05968cc6a5b2f2701171 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:40 +0100
Subject: net: cdc_ncm: remove redundant "intf" field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is always a duplicate of the "control" field. It causes
confusion wrt intf_data updates and cleanups.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 4 +---
 include/linux/usb/cdc_ncm.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 38566bffca98..bfab8796730f 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -382,7 +382,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	len = intf->cur_altsetting->extralen;
 
 	ctx->udev = dev->udev;
-	ctx->intf = intf;
 
 	/* parse through descriptors associated with control interface */
 	while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) {
@@ -489,7 +488,6 @@ advance:
 
 	usb_set_intfdata(ctx->data, dev);
 	usb_set_intfdata(ctx->control, dev);
-	usb_set_intfdata(ctx->intf, dev);
 
 	if (ctx->ether_desc) {
 		temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress);
@@ -562,7 +560,7 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 		ctx->control = NULL;
 	}
 
-	usb_set_intfdata(ctx->intf, NULL);
+	usb_set_intfdata(intf, NULL);
 	cdc_ncm_free(ctx);
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 89f0bbc2cf83..c14e00fb1667 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -103,7 +103,6 @@ struct cdc_ncm_ctx {
 	struct usb_host_endpoint *in_ep;
 	struct usb_host_endpoint *out_ep;
 	struct usb_host_endpoint *status_ep;
-	struct usb_interface *intf;
 	struct usb_interface *control;
 	struct usb_interface *data;
 
-- 
cgit v1.2.3-71-gd317


From ff1632aa8581b7103ac2af1ea3cb4a415eb9d6ad Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:41 +0100
Subject: net: cdc_ncm: remove redundant endpoint pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need to duplicate stuff already in the common usbnet
struct.  We still need to keep our special find_endpoints
function because we need explicit control over the selected
altsetting.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 38 +++++++++++++++++++-------------------
 include/linux/usb/cdc_ncm.h |  3 ---
 2 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index bfab8796730f..a989bd5290a6 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -292,9 +292,9 @@ max_dgram_err:
 }
 
 static void
-cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
+cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf)
 {
-	struct usb_host_endpoint *e;
+	struct usb_host_endpoint *e, *in = NULL, *out = NULL;
 	u8 ep;
 
 	for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) {
@@ -303,18 +303,18 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
 		switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
 		case USB_ENDPOINT_XFER_INT:
 			if (usb_endpoint_dir_in(&e->desc)) {
-				if (ctx->status_ep == NULL)
-					ctx->status_ep = e;
+				if (!dev->status)
+					dev->status = e;
 			}
 			break;
 
 		case USB_ENDPOINT_XFER_BULK:
 			if (usb_endpoint_dir_in(&e->desc)) {
-				if (ctx->in_ep == NULL)
-					ctx->in_ep = e;
+				if (!in)
+					in = e;
 			} else {
-				if (ctx->out_ep == NULL)
-					ctx->out_ep = e;
+				if (!out)
+					out = e;
 			}
 			break;
 
@@ -322,6 +322,14 @@ cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
 			break;
 		}
 	}
+	if (in && !dev->in)
+		dev->in = usb_rcvbulkpipe(dev->udev,
+					  in->desc.bEndpointAddress &
+					  USB_ENDPOINT_NUMBER_MASK);
+	if (out && !dev->out)
+		dev->out = usb_sndbulkpipe(dev->udev,
+					   out->desc.bEndpointAddress &
+					   USB_ENDPOINT_NUMBER_MASK);
 }
 
 static void cdc_ncm_free(struct cdc_ncm_ctx *ctx)
@@ -477,11 +485,9 @@ advance:
 	if (temp)
 		goto error2;
 
-	cdc_ncm_find_endpoints(ctx, ctx->data);
-	cdc_ncm_find_endpoints(ctx, ctx->control);
-
-	if ((ctx->in_ep == NULL) || (ctx->out_ep == NULL) ||
-	    (ctx->status_ep == NULL))
+	cdc_ncm_find_endpoints(dev, ctx->data);
+	cdc_ncm_find_endpoints(dev, ctx->control);
+	if (!dev->in || !dev->out || !dev->status)
 		goto error2;
 
 	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
@@ -496,12 +502,6 @@ advance:
 		dev_info(&dev->udev->dev, "MAC-Address: %pM\n", dev->net->dev_addr);
 	}
 
-
-	dev->in = usb_rcvbulkpipe(dev->udev,
-		ctx->in_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
-	dev->out = usb_sndbulkpipe(dev->udev,
-		ctx->out_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
-	dev->status = ctx->status_ep;
 	dev->rx_urb_size = ctx->rx_max;
 
 	/* cdc_ncm_setup will override dwNtbOutMaxSize if it is
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index c14e00fb1667..36e1e153ca2d 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -100,9 +100,6 @@ struct cdc_ncm_ctx {
 
 	struct net_device *netdev;
 	struct usb_device *udev;
-	struct usb_host_endpoint *in_ep;
-	struct usb_host_endpoint *out_ep;
-	struct usb_host_endpoint *status_ep;
 	struct usb_interface *control;
 	struct usb_interface *data;
 
-- 
cgit v1.2.3-71-gd317


From bed6f762123fc53c63efef386531dd877cba2468 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:42 +0100
Subject: net: cdc_ncm: remove redundant netdev field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Too many pointers back and forth are likely to confuse developers,
creating subtle bugs whenever we forget to syncronize them all.

As a usbnet driver, we should stick with the standard struct
usbnet fields as much as possible.  The netdevice is one such
field.

Cc: Greg Suarez <gsuarez@smithmicro.com>
Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  |  2 +-
 drivers/net/usb/cdc_ncm.c   | 73 +++++++++++++++++++++++----------------------
 include/linux/usb/cdc_ncm.h |  3 +-
 3 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 253472a8a983..af76aaf08b6b 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -175,7 +175,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb
 	}
 
 	spin_lock_bh(&ctx->mtx);
-	skb_out = cdc_ncm_fill_tx_frame(ctx, skb, sign);
+	skb_out = cdc_ncm_fill_tx_frame(dev, skb, sign);
 	spin_unlock_bh(&ctx->mtx);
 	return skb_out;
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a989bd5290a6..e39e7678a798 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -80,8 +80,9 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
 	usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
 }
 
-static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
+static u8 cdc_ncm_setup(struct usbnet *dev)
 {
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	u32 val;
 	u8 flags;
 	u8 iface_no;
@@ -90,7 +91,6 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 	u16 ntb_fmt_supported;
 	u32 min_dgram_size;
 	u32 min_hdr_size;
-	struct usbnet *dev = netdev_priv(ctx->netdev);
 
 	iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
 
@@ -285,8 +285,8 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 	}
 
 max_dgram_err:
-	if (ctx->netdev->mtu != (ctx->max_datagram_size - eth_hlen))
-		ctx->netdev->mtu = ctx->max_datagram_size - eth_hlen;
+	if (dev->net->mtu != (ctx->max_datagram_size - eth_hlen))
+		dev->net->mtu = ctx->max_datagram_size - eth_hlen;
 
 	return 0;
 }
@@ -375,11 +375,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
 	hrtimer_init(&ctx->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	ctx->tx_timer.function = &cdc_ncm_tx_timer_cb;
-	ctx->bh.data = (unsigned long)ctx;
+	ctx->bh.data = (unsigned long)dev;
 	ctx->bh.func = cdc_ncm_txpath_bh;
 	atomic_set(&ctx->stop, 0);
 	spin_lock_init(&ctx->mtx);
-	ctx->netdev = dev->net;
 
 	/* store ctx pointer in device data field */
 	dev->data[0] = (unsigned long)ctx;
@@ -477,7 +476,7 @@ advance:
 		goto error2;
 
 	/* initialize data interface */
-	if (cdc_ncm_setup(ctx))
+	if (cdc_ncm_setup(dev))
 		goto error2;
 
 	/* configure data interface */
@@ -669,9 +668,9 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 }
 
 struct sk_buff *
-cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
+cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 {
-	struct usbnet *dev = netdev_priv(ctx->netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	struct usb_cdc_ncm_nth16 *nth16;
 	struct usb_cdc_ncm_ndp16 *ndp16;
 	struct sk_buff *skb_out;
@@ -695,7 +694,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 		if (skb_out == NULL) {
 			if (skb != NULL) {
 				dev_kfree_skb_any(skb);
-				ctx->netdev->stats.tx_dropped++;
+				dev->net->stats.tx_dropped++;
 			}
 			goto exit_no_skb;
 		}
@@ -733,12 +732,12 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 				/* won't fit, MTU problem? */
 				dev_kfree_skb_any(skb);
 				skb = NULL;
-				ctx->netdev->stats.tx_dropped++;
+				dev->net->stats.tx_dropped++;
 			} else {
 				/* no room for skb - store for later */
 				if (ctx->tx_rem_skb != NULL) {
 					dev_kfree_skb_any(ctx->tx_rem_skb);
-					ctx->netdev->stats.tx_dropped++;
+					dev->net->stats.tx_dropped++;
 				}
 				ctx->tx_rem_skb = skb;
 				ctx->tx_rem_sign = sign;
@@ -771,7 +770,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 	if (skb != NULL) {
 		dev_kfree_skb_any(skb);
 		skb = NULL;
-		ctx->netdev->stats.tx_dropped++;
+		dev->net->stats.tx_dropped++;
 	}
 
 	ctx->tx_curr_frame_num = n;
@@ -814,7 +813,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign)
 
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
-	ctx->netdev->stats.tx_packets += ctx->tx_curr_frame_num;
+	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
 	return skb_out;
 
 exit_no_skb:
@@ -846,18 +845,19 @@ static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *timer)
 
 static void cdc_ncm_txpath_bh(unsigned long param)
 {
-	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)param;
+	struct usbnet *dev = (struct usbnet *)param;
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 
 	spin_lock_bh(&ctx->mtx);
 	if (ctx->tx_timer_pending != 0) {
 		ctx->tx_timer_pending--;
 		cdc_ncm_tx_timeout_start(ctx);
 		spin_unlock_bh(&ctx->mtx);
-	} else if (ctx->netdev != NULL) {
+	} else if (dev->net != NULL) {
 		spin_unlock_bh(&ctx->mtx);
-		netif_tx_lock_bh(ctx->netdev);
-		usbnet_start_xmit(NULL, ctx->netdev);
-		netif_tx_unlock_bh(ctx->netdev);
+		netif_tx_lock_bh(dev->net);
+		usbnet_start_xmit(NULL, dev->net);
+		netif_tx_unlock_bh(dev->net);
 	} else {
 		spin_unlock_bh(&ctx->mtx);
 	}
@@ -880,7 +880,7 @@ cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		goto error;
 
 	spin_lock_bh(&ctx->mtx);
-	skb_out = cdc_ncm_fill_tx_frame(ctx, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN));
+	skb_out = cdc_ncm_fill_tx_frame(dev, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN));
 	spin_unlock_bh(&ctx->mtx);
 	return skb_out;
 
@@ -1047,9 +1047,10 @@ error:
 }
 
 static void
-cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx,
+cdc_ncm_speed_change(struct usbnet *dev,
 		     struct usb_cdc_speed_change *data)
 {
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	uint32_t rx_speed = le32_to_cpu(data->DLBitRRate);
 	uint32_t tx_speed = le32_to_cpu(data->ULBitRate);
 
@@ -1063,18 +1064,18 @@ cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx,
 
 		if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
 			printk(KERN_INFO KBUILD_MODNAME
-				": %s: %u mbit/s downlink "
-				"%u mbit/s uplink\n",
-				ctx->netdev->name,
-				(unsigned int)(rx_speed / 1000000U),
-				(unsigned int)(tx_speed / 1000000U));
+			       ": %s: %u mbit/s downlink "
+			       "%u mbit/s uplink\n",
+			       dev->net->name,
+			       (unsigned int)(rx_speed / 1000000U),
+			       (unsigned int)(tx_speed / 1000000U));
 		} else {
 			printk(KERN_INFO KBUILD_MODNAME
-				": %s: %u kbit/s downlink "
-				"%u kbit/s uplink\n",
-				ctx->netdev->name,
-				(unsigned int)(rx_speed / 1000U),
-				(unsigned int)(tx_speed / 1000U));
+			       ": %s: %u kbit/s downlink "
+			       "%u kbit/s uplink\n",
+			       dev->net->name,
+			       (unsigned int)(rx_speed / 1000U),
+			       (unsigned int)(tx_speed / 1000U));
 		}
 	}
 }
@@ -1091,7 +1092,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 
 	/* test for split data in 8-byte chunks */
 	if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) {
-		cdc_ncm_speed_change(ctx,
+		cdc_ncm_speed_change(dev,
 		      (struct usb_cdc_speed_change *)urb->transfer_buffer);
 		return;
 	}
@@ -1108,8 +1109,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		ctx->connected = le16_to_cpu(event->wValue);
 
 		printk(KERN_INFO KBUILD_MODNAME ": %s: network connection:"
-			" %sconnected\n",
-			ctx->netdev->name, ctx->connected ? "" : "dis");
+		       " %sconnected\n",
+		       dev->net->name, ctx->connected ? "" : "dis");
 
 		usbnet_link_change(dev, ctx->connected, 0);
 		if (!ctx->connected)
@@ -1121,8 +1122,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 					sizeof(struct usb_cdc_speed_change)))
 			set_bit(EVENT_STS_SPLIT, &dev->flags);
 		else
-			cdc_ncm_speed_change(ctx,
-				(struct usb_cdc_speed_change *) &event[1]);
+			cdc_ncm_speed_change(dev,
+					     (struct usb_cdc_speed_change *)&event[1]);
 		break;
 
 	default:
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 36e1e153ca2d..5c47bd9620d5 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -98,7 +98,6 @@ struct cdc_ncm_ctx {
 	const struct usb_cdc_union_desc *union_desc;
 	const struct usb_cdc_ether_desc *ether_desc;
 
-	struct net_device *netdev;
 	struct usb_device *udev;
 	struct usb_interface *control;
 	struct usb_interface *data;
@@ -129,7 +128,7 @@ struct cdc_ncm_ctx {
 extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
 extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
 extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
-extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign);
+extern struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
 extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
 extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
 
-- 
cgit v1.2.3-71-gd317


From de5bee2720776989060b9686e6a89e938a346345 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:43 +0100
Subject: net: cdc_ncm: remove unused udev field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already use the usbnet udev field everywhere this could have
been used.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 2 --
 include/linux/usb/cdc_ncm.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index e39e7678a798..9cdd762916dc 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -388,8 +388,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	buf = intf->cur_altsetting->extra;
 	len = intf->cur_altsetting->extralen;
 
-	ctx->udev = dev->udev;
-
 	/* parse through descriptors associated with control interface */
 	while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) {
 
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 5c47bd9620d5..059dcc93c4d8 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -98,7 +98,6 @@ struct cdc_ncm_ctx {
 	const struct usb_cdc_union_desc *union_desc;
 	const struct usb_cdc_ether_desc *ether_desc;
 
-	struct usb_device *udev;
 	struct usb_interface *control;
 	struct usb_interface *data;
 
-- 
cgit v1.2.3-71-gd317


From f3028c524a7cd4d97b034fc1f35dcaecb5d6f9d6 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:44 +0100
Subject: net: cdc_ncm: remove tx_speed and rx_speed fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These fields are only used to prevent printing the same speeds
multiple times if we receive multiple identical speed notifications.

The value of these printk's is questionable, and even more so when
we filter out some of the notifications sent us by the firmware. If
we are going to print any of these, then we should print them all.

Removing little used fields is a bonus.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 37 ++++++++++++++-----------------------
 include/linux/usb/cdc_ncm.h |  2 --
 2 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9cdd762916dc..fc36a99f8183 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -510,7 +510,6 @@ advance:
 	    ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
 		ctx->tx_max++;
 
-	ctx->tx_speed = ctx->rx_speed = 0;
 	return 0;
 
 error2:
@@ -1048,7 +1047,6 @@ static void
 cdc_ncm_speed_change(struct usbnet *dev,
 		     struct usb_cdc_speed_change *data)
 {
-	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	uint32_t rx_speed = le32_to_cpu(data->DLBitRRate);
 	uint32_t tx_speed = le32_to_cpu(data->ULBitRate);
 
@@ -1056,25 +1054,20 @@ cdc_ncm_speed_change(struct usbnet *dev,
 	 * Currently the USB-NET API does not support reporting the actual
 	 * device speed. Do print it instead.
 	 */
-	if ((tx_speed != ctx->tx_speed) || (rx_speed != ctx->rx_speed)) {
-		ctx->tx_speed = tx_speed;
-		ctx->rx_speed = rx_speed;
-
-		if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
-			printk(KERN_INFO KBUILD_MODNAME
-			       ": %s: %u mbit/s downlink "
-			       "%u mbit/s uplink\n",
-			       dev->net->name,
-			       (unsigned int)(rx_speed / 1000000U),
-			       (unsigned int)(tx_speed / 1000000U));
-		} else {
-			printk(KERN_INFO KBUILD_MODNAME
-			       ": %s: %u kbit/s downlink "
-			       "%u kbit/s uplink\n",
-			       dev->net->name,
-			       (unsigned int)(rx_speed / 1000U),
-			       (unsigned int)(tx_speed / 1000U));
-		}
+	if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
+		printk(KERN_INFO KBUILD_MODNAME
+		       ": %s: %u mbit/s downlink "
+		       "%u mbit/s uplink\n",
+		       dev->net->name,
+		       (unsigned int)(rx_speed / 1000000U),
+		       (unsigned int)(tx_speed / 1000000U));
+	} else {
+		printk(KERN_INFO KBUILD_MODNAME
+		       ": %s: %u kbit/s downlink "
+		       "%u kbit/s uplink\n",
+		       dev->net->name,
+		       (unsigned int)(rx_speed / 1000U),
+		       (unsigned int)(tx_speed / 1000U));
 	}
 }
 
@@ -1111,8 +1104,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		       dev->net->name, ctx->connected ? "" : "dis");
 
 		usbnet_link_change(dev, ctx->connected, 0);
-		if (!ctx->connected)
-			ctx->tx_speed = ctx->rx_speed = 0;
 		break;
 
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 059dcc93c4d8..f14af3dd0cce 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -110,8 +110,6 @@ struct cdc_ncm_ctx {
 
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
-	u32 rx_speed;
-	u32 tx_speed;
 	u32 rx_max;
 	u32 tx_max;
 	u32 max_datagram_size;
-- 
cgit v1.2.3-71-gd317


From 6a9612e2cb22b3fd6a7304dcbf2b4ee1cf2104b2 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:45 +0100
Subject: net: cdc_ncm: remove ncm_parm field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moving the call to cdc_ncm_setup() after the endpoint
setup removes the last remaining reference to ncm_parm
outside cdc_ncm_setup.

Collecting all the ncm_parm based calculations in
cdc_ncm_setup improves readability.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 46 ++++++++++++++++++++++-----------------------
 include/linux/usb/cdc_ncm.h |  1 -
 2 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index fc36a99f8183..4de3a5423e87 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -83,6 +83,7 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
 static u8 cdc_ncm_setup(struct usbnet *dev)
 {
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	struct usb_cdc_ncm_ntb_parameters ncm_parm;
 	u32 val;
 	u8 flags;
 	u8 iface_no;
@@ -97,22 +98,22 @@ static u8 cdc_ncm_setup(struct usbnet *dev)
 	err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS,
 			      USB_TYPE_CLASS | USB_DIR_IN
 			      |USB_RECIP_INTERFACE,
-			      0, iface_no, &ctx->ncm_parm,
-			      sizeof(ctx->ncm_parm));
+			      0, iface_no, &ncm_parm,
+			      sizeof(ncm_parm));
 	if (err < 0) {
 		pr_debug("failed GET_NTB_PARAMETERS\n");
 		return 1;
 	}
 
 	/* read correct set of parameters according to device mode */
-	ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize);
-	ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize);
-	ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder);
-	ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor);
-	ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment);
+	ctx->rx_max = le32_to_cpu(ncm_parm.dwNtbInMaxSize);
+	ctx->tx_max = le32_to_cpu(ncm_parm.dwNtbOutMaxSize);
+	ctx->tx_remainder = le16_to_cpu(ncm_parm.wNdpOutPayloadRemainder);
+	ctx->tx_modulus = le16_to_cpu(ncm_parm.wNdpOutDivisor);
+	ctx->tx_ndp_modulus = le16_to_cpu(ncm_parm.wNdpOutAlignment);
 	/* devices prior to NCM Errata shall set this field to zero */
-	ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams);
-	ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported);
+	ctx->tx_max_datagrams = le16_to_cpu(ncm_parm.wNtbOutMaxDatagrams);
+	ntb_fmt_supported = le16_to_cpu(ncm_parm.bmNtbFormatsSupported);
 
 	eth_hlen = ETH_HLEN;
 	min_dgram_size = CDC_NCM_MIN_DATAGRAM_SIZE;
@@ -153,7 +154,7 @@ static u8 cdc_ncm_setup(struct usbnet *dev)
 	}
 
 	/* inform device about NTB input size changes */
-	if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
+	if (ctx->rx_max != le32_to_cpu(ncm_parm.dwNtbInMaxSize)) {
 		__le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
 
 		err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE,
@@ -171,6 +172,14 @@ static u8 cdc_ncm_setup(struct usbnet *dev)
 		pr_debug("Using default maximum transmit length=%d\n",
 						CDC_NCM_NTB_MAX_SIZE_TX);
 		ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX;
+
+		/* Adding a pad byte here simplifies the handling in
+		 * cdc_ncm_fill_tx_frame, by making tx_max always
+		 * represent the real skb max size.
+		 */
+		if (ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
+			ctx->tx_max++;
+
 	}
 
 	/*
@@ -473,10 +482,6 @@ advance:
 	if (temp)
 		goto error2;
 
-	/* initialize data interface */
-	if (cdc_ncm_setup(dev))
-		goto error2;
-
 	/* configure data interface */
 	temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
 	if (temp)
@@ -487,6 +492,10 @@ advance:
 	if (!dev->in || !dev->out || !dev->status)
 		goto error2;
 
+	/* initialize data interface */
+	if (cdc_ncm_setup(dev))
+		goto error2;
+
 	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
 
 	usb_set_intfdata(ctx->data, dev);
@@ -501,15 +510,6 @@ advance:
 
 	dev->rx_urb_size = ctx->rx_max;
 
-	/* cdc_ncm_setup will override dwNtbOutMaxSize if it is
-	 * outside the sane range. Adding a pad byte here if necessary
-	 * simplifies the handling in cdc_ncm_fill_tx_frame, making
-	 * tx_max always represent the real skb max size.
-	 */
-	if (ctx->tx_max != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) &&
-	    ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0)
-		ctx->tx_max++;
-
 	return 0;
 
 error2:
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index f14af3dd0cce..89b52a0fe4b9 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -88,7 +88,6 @@
 #define cdc_ncm_data_intf_is_mbim(x)  ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB)
 
 struct cdc_ncm_ctx {
-	struct usb_cdc_ncm_ntb_parameters ncm_parm;
 	struct hrtimer tx_timer;
 	struct tasklet_struct bh;
 
-- 
cgit v1.2.3-71-gd317


From 832922362e1308aaef95a43383577d56f51fbc3c Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:47 +0100
Subject: net: cdc_ncm: remove descriptor pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

header_desc was completely unused and union_desc was never used
outside cdc_ncm_bind_common.

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 12 ++++++------
 include/linux/usb/cdc_ncm.h |  4 +---
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index b8de8ddd6d98..435fcc75d706 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -372,6 +372,7 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
 
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting)
 {
+	const struct usb_cdc_union_desc *union_desc = NULL;
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *driver;
 	u8 *buf;
@@ -406,16 +407,15 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
 		switch (buf[2]) {
 		case USB_CDC_UNION_TYPE:
-			if (buf[0] < sizeof(*(ctx->union_desc)))
+			if (buf[0] < sizeof(*union_desc))
 				break;
 
-			ctx->union_desc =
-					(const struct usb_cdc_union_desc *)buf;
+			union_desc = (const struct usb_cdc_union_desc *)buf;
 
 			ctx->control = usb_ifnum_to_if(dev->udev,
-					ctx->union_desc->bMasterInterface0);
+						       union_desc->bMasterInterface0);
 			ctx->data = usb_ifnum_to_if(dev->udev,
-					ctx->union_desc->bSlaveInterface0);
+						    union_desc->bSlaveInterface0);
 			break;
 
 		case USB_CDC_ETHERNET_TYPE:
@@ -458,7 +458,7 @@ advance:
 	}
 
 	/* some buggy devices have an IAD but no CDC Union */
-	if (!ctx->union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
+	if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
 		ctx->control = intf;
 		ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1);
 		dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n");
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 89b52a0fe4b9..cad54ad4ad12 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -92,9 +92,7 @@ struct cdc_ncm_ctx {
 	struct tasklet_struct bh;
 
 	const struct usb_cdc_ncm_desc *func_desc;
-	const struct usb_cdc_mbim_desc   *mbim_desc;
-	const struct usb_cdc_header_desc *header_desc;
-	const struct usb_cdc_union_desc *union_desc;
+	const struct usb_cdc_mbim_desc *mbim_desc;
 	const struct usb_cdc_ether_desc *ether_desc;
 
 	struct usb_interface *control;
-- 
cgit v1.2.3-71-gd317


From 6dd13e83ce37f716e36085cb8b58779da1e98f6d Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 1 Nov 2013 11:16:57 +0100
Subject: net: cdc_ncm: drop "extern" from header declarations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: Alexey Orishko <alexey.orishko@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc_ncm.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index cad54ad4ad12..2300f7492927 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -119,11 +119,11 @@ struct cdc_ncm_ctx {
 	u16 connected;
 };
 
-extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
-extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
-extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
-extern struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
-extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
-extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
+u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
+int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
+void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
+struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
+int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
+int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
 
 #endif /* __LINUX_USB_CDC_NCM_H */
-- 
cgit v1.2.3-71-gd317


From 6e95fcaa42e5078ac265964deebed597f9eae07a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 30 Oct 2013 11:50:49 +0100
Subject: lib: crc32: add functionality to combine two crc32{, c}s in GF(2)

This patch adds a combinator to merge two or more crc32{,c}s
into a new one. This is useful for checksum computations of
fragmented skbs that use crc32/crc32c as checksums.

The arithmetics for combining both in the GF(2) was taken and
slightly modified from zlib. Only passing two crcs is insufficient
as two crcs and the length of the second piece is needed for
merging. The code is made generic, so that only polynomials
need to be passed for crc32_le resp. crc32c_le.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/crc32.h | 40 +++++++++++++++++++++++++
 lib/crc32.c           | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 68267b64bb98..7d275c4fc011 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -11,8 +11,48 @@
 extern u32  crc32_le(u32 crc, unsigned char const *p, size_t len);
 extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
 
+/**
+ * crc32_le_combine - Combine two crc32 check values into one. For two
+ * 		      sequences of bytes, seq1 and seq2 with lengths len1
+ * 		      and len2, crc32_le() check values were calculated
+ * 		      for each, crc1 and crc2.
+ *
+ * @crc1: crc32 of the first block
+ * @crc2: crc32 of the second block
+ * @len2: length of the second block
+ *
+ * Return: The crc32_le() check value of seq1 and seq2 concatenated,
+ * 	   requiring only crc1, crc2, and len2. Note: If seq_full denotes
+ * 	   the concatenated memory area of seq1 with seq2, and crc_full
+ * 	   the crc32_le() value of seq_full, then crc_full ==
+ * 	   crc32_le_combine(crc1, crc2, len2) when crc_full was seeded
+ * 	   with the same initializer as crc1, and crc2 seed was 0. See
+ * 	   also crc32_combine_test().
+ */
+extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
+
 extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 
+/**
+ * __crc32c_le_combine - Combine two crc32c check values into one. For two
+ * 			 sequences of bytes, seq1 and seq2 with lengths len1
+ * 			 and len2, __crc32c_le() check values were calculated
+ * 			 for each, crc1 and crc2.
+ *
+ * @crc1: crc32c of the first block
+ * @crc2: crc32c of the second block
+ * @len2: length of the second block
+ *
+ * Return: The __crc32c_le() check value of seq1 and seq2 concatenated,
+ * 	   requiring only crc1, crc2, and len2. Note: If seq_full denotes
+ * 	   the concatenated memory area of seq1 with seq2, and crc_full
+ * 	   the __crc32c_le() value of seq_full, then crc_full ==
+ * 	   __crc32c_le_combine(crc1, crc2, len2) when crc_full was
+ * 	   seeded with the same initializer as crc1, and crc2 seed
+ * 	   was 0. See also crc32c_combine_test().
+ */
+extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
+
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
 /*
diff --git a/lib/crc32.c b/lib/crc32.c
index 429d61ce6aa0..595205cddc30 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -49,6 +49,30 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
+#define GF2_DIM		32
+
+static u32 gf2_matrix_times(u32 *mat, u32 vec)
+{
+	u32 sum = 0;
+
+	while (vec) {
+		if (vec & 1)
+			sum ^= *mat;
+		vec >>= 1;
+		mat++;
+	}
+
+	return sum;
+}
+
+static void gf2_matrix_square(u32 *square, u32 *mat)
+{
+	int i;
+
+	for (i = 0; i < GF2_DIM; i++)
+		square[i] = gf2_matrix_times(mat, mat[i]);
+}
+
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
 /* implements slicing-by-4 or slicing-by-8 algorithm */
@@ -130,6 +154,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
 
+/* For conditions of distribution and use, see copyright notice in zlib.h */
+static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
+				 u32 polynomial)
+{
+	u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
+	u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
+	u32 row;
+	int i;
+
+	if (len2 <= 0)
+		return crc1;
+
+	/* Put operator for one zero bit in odd */
+	odd[0] = polynomial;
+	row = 1;
+	for (i = 1; i < GF2_DIM; i++) {
+		odd[i] = row;
+		row <<= 1;
+	}
+
+	gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
+	gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
+
+	/* Apply len2 zeros to crc1 (first square will put the operator for one
+	 * zero byte, eight zero bits, in even).
+	 */
+	do {
+		/* Apply zeros operator for this bit of len2 */
+		gf2_matrix_square(even, odd);
+		if (len2 & 1)
+			crc1 = gf2_matrix_times(even, crc1);
+		len2 >>= 1;
+		/* If no more bits set, then done */
+		if (len2 == 0)
+			break;
+		/* Another iteration of the loop with odd and even swapped */
+		gf2_matrix_square(odd, even);
+		if (len2 & 1)
+			crc1 = gf2_matrix_times(odd, crc1);
+		len2 >>= 1;
+	} while (len2 != 0);
+
+	crc1 ^= crc2;
+	return crc1;
+}
+
 /**
  * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
  *			CRC32/CRC32C
@@ -200,8 +270,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
+u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+}
+
+u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+}
 EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(crc32_le_combine);
 EXPORT_SYMBOL(__crc32c_le);
+EXPORT_SYMBOL(__crc32c_le_combine);
 
 /**
  * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
-- 
cgit v1.2.3-71-gd317


From 2817a336d4d533fb8b68719723cd60ea7dd7c09e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 30 Oct 2013 11:50:51 +0100
Subject: net: skb_checksum: allow custom update/combine for walking skb

Currently, skb_checksum walks over 1) linearized, 2) frags[], and
3) frag_list data and calculats the one's complement, a 32 bit
result suitable for feeding into itself or csum_tcpudp_magic(),
but unsuitable for SCTP as we're calculating CRC32c there.

Hence, in order to not re-implement the very same function in
SCTP (and maybe other protocols) over and over again, use an
update() + combine() callback internally to allow for walking
over the skb with different algorithms.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 13 ++++++++++---
 include/net/checksum.h |  6 ++++++
 net/core/skbuff.c      | 31 +++++++++++++++++++++----------
 3 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2c154976394b..44727b5d4981 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2360,8 +2360,6 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
-__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
-		    __wsum csum);
 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
@@ -2373,9 +2371,18 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
-
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
+struct skb_checksum_ops {
+	__wsum (*update)(const void *mem, int len, __wsum wsum);
+	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
+};
+
+__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
+		      __wsum csum, const struct skb_checksum_ops *ops);
+__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
+		    __wsum csum);
+
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 8f59ca50477c..15f33fde826e 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -78,6 +78,12 @@ csum_block_add(__wsum csum, __wsum csum2, int offset)
 	return csum_add(csum, (__force __wsum)sum);
 }
 
+static inline __wsum
+csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
+{
+	return csum_block_add(csum, csum2, offset);
+}
+
 static inline __wsum
 csum_block_sub(__wsum csum, __wsum csum2, int offset)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0ab32faa520f..31aab5376cb4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1928,9 +1928,8 @@ fault:
 EXPORT_SYMBOL(skb_store_bits);
 
 /* Checksum skb data. */
-
-__wsum skb_checksum(const struct sk_buff *skb, int offset,
-			  int len, __wsum csum)
+__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
+		      __wsum csum, const struct skb_checksum_ops *ops)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
@@ -1941,7 +1940,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		csum = csum_partial(skb->data + offset, copy, csum);
+		csum = ops->update(skb->data + offset, copy, csum);
 		if ((len -= copy) == 0)
 			return csum;
 		offset += copy;
@@ -1962,10 +1961,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = csum_partial(vaddr + frag->page_offset +
-					     offset - start, copy, 0);
+			csum2 = ops->update(vaddr + frag->page_offset +
+					    offset - start, copy, 0);
 			kunmap_atomic(vaddr);
-			csum = csum_block_add(csum, csum2, pos);
+			csum = ops->combine(csum, csum2, pos, copy);
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
@@ -1984,9 +1983,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			__wsum csum2;
 			if (copy > len)
 				copy = len;
-			csum2 = skb_checksum(frag_iter, offset - start,
-					     copy, 0);
-			csum = csum_block_add(csum, csum2, pos);
+			csum2 = __skb_checksum(frag_iter, offset - start,
+					       copy, 0, ops);
+			csum = ops->combine(csum, csum2, pos, copy);
 			if ((len -= copy) == 0)
 				return csum;
 			offset += copy;
@@ -1998,6 +1997,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 
 	return csum;
 }
+EXPORT_SYMBOL(__skb_checksum);
+
+__wsum skb_checksum(const struct sk_buff *skb, int offset,
+		    int len, __wsum csum)
+{
+	const struct skb_checksum_ops ops = {
+		.update  = csum_partial,
+		.combine = csum_block_add_ext,
+	};
+
+	return __skb_checksum(skb, offset, len, csum, &ops);
+}
 EXPORT_SYMBOL(skb_checksum);
 
 /* Both of above in one bottle. */
-- 
cgit v1.2.3-71-gd317


From 74d332c13b2148ae934ea94dac1745ae92efe8e5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Oct 2013 13:10:44 -0700
Subject: net: extend net_device allocation to vmalloc()

Joby Poriyath provided a xen-netback patch to reduce the size of
xenvif structure as some netdev allocation could fail under
memory pressure/fragmentation.

This patch is handling the problem at the core level, allowing
any netdev structures to use vmalloc() if kmalloc() failed.

As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT
to kzalloc() flags to do this fallback only when really needed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Joby Poriyath <joby.poriyath@citrix.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdevices.txt | 10 +++++-----
 include/linux/netdevice.h               |  1 +
 net/core/dev.c                          | 22 +++++++++++++++++-----
 net/core/net-sysfs.c                    |  2 +-
 4 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index c7ecc7080494..0b1cf6b2a592 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -10,12 +10,12 @@ network devices.
 struct net_device allocation rules
 ==================================
 Network device structures need to persist even after module is unloaded and
-must be allocated with kmalloc.  If device has registered successfully,
-it will be freed on last use by free_netdev.  This is required to handle the
-pathologic case cleanly (example: rmmod mydriver </sys/class/net/myeth/mtu )
+must be allocated with alloc_netdev_mqs() and friends.
+If device has registered successfully, it will be freed on last use
+by free_netdev(). This is required to handle the pathologic case cleanly
+(example: rmmod mydriver </sys/class/net/myeth/mtu )
 
-There are routines in net_init.c to handle the common cases of
-alloc_etherdev, alloc_netdev.  These reserve extra space for driver
+alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver
 private data which gets freed when the network device is freed. If
 separately allocated data is attached to the network device
 (netdev_priv(dev)) then it is up to the module exit handler to free that.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb1d918ecdf1..e6353cafbf05 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1800,6 +1800,7 @@ static inline void unregister_netdevice(struct net_device *dev)
 
 int netdev_refcnt_read(const struct net_device *dev);
 void free_netdev(struct net_device *dev);
+void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 0054c8c75f50..0e6136546a8c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6196,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
 
+void netdev_freemem(struct net_device *dev)
+{
+	char *addr = (char *)dev - dev->padded;
+
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -6239,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 
-	p = kzalloc(alloc_size, GFP_KERNEL);
+	p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!p)
+		p = vzalloc(alloc_size);
 	if (!p)
 		return NULL;
 
@@ -6248,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
-		goto free_p;
+		goto free_dev;
 
 	if (dev_addr_init(dev))
 		goto free_pcpu;
@@ -6301,8 +6313,8 @@ free_pcpu:
 	kfree(dev->_rx);
 #endif
 
-free_p:
-	kfree(p);
+free_dev:
+	netdev_freemem(dev);
 	return NULL;
 }
 EXPORT_SYMBOL(alloc_netdev_mqs);
@@ -6339,7 +6351,7 @@ void free_netdev(struct net_device *dev)
 
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
-		kfree((char *)dev - dev->padded);
+		netdev_freemem(dev);
 		return;
 	}
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d954b56b4e47..d03f2c9750fa 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d)
 	BUG_ON(dev->reg_state != NETREG_RELEASED);
 
 	kfree(dev->ifalias);
-	kfree((char *)dev - dev->padded);
+	netdev_freemem(dev);
 }
 
 static const void *net_namespace(struct device *d)
-- 
cgit v1.2.3-71-gd317


From acddd5dd44d4fd9b45dd5ee69cd8b183052b1cdc Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:03:18 +0200
Subject: net/mlx4_core: Fix reg/unreg vlan/mac to conform to the firmware spec

The functions mlx4_register_vlan, mlx4_unregister_vlan, mlx4_register_mac,
mlx4_unregister_mac all made illegal use of the out_param in multifunc mode
to pass the port number. The firmware spec specifies that the port number
should be passed in bits 8..15 of the input-modifier field for ALLOC_RES and
FREE_RES (sections 20.15.1 and 20.15.2).

For MAC register/unregister, this patch contains workarounds so that guests
running previous kernels continue to work on a new Hypervisor, and guests
running the new kernel will continue to work on old hypervisors.

Vlan registeration capability is still not operational in multifunction mode,
since the vlan wrapper functions are not implemented in this patch.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/port.c          | 45 +++++++++++++++-------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 28 ++++++++------
 include/linux/mlx4/device.h                        |  1 +
 3 files changed, 49 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index d3d3106f588f..9433c1f6b0d4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -178,13 +178,24 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac);
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
 	u64 out_param = 0;
-	int err;
+	int err = -EINVAL;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
-				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
-				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+			err = mlx4_cmd_imm(dev, mac, &out_param,
+					   ((u32) port) << 8 | (u32) RES_MAC,
+					   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+					   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		}
+		if (err && err == -EINVAL && mlx4_is_slave(dev)) {
+			/* retry using old REG_MAC format */
+			set_param_l(&out_param, port);
+			err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+					   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+					   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+			if (!err)
+				dev->flags |= MLX4_FLAG_OLD_REG_MAC;
+		}
 		if (err)
 			return err;
 
@@ -231,10 +242,18 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		(void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
-				    RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
-				    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+			(void) mlx4_cmd_imm(dev, mac, &out_param,
+					    ((u32) port) << 8 | (u32) RES_MAC,
+					    RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+					    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		} else {
+			/* use old unregister mac format */
+			set_param_l(&out_param, port);
+			(void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+					    RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+					    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		}
 		return;
 	}
 	__mlx4_unregister_mac(dev, port, mac);
@@ -374,8 +393,8 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 		return -EINVAL;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN,
+		err = mlx4_cmd_imm(dev, vlan, &out_param,
+				   ((u32) port) << 8 | (u32) RES_VLAN,
 				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
 				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 		if (!err)
@@ -418,8 +437,8 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&out_param, port);
-		(void) mlx4_cmd_imm(dev, index, &out_param, RES_VLAN,
+		(void) mlx4_cmd_imm(dev, index, &out_param,
+				    ((u32) port) << 8 | (u32) RES_VLAN,
 				    RES_OP_RESERVE_AND_MAP,
 				    MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index dd6876321116..a5aa3be554fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1443,7 +1443,7 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave)
 }
 
 static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			 u64 in_param, u64 *out_param)
+			 u64 in_param, u64 *out_param, int in_port)
 {
 	int err = -EINVAL;
 	int port;
@@ -1452,7 +1452,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 	if (op != RES_OP_RESERVE_AND_MAP)
 		return err;
 
-	port = get_param_l(out_param);
+	port = !in_port ? get_param_l(out_param) : in_port;
 	mac = in_param;
 
 	err = __mlx4_register_mac(dev, port, mac);
@@ -1470,7 +1470,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 }
 
 static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			 u64 in_param, u64 *out_param)
+			 u64 in_param, u64 *out_param, int port)
 {
 	return 0;
 }
@@ -1528,7 +1528,7 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 	int err;
 	int alop = vhcr->op_modifier;
 
-	switch (vhcr->in_modifier) {
+	switch (vhcr->in_modifier & 0xFF) {
 	case RES_QP:
 		err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop,
 				   vhcr->in_param, &vhcr->out_param);
@@ -1556,12 +1556,14 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 
 	case RES_MAC:
 		err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop,
-				    vhcr->in_param, &vhcr->out_param);
+				    vhcr->in_param, &vhcr->out_param,
+				    (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_VLAN:
 		err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
-				    vhcr->in_param, &vhcr->out_param);
+				     vhcr->in_param, &vhcr->out_param,
+				     (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_COUNTER:
@@ -1730,14 +1732,14 @@ static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 }
 
 static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			    u64 in_param, u64 *out_param)
+			    u64 in_param, u64 *out_param, int in_port)
 {
 	int port;
 	int err = 0;
 
 	switch (op) {
 	case RES_OP_RESERVE_AND_MAP:
-		port = get_param_l(out_param);
+		port = !in_port ? get_param_l(out_param) : in_port;
 		mac_del_from_slave(dev, slave, in_param, port);
 		__mlx4_unregister_mac(dev, port, in_param);
 		break;
@@ -1751,7 +1753,7 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 }
 
 static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
-			    u64 in_param, u64 *out_param)
+			    u64 in_param, u64 *out_param, int port)
 {
 	return 0;
 }
@@ -1803,7 +1805,7 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 	int err = -EINVAL;
 	int alop = vhcr->op_modifier;
 
-	switch (vhcr->in_modifier) {
+	switch (vhcr->in_modifier & 0xFF) {
 	case RES_QP:
 		err = qp_free_res(dev, slave, vhcr->op_modifier, alop,
 				  vhcr->in_param);
@@ -1831,12 +1833,14 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 
 	case RES_MAC:
 		err = mac_free_res(dev, slave, vhcr->op_modifier, alop,
-				   vhcr->in_param, &vhcr->out_param);
+				   vhcr->in_param, &vhcr->out_param,
+				   (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_VLAN:
 		err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
-				   vhcr->in_param, &vhcr->out_param);
+				    vhcr->in_param, &vhcr->out_param,
+				    (vhcr->in_modifier >> 8) & 0xFF);
 		break;
 
 	case RES_COUNTER:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 9ad0c18495ad..297a16309f00 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -54,6 +54,7 @@ enum {
 	MLX4_FLAG_MASTER	= 1 << 2,
 	MLX4_FLAG_SLAVE		= 1 << 3,
 	MLX4_FLAG_SRIOV		= 1 << 4,
+	MLX4_FLAG_OLD_REG_MAC	= 1 << 6,
 };
 
 enum {
-- 
cgit v1.2.3-71-gd317


From 2009d0059c084288f060b1ffe3d14229588acb67 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:03:19 +0200
Subject: net/mlx4_en: Use vlan id instead of vlan index for unregistration

Use of vlan_index created problems unregistering vlans on guests.

In addition, tools delete vlan by tag, not by index, lets follow that.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  6 +----
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  2 +-
 drivers/net/ethernet/mellanox/mlx4/port.c          | 27 ++++++++++++----------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  2 +-
 include/linux/mlx4/device.h                        |  2 +-
 6 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index ae8eb4c4fb6c..887d62576f54 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1687,7 +1687,7 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		if (NO_INDX != vp_oper->vlan_idx) {
 			__mlx4_unregister_vlan(&priv->dev,
-					       port, vp_oper->vlan_idx);
+					       port, vp_oper->state.default_vlan);
 			vp_oper->vlan_idx = NO_INDX;
 		}
 		if (NO_INDX != vp_oper->mac_idx) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 85d91665d400..b5554121aca4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -417,7 +417,6 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int err;
-	int idx;
 
 	en_dbg(HW, priv, "Killing VID:%d\n", vid);
 
@@ -425,10 +424,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 
 	/* Remove VID from port VLAN filter */
 	mutex_lock(&mdev->state_lock);
-	if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx))
-		mlx4_unregister_vlan(mdev->dev, priv->port, idx);
-	else
-		en_dbg(HW, priv, "could not find vid %d in cache\n", vid);
+	mlx4_unregister_vlan(mdev->dev, priv->port, vid);
 
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 348bb8c7d9a7..f2ad4f61f58c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1111,7 +1111,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
 
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
-void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
 int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 9433c1f6b0d4..caaa15470395 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -406,23 +406,26 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 }
 EXPORT_SYMBOL_GPL(mlx4_register_vlan);
 
-void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+	int index;
 
-	if (index < MLX4_VLAN_REGULAR) {
-		mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
-		return;
+	mutex_lock(&table->mutex);
+	if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
+		mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
+		goto out;
 	}
 
-	mutex_lock(&table->mutex);
-	if (!table->refs[index]) {
-		mlx4_warn(dev, "No vlan entry for index %d\n", index);
+	if (index < MLX4_VLAN_REGULAR) {
+		mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
 		goto out;
 	}
+
 	if (--table->refs[index]) {
-		mlx4_dbg(dev, "Have more references for index %d,"
-			 "no need to modify vlan table\n", index);
+		mlx4_dbg(dev, "Have %d more references for index %d,"
+			 "no need to modify vlan table\n", table->refs[index],
+			 index);
 		goto out;
 	}
 	table->entries[index] = 0;
@@ -432,19 +435,19 @@ out:
 	mutex_unlock(&table->mutex);
 }
 
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 {
 	u64 out_param = 0;
 
 	if (mlx4_is_mfunc(dev)) {
-		(void) mlx4_cmd_imm(dev, index, &out_param,
+		(void) mlx4_cmd_imm(dev, vlan, &out_param,
 				    ((u32) port) << 8 | (u32) RES_VLAN,
 				    RES_OP_RESERVE_AND_MAP,
 				    MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 		return;
 	}
-	__mlx4_unregister_vlan(dev, port, index);
+	__mlx4_unregister_vlan(dev, port, vlan);
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index a5aa3be554fe..993a2ef13866 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4085,7 +4085,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 	if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors &&
 	    NO_INDX != work->orig_vlan_ix)
 		__mlx4_unregister_vlan(&work->priv->dev, work->port,
-				       work->orig_vlan_ix);
+				       work->orig_vlan_id);
 out:
 	kfree(work);
 	return;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 297a16309f00..e2e92885bdc1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1079,7 +1079,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 		u8 *pg, u16 *ratelimit);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
 
 int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
 		      int npages, u64 iova, u32 *lkey, u32 *rkey);
-- 
cgit v1.2.3-71-gd317


From 5a0d0a6161aecbbc76e4c1d2b82e4c7cef88bb29 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 3 Nov 2013 10:03:23 +0200
Subject: mlx4: Structures and init/teardown for VF resource quotas

This is step #1 for implementing SRIOV resource quotas for VFs.

Quotas are implemented per resource type for VFs and the PF, to prevent
any entity from simply grabbing all the resources for itself and leaving
the other entities unable to obtain such resources.

Resources which are allocated using quotas:  QPs, CQs, SRQs, MPTs, MTTs, MAC,
                                             VLAN, and Counters.

The quota system works as follows:
Each entity (VF or PF) is given a max number of a given resource (its quota),
and a guaranteed minimum number for each resource (starvation prevention).

For QPs, CQs, SRQs, MPTs and MTTs:
50% of the available quantity for the resource is divided equally among
the PF and all the active VFs (i.e., the number of VFs in the mlx4_core module
parameter "num_vfs"). This 50% represents the "guaranteed minimum" pool.
The other 50% is the "free pool", allocated on a first-come-first-serve basis.
For each VF/PF, resources are first allocated from its "guaranteed-minimum"
pool. When that pool is exhausted, the driver attempts to allocate from
the resource "free-pool".

The quota (i.e., max) for the VFs and the PF is:
  The free-pool amount (50% of the real max) + the guaranteed minimum

For MACs:
  Guarantee 2 MACs per VF/PF per port. As a result, since we have only
  128 MACs per port, reduce the allowable number of VFs from 64 to 63.
  Any remaining MACs are put into a free pool.

For VLANs:
  For the PF, the per-port quota is 128 and guarantee is 64
     (to allow the PF to register at least a VLAN per VF in VST mode).
  For the VFs, the per-port quota is 64 and the guarantee is 0.
      We assume that VGT VFs are trusted not to abuse the VLAN resource.

For Counters:
  For all functions (PF and VFs), the quota is 128 and the guarantee is 0.

In this patch, we define the needed structures, which are added to the
resource-tracker struct.  In addition, we do initialization
for the resource quota, and adjust the query_device response to use quotas
rather than resource maxima.

As part of the implementation, we introduce a new field in
mlx4_dev: quotas.  This field holds the resource quotas used
to report maxima to the upper layers (ib_core, via query_device).

The HCA maxima of these values are passed to the VFs (via
QUERY_HCA) so that they may continue to use these in handling
QPs, CQs, SRQs and MPTs.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c                  |   8 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c            |  11 +-
 drivers/net/ethernet/mellanox/mlx4/main.c          |  32 +++--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  17 +++
 drivers/net/ethernet/mellanox/mlx4/qp.c            |   3 +-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 157 ++++++++++++++++++++-
 include/linux/mlx4/device.h                        |  17 +++
 7 files changed, 222 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f0612645de99..7567437dbd34 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -177,18 +177,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 
 	props->max_mr_size	   = ~0ull;
 	props->page_size_cap	   = dev->dev->caps.page_size_cap;
-	props->max_qp		   = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+	props->max_qp		   = dev->dev->quotas.qp;
 	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
 					 dev->dev->caps.max_rq_sg);
-	props->max_cq		   = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+	props->max_cq		   = dev->dev->quotas.cq;
 	props->max_cqe		   = dev->dev->caps.max_cqes;
-	props->max_mr		   = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+	props->max_mr		   = dev->dev->quotas.mpt;
 	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
 	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
 	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
-	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+	props->max_srq		   = dev->dev->quotas.srq;
 	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
 	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index c151e7a6710a..f8c88c3ad9fc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -177,6 +177,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_cmd_mailbox *outbox,
 				struct mlx4_cmd_info *cmd)
 {
+	struct mlx4_priv *priv = mlx4_priv(dev);
 	u8	field;
 	u32	size;
 	int	err = 0;
@@ -250,13 +251,13 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		field = 0; /* protected FMR support not available as yet */
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET);
 
-		size = dev->caps.num_qps;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
 
-		size = dev->caps.num_srqs;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
 
-		size = dev->caps.num_cqs;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
 
 		size = dev->caps.num_eqs;
@@ -265,10 +266,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.reserved_eqs;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
 
-		size = dev->caps.num_mpts;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
 
-		size = dev->caps.num_mtts;
+		size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
 
 		size = dev->caps.num_mgms + dev->caps.num_amgms;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 179d26709c94..7d2628dfdc29 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -562,13 +562,17 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	}
 
 	dev->caps.num_ports		= func_cap.num_ports;
-	dev->caps.num_qps		= func_cap.qp_quota;
-	dev->caps.num_srqs		= func_cap.srq_quota;
-	dev->caps.num_cqs		= func_cap.cq_quota;
-	dev->caps.num_eqs               = func_cap.max_eq;
-	dev->caps.reserved_eqs          = func_cap.reserved_eq;
-	dev->caps.num_mpts		= func_cap.mpt_quota;
-	dev->caps.num_mtts		= func_cap.mtt_quota;
+	dev->quotas.qp			= func_cap.qp_quota;
+	dev->quotas.srq			= func_cap.srq_quota;
+	dev->quotas.cq			= func_cap.cq_quota;
+	dev->quotas.mpt			= func_cap.mpt_quota;
+	dev->quotas.mtt			= func_cap.mtt_quota;
+	dev->caps.num_qps		= 1 << hca_param.log_num_qps;
+	dev->caps.num_srqs		= 1 << hca_param.log_num_srqs;
+	dev->caps.num_cqs		= 1 << hca_param.log_num_cqs;
+	dev->caps.num_mpts		= 1 << hca_param.log_mpt_sz;
+	dev->caps.num_eqs		= func_cap.max_eq;
+	dev->caps.reserved_eqs		= func_cap.reserved_eq;
 	dev->caps.num_pds               = MLX4_NUM_PDS;
 	dev->caps.num_mgms              = 0;
 	dev->caps.num_amgms             = 0;
@@ -2102,9 +2106,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 			"aborting.\n");
 		return err;
 	}
-	if (num_vfs > MLX4_MAX_NUM_VF) {
-		printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n",
-		       num_vfs, MLX4_MAX_NUM_VF);
+
+	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
+	 * per port, we must limit the number of VFs to 63 (since their are
+	 * 128 MACs)
+	 */
+	if (num_vfs >= MLX4_MAX_NUM_VF) {
+		dev_err(&pdev->dev,
+			"Requested more VF's (%d) than allowed (%d)\n",
+			num_vfs, MLX4_MAX_NUM_VF - 1);
 		return -EINVAL;
 	}
 
@@ -2322,6 +2332,8 @@ slave_start:
 	if (err)
 		goto err_steer;
 
+	mlx4_init_quotas(dev);
+
 	for (port = 1; port <= dev->caps.num_ports; port++) {
 		err = mlx4_init_port_info(dev, port);
 		if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 97941269bc14..e7eb86ecc6ea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -504,12 +504,27 @@ struct slave_list {
 	struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
 };
 
+struct resource_allocator {
+	union {
+		int res_reserved;
+		int res_port_rsvd[MLX4_MAX_PORTS];
+	};
+	union {
+		int res_free;
+		int res_port_free[MLX4_MAX_PORTS];
+	};
+	int *quota;
+	int *allocated;
+	int *guaranteed;
+};
+
 struct mlx4_resource_tracker {
 	spinlock_t lock;
 	/* tree for each resources */
 	struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
 	/* num_of_slave's lists, one per slave */
 	struct slave_list *slave_list;
+	struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE];
 };
 
 #define SLAVE_EVENT_EQ_SIZE	128
@@ -1253,4 +1268,6 @@ static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev)
 
 void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
 
+void mlx4_init_quotas(struct mlx4_dev *dev);
+
 #endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index e891b058c1be..2715e61dbb74 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -480,8 +480,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	*/
 
 	err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
-			       (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 +
-			       16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev),
+			       (1 << 23) - 1, mlx4_num_reserved_sqps(dev),
 			       reserved_from_top);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 35863889bec0..cc5d6d0aad16 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -284,10 +284,59 @@ static const char *ResourceType(enum mlx4_resource rt)
 }
 
 static void rem_slave_vlans(struct mlx4_dev *dev, int slave);
+static inline void initialize_res_quotas(struct mlx4_dev *dev,
+					 struct resource_allocator *res_alloc,
+					 enum mlx4_resource res_type,
+					 int vf, int num_instances)
+{
+	res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
+	res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
+	if (vf == mlx4_master_func_num(dev)) {
+		res_alloc->res_free = num_instances;
+		if (res_type == RES_MTT) {
+			/* reserved mtts will be taken out of the PF allocation */
+			res_alloc->res_free += dev->caps.reserved_mtts;
+			res_alloc->guaranteed[vf] += dev->caps.reserved_mtts;
+			res_alloc->quota[vf] += dev->caps.reserved_mtts;
+		}
+	}
+}
+
+void mlx4_init_quotas(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int pf;
+
+	/* quotas for VFs are initialized in mlx4_slave_cap */
+	if (mlx4_is_slave(dev))
+		return;
+
+	if (!mlx4_is_mfunc(dev)) {
+		dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps -
+			mlx4_num_reserved_sqps(dev);
+		dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs;
+		dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs;
+		dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts;
+		dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws;
+		return;
+	}
+
+	pf = mlx4_master_func_num(dev);
+	dev->quotas.qp =
+		priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf];
+	dev->quotas.cq =
+		priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf];
+	dev->quotas.srq =
+		priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf];
+	dev->quotas.mtt =
+		priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf];
+	dev->quotas.mpt =
+		priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+}
 int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	int i;
+	int i, j;
 	int t;
 
 	priv->mfunc.master.res_tracker.slave_list =
@@ -308,8 +357,104 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 	for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++)
 		priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT;
 
+	for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+		struct resource_allocator *res_alloc =
+			&priv->mfunc.master.res_tracker.res_alloc[i];
+		res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+		res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+		if (i == RES_MAC || i == RES_VLAN)
+			res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
+						       (dev->num_vfs + 1) * sizeof(int),
+							GFP_KERNEL);
+		else
+			res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+
+		if (!res_alloc->quota || !res_alloc->guaranteed ||
+		    !res_alloc->allocated)
+			goto no_mem_err;
+
+		for (t = 0; t < dev->num_vfs + 1; t++) {
+			switch (i) {
+			case RES_QP:
+				initialize_res_quotas(dev, res_alloc, RES_QP,
+						      t, dev->caps.num_qps -
+						      dev->caps.reserved_qps -
+						      mlx4_num_reserved_sqps(dev));
+				break;
+			case RES_CQ:
+				initialize_res_quotas(dev, res_alloc, RES_CQ,
+						      t, dev->caps.num_cqs -
+						      dev->caps.reserved_cqs);
+				break;
+			case RES_SRQ:
+				initialize_res_quotas(dev, res_alloc, RES_SRQ,
+						      t, dev->caps.num_srqs -
+						      dev->caps.reserved_srqs);
+				break;
+			case RES_MPT:
+				initialize_res_quotas(dev, res_alloc, RES_MPT,
+						      t, dev->caps.num_mpts -
+						      dev->caps.reserved_mrws);
+				break;
+			case RES_MTT:
+				initialize_res_quotas(dev, res_alloc, RES_MTT,
+						      t, dev->caps.num_mtts -
+						      dev->caps.reserved_mtts);
+				break;
+			case RES_MAC:
+				if (t == mlx4_master_func_num(dev)) {
+					res_alloc->quota[t] = MLX4_MAX_MAC_NUM;
+					res_alloc->guaranteed[t] = 2;
+					for (j = 0; j < MLX4_MAX_PORTS; j++)
+						res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM;
+				} else {
+					res_alloc->quota[t] = MLX4_MAX_MAC_NUM;
+					res_alloc->guaranteed[t] = 2;
+				}
+				break;
+			case RES_VLAN:
+				if (t == mlx4_master_func_num(dev)) {
+					res_alloc->quota[t] = MLX4_MAX_VLAN_NUM;
+					res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2;
+					for (j = 0; j < MLX4_MAX_PORTS; j++)
+						res_alloc->res_port_free[j] =
+							res_alloc->quota[t];
+				} else {
+					res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2;
+					res_alloc->guaranteed[t] = 0;
+				}
+				break;
+			case RES_COUNTER:
+				res_alloc->quota[t] = dev->caps.max_counters;
+				res_alloc->guaranteed[t] = 0;
+				if (t == mlx4_master_func_num(dev))
+					res_alloc->res_free = res_alloc->quota[t];
+				break;
+			default:
+				break;
+			}
+			if (i == RES_MAC || i == RES_VLAN) {
+				for (j = 0; j < MLX4_MAX_PORTS; j++)
+					res_alloc->res_port_rsvd[j] +=
+						res_alloc->guaranteed[t];
+			} else {
+				res_alloc->res_reserved += res_alloc->guaranteed[t];
+			}
+		}
+	}
 	spin_lock_init(&priv->mfunc.master.res_tracker.lock);
-	return 0 ;
+	return 0;
+
+no_mem_err:
+	for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+		kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+		priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+		kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+		priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+		kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+		priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+	}
+	return -ENOMEM;
 }
 
 void mlx4_free_resource_tracker(struct mlx4_dev *dev,
@@ -333,6 +478,14 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev,
 		}
 
 		if (type != RES_TR_FREE_SLAVES_ONLY) {
+			for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+				kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+				priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+				kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+				priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+				kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+				priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+			}
 			kfree(priv->mfunc.master.res_tracker.slave_list);
 			priv->mfunc.master.res_tracker.slave_list = NULL;
 		}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e2e92885bdc1..f6f59271f857 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -641,12 +641,23 @@ struct mlx4_counter {
 	__be64	tx_bytes;
 };
 
+struct mlx4_quotas {
+	int qp;
+	int cq;
+	int srq;
+	int mpt;
+	int mtt;
+	int counter;
+	int xrcd;
+};
+
 struct mlx4_dev {
 	struct pci_dev	       *pdev;
 	unsigned long		flags;
 	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
 	struct mlx4_phys_caps	phys_caps;
+	struct mlx4_quotas	quotas;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
@@ -772,6 +783,12 @@ static inline int mlx4_is_master(struct mlx4_dev *dev)
 	return dev->flags & MLX4_FLAG_MASTER;
 }
 
+static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
+{
+	return dev->phys_caps.base_sqpn + 8 +
+		16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev);
+}
+
 static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
 {
 	return (qpn < dev->phys_caps.base_sqpn + 8 +
-- 
cgit v1.2.3-71-gd317


From f8e617e100d7369a0108f96abf4414e9fb82ced7 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 1 Nov 2013 14:07:47 +0800
Subject: net: introduce skb_coalesce_rx_frag()

Sometimes we need to coalesce the rx frags to avoid frag list. One example is
virtio-net driver which tries to use small frags for both MTU sized packet and
GSO packet. So this patch introduce skb_coalesce_rx_frag() to do this.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michael Dalton <mwdalton@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 +++
 net/core/skbuff.c      | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 44727b5d4981..2e153b69d318 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1372,6 +1372,9 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize);
 
+void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
+			  unsigned int truesize);
+
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e4115597b38b..3735fad5616e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -476,6 +476,18 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 }
 EXPORT_SYMBOL(skb_add_rx_frag);
 
+void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
+			  unsigned int truesize)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+	skb_frag_size_add(frag, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += truesize;
+}
+EXPORT_SYMBOL(skb_coalesce_rx_frag);
+
 static void skb_drop_list(struct sk_buff **listp)
 {
 	kfree_skb_list(*listp);
-- 
cgit v1.2.3-71-gd317


From 2f69702c4db5f1c3149fd17fe30bdeb87cba9698 Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Mon, 4 Nov 2013 09:50:47 +0100
Subject: net: cdc_ncm: Export cdc_ncm_{tx, rx}_fixup functions for re-use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some drivers implementing NCM-like protocols, may re-use those functions, as is
the case in the huawei_cdc_ncm driver.
Export them via EXPORT_SYMBOL_GPL, in accordance with how other functions have
been exported.

Signed-off-by: Enrico Mioso <mrkiko.rs@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 6 ++++--
 include/linux/usb/cdc_ncm.h | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 11c703337577..1763195bd599 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -830,7 +830,7 @@ static void cdc_ncm_txpath_bh(unsigned long param)
 	}
 }
 
-static struct sk_buff *
+struct sk_buff *
 cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 {
 	struct sk_buff *skb_out;
@@ -857,6 +857,7 @@ error:
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_tx_fixup);
 
 /* verify NTB header and return offset of first NDP, or negative error */
 int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in)
@@ -943,7 +944,7 @@ error:
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp16);
 
-static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
+int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 {
 	struct sk_buff *skb;
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -1019,6 +1020,7 @@ err_ndp:
 error:
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cdc_ncm_rx_fixup);
 
 static void
 cdc_ncm_speed_change(struct usbnet *dev,
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 2300f7492927..c3fa80745996 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -125,5 +125,8 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
 int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in);
 int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset);
+struct sk_buff *
+cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags);
+int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in);
 
 #endif /* __LINUX_USB_CDC_NCM_H */
-- 
cgit v1.2.3-71-gd317


From d32435391974e39c35ade4d115f17c538a96a708 Mon Sep 17 00:00:00 2001
From: Eyal Perry <eyalpe@mellanox.com>
Date: Wed, 6 Nov 2013 15:37:23 +0200
Subject: net/vlan: Provide read access to the vlan egress map

Provide a method for read-only access to the vlan device egress mapping.

Do this by refactoring vlan_dev_get_egress_qos_mask() such that now it
receives as an argument the skb priority instead of pointer to the skb.

Such an access is needed for the IBoE stack where the control plane
goes through the network stack. This is an add-on step on top of commit
d4a968658c "net/route: export symbol ip_tos2prio" which allowed the RDMA-CM
to use ip_tos2prio.

Signed-off-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  9 ++++++++-
 net/8021q/vlan_dev.c    | 18 ++++++++++++------
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 715c343f7c00..f3088a0112cf 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -88,7 +88,8 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
-
+extern u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
+					u32 skprio);
 extern bool vlan_do_receive(struct sk_buff **skb);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
@@ -121,6 +122,12 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
 	return 0;
 }
 
+static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
+					       u32 skprio)
+{
+	return 0;
+}
+
 static inline bool vlan_do_receive(struct sk_buff **skb)
 {
 	return false;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 09bf1c38805b..13904a414929 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -69,15 +69,15 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 }
 
 static inline u16
-vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
+__vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
 {
 	struct vlan_priority_tci_mapping *mp;
 
 	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
 
-	mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
+	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
 	while (mp) {
-		if (mp->priority == skb->priority) {
+		if (mp->priority == skprio) {
 			return mp->vlan_qos; /* This should already be shifted
 					      * to mask correctly with the
 					      * VLAN's TCI */
@@ -87,6 +87,12 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 	return 0;
 }
 
+u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
+{
+	return __vlan_dev_get_egress_qos_mask(dev, skprio);
+}
+EXPORT_SYMBOL(vlan_dev_get_egress_qos_mask);
+
 /*
  *	Create the VLAN header for an arbitrary protocol layer
  *
@@ -111,7 +117,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
@@ -168,7 +174,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
@@ -253,7 +259,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	np->vlan_qos = vlan_qos;
 	/* Before inserting this element in hash table, make sure all its fields
 	 * are committed to memory.
-	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
+	 * coupled with smp_rmb() in __vlan_dev_get_egress_qos_mask()
 	 */
 	smp_wmb();
 	vlan->egress_priority_map[skb_prio & 0xF] = np;
-- 
cgit v1.2.3-71-gd317


From df42153c59a38a65c6f7440d5c70d87d1c24438d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 7 Nov 2013 10:48:49 +0300
Subject: net: make ndev->irq signed for error handling

There is a bug in cpsw_probe() where we do:

	ndev->irq = platform_get_irq(pdev, 0);
	if (ndev->irq < 0) {

The problem is that "ndev->irq" is unsigned so the error handling
doesn't work.  I have changed it to a regular int.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e6353cafbf05..b6f6efbcfc74 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1132,7 +1132,7 @@ struct net_device {
 	unsigned long		mem_end;	/* shared mem end	*/
 	unsigned long		mem_start;	/* shared mem start	*/
 	unsigned long		base_addr;	/* device I/O address	*/
-	unsigned int		irq;		/* device IRQ number	*/
+	int			irq;		/* device IRQ number	*/
 
 	/*
 	 *	Some hardware also needs these fields, but they are not
-- 
cgit v1.2.3-71-gd317


From a6cc0cfa72e0b6d9f2c8fd858aacc32313c4f272 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 6 Nov 2013 09:54:46 -0800
Subject: net: Add layer 2 hardware acceleration operations for macvlan devices

Add a operations structure that allows a network interface to export
the fact that it supports package forwarding in hardware between
physical interfaces and other mac layer devices assigned to it (such
as macvlans). This operaions structure can be used by virtual mac
devices to bypass software switching so that forwarding can be done
in hardware more efficiently.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c           | 36 +++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h      |  1 +
 include/linux/netdev_features.h |  2 ++
 include/linux/netdevice.h       | 36 +++++++++++++++++++++++++++++++++++-
 include/uapi/linux/if.h         |  1 +
 net/core/dev.c                  | 18 +++++++++++++-----
 net/core/ethtool.c              |  1 +
 net/sched/sch_generic.c         |  2 +-
 8 files changed, 89 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cc9845ec91c1..af4aaa5893ff 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -297,7 +297,13 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 	int ret;
 	const struct macvlan_dev *vlan = netdev_priv(dev);
 
-	ret = macvlan_queue_xmit(skb, dev);
+	if (vlan->fwd_priv) {
+		skb->dev = vlan->lowerdev;
+		ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
+	} else {
+		ret = macvlan_queue_xmit(skb, dev);
+	}
+
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct macvlan_pcpu_stats *pcpu_stats;
 
@@ -347,6 +353,21 @@ static int macvlan_open(struct net_device *dev)
 		goto hash_add;
 	}
 
+	if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
+		vlan->fwd_priv =
+		      lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
+
+		/* If we get a NULL pointer back, or if we get an error
+		 * then we should just fall through to the non accelerated path
+		 */
+		if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
+			vlan->fwd_priv = NULL;
+		} else {
+			dev->features &= ~NETIF_F_LLTX;
+			return 0;
+		}
+	}
+
 	err = -EBUSY;
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
@@ -367,6 +388,11 @@ hash_add:
 del_unicast:
 	dev_uc_del(lowerdev, dev->dev_addr);
 out:
+	if (vlan->fwd_priv) {
+		lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
+							   vlan->fwd_priv);
+		vlan->fwd_priv = NULL;
+	}
 	return err;
 }
 
@@ -375,6 +401,13 @@ static int macvlan_stop(struct net_device *dev)
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
 
+	if (vlan->fwd_priv) {
+		lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
+							   vlan->fwd_priv);
+		vlan->fwd_priv = NULL;
+		return 0;
+	}
+
 	dev_uc_unsync(lowerdev, dev);
 	dev_mc_unsync(lowerdev, dev);
 
@@ -833,6 +866,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		goto destroy_port;
 
+	dev->priv_flags |= IFF_MACVLAN;
 	err = netdev_upper_dev_link(lowerdev, dev);
 	if (err)
 		goto destroy_port;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index ddd33fd5904d..c2702856295e 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -61,6 +61,7 @@ struct macvlan_dev {
 	struct hlist_node	hlist;
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
+	void			*fwd_priv;
 	struct macvlan_pcpu_stats __percpu *pcpu_stats;
 
 	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index b05a4b501ab5..1005ebf17575 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -62,6 +62,7 @@ enum {
 	NETIF_F_HW_VLAN_STAG_TX_BIT,	/* Transmit VLAN STAG HW acceleration */
 	NETIF_F_HW_VLAN_STAG_RX_BIT,	/* Receive VLAN STAG HW acceleration */
 	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
+	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -116,6 +117,7 @@ enum {
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
+#define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b6f6efbcfc74..15fa01c9a3bf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -962,6 +962,25 @@ struct netdev_phys_port_id {
  *	Called by vxlan to notify the driver about a UDP port and socket
  *	address family that vxlan is not listening to anymore. The operation
  *	is protected by the vxlan_net->sock_lock.
+ *
+ * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
+ *				 struct net_device *dev)
+ *	Called by upper layer devices to accelerate switching or other
+ *	station functionality into hardware. 'pdev is the lowerdev
+ *	to use for the offload and 'dev' is the net device that will
+ *	back the offload. Returns a pointer to the private structure
+ *	the upper layer will maintain.
+ * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
+ *	Called by upper layer device to delete the station created
+ *	by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
+ *	the station and priv is the structure returned by the add
+ *	operation.
+ * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
+ *				      struct net_device *dev,
+ *				      void *priv);
+ *	Callback to use for xmit over the accelerated station. This
+ *	is used in place of ndo_start_xmit on accelerated net
+ *	devices.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1098,6 +1117,15 @@ struct net_device_ops {
 	void			(*ndo_del_vxlan_port)(struct  net_device *dev,
 						      sa_family_t sa_family,
 						      __be16 port);
+
+	void*			(*ndo_dfwd_add_station)(struct net_device *pdev,
+							struct net_device *dev);
+	void			(*ndo_dfwd_del_station)(struct net_device *pdev,
+							void *priv);
+
+	netdev_tx_t		(*ndo_dfwd_start_xmit) (struct sk_buff *skb,
+							struct net_device *dev,
+							void *priv);
 };
 
 /*
@@ -1195,6 +1223,7 @@ struct net_device {
 	/* Management operations */
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
+	const struct forwarding_accel_ops *fwd_ops;
 
 	/* Hardware header description */
 	const struct header_ops *header_ops;
@@ -2388,7 +2417,7 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq);
+			struct netdev_queue *txq, void *accel_priv);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 
 extern int		netdev_budget;
@@ -2967,6 +2996,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
+static inline bool netif_is_macvlan(struct net_device *dev)
+{
+	return dev->priv_flags & IFF_MACVLAN;
+}
+
 static inline bool netif_is_bond_master(struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index 1ec407b01e46..d758163b0e43 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -83,6 +83,7 @@
 #define IFF_SUPP_NOFCS	0x80000		/* device supports sending custom FCS */
 #define IFF_LIVE_ADDR_CHANGE 0x100000	/* device supports hardware address
 					 * change when it's running */
+#define IFF_MACVLAN 0x200000		/* Macvlan device */
 
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
diff --git a/net/core/dev.c b/net/core/dev.c
index 0e6136546a8c..8ffc52e01ece 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2538,7 +2538,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq)
+			struct netdev_queue *txq, void *accel_priv)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
@@ -2604,9 +2604,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			dev_queue_xmit_nit(skb, dev);
 
 		skb_len = skb->len;
-		rc = ops->ndo_start_xmit(skb, dev);
+		if (accel_priv)
+			rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
+		else
+			rc = ops->ndo_start_xmit(skb, dev);
+
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
-		if (rc == NETDEV_TX_OK)
+		if (rc == NETDEV_TX_OK && txq)
 			txq_trans_update(txq);
 		return rc;
 	}
@@ -2622,7 +2626,10 @@ gso:
 			dev_queue_xmit_nit(nskb, dev);
 
 		skb_len = nskb->len;
-		rc = ops->ndo_start_xmit(nskb, dev);
+		if (accel_priv)
+			rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
+		else
+			rc = ops->ndo_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
@@ -2647,6 +2654,7 @@ out_kfree_skb:
 out:
 	return rc;
 }
+EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
 
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
@@ -2854,7 +2862,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
-				rc = dev_hard_start_xmit(skb, dev, txq);
+				rc = dev_hard_start_xmit(skb, dev, txq, NULL);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 862989898f61..30071dec287a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -96,6 +96,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_LOOPBACK_BIT] =         "loopback",
 	[NETIF_F_RXFCS_BIT] =            "rx-fcs",
 	[NETIF_F_RXALL_BIT] =            "rx-all",
+	[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
 };
 
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7fc899a943a8..922a09406ba7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -126,7 +126,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_stopped(txq))
-		ret = dev_hard_start_xmit(skb, dev, txq);
+		ret = dev_hard_start_xmit(skb, dev, txq, NULL);
 
 	HARD_TX_UNLOCK(dev, txq);
 
-- 
cgit v1.2.3-71-gd317


From 6e7136ed7793fa4948b0192dcd6862d12a50d67c Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 7 Nov 2013 12:19:53 +0200
Subject: net/mlx4_core: ICM pages are allocated on device NUMA node

This is done to optimize FW/HW access to host memory.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/icm.c  | 42 ++++++++++++++++++++++---------
 drivers/net/ethernet/mellanox/mlx4/main.c |  1 +
 include/linux/mlx4/device.h               |  1 +
 3 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 31d02649be41..5fbf4924c272 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -93,13 +93,17 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
 	kfree(icm);
 }
 
-static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
+				gfp_t gfp_mask, int node)
 {
 	struct page *page;
 
-	page = alloc_pages(gfp_mask, order);
-	if (!page)
-		return -ENOMEM;
+	page = alloc_pages_node(node, gfp_mask, order);
+	if (!page) {
+		page = alloc_pages(gfp_mask, order);
+		if (!page)
+			return -ENOMEM;
+	}
 
 	sg_set_page(mem, page, PAGE_SIZE << order, 0);
 	return 0;
@@ -130,9 +134,15 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 	/* We use sg_set_buf for coherent allocs, which assumes low memory */
 	BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
 
-	icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
-	if (!icm)
-		return NULL;
+	icm = kmalloc_node(sizeof(*icm),
+			   gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+			   dev->numa_node);
+	if (!icm) {
+		icm = kmalloc(sizeof(*icm),
+			      gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+		if (!icm)
+			return NULL;
+	}
 
 	icm->refcount = 0;
 	INIT_LIST_HEAD(&icm->chunk_list);
@@ -141,10 +151,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 
 	while (npages > 0) {
 		if (!chunk) {
-			chunk = kmalloc(sizeof *chunk,
-					gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
-			if (!chunk)
-				goto fail;
+			chunk = kmalloc_node(sizeof(*chunk),
+					     gfp_mask & ~(__GFP_HIGHMEM |
+							  __GFP_NOWARN),
+					     dev->numa_node);
+			if (!chunk) {
+				chunk = kmalloc(sizeof(*chunk),
+						gfp_mask & ~(__GFP_HIGHMEM |
+							     __GFP_NOWARN));
+				if (!chunk)
+					goto fail;
+			}
 
 			sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
 			chunk->npages = 0;
@@ -161,7 +178,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 						      cur_order, gfp_mask);
 		else
 			ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
-						   cur_order, gfp_mask);
+						   cur_order, gfp_mask,
+						   dev->numa_node);
 
 		if (ret) {
 			if (--cur_order < 0)
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7d2628dfdc29..5789ea2c934d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2191,6 +2191,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 	mutex_init(&priv->bf_mutex);
 
 	dev->rev_id = pdev->revision;
+	dev->numa_node = dev_to_node(&pdev->dev);
 	/* Detect if this device is a virtual function */
 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
 		/* When acting as pf, we normally skip vfs unless explicitly
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index f6f59271f857..4cf0b0153639 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -662,6 +662,7 @@ struct mlx4_dev {
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
 	int			num_vfs;
+	int			numa_node;
 	int			oper_log_mgm_entry_size;
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
-- 
cgit v1.2.3-71-gd317


From 163561a4e2f8af44e96453bc10c7a4f9bcc736e1 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 7 Nov 2013 12:19:54 +0200
Subject: net/mlx4_en: Datapath structures are allocated per NUMA node

For each RX/TX ring and its CQ, allocation is done on a NUMA node that
corresponds to the core that the data structure should operate on.
The assumption is that the core number is reflected by the ring index.
The affected allocations are the ring/CQ data structures,
the TX/RX info and the shared HW/SW buffer.
For TX rings, each core has rings of all UPs.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     | 17 ++++++++++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 12 ++++++---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 23 +++++++++++------
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     | 34 +++++++++++++++++---------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  6 ++---
 drivers/net/ethernet/mellanox/mlx4/pd.c        | 11 ++++++---
 include/linux/mlx4/device.h                    |  2 +-
 7 files changed, 71 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index d203f11b9edf..3a098cc4d349 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -45,16 +45,20 @@ static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 		      struct mlx4_en_cq **pcq,
-		      int entries, int ring, enum cq_type mode)
+		      int entries, int ring, enum cq_type mode,
+		      int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_cq *cq;
 	int err;
 
-	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
 	if (!cq) {
-		en_err(priv, "Failed to allocate CQ structure\n");
-		return -ENOMEM;
+		cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+		if (!cq) {
+			en_err(priv, "Failed to allocate CQ structure\n");
+			return -ENOMEM;
+		}
 	}
 
 	cq->size = entries;
@@ -64,8 +68,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	cq->is_tx = mode;
 	spin_lock_init(&cq->lock);
 
+	/* Allocate HW buffers on provided NUMA node.
+	 * dev->numa_node is used in mtt range allocation flow.
+	 */
+	set_dev_node(&mdev->dev->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
 				cq->buf_size, 2 * PAGE_SIZE);
+	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_cq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index f430788cc4fe..e72d8a112a6b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1895,6 +1895,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
 	int err;
+	int node;
 
 	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn);
 	if (err) {
@@ -1904,23 +1905,26 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 
 	/* Create tx Rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
+		node = cpu_to_node(i % num_online_cpus());
 		if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
-				      prof->tx_ring_size, i, TX))
+				      prof->tx_ring_size, i, TX, node))
 			goto err;
 
 		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i,
-					   prof->tx_ring_size, TXBB_SIZE))
+					   prof->tx_ring_size, TXBB_SIZE, node))
 			goto err;
 	}
 
 	/* Create rx Rings */
 	for (i = 0; i < priv->rx_ring_num; i++) {
+		node = cpu_to_node(i % num_online_cpus());
 		if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
-				      prof->rx_ring_size, i, RX))
+				      prof->rx_ring_size, i, RX, node))
 			goto err;
 
 		if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
-					   prof->rx_ring_size, priv->stride))
+					   prof->rx_ring_size, priv->stride,
+					   node))
 			goto err;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 1c45f88776c5..07a1d0fbae47 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -320,17 +320,20 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
-			   u32 size, u16 stride)
+			   u32 size, u16 stride, int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring;
 	int err = -ENOMEM;
 	int tmp;
 
-	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
 	if (!ring) {
-		en_err(priv, "Failed to allocate RX ring structure\n");
-		return -ENOMEM;
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring) {
+			en_err(priv, "Failed to allocate RX ring structure\n");
+			return -ENOMEM;
+		}
 	}
 
 	ring->prod = 0;
@@ -343,17 +346,23 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
 					sizeof(struct mlx4_en_rx_alloc));
-	ring->rx_info = vmalloc(tmp);
+	ring->rx_info = vmalloc_node(tmp, node);
 	if (!ring->rx_info) {
-		err = -ENOMEM;
-		goto err_ring;
+		ring->rx_info = vmalloc(tmp);
+		if (!ring->rx_info) {
+			err = -ENOMEM;
+			goto err_ring;
+		}
 	}
 
 	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
 		 ring->rx_info, tmp);
 
+	/* Allocate HW buffers on provided NUMA node */
+	set_dev_node(&mdev->dev->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
 				 ring->buf_size, 2 * PAGE_SIZE);
+	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_info;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index d4e4cf30a720..f54ebd5a1702 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -55,17 +55,20 @@ MODULE_PARM_DESC(inline_thold, "threshold for using inline data");
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring **pring, int qpn, u32 size,
-			   u16 stride)
+			   u16 stride, int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_ring *ring;
 	int tmp;
 	int err;
 
-	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
 	if (!ring) {
-		en_err(priv, "Failed allocating TX ring\n");
-		return -ENOMEM;
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring) {
+			en_err(priv, "Failed allocating TX ring\n");
+			return -ENOMEM;
+		}
 	}
 
 	ring->size = size;
@@ -75,24 +78,33 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	inline_thold = min(inline_thold, MAX_INLINE);
 
 	tmp = size * sizeof(struct mlx4_en_tx_info);
-	ring->tx_info = vmalloc(tmp);
+	ring->tx_info = vmalloc_node(tmp, node);
 	if (!ring->tx_info) {
-		err = -ENOMEM;
-		goto err_ring;
+		ring->tx_info = vmalloc(tmp);
+		if (!ring->tx_info) {
+			err = -ENOMEM;
+			goto err_ring;
+		}
 	}
 
 	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
 		 ring->tx_info, tmp);
 
-	ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+	ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
 	if (!ring->bounce_buf) {
-		err = -ENOMEM;
-		goto err_info;
+		ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+		if (!ring->bounce_buf) {
+			err = -ENOMEM;
+			goto err_info;
+		}
 	}
 	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
 
+	/* Allocate HW buffers on provided NUMA node */
+	set_dev_node(&mdev->dev->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
 				 2 * PAGE_SIZE);
+	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
 	if (err) {
 		en_err(priv, "Failed allocating hwq resources\n");
 		goto err_bounce;
@@ -118,7 +130,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	}
 	ring->qp.event = mlx4_en_sqp_event;
 
-	err = mlx4_bf_alloc(mdev->dev, &ring->bf);
+	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
 	if (err) {
 		en_dbg(DRV, priv, "working without blueflame (%d)", err);
 		ring->bf.uar = &mdev->priv_uar;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index b2547ae07dfa..f3758de59c05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -705,7 +705,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv);
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
-		      int entries, int ring, enum cq_type mode);
+		      int entries, int ring, enum cq_type mode, int node);
 void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq);
 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 			int cq_idx);
@@ -719,7 +719,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring **pring,
-			   int qpn, u32 size, u16 stride);
+			   int qpn, u32 size, u16 stride, int node);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring **pring);
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
@@ -730,7 +730,7 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
-			   u32 size, u16 stride);
+			   u32 size, u16 stride, int node);
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_rx_ring **pring,
 			     u32 size, u16 stride);
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 00f223acada7..84cfb40bf451 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -168,7 +168,7 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_uar *uar;
@@ -186,10 +186,13 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
 			err = -ENOMEM;
 			goto out;
 		}
-		uar = kmalloc(sizeof *uar, GFP_KERNEL);
+		uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node);
 		if (!uar) {
-			err = -ENOMEM;
-			goto out;
+			uar = kmalloc(sizeof(*uar), GFP_KERNEL);
+			if (!uar) {
+				err = -ENOMEM;
+				goto out;
+			}
 		}
 		err = mlx4_uar_alloc(dev, uar);
 		if (err)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 4cf0b0153639..7d3a523160ba 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -835,7 +835,7 @@ void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
 void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
 
 int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
-- 
cgit v1.2.3-71-gd317


From 0c7ddf36c29c3ce12f2d2931a357ccaa0861035a Mon Sep 17 00:00:00 2001
From: Mathias Krause <mathias.krause@secunet.com>
Date: Thu, 7 Nov 2013 14:18:24 +0100
Subject: net: move pskb_put() to core code

This function has usage beside IPsec so move it to the core skbuff code.
While doing so, give it some documentation and change its return type to
'unsigned char *' to be in line with skb_put().

Signed-off-by: Mathias Krause <mathias.krause@secunet.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 include/net/esp.h      |  2 --
 net/core/skbuff.c      | 23 +++++++++++++++++++++++
 net/xfrm/xfrm_algo.c   | 13 -------------
 4 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e153b69d318..491dd6c2c6cc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1417,6 +1417,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 /*
  *	Add data to an sk_buff
  */
+unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
 unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
diff --git a/include/net/esp.h b/include/net/esp.h
index c92213c38312..a43be85aedc4 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -3,8 +3,6 @@
 
 #include <linux/skbuff.h>
 
-void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
-
 struct ip_esp_hdr;
 
 static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3735fad5616e..2fbea08c4f50 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1263,6 +1263,29 @@ free_skb:
 }
 EXPORT_SYMBOL(skb_pad);
 
+/**
+ *	pskb_put - add data to the tail of a potentially fragmented buffer
+ *	@skb: start of the buffer to use
+ *	@tail: tail fragment of the buffer to use
+ *	@len: amount of data to add
+ *
+ *	This function extends the used data area of the potentially
+ *	fragmented buffer. @tail must be the last fragment of @skb -- or
+ *	@skb itself. If this would exceed the total buffer size the kernel
+ *	will panic. A pointer to the first byte of the extra data is
+ *	returned.
+ */
+
+unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+EXPORT_SYMBOL_GPL(pskb_put);
+
 /**
  *	skb_put - add data to a buffer
  *	@skb: buffer to use
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index ab4ef72f0b1d..debe733386f8 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -802,17 +802,4 @@ int xfrm_count_pfkey_enc_supported(void)
 }
 EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
 
-#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
-
-void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
-{
-	if (tail != skb) {
-		skb->data_len += len;
-		skb->len += len;
-	}
-	return skb_put(tail, len);
-}
-EXPORT_SYMBOL_GPL(pskb_put);
-#endif
-
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-71-gd317


From bc32383cd6496d595e6a25cdc7cff1da6b694462 Mon Sep 17 00:00:00 2001
From: Mathias Krause <mathias.krause@secunet.com>
Date: Thu, 7 Nov 2013 14:18:26 +0100
Subject: net: skbuff - kernel-doc fixes

Use "@" to refer to parameters in the kernel-doc description. According
to Documentation/kernel-doc-nano-HOWTO.txt "&" shall be used to refer to
structures only.

Signed-off-by: Mathias Krause <mathias.krause@secunet.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 net/core/skbuff.c      | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 491dd6c2c6cc..036ec7d8a83a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1357,7 +1357,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
  * @size: the length of the data
  *
  * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
- * @skb to point to &size bytes at offset @off within @page. In
+ * @skb to point to @size bytes at offset @off within @page. In
  * addition updates @skb such that @i is the last fragment.
  *
  * Does not take any additional reference on the fragment.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2fbea08c4f50..8c5197fe55a4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1051,8 +1051,8 @@ EXPORT_SYMBOL(__pskb_copy);
  *	@ntail: room to add at tail
  *	@gfp_mask: allocation priority
  *
- *	Expands (or creates identical copy, if &nhead and &ntail are zero)
- *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *	Expands (or creates identical copy, if @nhead and @ntail are zero)
+ *	header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
  *	reference count of 1. Returns zero in the case of success or error,
  *	if expansion failed. In the last case, &sk_buff is not changed.
  *
@@ -2563,14 +2563,14 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  * @data: destination pointer for data to be returned
  * @st: state variable
  *
- * Reads a block of skb data at &consumed relative to the
+ * Reads a block of skb data at @consumed relative to the
  * lower offset specified to skb_prepare_seq_read(). Assigns
- * the head of the data block to &data and returns the length
+ * the head of the data block to @data and returns the length
  * of the block or 0 if the end of the skb data or the upper
  * offset has been reached.
  *
  * The caller is not required to consume all of the data
- * returned, i.e. &consumed is typically set to the number
+ * returned, i.e. @consumed is typically set to the number
  * of bytes already consumed and the next call to
  * skb_seq_read() will return the remaining part of the block.
  *
-- 
cgit v1.2.3-71-gd317


From 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 6 Nov 2013 17:52:20 +0100
Subject: netfilter: push reasm skb through instead of original frag skbs

Pushing original fragments through causes several problems. For example
for matching, frags may not be matched correctly. Take following
example:

<example>
On HOSTA do:
ip6tables -I INPUT -p icmpv6 -j DROP
ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT

and on HOSTB you do:
ping6 HOSTA -s2000    (MTU is 1500)

Incoming echo requests will be filtered out on HOSTA. This issue does
not occur with smaller packets than MTU (where fragmentation does not happen)
</example>

As was discussed previously, the only correct solution seems to be to use
reassembled skb instead of separete frags. Doing this has positive side
effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams
dances in ipvs and conntrack can be removed.

Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c
entirely and use code in net/ipv6/reassembly.c instead.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                         | 32 ---------------
 include/net/ip_vs.h                            | 32 +--------------
 include/net/netfilter/ipv6/nf_defrag_ipv6.h    |  4 +-
 net/core/skbuff.c                              |  3 --
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 56 +-------------------------
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 19 +--------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  7 +++-
 net/netfilter/ipvs/ip_vs_core.c                | 55 +------------------------
 net/netfilter/ipvs/ip_vs_pe_sip.c              |  8 +---
 9 files changed, 13 insertions(+), 203 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 036ec7d8a83a..215b5ea1cb30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
-#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
-    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
-#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
-#endif
-
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@protocol: Packet protocol from driver
  *	@destructor: Destruct function
  *	@nfct: Associated connection, if any
- *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
  *	@tc_index: Traffic control index
@@ -463,9 +457,6 @@ struct sk_buff {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	struct sk_buff		*nfct_reasm;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
@@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 		atomic_inc(&nfct->use);
 }
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
-{
-	if (skb)
-		atomic_inc(&skb->users);
-}
-static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
-{
-	if (skb)
-		kfree_skb(skb);
-}
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
@@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	nf_conntrack_put_reasm(skb->nfct_reasm);
-	skb->nfct_reasm = NULL;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 	skb->nf_bridge = NULL;
@@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	dst->nfct_reasm = src->nfct_reasm;
-	nf_conntrack_get_reasm(src->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
@@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	nf_conntrack_put_reasm(dst->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(dst->nf_bridge);
 #endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index cd7275f9c463..5679d927562b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size;
 struct ip_vs_iphdr {
 	__u32 len;	/* IPv4 simply where L4 starts
 			   IPv6 where L4 Transport Header starts */
-	__u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
 	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
 	__s16 protocol;
 	__s32 flags;
@@ -117,34 +116,12 @@ struct ip_vs_iphdr {
 	union nf_inet_addr daddr;
 };
 
-/* Dependency to module: nf_defrag_ipv6 */
-#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
-{
-	return skb->nfct_reasm;
-}
-static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
-				      int len, void *buffer,
-				      const struct ip_vs_iphdr *ipvsh)
-{
-	if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
-		return skb_header_pointer(skb_nfct_reasm(skb),
-					  ipvsh->thoff_reasm, len, buffer);
-
-	return skb_header_pointer(skb, offset, len, buffer);
-}
-#else
-static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
-{
-	return NULL;
-}
 static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
 				      int len, void *buffer,
 				      const struct ip_vs_iphdr *ipvsh)
 {
 	return skb_header_pointer(skb, offset, len, buffer);
 }
-#endif
 
 static inline void
 ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
@@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
 			(struct ipv6hdr *)skb_network_header(skb);
 		iphdr->saddr.in6 = iph->saddr;
 		iphdr->daddr.in6 = iph->daddr;
-		/* ipv6_find_hdr() updates len, flags, thoff_reasm */
-		iphdr->thoff_reasm = 0;
+		/* ipv6_find_hdr() updates len, flags */
 		iphdr->len	 = 0;
 		iphdr->flags	 = 0;
 		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
 						 &iphdr->fragoffs,
 						 &iphdr->flags);
-		/* get proto from re-assembled packet and it's offset */
-		if (skb_nfct_reasm(skb))
-			iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
-							&iphdr->thoff_reasm,
-							-1, NULL, NULL);
-
 	} else
 #endif
 	{
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 5613412e7dc2..27666d8a0bd0 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void);
 int nf_ct_frag6_init(void);
 void nf_ct_frag6_cleanup(void);
 struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-			struct net_device *in, struct net_device *out,
-			int (*okfn)(struct sk_buff *));
+void nf_ct_frag6_consume_orig(struct sk_buff *skb);
 
 struct inet_frags_ctl;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8c5197fe55a4..8cec1e6b844d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	nf_conntrack_put(skb->nfct);
 #endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
-	nf_conntrack_put_reasm(skb->nfct_reasm);
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 486545eb42ce..4cbc6b290dd5 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -169,64 +169,13 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int __ipv6_conntrack_in(struct net *net,
-					unsigned int hooknum,
-					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
-{
-	struct sk_buff *reasm = skb->nfct_reasm;
-	const struct nf_conn_help *help;
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-
-	/* This packet is fragmented and has reassembled packet. */
-	if (reasm) {
-		/* Reassembled packet isn't parsed yet ? */
-		if (!reasm->nfct) {
-			unsigned int ret;
-
-			ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
-			if (ret != NF_ACCEPT)
-				return ret;
-		}
-
-		/* Conntrack helpers need the entire reassembled packet in the
-		 * POST_ROUTING hook. In case of unconfirmed connections NAT
-		 * might reassign a helper, so the entire packet is also
-		 * required.
-		 */
-		ct = nf_ct_get(reasm, &ctinfo);
-		if (ct != NULL && !nf_ct_is_untracked(ct)) {
-			help = nfct_help(ct);
-			if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
-				nf_conntrack_get_reasm(reasm);
-				NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
-					       (struct net_device *)in,
-					       (struct net_device *)out,
-					       okfn, NF_IP6_PRI_CONNTRACK + 1);
-				return NF_DROP_ERR(-ECANCELED);
-			}
-		}
-
-		nf_conntrack_get(reasm->nfct);
-		skb->nfct = reasm->nfct;
-		skb->nfctinfo = reasm->nfctinfo;
-		return NF_ACCEPT;
-	}
-
-	return nf_conntrack_in(net, PF_INET6, hooknum, skb);
-}
-
 static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
-				   okfn);
+	return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
 }
 
 static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
@@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
-				   okfn);
+	return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 4a258263d8ec..767ab8da8218 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -633,31 +633,16 @@ ret_orig:
 	return skb;
 }
 
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-			struct net_device *in, struct net_device *out,
-			int (*okfn)(struct sk_buff *))
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
 {
 	struct sk_buff *s, *s2;
-	unsigned int ret = 0;
 
 	for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
-		nf_conntrack_put_reasm(s->nfct_reasm);
-		nf_conntrack_get_reasm(skb);
-		s->nfct_reasm = skb;
-
 		s2 = s->next;
 		s->next = NULL;
-
-		if (ret != -ECANCELED)
-			ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
-					     in, out, okfn,
-					     NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
-		else
-			kfree_skb(s);
-
+		consume_skb(s);
 		s = s2;
 	}
-	nf_conntrack_put_reasm(skb);
 }
 
 static int nf_ct_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index ec483aa3f60f..7b9a748c6bac 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 	if (reasm == skb)
 		return NF_ACCEPT;
 
-	nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
-			   (struct net_device *)out, okfn);
+	nf_ct_frag6_consume_orig(reasm);
+
+	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
+		       (struct net_device *) in, (struct net_device *) out,
+		       okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 
 	return NF_STOLEN;
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 34fda62f40f6..4f26ee46b51f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	ip_vs_fill_iph_skb(af, skb, &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-			struct sk_buff *reasm = skb_nfct_reasm(skb);
-			/* Save fw mark for coming frags */
-			reasm->ipvs_property = 1;
-			reasm->mark = skb->mark;
-		}
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
 			int verdict = ip_vs_out_icmp_v6(skb, &related,
@@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (!iph.fragoffs && skb_nfct_reasm(skb)) {
-			struct sk_buff *reasm = skb_nfct_reasm(skb);
-			/* Save fw mark for coming frags. */
-			reasm->ipvs_property = 1;
-			reasm->mark = skb->mark;
-		}
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
 			int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
@@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		/* sorry, all this trouble for a no-hit :) */
 		IP_VS_DBG_PKT(12, af, pp, skb, 0,
 			      "ip_vs_in: packet continues traversal as normal");
-		if (iph.fragoffs && !skb_nfct_reasm(skb)) {
+		if (iph.fragoffs) {
 			/* Fragment that couldn't be mapped to a conn entry
-			 * and don't have any pointer to a reasm skb
 			 * is missing module nf_defrag_ipv6
 			 */
 			IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
@@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
 
 #ifdef CONFIG_IP_VS_IPV6
 
-/*
- * AF_INET6 fragment handling
- * Copy info from first fragment, to the rest of them.
- */
-static unsigned int
-ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
-{
-	struct sk_buff *reasm = skb_nfct_reasm(skb);
-	struct net *net;
-
-	/* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
-	 * ipvs_property is set when checking first fragment
-	 * in ip_vs_in() and ip_vs_out().
-	 */
-	if (reasm)
-		IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
-	if (!reasm || !reasm->ipvs_property)
-		return NF_ACCEPT;
-
-	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
-		return NF_ACCEPT;
-
-	/* Copy stored fw mark, saved in ip_vs_{in,out} */
-	skb->mark = reasm->mark;
-
-	return NF_ACCEPT;
-}
-
 /*
  *	AF_INET6 handler in NF_INET_LOCAL_IN chain
  *	Schedule and forward packets from remote clients
@@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.priority	= 100,
 	},
 #ifdef CONFIG_IP_VS_IPV6
-	/* After mangle & nat fetch 2:nd fragment and following */
-	{
-		.hook		= ip_vs_preroute_frag6,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_NAT_DST + 1,
-	},
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 9ef22bdce9f1..bed5f7042529 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
 static int
 ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 {
-	struct sk_buff *reasm = skb_nfct_reasm(skb);
 	struct ip_vs_iphdr iph;
 	unsigned int dataoff, datalen, matchoff, matchlen;
 	const char *dptr;
@@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	/* todo: IPv6 fragments:
 	 *       I think this only should be done for the first fragment. /HS
 	 */
-	if (reasm) {
-		skb = reasm;
-		dataoff = iph.thoff_reasm + sizeof(struct udphdr);
-	} else
-		dataoff = iph.len + sizeof(struct udphdr);
+	dataoff = iph.len + sizeof(struct udphdr);
 
 	if (dataoff >= skb->len)
 		return -EINVAL;
-	/* todo: Check if this will mess-up the reasm skb !!! /HS */
 	retc = skb_linearize(skb);
 	if (retc < 0)
 		return retc;
-- 
cgit v1.2.3-71-gd317


From e267cb960ab790c94a5019272c0e4dac95dc4dba Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Nov 2013 00:42:07 -0500
Subject: vlan: Implement vlan_dev_get_egress_qos_mask as an inline.

This is to avoid very silly Kconfig dependencies for modules
using this routine.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++++-
 net/8021q/vlan.h        | 77 --------------------------------------
 net/8021q/vlan_dev.c    | 31 ++--------------
 3 files changed, 99 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index f3088a0112cf..f252deb99454 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -88,8 +88,102 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
-extern u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
-					u32 skprio);
+
+/**
+ *	struct vlan_priority_tci_mapping - vlan egress priority mappings
+ *	@priority: skb priority
+ *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
+ *	@next: pointer to next struct
+ */
+struct vlan_priority_tci_mapping {
+	u32					priority;
+	u16					vlan_qos;
+	struct vlan_priority_tci_mapping	*next;
+};
+
+/**
+ *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
+ *	@rx_packets: number of received packets
+ *	@rx_bytes: number of received bytes
+ *	@rx_multicast: number of received multicast packets
+ *	@tx_packets: number of transmitted packets
+ *	@tx_bytes: number of transmitted bytes
+ *	@syncp: synchronization point for 64bit counters
+ *	@rx_errors: number of rx errors
+ *	@tx_dropped: number of tx drops
+ */
+struct vlan_pcpu_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_errors;
+	u32			tx_dropped;
+};
+
+struct proc_dir_entry;
+struct netpoll;
+
+/**
+ *	struct vlan_dev_priv - VLAN private device data
+ *	@nr_ingress_mappings: number of ingress priority mappings
+ *	@ingress_priority_map: ingress priority mappings
+ *	@nr_egress_mappings: number of egress priority mappings
+ *	@egress_priority_map: hash of egress priority mappings
+ *	@vlan_proto: VLAN encapsulation protocol
+ *	@vlan_id: VLAN identifier
+ *	@flags: device flags
+ *	@real_dev: underlying netdevice
+ *	@real_dev_addr: address of underlying netdevice
+ *	@dent: proc dir entry
+ *	@vlan_pcpu_stats: ptr to percpu rx stats
+ */
+struct vlan_dev_priv {
+	unsigned int				nr_ingress_mappings;
+	u32					ingress_priority_map[8];
+	unsigned int				nr_egress_mappings;
+	struct vlan_priority_tci_mapping	*egress_priority_map[16];
+
+	__be16					vlan_proto;
+	u16					vlan_id;
+	u16					flags;
+
+	struct net_device			*real_dev;
+	unsigned char				real_dev_addr[ETH_ALEN];
+
+	struct proc_dir_entry			*dent;
+	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll				*netpoll;
+#endif
+};
+
+static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
+static inline u16
+vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
+{
+	struct vlan_priority_tci_mapping *mp;
+
+	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
+
+	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
+	while (mp) {
+		if (mp->priority == skprio) {
+			return mp->vlan_qos; /* This should already be shifted
+					      * to mask correctly with the
+					      * VLAN's TCI */
+		}
+		mp = mp->next;
+	}
+	return 0;
+}
+
 extern bool vlan_do_receive(struct sk_buff **skb);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a2caf00b82cc..5704ed9c3a23 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -5,83 +5,6 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/list.h>
 
-
-/**
- *	struct vlan_priority_tci_mapping - vlan egress priority mappings
- *	@priority: skb priority
- *	@vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
- *	@next: pointer to next struct
- */
-struct vlan_priority_tci_mapping {
-	u32					priority;
-	u16					vlan_qos;
-	struct vlan_priority_tci_mapping	*next;
-};
-
-
-/**
- *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
- *	@rx_packets: number of received packets
- *	@rx_bytes: number of received bytes
- *	@rx_multicast: number of received multicast packets
- *	@tx_packets: number of transmitted packets
- *	@tx_bytes: number of transmitted bytes
- *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of rx errors
- *	@tx_dropped: number of tx drops
- */
-struct vlan_pcpu_stats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_multicast;
-	u64			tx_packets;
-	u64			tx_bytes;
-	struct u64_stats_sync	syncp;
-	u32			rx_errors;
-	u32			tx_dropped;
-};
-
-struct netpoll;
-
-/**
- *	struct vlan_dev_priv - VLAN private device data
- *	@nr_ingress_mappings: number of ingress priority mappings
- *	@ingress_priority_map: ingress priority mappings
- *	@nr_egress_mappings: number of egress priority mappings
- *	@egress_priority_map: hash of egress priority mappings
- *	@vlan_proto: VLAN encapsulation protocol
- *	@vlan_id: VLAN identifier
- *	@flags: device flags
- *	@real_dev: underlying netdevice
- *	@real_dev_addr: address of underlying netdevice
- *	@dent: proc dir entry
- *	@vlan_pcpu_stats: ptr to percpu rx stats
- */
-struct vlan_dev_priv {
-	unsigned int				nr_ingress_mappings;
-	u32					ingress_priority_map[8];
-	unsigned int				nr_egress_mappings;
-	struct vlan_priority_tci_mapping	*egress_priority_map[16];
-
-	__be16					vlan_proto;
-	u16					vlan_id;
-	u16					flags;
-
-	struct net_device			*real_dev;
-	unsigned char				real_dev_addr[ETH_ALEN];
-
-	struct proc_dir_entry			*dent;
-	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	struct netpoll				*netpoll;
-#endif
-};
-
-static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
 /* if this changes, algorithm will have to be reworked because this
  * depends on completely exhausting the VLAN identifier space.  Thus
  * it gives constant time look-up, but in many cases it wastes memory.
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 13904a414929..8db1b985dbf1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -68,31 +68,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 	return 0;
 }
 
-static inline u16
-__vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
-{
-	struct vlan_priority_tci_mapping *mp;
-
-	smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
-
-	mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
-	while (mp) {
-		if (mp->priority == skprio) {
-			return mp->vlan_qos; /* This should already be shifted
-					      * to mask correctly with the
-					      * VLAN's TCI */
-		}
-		mp = mp->next;
-	}
-	return 0;
-}
-
-u16 vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
-{
-	return __vlan_dev_get_egress_qos_mask(dev, skprio);
-}
-EXPORT_SYMBOL(vlan_dev_get_egress_qos_mask);
-
 /*
  *	Create the VLAN header for an arbitrary protocol layer
  *
@@ -117,7 +92,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
@@ -174,7 +149,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	    vlan->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
 		vlan_tci = vlan->vlan_id;
-		vlan_tci |= __vlan_dev_get_egress_qos_mask(dev, skb->priority);
+		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
 		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
@@ -259,7 +234,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 	np->vlan_qos = vlan_qos;
 	/* Before inserting this element in hash table, make sure all its fields
 	 * are committed to memory.
-	 * coupled with smp_rmb() in __vlan_dev_get_egress_qos_mask()
+	 * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
 	 */
 	smp_wmb();
 	vlan->egress_priority_map[skb_prio & 0xF] = np;
-- 
cgit v1.2.3-71-gd317


From 51c37a70aaa3f95773af560e6db3073520513912 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 11 Nov 2013 12:20:32 +0100
Subject: random32: fix off-by-one in seeding requirement

For properly initialising the Tausworthe generator [1], we have
a strict seeding requirement, that is, s1 > 1, s2 > 7, s3 > 15.

Commit 697f8d0348 ("random32: seeding improvement") introduced
a __seed() function that imposes boundary checks proposed by the
errata paper [2] to properly ensure above conditions.

However, we're off by one, as the function is implemented as:
"return (x < m) ? x + m : x;", and called with __seed(X, 1),
__seed(X, 7), __seed(X, 15). Thus, an unwanted seed of 1, 7, 15
would be possible, whereas the lower boundary should actually
be of at least 2, 8, 16, just as GSL does. Fix this, as otherwise
an initialization with an unwanted seed could have the effect
that Tausworthe's PRNG properties cannot not be ensured.

Note that this PRNG is *not* used for cryptography in the kernel.

 [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
 [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps

Joint work with Hannes Frederic Sowa.

Fixes: 697f8d0348a6 ("random32: seeding improvement")
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h |  6 +++---
 lib/random32.c         | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 6312dd9ba449..bf9085e89fb5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -50,9 +50,9 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 
-	state->s1 = __seed(i, 1);
-	state->s2 = __seed(i, 7);
-	state->s3 = __seed(i, 15);
+	state->s1 = __seed(i, 2);
+	state->s2 = __seed(i, 8);
+	state->s3 = __seed(i, 16);
 }
 
 #ifdef CONFIG_ARCH_RANDOM
diff --git a/lib/random32.c b/lib/random32.c
index 52280d5526be..01e8890d1089 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -141,7 +141,7 @@ void prandom_seed(u32 entropy)
 	 */
 	for_each_possible_cpu (i) {
 		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		state->s1 = __seed(state->s1 ^ entropy, 1);
+		state->s1 = __seed(state->s1 ^ entropy, 2);
 	}
 }
 EXPORT_SYMBOL(prandom_seed);
@@ -158,9 +158,9 @@ static int __init prandom_init(void)
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
 
 #define LCG(x)	((x) * 69069)	/* super-duper LCG */
-		state->s1 = __seed(LCG(i + jiffies), 1);
-		state->s2 = __seed(LCG(state->s1), 7);
-		state->s3 = __seed(LCG(state->s2), 15);
+		state->s1 = __seed(LCG(i + jiffies), 2);
+		state->s2 = __seed(LCG(state->s1), 8);
+		state->s3 = __seed(LCG(state->s2), 16);
 
 		/* "warm it up" */
 		prandom_u32_state(state);
@@ -187,9 +187,9 @@ static int __init prandom_reseed(void)
 		u32 seeds[3];
 
 		get_random_bytes(&seeds, sizeof(seeds));
-		state->s1 = __seed(seeds[0], 1);
-		state->s2 = __seed(seeds[1], 7);
-		state->s3 = __seed(seeds[2], 15);
+		state->s1 = __seed(seeds[0], 2);
+		state->s2 = __seed(seeds[1], 8);
+		state->s3 = __seed(seeds[2], 16);
 
 		/* mix it in */
 		prandom_u32_state(state);
-- 
cgit v1.2.3-71-gd317


From 4af712e8df998475736f3e2727701bd31e3751a9 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 11 Nov 2013 12:20:34 +0100
Subject: random32: add prandom_reseed_late() and call when nonblocking pool
 becomes initialized

The Tausworthe PRNG is initialized at late_initcall time. At that time the
entropy pool serving get_random_bytes is not filled sufficiently. This
patch adds an additional reseeding step as soon as the nonblocking pool
gets marked as initialized.

On some machines it might be possible that late_initcall gets called after
the pool has been initialized. In this situation we won't reseed again.

(A call to prandom_seed_late blocks later invocations of early reseed
attempts.)

Joint work with Daniel Borkmann.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c  |  5 ++++-
 include/linux/random.h |  1 +
 lib/random32.c         | 23 ++++++++++++++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7a744d391756..4fe5609eeb72 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -603,8 +603,11 @@ retry:
 
 	if (!r->initialized && nbits > 0) {
 		r->entropy_total += nbits;
-		if (r->entropy_total > 128)
+		if (r->entropy_total > 128) {
 			r->initialized = 1;
+			if (r == &nonblocking_pool)
+				prandom_reseed_late();
+		}
 	}
 
 	trace_credit_entropy_bits(r->name, nbits, entropy_count,
diff --git a/include/linux/random.h b/include/linux/random.h
index bf9085e89fb5..5117ae348fe8 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -29,6 +29,7 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l
 u32 prandom_u32(void);
 void prandom_bytes(void *buf, int nbytes);
 void prandom_seed(u32 seed);
+void prandom_reseed_late(void);
 
 u32 prandom_u32_state(struct rnd_state *);
 void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
diff --git a/lib/random32.c b/lib/random32.c
index 12215df701e8..9f2f2fb03dfe 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -200,9 +200,18 @@ static void prandom_start_seed_timer(void)
  *	Generate better values after random number generator
  *	is fully initialized.
  */
-static int __init prandom_reseed(void)
+static void __prandom_reseed(bool late)
 {
 	int i;
+	unsigned long flags;
+	static bool latch = false;
+	static DEFINE_SPINLOCK(lock);
+
+	/* only allow initial seeding (late == false) once */
+	spin_lock_irqsave(&lock, flags);
+	if (latch && !late)
+		goto out;
+	latch = true;
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
@@ -216,6 +225,18 @@ static int __init prandom_reseed(void)
 		/* mix it in */
 		prandom_u32_state(state);
 	}
+out:
+	spin_unlock_irqrestore(&lock, flags);
+}
+
+void prandom_reseed_late(void)
+{
+	__prandom_reseed(true);
+}
+
+static int __init prandom_reseed(void)
+{
+	__prandom_reseed(false);
 	prandom_start_seed_timer();
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 38e9efcdb33270b4da72143d8e7ca4dcf7f0989b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 11 Nov 2013 12:20:35 +0100
Subject: random32: move rnd_state to linux/random.h

struct rnd_state got mistakenly pulled into uapi header. It is not
used anywhere and does also not belong there!

Commit 5960164fde ("lib/random32: export pseudo-random number
generator for modules"), the last commit on rnd_state before it
got moved to uapi, says:

  This patch moves the definition of struct rnd_state and the inline
  __seed() function to linux/random.h.  It renames the static __random32()
  function to prandom32() and exports it for use in modules.

Hence, the structure was moved from lib/random32.c to linux/random.h
so that it can be used within modules (FCoE-related code in this
case), but not from user space. However, it seems to have been
mistakenly moved to uapi header through the uapi script. Since no-one
should make use of it from the linux headers, move the structure back
to the kernel for internal use, so that it can be modified on demand.

Joint work with Hannes Frederic Sowa.

Cc: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h      | 4 ++++
 include/uapi/linux/random.h | 7 -------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 5117ae348fe8..8ef0b70bd1f9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -31,6 +31,10 @@ void prandom_bytes(void *buf, int nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
+struct rnd_state {
+	__u32 s1, s2, s3;
+};
+
 u32 prandom_u32_state(struct rnd_state *);
 void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index 7471b5b3b8ba..fff3528a078f 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -40,11 +40,4 @@ struct rand_pool_info {
 	__u32	buf[0];
 };
 
-struct rnd_state {
-	__u32 s1, s2, s3;
-};
-
-/* Exported functions */
-
-
 #endif /* _UAPI_LINUX_RANDOM_H */
-- 
cgit v1.2.3-71-gd317


From a98814cef87946d2708812ad9f8b1e03b8366b6f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 11 Nov 2013 12:20:36 +0100
Subject: random32: upgrade taus88 generator to taus113 from errata paper

Since we use prandom*() functions quite often in networking code
i.e. in UDP port selection, netfilter code, etc, upgrade the PRNG
from Pierre L'Ecuyer's original paper "Maximally Equidistributed
Combined Tausworthe Generators", Mathematics of Computation, 65,
213 (1996), 203--213 to the version published in his errata paper [1].

The Tausworthe generator is a maximally-equidistributed generator,
that is fast and has good statistical properties [1].

The version presented there upgrades the 3 state LFSR to a 4 state
LFSR with increased periodicity from about 2^88 to 2^113. The
algorithm is presented in [1] by the very same author who also
designed the original algorithm in [2].

Also, by increasing the state, we make it a bit harder for attackers
to "guess" the PRNGs internal state. See also discussion in [3].

Now, as we use this sort of weak initialization discussed in [3]
only between core_initcall() until late_initcall() time [*] for
prandom32*() users, namely in prandom_init(), it is less relevant
from late_initcall() onwards as we overwrite seeds through
prandom_reseed() anyways with a seed source of higher entropy, that
is, get_random_bytes(). In other words, a exhaustive keysearch of
96 bit would be needed. Now, with the help of this patch, this
state-search increases further to 128 bit. Initialization needs
to make sure that s1 > 1, s2 > 7, s3 > 15, s4 > 127.

taus88 and taus113 algorithm is also part of GSL. I added a test
case in the next patch to verify internal behaviour of this patch
with GSL and ran tests with the dieharder 3.31.1 RNG test suite:

$ dieharder -g 052 -a -m 10 -s 1 -S 4137730333 #taus88
$ dieharder -g 054 -a -m 10 -s 1 -S 4137730333 #taus113

With this seed configuration, in order to compare both, we get
the following differences:

algorithm                 taus88           taus113
rands/second [**]         1.61e+08         1.37e+08
sts_serial(4, 1st run)    WEAK             PASSED
sts_serial(9, 2nd run)    WEAK             PASSED
rgb_lagged_sum(31)        WEAK             PASSED

We took out diehard_sums test as according to the authors it is
considered broken and unusable [4]. Despite that and the slight
decrease in performance (which is acceptable), taus113 here passes
all 113 tests (only rgb_minimum_distance_5 in WEAK, the rest PASSED).
In general, taus/taus113 is considered "very good" by the authors
of dieharder [5].

The papers [1][2] states a single warm-up step is sufficient by
running quicktaus once on each state to ensure proper initialization
of ~s_{0}:

Our selection of (s) according to Table 1 of [1] row 1 holds the
condition L - k <= r - s, that is,

  (32 32 32 32) - (31 29 28 25) <= (25 27 15 22) - (18 2 7 13)

with r = k - q and q = (6 2 13 3) as also stated by the paper.
So according to [2] we are safe with one round of quicktaus for
initialization. However we decided to include the warm-up phase
of the PRNG as done in GSL in every case as a safety net. We also
use the warm up phase to make the output of the RNG easier to
verify by the GSL output.

In prandom_init(), we also mix random_get_entropy() into it, just
like drivers/char/random.c does it, jiffies ^ random_get_entropy().
random-get_entropy() is get_cycles(). xor is entropy preserving so
it is fine if it is not implemented by some architectures.

Note, this PRNG is *not* used for cryptography in the kernel, but
rather as a fast PRNG for various randomizations i.e. in the
networking code, or elsewhere for debugging purposes, for example.

[*]: In order to generate some "sort of pseduo-randomness", since
get_random_bytes() is not yet available for us, we use jiffies and
initialize states s1 - s3 with a simple linear congruential generator
(LCG), that is x <- x * 69069; and derive s2, s3, from the 32bit
initialization from s1. So the above quote from [3] accounts only
for the time from core to late initcall, not afterwards.
[**] Single threaded run on MacBook Air w/ Intel Core i5-3317U

 [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
 [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
 [3] http://thread.gmane.org/gmane.comp.encryption.general/12103/
 [4] http://code.google.com/p/dieharder/source/browse/trunk/libdieharder/diehard_sums.c?spec=svn490&r=490#20
 [5] http://www.phy.duke.edu/~rgb/General/dieharder.php

Joint work with Hannes Frederic Sowa.

Cc: Florian Weimer <fweimer@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h | 11 +++----
 lib/random32.c         | 80 +++++++++++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 8ef0b70bd1f9..4002b3df4c85 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -32,10 +32,10 @@ void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
 struct rnd_state {
-	__u32 s1, s2, s3;
+	__u32 s1, s2, s3, s4;
 };
 
-u32 prandom_u32_state(struct rnd_state *);
+u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
 /*
@@ -55,9 +55,10 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 
-	state->s1 = __seed(i, 2);
-	state->s2 = __seed(i, 8);
-	state->s3 = __seed(i, 16);
+	state->s1 = __seed(i,   2U);
+	state->s2 = __seed(i,   8U);
+	state->s3 = __seed(i,  16U);
+	state->s4 = __seed(i, 128U);
 }
 
 #ifdef CONFIG_ARCH_RANDOM
diff --git a/lib/random32.c b/lib/random32.c
index 9f2f2fb03dfe..27adb753180f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -2,19 +2,19 @@
   This is a maximally equidistributed combined Tausworthe generator
   based on code from GNU Scientific Library 1.5 (30 Jun 2004)
 
-   x_n = (s1_n ^ s2_n ^ s3_n)
+  lfsr113 version:
 
-   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
-   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
-   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+   x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n)
 
-   The period of this generator is about 2^88.
+   s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n <<  6) ^ s1_n) >> 13))
+   s2_{n+1} = (((s2_n & 4294967288) <<  2) ^ (((s2_n <<  2) ^ s2_n) >> 27))
+   s3_{n+1} = (((s3_n & 4294967280) <<  7) ^ (((s3_n << 13) ^ s3_n) >> 21))
+   s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n <<  3) ^ s4_n) >> 12))
 
-   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
-   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
-
-   This is available on the net from L'Ecuyer's home page,
+   The period of this generator is about 2^113 (see erratum paper).
 
+   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   Generators", Mathematics of Computation, 65, 213 (1996), 203--213:
    http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
    ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
 
@@ -29,7 +29,7 @@
         that paper.)
 
    This affects the seeding procedure by imposing the requirement
-   s1 > 1, s2 > 7, s3 > 15.
+   s1 > 1, s2 > 7, s3 > 15, s4 > 127.
 
 */
 
@@ -52,11 +52,12 @@ u32 prandom_u32_state(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
-	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
-	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
-	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+	state->s1 = TAUSWORTHE(state->s1,  6U, 13U, 4294967294U, 18U);
+	state->s2 = TAUSWORTHE(state->s2,  2U, 27U, 4294967288U,  2U);
+	state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U,  7U);
+	state->s4 = TAUSWORTHE(state->s4,  3U, 12U, 4294967168U, 13U);
 
-	return (state->s1 ^ state->s2 ^ state->s3);
+	return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4);
 }
 EXPORT_SYMBOL(prandom_u32_state);
 
@@ -126,6 +127,21 @@ void prandom_bytes(void *buf, int bytes)
 }
 EXPORT_SYMBOL(prandom_bytes);
 
+static void prandom_warmup(struct rnd_state *state)
+{
+	/* Calling RNG ten times to satify recurrence condition */
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+	prandom_u32_state(state);
+}
+
 /**
  *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
@@ -141,8 +157,9 @@ void prandom_seed(u32 entropy)
 	 */
 	for_each_possible_cpu (i) {
 		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		state->s1 = __seed(state->s1 ^ entropy, 2);
-		prandom_u32_state(state);
+
+		state->s1 = __seed(state->s1 ^ entropy, 2U);
+		prandom_warmup(state);
 	}
 }
 EXPORT_SYMBOL(prandom_seed);
@@ -158,18 +175,13 @@ static int __init prandom_init(void)
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
 
-#define LCG(x)	((x) * 69069)	/* super-duper LCG */
-		state->s1 = __seed(LCG(i + jiffies), 2);
-		state->s2 = __seed(LCG(state->s1), 8);
-		state->s3 = __seed(LCG(state->s2), 16);
-
-		/* "warm it up" */
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
-		prandom_u32_state(state);
+#define LCG(x)	((x) * 69069U)	/* super-duper LCG */
+		state->s1 = __seed(LCG((i + jiffies) ^ random_get_entropy()), 2U);
+		state->s2 = __seed(LCG(state->s1),   8U);
+		state->s3 = __seed(LCG(state->s2),  16U);
+		state->s4 = __seed(LCG(state->s3), 128U);
+
+		prandom_warmup(state);
 	}
 	return 0;
 }
@@ -215,15 +227,15 @@ static void __prandom_reseed(bool late)
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
-		u32 seeds[3];
+		u32 seeds[4];
 
 		get_random_bytes(&seeds, sizeof(seeds));
-		state->s1 = __seed(seeds[0], 2);
-		state->s2 = __seed(seeds[1], 8);
-		state->s3 = __seed(seeds[2], 16);
+		state->s1 = __seed(seeds[0],   2U);
+		state->s2 = __seed(seeds[1],   8U);
+		state->s3 = __seed(seeds[2],  16U);
+		state->s4 = __seed(seeds[3], 128U);
 
-		/* mix it in */
-		prandom_u32_state(state);
+		prandom_warmup(state);
 	}
 out:
 	spin_unlock_irqrestore(&lock, flags);
-- 
cgit v1.2.3-71-gd317