cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ipt_CLUSTERIP.c (23134B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Cluster IP hashmark target
      3 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
      4 * based on ideas of Fabio Olive Leite <olive@unixforge.org>
      5 *
      6 * Development of this code funded by SuSE Linux AG, https://www.suse.com/
      7 */
      8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      9#include <linux/module.h>
     10#include <linux/proc_fs.h>
     11#include <linux/jhash.h>
     12#include <linux/bitops.h>
     13#include <linux/skbuff.h>
     14#include <linux/slab.h>
     15#include <linux/ip.h>
     16#include <linux/tcp.h>
     17#include <linux/udp.h>
     18#include <linux/icmp.h>
     19#include <linux/if_arp.h>
     20#include <linux/seq_file.h>
     21#include <linux/refcount.h>
     22#include <linux/netfilter_arp.h>
     23#include <linux/netfilter/x_tables.h>
     24#include <linux/netfilter_ipv4/ip_tables.h>
     25#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
     26#include <net/netfilter/nf_conntrack.h>
     27#include <net/net_namespace.h>
     28#include <net/netns/generic.h>
     29#include <net/checksum.h>
     30#include <net/ip.h>
     31
     32#define CLUSTERIP_VERSION "0.8"
     33
     34MODULE_LICENSE("GPL");
     35MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
     36MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
     37
     38struct clusterip_config {
     39	struct list_head list;			/* list of all configs */
     40	refcount_t refcount;			/* reference count */
     41	refcount_t entries;			/* number of entries/rules
     42						 * referencing us */
     43
     44	__be32 clusterip;			/* the IP address */
     45	u_int8_t clustermac[ETH_ALEN];		/* the MAC address */
     46	int ifindex;				/* device ifindex */
     47	u_int16_t num_total_nodes;		/* total number of nodes */
     48	unsigned long local_nodes;		/* node number array */
     49
     50#ifdef CONFIG_PROC_FS
     51	struct proc_dir_entry *pde;		/* proc dir entry */
     52#endif
     53	enum clusterip_hashmode hash_mode;	/* which hashing mode */
     54	u_int32_t hash_initval;			/* hash initialization */
     55	struct rcu_head rcu;			/* for call_rcu */
     56	struct net *net;			/* netns for pernet list */
     57	char ifname[IFNAMSIZ];			/* device ifname */
     58};
     59
     60#ifdef CONFIG_PROC_FS
     61static const struct proc_ops clusterip_proc_ops;
     62#endif
     63
     64struct clusterip_net {
     65	struct list_head configs;
     66	/* lock protects the configs list */
     67	spinlock_t lock;
     68
     69	bool clusterip_deprecated_warning;
     70#ifdef CONFIG_PROC_FS
     71	struct proc_dir_entry *procdir;
     72	/* mutex protects the config->pde*/
     73	struct mutex mutex;
     74#endif
     75	unsigned int hook_users;
     76};
     77
     78static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
     79
     80static const struct nf_hook_ops cip_arp_ops = {
     81	.hook = clusterip_arp_mangle,
     82	.pf = NFPROTO_ARP,
     83	.hooknum = NF_ARP_OUT,
     84	.priority = -1
     85};
     86
     87static unsigned int clusterip_net_id __read_mostly;
     88static inline struct clusterip_net *clusterip_pernet(struct net *net)
     89{
     90	return net_generic(net, clusterip_net_id);
     91}
     92
     93static inline void
     94clusterip_config_get(struct clusterip_config *c)
     95{
     96	refcount_inc(&c->refcount);
     97}
     98
     99static void clusterip_config_rcu_free(struct rcu_head *head)
    100{
    101	struct clusterip_config *config;
    102	struct net_device *dev;
    103
    104	config = container_of(head, struct clusterip_config, rcu);
    105	dev = dev_get_by_name(config->net, config->ifname);
    106	if (dev) {
    107		dev_mc_del(dev, config->clustermac);
    108		dev_put(dev);
    109	}
    110	kfree(config);
    111}
    112
    113static inline void
    114clusterip_config_put(struct clusterip_config *c)
    115{
    116	if (refcount_dec_and_test(&c->refcount))
    117		call_rcu(&c->rcu, clusterip_config_rcu_free);
    118}
    119
    120/* decrease the count of entries using/referencing this config.  If last
    121 * entry(rule) is removed, remove the config from lists, but don't free it
    122 * yet, since proc-files could still be holding references */
    123static inline void
    124clusterip_config_entry_put(struct clusterip_config *c)
    125{
    126	struct clusterip_net *cn = clusterip_pernet(c->net);
    127
    128	local_bh_disable();
    129	if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
    130		list_del_rcu(&c->list);
    131		spin_unlock(&cn->lock);
    132		local_bh_enable();
    133		/* In case anyone still accesses the file, the open/close
    134		 * functions are also incrementing the refcount on their own,
    135		 * so it's safe to remove the entry even if it's in use. */
    136#ifdef CONFIG_PROC_FS
    137		mutex_lock(&cn->mutex);
    138		if (cn->procdir)
    139			proc_remove(c->pde);
    140		mutex_unlock(&cn->mutex);
    141#endif
    142		return;
    143	}
    144	local_bh_enable();
    145}
    146
    147static struct clusterip_config *
    148__clusterip_config_find(struct net *net, __be32 clusterip)
    149{
    150	struct clusterip_config *c;
    151	struct clusterip_net *cn = clusterip_pernet(net);
    152
    153	list_for_each_entry_rcu(c, &cn->configs, list) {
    154		if (c->clusterip == clusterip)
    155			return c;
    156	}
    157
    158	return NULL;
    159}
    160
    161static inline struct clusterip_config *
    162clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
    163{
    164	struct clusterip_config *c;
    165
    166	rcu_read_lock_bh();
    167	c = __clusterip_config_find(net, clusterip);
    168	if (c) {
    169#ifdef CONFIG_PROC_FS
    170		if (!c->pde)
    171			c = NULL;
    172		else
    173#endif
    174		if (unlikely(!refcount_inc_not_zero(&c->refcount)))
    175			c = NULL;
    176		else if (entry) {
    177			if (unlikely(!refcount_inc_not_zero(&c->entries))) {
    178				clusterip_config_put(c);
    179				c = NULL;
    180			}
    181		}
    182	}
    183	rcu_read_unlock_bh();
    184
    185	return c;
    186}
    187
    188static void
    189clusterip_config_init_nodelist(struct clusterip_config *c,
    190			       const struct ipt_clusterip_tgt_info *i)
    191{
    192	int n;
    193
    194	for (n = 0; n < i->num_local_nodes; n++)
    195		set_bit(i->local_nodes[n] - 1, &c->local_nodes);
    196}
    197
    198static int
    199clusterip_netdev_event(struct notifier_block *this, unsigned long event,
    200		       void *ptr)
    201{
    202	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
    203	struct net *net = dev_net(dev);
    204	struct clusterip_net *cn = clusterip_pernet(net);
    205	struct clusterip_config *c;
    206
    207	spin_lock_bh(&cn->lock);
    208	list_for_each_entry_rcu(c, &cn->configs, list) {
    209		switch (event) {
    210		case NETDEV_REGISTER:
    211			if (!strcmp(dev->name, c->ifname)) {
    212				c->ifindex = dev->ifindex;
    213				dev_mc_add(dev, c->clustermac);
    214			}
    215			break;
    216		case NETDEV_UNREGISTER:
    217			if (dev->ifindex == c->ifindex) {
    218				dev_mc_del(dev, c->clustermac);
    219				c->ifindex = -1;
    220			}
    221			break;
    222		case NETDEV_CHANGENAME:
    223			if (!strcmp(dev->name, c->ifname)) {
    224				c->ifindex = dev->ifindex;
    225				dev_mc_add(dev, c->clustermac);
    226			} else if (dev->ifindex == c->ifindex) {
    227				dev_mc_del(dev, c->clustermac);
    228				c->ifindex = -1;
    229			}
    230			break;
    231		}
    232	}
    233	spin_unlock_bh(&cn->lock);
    234
    235	return NOTIFY_DONE;
    236}
    237
    238static struct clusterip_config *
    239clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
    240		      __be32 ip, const char *iniface)
    241{
    242	struct clusterip_net *cn = clusterip_pernet(net);
    243	struct clusterip_config *c;
    244	struct net_device *dev;
    245	int err;
    246
    247	if (iniface[0] == '\0') {
    248		pr_info("Please specify an interface name\n");
    249		return ERR_PTR(-EINVAL);
    250	}
    251
    252	c = kzalloc(sizeof(*c), GFP_ATOMIC);
    253	if (!c)
    254		return ERR_PTR(-ENOMEM);
    255
    256	dev = dev_get_by_name(net, iniface);
    257	if (!dev) {
    258		pr_info("no such interface %s\n", iniface);
    259		kfree(c);
    260		return ERR_PTR(-ENOENT);
    261	}
    262	c->ifindex = dev->ifindex;
    263	strcpy(c->ifname, dev->name);
    264	memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
    265	dev_mc_add(dev, c->clustermac);
    266	dev_put(dev);
    267
    268	c->clusterip = ip;
    269	c->num_total_nodes = i->num_total_nodes;
    270	clusterip_config_init_nodelist(c, i);
    271	c->hash_mode = i->hash_mode;
    272	c->hash_initval = i->hash_initval;
    273	c->net = net;
    274	refcount_set(&c->refcount, 1);
    275
    276	spin_lock_bh(&cn->lock);
    277	if (__clusterip_config_find(net, ip)) {
    278		err = -EBUSY;
    279		goto out_config_put;
    280	}
    281
    282	list_add_rcu(&c->list, &cn->configs);
    283	spin_unlock_bh(&cn->lock);
    284
    285#ifdef CONFIG_PROC_FS
    286	{
    287		char buffer[16];
    288
    289		/* create proc dir entry */
    290		sprintf(buffer, "%pI4", &ip);
    291		mutex_lock(&cn->mutex);
    292		c->pde = proc_create_data(buffer, 0600,
    293					  cn->procdir,
    294					  &clusterip_proc_ops, c);
    295		mutex_unlock(&cn->mutex);
    296		if (!c->pde) {
    297			err = -ENOMEM;
    298			goto err;
    299		}
    300	}
    301#endif
    302
    303	refcount_set(&c->entries, 1);
    304	return c;
    305
    306#ifdef CONFIG_PROC_FS
    307err:
    308#endif
    309	spin_lock_bh(&cn->lock);
    310	list_del_rcu(&c->list);
    311out_config_put:
    312	spin_unlock_bh(&cn->lock);
    313	clusterip_config_put(c);
    314	return ERR_PTR(err);
    315}
    316
    317#ifdef CONFIG_PROC_FS
    318static int
    319clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
    320{
    321
    322	if (nodenum == 0 ||
    323	    nodenum > c->num_total_nodes)
    324		return 1;
    325
    326	/* check if we already have this number in our bitfield */
    327	if (test_and_set_bit(nodenum - 1, &c->local_nodes))
    328		return 1;
    329
    330	return 0;
    331}
    332
    333static bool
    334clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
    335{
    336	if (nodenum == 0 ||
    337	    nodenum > c->num_total_nodes)
    338		return true;
    339
    340	if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
    341		return false;
    342
    343	return true;
    344}
    345#endif
    346
    347static inline u_int32_t
    348clusterip_hashfn(const struct sk_buff *skb,
    349		 const struct clusterip_config *config)
    350{
    351	const struct iphdr *iph = ip_hdr(skb);
    352	unsigned long hashval;
    353	u_int16_t sport = 0, dport = 0;
    354	int poff;
    355
    356	poff = proto_ports_offset(iph->protocol);
    357	if (poff >= 0) {
    358		const u_int16_t *ports;
    359		u16 _ports[2];
    360
    361		ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
    362		if (ports) {
    363			sport = ports[0];
    364			dport = ports[1];
    365		}
    366	} else {
    367		net_info_ratelimited("unknown protocol %u\n", iph->protocol);
    368	}
    369
    370	switch (config->hash_mode) {
    371	case CLUSTERIP_HASHMODE_SIP:
    372		hashval = jhash_1word(ntohl(iph->saddr),
    373				      config->hash_initval);
    374		break;
    375	case CLUSTERIP_HASHMODE_SIP_SPT:
    376		hashval = jhash_2words(ntohl(iph->saddr), sport,
    377				       config->hash_initval);
    378		break;
    379	case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
    380		hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
    381				       config->hash_initval);
    382		break;
    383	default:
    384		/* to make gcc happy */
    385		hashval = 0;
    386		/* This cannot happen, unless the check function wasn't called
    387		 * at rule load time */
    388		pr_info("unknown mode %u\n", config->hash_mode);
    389		BUG();
    390		break;
    391	}
    392
    393	/* node numbers are 1..n, not 0..n */
    394	return reciprocal_scale(hashval, config->num_total_nodes) + 1;
    395}
    396
    397static inline int
    398clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
    399{
    400	return test_bit(hash - 1, &config->local_nodes);
    401}
    402
    403/***********************************************************************
    404 * IPTABLES TARGET
    405 ***********************************************************************/
    406
    407static unsigned int
    408clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
    409{
    410	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
    411	struct nf_conn *ct;
    412	enum ip_conntrack_info ctinfo;
    413	u_int32_t hash;
    414
    415	/* don't need to clusterip_config_get() here, since refcount
    416	 * is only decremented by destroy() - and ip_tables guarantees
    417	 * that the ->target() function isn't called after ->destroy() */
    418
    419	ct = nf_ct_get(skb, &ctinfo);
    420	if (ct == NULL)
    421		return NF_DROP;
    422
    423	/* special case: ICMP error handling. conntrack distinguishes between
    424	 * error messages (RELATED) and information requests (see below) */
    425	if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
    426	    (ctinfo == IP_CT_RELATED ||
    427	     ctinfo == IP_CT_RELATED_REPLY))
    428		return XT_CONTINUE;
    429
    430	/* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
    431	 * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
    432	 * on, which all have an ID field [relevant for hashing]. */
    433
    434	hash = clusterip_hashfn(skb, cipinfo->config);
    435
    436	switch (ctinfo) {
    437	case IP_CT_NEW:
    438		ct->mark = hash;
    439		break;
    440	case IP_CT_RELATED:
    441	case IP_CT_RELATED_REPLY:
    442		/* FIXME: we don't handle expectations at the moment.
    443		 * They can arrive on a different node than
    444		 * the master connection (e.g. FTP passive mode) */
    445	case IP_CT_ESTABLISHED:
    446	case IP_CT_ESTABLISHED_REPLY:
    447		break;
    448	default:			/* Prevent gcc warnings */
    449		break;
    450	}
    451
    452#ifdef DEBUG
    453	nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
    454#endif
    455	pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
    456	if (!clusterip_responsible(cipinfo->config, hash)) {
    457		pr_debug("not responsible\n");
    458		return NF_DROP;
    459	}
    460	pr_debug("responsible\n");
    461
    462	/* despite being received via linklayer multicast, this is
    463	 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
    464	skb->pkt_type = PACKET_HOST;
    465
    466	return XT_CONTINUE;
    467}
    468
    469static int clusterip_tg_check(const struct xt_tgchk_param *par)
    470{
    471	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
    472	struct clusterip_net *cn = clusterip_pernet(par->net);
    473	const struct ipt_entry *e = par->entryinfo;
    474	struct clusterip_config *config;
    475	int ret, i;
    476
    477	if (par->nft_compat) {
    478		pr_err("cannot use CLUSTERIP target from nftables compat\n");
    479		return -EOPNOTSUPP;
    480	}
    481
    482	if (cn->hook_users == UINT_MAX)
    483		return -EOVERFLOW;
    484
    485	if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
    486	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
    487	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
    488		pr_info("unknown mode %u\n", cipinfo->hash_mode);
    489		return -EINVAL;
    490
    491	}
    492	if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
    493	    e->ip.dst.s_addr == 0) {
    494		pr_info("Please specify destination IP\n");
    495		return -EINVAL;
    496	}
    497	if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
    498		pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
    499		return -EINVAL;
    500	}
    501	for (i = 0; i < cipinfo->num_local_nodes; i++) {
    502		if (cipinfo->local_nodes[i] - 1 >=
    503		    sizeof(config->local_nodes) * 8) {
    504			pr_info("bad local_nodes[%d] %u\n",
    505				i, cipinfo->local_nodes[i]);
    506			return -EINVAL;
    507		}
    508	}
    509
    510	config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
    511	if (!config) {
    512		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
    513			pr_info("no config found for %pI4, need 'new'\n",
    514				&e->ip.dst.s_addr);
    515			return -EINVAL;
    516		} else {
    517			config = clusterip_config_init(par->net, cipinfo,
    518						       e->ip.dst.s_addr,
    519						       e->ip.iniface);
    520			if (IS_ERR(config))
    521				return PTR_ERR(config);
    522		}
    523	} else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
    524		clusterip_config_entry_put(config);
    525		clusterip_config_put(config);
    526		return -EINVAL;
    527	}
    528
    529	ret = nf_ct_netns_get(par->net, par->family);
    530	if (ret < 0) {
    531		pr_info("cannot load conntrack support for proto=%u\n",
    532			par->family);
    533		clusterip_config_entry_put(config);
    534		clusterip_config_put(config);
    535		return ret;
    536	}
    537
    538	if (cn->hook_users == 0) {
    539		ret = nf_register_net_hook(par->net, &cip_arp_ops);
    540
    541		if (ret < 0) {
    542			clusterip_config_entry_put(config);
    543			clusterip_config_put(config);
    544			nf_ct_netns_put(par->net, par->family);
    545			return ret;
    546		}
    547	}
    548
    549	cn->hook_users++;
    550
    551	if (!cn->clusterip_deprecated_warning) {
    552		pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
    553			"use xt_cluster instead\n");
    554		cn->clusterip_deprecated_warning = true;
    555	}
    556
    557	cipinfo->config = config;
    558	return ret;
    559}
    560
    561/* drop reference count of cluster config when rule is deleted */
    562static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
    563{
    564	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
    565	struct clusterip_net *cn = clusterip_pernet(par->net);
    566
    567	/* if no more entries are referencing the config, remove it
    568	 * from the list and destroy the proc entry */
    569	clusterip_config_entry_put(cipinfo->config);
    570
    571	clusterip_config_put(cipinfo->config);
    572
    573	nf_ct_netns_put(par->net, par->family);
    574	cn->hook_users--;
    575
    576	if (cn->hook_users == 0)
    577		nf_unregister_net_hook(par->net, &cip_arp_ops);
    578}
    579
    580#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
    581struct compat_ipt_clusterip_tgt_info
    582{
    583	u_int32_t	flags;
    584	u_int8_t	clustermac[6];
    585	u_int16_t	num_total_nodes;
    586	u_int16_t	num_local_nodes;
    587	u_int16_t	local_nodes[CLUSTERIP_MAX_NODES];
    588	u_int32_t	hash_mode;
    589	u_int32_t	hash_initval;
    590	compat_uptr_t	config;
    591};
    592#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
    593
    594static struct xt_target clusterip_tg_reg __read_mostly = {
    595	.name		= "CLUSTERIP",
    596	.family		= NFPROTO_IPV4,
    597	.target		= clusterip_tg,
    598	.checkentry	= clusterip_tg_check,
    599	.destroy	= clusterip_tg_destroy,
    600	.targetsize	= sizeof(struct ipt_clusterip_tgt_info),
    601	.usersize	= offsetof(struct ipt_clusterip_tgt_info, config),
    602#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
    603	.compatsize	= sizeof(struct compat_ipt_clusterip_tgt_info),
    604#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
    605	.me		= THIS_MODULE
    606};
    607
    608
    609/***********************************************************************
    610 * ARP MANGLING CODE
    611 ***********************************************************************/
    612
    613/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
    614struct arp_payload {
    615	u_int8_t src_hw[ETH_ALEN];
    616	__be32 src_ip;
    617	u_int8_t dst_hw[ETH_ALEN];
    618	__be32 dst_ip;
    619} __packed;
    620
    621#ifdef DEBUG
    622static void arp_print(struct arp_payload *payload)
    623{
    624#define HBUFFERLEN 30
    625	char hbuffer[HBUFFERLEN];
    626	int j, k;
    627
    628	for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
    629		hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
    630		hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
    631		hbuffer[k++] = ':';
    632	}
    633	hbuffer[--k] = '\0';
    634
    635	pr_debug("src %pI4@%s, dst %pI4\n",
    636		 &payload->src_ip, hbuffer, &payload->dst_ip);
    637}
    638#endif
    639
    640static unsigned int
    641clusterip_arp_mangle(void *priv, struct sk_buff *skb,
    642		     const struct nf_hook_state *state)
    643{
    644	struct arphdr *arp = arp_hdr(skb);
    645	struct arp_payload *payload;
    646	struct clusterip_config *c;
    647	struct net *net = state->net;
    648
    649	/* we don't care about non-ethernet and non-ipv4 ARP */
    650	if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
    651	    arp->ar_pro != htons(ETH_P_IP) ||
    652	    arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
    653		return NF_ACCEPT;
    654
    655	/* we only want to mangle arp requests and replies */
    656	if (arp->ar_op != htons(ARPOP_REPLY) &&
    657	    arp->ar_op != htons(ARPOP_REQUEST))
    658		return NF_ACCEPT;
    659
    660	payload = (void *)(arp+1);
    661
    662	/* if there is no clusterip configuration for the arp reply's
    663	 * source ip, we don't want to mangle it */
    664	c = clusterip_config_find_get(net, payload->src_ip, 0);
    665	if (!c)
    666		return NF_ACCEPT;
    667
    668	/* normally the linux kernel always replies to arp queries of
    669	 * addresses on different interfacs.  However, in the CLUSTERIP case
    670	 * this wouldn't work, since we didn't subscribe the mcast group on
    671	 * other interfaces */
    672	if (c->ifindex != state->out->ifindex) {
    673		pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
    674			 c->ifindex, state->out->ifindex);
    675		clusterip_config_put(c);
    676		return NF_ACCEPT;
    677	}
    678
    679	/* mangle reply hardware address */
    680	memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
    681
    682#ifdef DEBUG
    683	pr_debug("mangled arp reply: ");
    684	arp_print(payload);
    685#endif
    686
    687	clusterip_config_put(c);
    688
    689	return NF_ACCEPT;
    690}
    691
    692/***********************************************************************
    693 * PROC DIR HANDLING
    694 ***********************************************************************/
    695
    696#ifdef CONFIG_PROC_FS
    697
    698struct clusterip_seq_position {
    699	unsigned int pos;	/* position */
    700	unsigned int weight;	/* number of bits set == size */
    701	unsigned int bit;	/* current bit */
    702	unsigned long val;	/* current value */
    703};
    704
    705static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
    706{
    707	struct clusterip_config *c = s->private;
    708	unsigned int weight;
    709	u_int32_t local_nodes;
    710	struct clusterip_seq_position *idx;
    711
    712	/* FIXME: possible race */
    713	local_nodes = c->local_nodes;
    714	weight = hweight32(local_nodes);
    715	if (*pos >= weight)
    716		return NULL;
    717
    718	idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
    719	if (!idx)
    720		return ERR_PTR(-ENOMEM);
    721
    722	idx->pos = *pos;
    723	idx->weight = weight;
    724	idx->bit = ffs(local_nodes);
    725	idx->val = local_nodes;
    726	clear_bit(idx->bit - 1, &idx->val);
    727
    728	return idx;
    729}
    730
    731static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
    732{
    733	struct clusterip_seq_position *idx = v;
    734
    735	*pos = ++idx->pos;
    736	if (*pos >= idx->weight) {
    737		kfree(v);
    738		return NULL;
    739	}
    740	idx->bit = ffs(idx->val);
    741	clear_bit(idx->bit - 1, &idx->val);
    742	return idx;
    743}
    744
    745static void clusterip_seq_stop(struct seq_file *s, void *v)
    746{
    747	if (!IS_ERR(v))
    748		kfree(v);
    749}
    750
    751static int clusterip_seq_show(struct seq_file *s, void *v)
    752{
    753	struct clusterip_seq_position *idx = v;
    754
    755	if (idx->pos != 0)
    756		seq_putc(s, ',');
    757
    758	seq_printf(s, "%u", idx->bit);
    759
    760	if (idx->pos == idx->weight - 1)
    761		seq_putc(s, '\n');
    762
    763	return 0;
    764}
    765
    766static const struct seq_operations clusterip_seq_ops = {
    767	.start	= clusterip_seq_start,
    768	.next	= clusterip_seq_next,
    769	.stop	= clusterip_seq_stop,
    770	.show	= clusterip_seq_show,
    771};
    772
    773static int clusterip_proc_open(struct inode *inode, struct file *file)
    774{
    775	int ret = seq_open(file, &clusterip_seq_ops);
    776
    777	if (!ret) {
    778		struct seq_file *sf = file->private_data;
    779		struct clusterip_config *c = pde_data(inode);
    780
    781		sf->private = c;
    782
    783		clusterip_config_get(c);
    784	}
    785
    786	return ret;
    787}
    788
    789static int clusterip_proc_release(struct inode *inode, struct file *file)
    790{
    791	struct clusterip_config *c = pde_data(inode);
    792	int ret;
    793
    794	ret = seq_release(inode, file);
    795
    796	if (!ret)
    797		clusterip_config_put(c);
    798
    799	return ret;
    800}
    801
    802static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
    803				size_t size, loff_t *ofs)
    804{
    805	struct clusterip_config *c = pde_data(file_inode(file));
    806#define PROC_WRITELEN	10
    807	char buffer[PROC_WRITELEN+1];
    808	unsigned long nodenum;
    809	int rc;
    810
    811	if (size > PROC_WRITELEN)
    812		return -EIO;
    813	if (copy_from_user(buffer, input, size))
    814		return -EFAULT;
    815	buffer[size] = 0;
    816
    817	if (*buffer == '+') {
    818		rc = kstrtoul(buffer+1, 10, &nodenum);
    819		if (rc)
    820			return rc;
    821		if (clusterip_add_node(c, nodenum))
    822			return -ENOMEM;
    823	} else if (*buffer == '-') {
    824		rc = kstrtoul(buffer+1, 10, &nodenum);
    825		if (rc)
    826			return rc;
    827		if (clusterip_del_node(c, nodenum))
    828			return -ENOENT;
    829	} else
    830		return -EIO;
    831
    832	return size;
    833}
    834
    835static const struct proc_ops clusterip_proc_ops = {
    836	.proc_open	= clusterip_proc_open,
    837	.proc_read	= seq_read,
    838	.proc_write	= clusterip_proc_write,
    839	.proc_lseek	= seq_lseek,
    840	.proc_release	= clusterip_proc_release,
    841};
    842
    843#endif /* CONFIG_PROC_FS */
    844
    845static int clusterip_net_init(struct net *net)
    846{
    847	struct clusterip_net *cn = clusterip_pernet(net);
    848
    849	INIT_LIST_HEAD(&cn->configs);
    850
    851	spin_lock_init(&cn->lock);
    852
    853#ifdef CONFIG_PROC_FS
    854	cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
    855	if (!cn->procdir) {
    856		pr_err("Unable to proc dir entry\n");
    857		return -ENOMEM;
    858	}
    859	mutex_init(&cn->mutex);
    860#endif /* CONFIG_PROC_FS */
    861
    862	return 0;
    863}
    864
    865static void clusterip_net_exit(struct net *net)
    866{
    867#ifdef CONFIG_PROC_FS
    868	struct clusterip_net *cn = clusterip_pernet(net);
    869
    870	mutex_lock(&cn->mutex);
    871	proc_remove(cn->procdir);
    872	cn->procdir = NULL;
    873	mutex_unlock(&cn->mutex);
    874#endif
    875}
    876
    877static struct pernet_operations clusterip_net_ops = {
    878	.init = clusterip_net_init,
    879	.exit = clusterip_net_exit,
    880	.id   = &clusterip_net_id,
    881	.size = sizeof(struct clusterip_net),
    882};
    883
    884static struct notifier_block cip_netdev_notifier = {
    885	.notifier_call = clusterip_netdev_event
    886};
    887
    888static int __init clusterip_tg_init(void)
    889{
    890	int ret;
    891
    892	ret = register_pernet_subsys(&clusterip_net_ops);
    893	if (ret < 0)
    894		return ret;
    895
    896	ret = xt_register_target(&clusterip_tg_reg);
    897	if (ret < 0)
    898		goto cleanup_subsys;
    899
    900	ret = register_netdevice_notifier(&cip_netdev_notifier);
    901	if (ret < 0)
    902		goto unregister_target;
    903
    904	pr_info("ClusterIP Version %s loaded successfully\n",
    905		CLUSTERIP_VERSION);
    906
    907	return 0;
    908
    909unregister_target:
    910	xt_unregister_target(&clusterip_tg_reg);
    911cleanup_subsys:
    912	unregister_pernet_subsys(&clusterip_net_ops);
    913	return ret;
    914}
    915
    916static void __exit clusterip_tg_exit(void)
    917{
    918	pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
    919
    920	unregister_netdevice_notifier(&cip_netdev_notifier);
    921	xt_unregister_target(&clusterip_tg_reg);
    922	unregister_pernet_subsys(&clusterip_net_ops);
    923
    924	/* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
    925	rcu_barrier();
    926}
    927
    928module_init(clusterip_tg_init);
    929module_exit(clusterip_tg_exit);