cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nfnetlink_queue.c (39691B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * This is a module which is used for queueing packets and communicating with
      4 * userspace via nfnetlink.
      5 *
      6 * (C) 2005 by Harald Welte <laforge@netfilter.org>
      7 * (C) 2007 by Patrick McHardy <kaber@trash.net>
      8 *
      9 * Based on the old ipv4-only ip_queue.c:
     10 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
     11 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
     12 */
     13
     14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     15
     16#include <linux/module.h>
     17#include <linux/skbuff.h>
     18#include <linux/init.h>
     19#include <linux/spinlock.h>
     20#include <linux/slab.h>
     21#include <linux/notifier.h>
     22#include <linux/netdevice.h>
     23#include <linux/netfilter.h>
     24#include <linux/proc_fs.h>
     25#include <linux/netfilter_ipv4.h>
     26#include <linux/netfilter_ipv6.h>
     27#include <linux/netfilter_bridge.h>
     28#include <linux/netfilter/nfnetlink.h>
     29#include <linux/netfilter/nfnetlink_queue.h>
     30#include <linux/netfilter/nf_conntrack_common.h>
     31#include <linux/list.h>
     32#include <net/sock.h>
     33#include <net/tcp_states.h>
     34#include <net/netfilter/nf_queue.h>
     35#include <net/netns/generic.h>
     36
     37#include <linux/atomic.h>
     38
     39#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
     40#include "../bridge/br_private.h"
     41#endif
     42
     43#if IS_ENABLED(CONFIG_NF_CONNTRACK)
     44#include <net/netfilter/nf_conntrack.h>
     45#endif
     46
     47#define NFQNL_QMAX_DEFAULT 1024
     48
     49/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
     50 * includes the header length. Thus, the maximum packet length that we
     51 * support is 65531 bytes. We send truncated packets if the specified length
     52 * is larger than that.  Userspace can check for presence of NFQA_CAP_LEN
     53 * attribute to detect truncation.
     54 */
     55#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
     56
     57struct nfqnl_instance {
     58	struct hlist_node hlist;		/* global list of queues */
     59	struct rcu_head rcu;
     60
     61	u32 peer_portid;
     62	unsigned int queue_maxlen;
     63	unsigned int copy_range;
     64	unsigned int queue_dropped;
     65	unsigned int queue_user_dropped;
     66
     67
     68	u_int16_t queue_num;			/* number of this queue */
     69	u_int8_t copy_mode;
     70	u_int32_t flags;			/* Set using NFQA_CFG_FLAGS */
     71/*
     72 * Following fields are dirtied for each queued packet,
     73 * keep them in same cache line if possible.
     74 */
     75	spinlock_t	lock	____cacheline_aligned_in_smp;
     76	unsigned int	queue_total;
     77	unsigned int	id_sequence;		/* 'sequence' of pkt ids */
     78	struct list_head queue_list;		/* packets in queue */
     79};
     80
     81typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
     82
     83static unsigned int nfnl_queue_net_id __read_mostly;
     84
     85#define INSTANCE_BUCKETS	16
     86struct nfnl_queue_net {
     87	spinlock_t instances_lock;
     88	struct hlist_head instance_table[INSTANCE_BUCKETS];
     89};
     90
     91static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
     92{
     93	return net_generic(net, nfnl_queue_net_id);
     94}
     95
     96static inline u_int8_t instance_hashfn(u_int16_t queue_num)
     97{
     98	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
     99}
    100
    101static struct nfqnl_instance *
    102instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
    103{
    104	struct hlist_head *head;
    105	struct nfqnl_instance *inst;
    106
    107	head = &q->instance_table[instance_hashfn(queue_num)];
    108	hlist_for_each_entry_rcu(inst, head, hlist) {
    109		if (inst->queue_num == queue_num)
    110			return inst;
    111	}
    112	return NULL;
    113}
    114
    115static struct nfqnl_instance *
    116instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
    117{
    118	struct nfqnl_instance *inst;
    119	unsigned int h;
    120	int err;
    121
    122	spin_lock(&q->instances_lock);
    123	if (instance_lookup(q, queue_num)) {
    124		err = -EEXIST;
    125		goto out_unlock;
    126	}
    127
    128	inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
    129	if (!inst) {
    130		err = -ENOMEM;
    131		goto out_unlock;
    132	}
    133
    134	inst->queue_num = queue_num;
    135	inst->peer_portid = portid;
    136	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
    137	inst->copy_range = NFQNL_MAX_COPY_RANGE;
    138	inst->copy_mode = NFQNL_COPY_NONE;
    139	spin_lock_init(&inst->lock);
    140	INIT_LIST_HEAD(&inst->queue_list);
    141
    142	if (!try_module_get(THIS_MODULE)) {
    143		err = -EAGAIN;
    144		goto out_free;
    145	}
    146
    147	h = instance_hashfn(queue_num);
    148	hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
    149
    150	spin_unlock(&q->instances_lock);
    151
    152	return inst;
    153
    154out_free:
    155	kfree(inst);
    156out_unlock:
    157	spin_unlock(&q->instances_lock);
    158	return ERR_PTR(err);
    159}
    160
    161static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
    162			unsigned long data);
    163
    164static void
    165instance_destroy_rcu(struct rcu_head *head)
    166{
    167	struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
    168						   rcu);
    169
    170	nfqnl_flush(inst, NULL, 0);
    171	kfree(inst);
    172	module_put(THIS_MODULE);
    173}
    174
    175static void
    176__instance_destroy(struct nfqnl_instance *inst)
    177{
    178	hlist_del_rcu(&inst->hlist);
    179	call_rcu(&inst->rcu, instance_destroy_rcu);
    180}
    181
    182static void
    183instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
    184{
    185	spin_lock(&q->instances_lock);
    186	__instance_destroy(inst);
    187	spin_unlock(&q->instances_lock);
    188}
    189
    190static inline void
    191__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
    192{
    193       list_add_tail(&entry->list, &queue->queue_list);
    194       queue->queue_total++;
    195}
    196
    197static void
    198__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
    199{
    200	list_del(&entry->list);
    201	queue->queue_total--;
    202}
    203
    204static struct nf_queue_entry *
    205find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
    206{
    207	struct nf_queue_entry *entry = NULL, *i;
    208
    209	spin_lock_bh(&queue->lock);
    210
    211	list_for_each_entry(i, &queue->queue_list, list) {
    212		if (i->id == id) {
    213			entry = i;
    214			break;
    215		}
    216	}
    217
    218	if (entry)
    219		__dequeue_entry(queue, entry);
    220
    221	spin_unlock_bh(&queue->lock);
    222
    223	return entry;
    224}
    225
    226static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
    227{
    228	const struct nf_ct_hook *ct_hook;
    229	int err;
    230
    231	if (verdict == NF_ACCEPT ||
    232	    verdict == NF_REPEAT ||
    233	    verdict == NF_STOP) {
    234		rcu_read_lock();
    235		ct_hook = rcu_dereference(nf_ct_hook);
    236		if (ct_hook) {
    237			err = ct_hook->update(entry->state.net, entry->skb);
    238			if (err < 0)
    239				verdict = NF_DROP;
    240		}
    241		rcu_read_unlock();
    242	}
    243	nf_reinject(entry, verdict);
    244}
    245
    246static void
    247nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
    248{
    249	struct nf_queue_entry *entry, *next;
    250
    251	spin_lock_bh(&queue->lock);
    252	list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
    253		if (!cmpfn || cmpfn(entry, data)) {
    254			list_del(&entry->list);
    255			queue->queue_total--;
    256			nfqnl_reinject(entry, NF_DROP);
    257		}
    258	}
    259	spin_unlock_bh(&queue->lock);
    260}
    261
    262static int
    263nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
    264		      bool csum_verify)
    265{
    266	__u32 flags = 0;
    267
    268	if (packet->ip_summed == CHECKSUM_PARTIAL)
    269		flags = NFQA_SKB_CSUMNOTREADY;
    270	else if (csum_verify)
    271		flags = NFQA_SKB_CSUM_NOTVERIFIED;
    272
    273	if (skb_is_gso(packet))
    274		flags |= NFQA_SKB_GSO;
    275
    276	return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
    277}
    278
    279static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
    280{
    281	const struct cred *cred;
    282
    283	if (!sk_fullsock(sk))
    284		return 0;
    285
    286	read_lock_bh(&sk->sk_callback_lock);
    287	if (sk->sk_socket && sk->sk_socket->file) {
    288		cred = sk->sk_socket->file->f_cred;
    289		if (nla_put_be32(skb, NFQA_UID,
    290		    htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
    291			goto nla_put_failure;
    292		if (nla_put_be32(skb, NFQA_GID,
    293		    htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
    294			goto nla_put_failure;
    295	}
    296	read_unlock_bh(&sk->sk_callback_lock);
    297	return 0;
    298
    299nla_put_failure:
    300	read_unlock_bh(&sk->sk_callback_lock);
    301	return -1;
    302}
    303
    304static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
    305{
    306	u32 seclen = 0;
    307#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
    308	if (!skb || !sk_fullsock(skb->sk))
    309		return 0;
    310
    311	read_lock_bh(&skb->sk->sk_callback_lock);
    312
    313	if (skb->secmark)
    314		security_secid_to_secctx(skb->secmark, secdata, &seclen);
    315
    316	read_unlock_bh(&skb->sk->sk_callback_lock);
    317#endif
    318	return seclen;
    319}
    320
    321static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
    322{
    323	struct sk_buff *entskb = entry->skb;
    324	u32 nlalen = 0;
    325
    326	if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
    327		return 0;
    328
    329	if (skb_vlan_tag_present(entskb))
    330		nlalen += nla_total_size(nla_total_size(sizeof(__be16)) +
    331					 nla_total_size(sizeof(__be16)));
    332
    333	if (entskb->network_header > entskb->mac_header)
    334		nlalen += nla_total_size((entskb->network_header -
    335					  entskb->mac_header));
    336
    337	return nlalen;
    338}
    339
    340static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
    341{
    342	struct sk_buff *entskb = entry->skb;
    343
    344	if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
    345		return 0;
    346
    347	if (skb_vlan_tag_present(entskb)) {
    348		struct nlattr *nest;
    349
    350		nest = nla_nest_start(skb, NFQA_VLAN);
    351		if (!nest)
    352			goto nla_put_failure;
    353
    354		if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) ||
    355		    nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto))
    356			goto nla_put_failure;
    357
    358		nla_nest_end(skb, nest);
    359	}
    360
    361	if (entskb->mac_header < entskb->network_header) {
    362		int len = (int)(entskb->network_header - entskb->mac_header);
    363
    364		if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb)))
    365			goto nla_put_failure;
    366	}
    367
    368	return 0;
    369
    370nla_put_failure:
    371	return -1;
    372}
    373
    374static struct sk_buff *
    375nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
    376			   struct nf_queue_entry *entry,
    377			   __be32 **packet_id_ptr)
    378{
    379	size_t size;
    380	size_t data_len = 0, cap_len = 0;
    381	unsigned int hlen = 0;
    382	struct sk_buff *skb;
    383	struct nlattr *nla;
    384	struct nfqnl_msg_packet_hdr *pmsg;
    385	struct nlmsghdr *nlh;
    386	struct sk_buff *entskb = entry->skb;
    387	struct net_device *indev;
    388	struct net_device *outdev;
    389	struct nf_conn *ct = NULL;
    390	enum ip_conntrack_info ctinfo = 0;
    391	const struct nfnl_ct_hook *nfnl_ct;
    392	bool csum_verify;
    393	char *secdata = NULL;
    394	u32 seclen = 0;
    395	ktime_t tstamp;
    396
    397	size = nlmsg_total_size(sizeof(struct nfgenmsg))
    398		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
    399		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
    400		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
    401#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
    402		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
    403		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
    404#endif
    405		+ nla_total_size(sizeof(u_int32_t))	/* mark */
    406		+ nla_total_size(sizeof(u_int32_t))	/* priority */
    407		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
    408		+ nla_total_size(sizeof(u_int32_t))	/* skbinfo */
    409		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */
    410
    411	tstamp = skb_tstamp_cond(entskb, false);
    412	if (tstamp)
    413		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
    414
    415	size += nfqnl_get_bridge_size(entry);
    416
    417	if (entry->state.hook <= NF_INET_FORWARD ||
    418	   (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
    419		csum_verify = !skb_csum_unnecessary(entskb);
    420	else
    421		csum_verify = false;
    422
    423	outdev = entry->state.out;
    424
    425	switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
    426	case NFQNL_COPY_META:
    427	case NFQNL_COPY_NONE:
    428		break;
    429
    430	case NFQNL_COPY_PACKET:
    431		if (!(queue->flags & NFQA_CFG_F_GSO) &&
    432		    entskb->ip_summed == CHECKSUM_PARTIAL &&
    433		    skb_checksum_help(entskb))
    434			return NULL;
    435
    436		data_len = READ_ONCE(queue->copy_range);
    437		if (data_len > entskb->len)
    438			data_len = entskb->len;
    439
    440		hlen = skb_zerocopy_headlen(entskb);
    441		hlen = min_t(unsigned int, hlen, data_len);
    442		size += sizeof(struct nlattr) + hlen;
    443		cap_len = entskb->len;
    444		break;
    445	}
    446
    447	nfnl_ct = rcu_dereference(nfnl_ct_hook);
    448
    449#if IS_ENABLED(CONFIG_NF_CONNTRACK)
    450	if (queue->flags & NFQA_CFG_F_CONNTRACK) {
    451		if (nfnl_ct != NULL) {
    452			ct = nf_ct_get(entskb, &ctinfo);
    453			if (ct != NULL)
    454				size += nfnl_ct->build_size(ct);
    455		}
    456	}
    457#endif
    458
    459	if (queue->flags & NFQA_CFG_F_UID_GID) {
    460		size += (nla_total_size(sizeof(u_int32_t))	/* uid */
    461			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
    462	}
    463
    464	if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
    465		seclen = nfqnl_get_sk_secctx(entskb, &secdata);
    466		if (seclen)
    467			size += nla_total_size(seclen);
    468	}
    469
    470	skb = alloc_skb(size, GFP_ATOMIC);
    471	if (!skb) {
    472		skb_tx_error(entskb);
    473		goto nlmsg_failure;
    474	}
    475
    476	nlh = nfnl_msg_put(skb, 0, 0,
    477			   nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
    478			   0, entry->state.pf, NFNETLINK_V0,
    479			   htons(queue->queue_num));
    480	if (!nlh) {
    481		skb_tx_error(entskb);
    482		kfree_skb(skb);
    483		goto nlmsg_failure;
    484	}
    485
    486	nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
    487	pmsg = nla_data(nla);
    488	pmsg->hw_protocol	= entskb->protocol;
    489	pmsg->hook		= entry->state.hook;
    490	*packet_id_ptr		= &pmsg->packet_id;
    491
    492	indev = entry->state.in;
    493	if (indev) {
    494#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
    495		if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
    496			goto nla_put_failure;
    497#else
    498		if (entry->state.pf == PF_BRIDGE) {
    499			/* Case 1: indev is physical input device, we need to
    500			 * look for bridge group (when called from
    501			 * netfilter_bridge) */
    502			if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
    503					 htonl(indev->ifindex)) ||
    504			/* this is the bridge group "brX" */
    505			/* rcu_read_lock()ed by __nf_queue */
    506			    nla_put_be32(skb, NFQA_IFINDEX_INDEV,
    507					 htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
    508				goto nla_put_failure;
    509		} else {
    510			int physinif;
    511
    512			/* Case 2: indev is bridge group, we need to look for
    513			 * physical device (when called from ipv4) */
    514			if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
    515					 htonl(indev->ifindex)))
    516				goto nla_put_failure;
    517
    518			physinif = nf_bridge_get_physinif(entskb);
    519			if (physinif &&
    520			    nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
    521					 htonl(physinif)))
    522				goto nla_put_failure;
    523		}
    524#endif
    525	}
    526
    527	if (outdev) {
    528#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
    529		if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
    530			goto nla_put_failure;
    531#else
    532		if (entry->state.pf == PF_BRIDGE) {
    533			/* Case 1: outdev is physical output device, we need to
    534			 * look for bridge group (when called from
    535			 * netfilter_bridge) */
    536			if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
    537					 htonl(outdev->ifindex)) ||
    538			/* this is the bridge group "brX" */
    539			/* rcu_read_lock()ed by __nf_queue */
    540			    nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
    541					 htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
    542				goto nla_put_failure;
    543		} else {
    544			int physoutif;
    545
    546			/* Case 2: outdev is bridge group, we need to look for
    547			 * physical output device (when called from ipv4) */
    548			if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
    549					 htonl(outdev->ifindex)))
    550				goto nla_put_failure;
    551
    552			physoutif = nf_bridge_get_physoutif(entskb);
    553			if (physoutif &&
    554			    nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
    555					 htonl(physoutif)))
    556				goto nla_put_failure;
    557		}
    558#endif
    559	}
    560
    561	if (entskb->mark &&
    562	    nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
    563		goto nla_put_failure;
    564
    565	if (entskb->priority &&
    566	    nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority)))
    567		goto nla_put_failure;
    568
    569	if (indev && entskb->dev &&
    570	    skb_mac_header_was_set(entskb) &&
    571	    skb_mac_header_len(entskb) != 0) {
    572		struct nfqnl_msg_packet_hw phw;
    573		int len;
    574
    575		memset(&phw, 0, sizeof(phw));
    576		len = dev_parse_header(entskb, phw.hw_addr);
    577		if (len) {
    578			phw.hw_addrlen = htons(len);
    579			if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
    580				goto nla_put_failure;
    581		}
    582	}
    583
    584	if (nfqnl_put_bridge(entry, skb) < 0)
    585		goto nla_put_failure;
    586
    587	if (entry->state.hook <= NF_INET_FORWARD && tstamp) {
    588		struct nfqnl_msg_packet_timestamp ts;
    589		struct timespec64 kts = ktime_to_timespec64(tstamp);
    590
    591		ts.sec = cpu_to_be64(kts.tv_sec);
    592		ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
    593
    594		if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
    595			goto nla_put_failure;
    596	}
    597
    598	if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
    599	    nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
    600		goto nla_put_failure;
    601
    602	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
    603		goto nla_put_failure;
    604
    605	if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
    606		goto nla_put_failure;
    607
    608	if (cap_len > data_len &&
    609	    nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
    610		goto nla_put_failure;
    611
    612	if (nfqnl_put_packet_info(skb, entskb, csum_verify))
    613		goto nla_put_failure;
    614
    615	if (data_len) {
    616		struct nlattr *nla;
    617
    618		if (skb_tailroom(skb) < sizeof(*nla) + hlen)
    619			goto nla_put_failure;
    620
    621		nla = skb_put(skb, sizeof(*nla));
    622		nla->nla_type = NFQA_PAYLOAD;
    623		nla->nla_len = nla_attr_size(data_len);
    624
    625		if (skb_zerocopy(skb, entskb, data_len, hlen))
    626			goto nla_put_failure;
    627	}
    628
    629	nlh->nlmsg_len = skb->len;
    630	if (seclen)
    631		security_release_secctx(secdata, seclen);
    632	return skb;
    633
    634nla_put_failure:
    635	skb_tx_error(entskb);
    636	kfree_skb(skb);
    637	net_err_ratelimited("nf_queue: error creating packet message\n");
    638nlmsg_failure:
    639	if (seclen)
    640		security_release_secctx(secdata, seclen);
    641	return NULL;
    642}
    643
    644static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
    645{
    646#if IS_ENABLED(CONFIG_NF_CONNTRACK)
    647	static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
    648	const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
    649
    650	if (ct && ((ct->status & flags) == IPS_DYING))
    651		return true;
    652#endif
    653	return false;
    654}
    655
    656static int
    657__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
    658			struct nf_queue_entry *entry)
    659{
    660	struct sk_buff *nskb;
    661	int err = -ENOBUFS;
    662	__be32 *packet_id_ptr;
    663	int failopen = 0;
    664
    665	nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
    666	if (nskb == NULL) {
    667		err = -ENOMEM;
    668		goto err_out;
    669	}
    670	spin_lock_bh(&queue->lock);
    671
    672	if (nf_ct_drop_unconfirmed(entry))
    673		goto err_out_free_nskb;
    674
    675	if (queue->queue_total >= queue->queue_maxlen) {
    676		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
    677			failopen = 1;
    678			err = 0;
    679		} else {
    680			queue->queue_dropped++;
    681			net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
    682					     queue->queue_total);
    683		}
    684		goto err_out_free_nskb;
    685	}
    686	entry->id = ++queue->id_sequence;
    687	*packet_id_ptr = htonl(entry->id);
    688
    689	/* nfnetlink_unicast will either free the nskb or add it to a socket */
    690	err = nfnetlink_unicast(nskb, net, queue->peer_portid);
    691	if (err < 0) {
    692		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
    693			failopen = 1;
    694			err = 0;
    695		} else {
    696			queue->queue_user_dropped++;
    697		}
    698		goto err_out_unlock;
    699	}
    700
    701	__enqueue_entry(queue, entry);
    702
    703	spin_unlock_bh(&queue->lock);
    704	return 0;
    705
    706err_out_free_nskb:
    707	kfree_skb(nskb);
    708err_out_unlock:
    709	spin_unlock_bh(&queue->lock);
    710	if (failopen)
    711		nfqnl_reinject(entry, NF_ACCEPT);
    712err_out:
    713	return err;
    714}
    715
    716static struct nf_queue_entry *
    717nf_queue_entry_dup(struct nf_queue_entry *e)
    718{
    719	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
    720
    721	if (!entry)
    722		return NULL;
    723
    724	if (nf_queue_entry_get_refs(entry))
    725		return entry;
    726
    727	kfree(entry);
    728	return NULL;
    729}
    730
    731#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
    732/* When called from bridge netfilter, skb->data must point to MAC header
    733 * before calling skb_gso_segment(). Else, original MAC header is lost
    734 * and segmented skbs will be sent to wrong destination.
    735 */
    736static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
    737{
    738	if (nf_bridge_info_get(skb))
    739		__skb_push(skb, skb->network_header - skb->mac_header);
    740}
    741
    742static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
    743{
    744	if (nf_bridge_info_get(skb))
    745		__skb_pull(skb, skb->network_header - skb->mac_header);
    746}
    747#else
    748#define nf_bridge_adjust_skb_data(s) do {} while (0)
    749#define nf_bridge_adjust_segmented_data(s) do {} while (0)
    750#endif
    751
    752static int
    753__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
    754			   struct sk_buff *skb, struct nf_queue_entry *entry)
    755{
    756	int ret = -ENOMEM;
    757	struct nf_queue_entry *entry_seg;
    758
    759	nf_bridge_adjust_segmented_data(skb);
    760
    761	if (skb->next == NULL) { /* last packet, no need to copy entry */
    762		struct sk_buff *gso_skb = entry->skb;
    763		entry->skb = skb;
    764		ret = __nfqnl_enqueue_packet(net, queue, entry);
    765		if (ret)
    766			entry->skb = gso_skb;
    767		return ret;
    768	}
    769
    770	skb_mark_not_on_list(skb);
    771
    772	entry_seg = nf_queue_entry_dup(entry);
    773	if (entry_seg) {
    774		entry_seg->skb = skb;
    775		ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
    776		if (ret)
    777			nf_queue_entry_free(entry_seg);
    778	}
    779	return ret;
    780}
    781
    782static int
    783nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
    784{
    785	unsigned int queued;
    786	struct nfqnl_instance *queue;
    787	struct sk_buff *skb, *segs, *nskb;
    788	int err = -ENOBUFS;
    789	struct net *net = entry->state.net;
    790	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
    791
    792	/* rcu_read_lock()ed by nf_hook_thresh */
    793	queue = instance_lookup(q, queuenum);
    794	if (!queue)
    795		return -ESRCH;
    796
    797	if (queue->copy_mode == NFQNL_COPY_NONE)
    798		return -EINVAL;
    799
    800	skb = entry->skb;
    801
    802	switch (entry->state.pf) {
    803	case NFPROTO_IPV4:
    804		skb->protocol = htons(ETH_P_IP);
    805		break;
    806	case NFPROTO_IPV6:
    807		skb->protocol = htons(ETH_P_IPV6);
    808		break;
    809	}
    810
    811	if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
    812		return __nfqnl_enqueue_packet(net, queue, entry);
    813
    814	nf_bridge_adjust_skb_data(skb);
    815	segs = skb_gso_segment(skb, 0);
    816	/* Does not use PTR_ERR to limit the number of error codes that can be
    817	 * returned by nf_queue.  For instance, callers rely on -ESRCH to
    818	 * mean 'ignore this hook'.
    819	 */
    820	if (IS_ERR_OR_NULL(segs))
    821		goto out_err;
    822	queued = 0;
    823	err = 0;
    824	skb_list_walk_safe(segs, segs, nskb) {
    825		if (err == 0)
    826			err = __nfqnl_enqueue_packet_gso(net, queue,
    827							segs, entry);
    828		if (err == 0)
    829			queued++;
    830		else
    831			kfree_skb(segs);
    832	}
    833
    834	if (queued) {
    835		if (err) /* some segments are already queued */
    836			nf_queue_entry_free(entry);
    837		kfree_skb(skb);
    838		return 0;
    839	}
    840 out_err:
    841	nf_bridge_adjust_segmented_data(skb);
    842	return err;
    843}
    844
    845static int
    846nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
    847{
    848	struct sk_buff *nskb;
    849
    850	if (diff < 0) {
    851		if (pskb_trim(e->skb, data_len))
    852			return -ENOMEM;
    853	} else if (diff > 0) {
    854		if (data_len > 0xFFFF)
    855			return -EINVAL;
    856		if (diff > skb_tailroom(e->skb)) {
    857			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
    858					       diff, GFP_ATOMIC);
    859			if (!nskb)
    860				return -ENOMEM;
    861			kfree_skb(e->skb);
    862			e->skb = nskb;
    863		}
    864		skb_put(e->skb, diff);
    865	}
    866	if (skb_ensure_writable(e->skb, data_len))
    867		return -ENOMEM;
    868	skb_copy_to_linear_data(e->skb, data, data_len);
    869	e->skb->ip_summed = CHECKSUM_NONE;
    870	return 0;
    871}
    872
    873static int
    874nfqnl_set_mode(struct nfqnl_instance *queue,
    875	       unsigned char mode, unsigned int range)
    876{
    877	int status = 0;
    878
    879	spin_lock_bh(&queue->lock);
    880	switch (mode) {
    881	case NFQNL_COPY_NONE:
    882	case NFQNL_COPY_META:
    883		queue->copy_mode = mode;
    884		queue->copy_range = 0;
    885		break;
    886
    887	case NFQNL_COPY_PACKET:
    888		queue->copy_mode = mode;
    889		if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
    890			queue->copy_range = NFQNL_MAX_COPY_RANGE;
    891		else
    892			queue->copy_range = range;
    893		break;
    894
    895	default:
    896		status = -EINVAL;
    897
    898	}
    899	spin_unlock_bh(&queue->lock);
    900
    901	return status;
    902}
    903
    904static int
    905dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
    906{
    907#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
    908	int physinif, physoutif;
    909
    910	physinif = nf_bridge_get_physinif(entry->skb);
    911	physoutif = nf_bridge_get_physoutif(entry->skb);
    912
    913	if (physinif == ifindex || physoutif == ifindex)
    914		return 1;
    915#endif
    916	if (entry->state.in)
    917		if (entry->state.in->ifindex == ifindex)
    918			return 1;
    919	if (entry->state.out)
    920		if (entry->state.out->ifindex == ifindex)
    921			return 1;
    922
    923	return 0;
    924}
    925
    926/* drop all packets with either indev or outdev == ifindex from all queue
    927 * instances */
    928static void
    929nfqnl_dev_drop(struct net *net, int ifindex)
    930{
    931	int i;
    932	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
    933
    934	rcu_read_lock();
    935
    936	for (i = 0; i < INSTANCE_BUCKETS; i++) {
    937		struct nfqnl_instance *inst;
    938		struct hlist_head *head = &q->instance_table[i];
    939
    940		hlist_for_each_entry_rcu(inst, head, hlist)
    941			nfqnl_flush(inst, dev_cmp, ifindex);
    942	}
    943
    944	rcu_read_unlock();
    945}
    946
    947static int
    948nfqnl_rcv_dev_event(struct notifier_block *this,
    949		    unsigned long event, void *ptr)
    950{
    951	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
    952
    953	/* Drop any packets associated with the downed device */
    954	if (event == NETDEV_DOWN)
    955		nfqnl_dev_drop(dev_net(dev), dev->ifindex);
    956	return NOTIFY_DONE;
    957}
    958
    959static struct notifier_block nfqnl_dev_notifier = {
    960	.notifier_call	= nfqnl_rcv_dev_event,
    961};
    962
    963static void nfqnl_nf_hook_drop(struct net *net)
    964{
    965	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
    966	int i;
    967
    968	/* This function is also called on net namespace error unwind,
    969	 * when pernet_ops->init() failed and ->exit() functions of the
    970	 * previous pernet_ops gets called.
    971	 *
    972	 * This may result in a call to nfqnl_nf_hook_drop() before
    973	 * struct nfnl_queue_net was allocated.
    974	 */
    975	if (!q)
    976		return;
    977
    978	for (i = 0; i < INSTANCE_BUCKETS; i++) {
    979		struct nfqnl_instance *inst;
    980		struct hlist_head *head = &q->instance_table[i];
    981
    982		hlist_for_each_entry_rcu(inst, head, hlist)
    983			nfqnl_flush(inst, NULL, 0);
    984	}
    985}
    986
    987static int
    988nfqnl_rcv_nl_event(struct notifier_block *this,
    989		   unsigned long event, void *ptr)
    990{
    991	struct netlink_notify *n = ptr;
    992	struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
    993
    994	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
    995		int i;
    996
    997		/* destroy all instances for this portid */
    998		spin_lock(&q->instances_lock);
    999		for (i = 0; i < INSTANCE_BUCKETS; i++) {
   1000			struct hlist_node *t2;
   1001			struct nfqnl_instance *inst;
   1002			struct hlist_head *head = &q->instance_table[i];
   1003
   1004			hlist_for_each_entry_safe(inst, t2, head, hlist) {
   1005				if (n->portid == inst->peer_portid)
   1006					__instance_destroy(inst);
   1007			}
   1008		}
   1009		spin_unlock(&q->instances_lock);
   1010	}
   1011	return NOTIFY_DONE;
   1012}
   1013
   1014static struct notifier_block nfqnl_rtnl_notifier = {
   1015	.notifier_call	= nfqnl_rcv_nl_event,
   1016};
   1017
   1018static const struct nla_policy nfqa_vlan_policy[NFQA_VLAN_MAX + 1] = {
   1019	[NFQA_VLAN_TCI]		= { .type = NLA_U16},
   1020	[NFQA_VLAN_PROTO]	= { .type = NLA_U16},
   1021};
   1022
   1023static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
   1024	[NFQA_VERDICT_HDR]	= { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
   1025	[NFQA_MARK]		= { .type = NLA_U32 },
   1026	[NFQA_PAYLOAD]		= { .type = NLA_UNSPEC },
   1027	[NFQA_CT]		= { .type = NLA_UNSPEC },
   1028	[NFQA_EXP]		= { .type = NLA_UNSPEC },
   1029	[NFQA_VLAN]		= { .type = NLA_NESTED },
   1030	[NFQA_PRIORITY]		= { .type = NLA_U32 },
   1031};
   1032
   1033static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
   1034	[NFQA_VERDICT_HDR]	= { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
   1035	[NFQA_MARK]		= { .type = NLA_U32 },
   1036	[NFQA_PRIORITY]		= { .type = NLA_U32 },
   1037};
   1038
   1039static struct nfqnl_instance *
   1040verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
   1041{
   1042	struct nfqnl_instance *queue;
   1043
   1044	queue = instance_lookup(q, queue_num);
   1045	if (!queue)
   1046		return ERR_PTR(-ENODEV);
   1047
   1048	if (queue->peer_portid != nlportid)
   1049		return ERR_PTR(-EPERM);
   1050
   1051	return queue;
   1052}
   1053
   1054static struct nfqnl_msg_verdict_hdr*
   1055verdicthdr_get(const struct nlattr * const nfqa[])
   1056{
   1057	struct nfqnl_msg_verdict_hdr *vhdr;
   1058	unsigned int verdict;
   1059
   1060	if (!nfqa[NFQA_VERDICT_HDR])
   1061		return NULL;
   1062
   1063	vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
   1064	verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
   1065	if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
   1066		return NULL;
   1067	return vhdr;
   1068}
   1069
   1070static int nfq_id_after(unsigned int id, unsigned int max)
   1071{
   1072	return (int)(id - max) > 0;
   1073}
   1074
   1075static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
   1076				    const struct nfnl_info *info,
   1077				    const struct nlattr * const nfqa[])
   1078{
   1079	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
   1080	u16 queue_num = ntohs(info->nfmsg->res_id);
   1081	struct nf_queue_entry *entry, *tmp;
   1082	struct nfqnl_msg_verdict_hdr *vhdr;
   1083	struct nfqnl_instance *queue;
   1084	unsigned int verdict, maxid;
   1085	LIST_HEAD(batch_list);
   1086
   1087	queue = verdict_instance_lookup(q, queue_num,
   1088					NETLINK_CB(skb).portid);
   1089	if (IS_ERR(queue))
   1090		return PTR_ERR(queue);
   1091
   1092	vhdr = verdicthdr_get(nfqa);
   1093	if (!vhdr)
   1094		return -EINVAL;
   1095
   1096	verdict = ntohl(vhdr->verdict);
   1097	maxid = ntohl(vhdr->id);
   1098
   1099	spin_lock_bh(&queue->lock);
   1100
   1101	list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
   1102		if (nfq_id_after(entry->id, maxid))
   1103			break;
   1104		__dequeue_entry(queue, entry);
   1105		list_add_tail(&entry->list, &batch_list);
   1106	}
   1107
   1108	spin_unlock_bh(&queue->lock);
   1109
   1110	if (list_empty(&batch_list))
   1111		return -ENOENT;
   1112
   1113	list_for_each_entry_safe(entry, tmp, &batch_list, list) {
   1114		if (nfqa[NFQA_MARK])
   1115			entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
   1116
   1117		if (nfqa[NFQA_PRIORITY])
   1118			entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
   1119
   1120		nfqnl_reinject(entry, verdict);
   1121	}
   1122	return 0;
   1123}
   1124
   1125static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
   1126				      const struct nlmsghdr *nlh,
   1127				      const struct nlattr * const nfqa[],
   1128				      struct nf_queue_entry *entry,
   1129				      enum ip_conntrack_info *ctinfo)
   1130{
   1131#if IS_ENABLED(CONFIG_NF_CONNTRACK)
   1132	struct nf_conn *ct;
   1133
   1134	ct = nf_ct_get(entry->skb, ctinfo);
   1135	if (ct == NULL)
   1136		return NULL;
   1137
   1138	if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
   1139		return NULL;
   1140
   1141	if (nfqa[NFQA_EXP])
   1142		nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
   1143				      NETLINK_CB(entry->skb).portid,
   1144				      nlmsg_report(nlh));
   1145	return ct;
   1146#else
   1147	return NULL;
   1148#endif
   1149}
   1150
   1151static int nfqa_parse_bridge(struct nf_queue_entry *entry,
   1152			     const struct nlattr * const nfqa[])
   1153{
   1154	if (nfqa[NFQA_VLAN]) {
   1155		struct nlattr *tb[NFQA_VLAN_MAX + 1];
   1156		int err;
   1157
   1158		err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX,
   1159						  nfqa[NFQA_VLAN],
   1160						  nfqa_vlan_policy, NULL);
   1161		if (err < 0)
   1162			return err;
   1163
   1164		if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO])
   1165			return -EINVAL;
   1166
   1167		__vlan_hwaccel_put_tag(entry->skb,
   1168			nla_get_be16(tb[NFQA_VLAN_PROTO]),
   1169			ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])));
   1170	}
   1171
   1172	if (nfqa[NFQA_L2HDR]) {
   1173		int mac_header_len = entry->skb->network_header -
   1174			entry->skb->mac_header;
   1175
   1176		if (mac_header_len != nla_len(nfqa[NFQA_L2HDR]))
   1177			return -EINVAL;
   1178		else if (mac_header_len > 0)
   1179			memcpy(skb_mac_header(entry->skb),
   1180			       nla_data(nfqa[NFQA_L2HDR]),
   1181			       mac_header_len);
   1182	}
   1183
   1184	return 0;
   1185}
   1186
   1187static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
   1188			      const struct nlattr * const nfqa[])
   1189{
   1190	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
   1191	u_int16_t queue_num = ntohs(info->nfmsg->res_id);
   1192	const struct nfnl_ct_hook *nfnl_ct;
   1193	struct nfqnl_msg_verdict_hdr *vhdr;
   1194	enum ip_conntrack_info ctinfo;
   1195	struct nfqnl_instance *queue;
   1196	struct nf_queue_entry *entry;
   1197	struct nf_conn *ct = NULL;
   1198	unsigned int verdict;
   1199	int err;
   1200
   1201	queue = verdict_instance_lookup(q, queue_num,
   1202					NETLINK_CB(skb).portid);
   1203	if (IS_ERR(queue))
   1204		return PTR_ERR(queue);
   1205
   1206	vhdr = verdicthdr_get(nfqa);
   1207	if (!vhdr)
   1208		return -EINVAL;
   1209
   1210	verdict = ntohl(vhdr->verdict);
   1211
   1212	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
   1213	if (entry == NULL)
   1214		return -ENOENT;
   1215
   1216	/* rcu lock already held from nfnl->call_rcu. */
   1217	nfnl_ct = rcu_dereference(nfnl_ct_hook);
   1218
   1219	if (nfqa[NFQA_CT]) {
   1220		if (nfnl_ct != NULL)
   1221			ct = nfqnl_ct_parse(nfnl_ct, info->nlh, nfqa, entry,
   1222					    &ctinfo);
   1223	}
   1224
   1225	if (entry->state.pf == PF_BRIDGE) {
   1226		err = nfqa_parse_bridge(entry, nfqa);
   1227		if (err < 0)
   1228			return err;
   1229	}
   1230
   1231	if (nfqa[NFQA_PAYLOAD]) {
   1232		u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
   1233		int diff = payload_len - entry->skb->len;
   1234
   1235		if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
   1236				 payload_len, entry, diff) < 0)
   1237			verdict = NF_DROP;
   1238
   1239		if (ct && diff)
   1240			nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
   1241	}
   1242
   1243	if (nfqa[NFQA_MARK])
   1244		entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
   1245
   1246	if (nfqa[NFQA_PRIORITY])
   1247		entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
   1248
   1249	nfqnl_reinject(entry, verdict);
   1250	return 0;
   1251}
   1252
   1253static int nfqnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
   1254			     const struct nlattr * const cda[])
   1255{
   1256	return -ENOTSUPP;
   1257}
   1258
   1259static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
   1260	[NFQA_CFG_CMD]		= { .len = sizeof(struct nfqnl_msg_config_cmd) },
   1261	[NFQA_CFG_PARAMS]	= { .len = sizeof(struct nfqnl_msg_config_params) },
   1262	[NFQA_CFG_QUEUE_MAXLEN]	= { .type = NLA_U32 },
   1263	[NFQA_CFG_MASK]		= { .type = NLA_U32 },
   1264	[NFQA_CFG_FLAGS]	= { .type = NLA_U32 },
   1265};
   1266
   1267static const struct nf_queue_handler nfqh = {
   1268	.outfn		= nfqnl_enqueue_packet,
   1269	.nf_hook_drop	= nfqnl_nf_hook_drop,
   1270};
   1271
   1272static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
   1273			     const struct nlattr * const nfqa[])
   1274{
   1275	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
   1276	u_int16_t queue_num = ntohs(info->nfmsg->res_id);
   1277	struct nfqnl_msg_config_cmd *cmd = NULL;
   1278	struct nfqnl_instance *queue;
   1279	__u32 flags = 0, mask = 0;
   1280	int ret = 0;
   1281
   1282	if (nfqa[NFQA_CFG_CMD]) {
   1283		cmd = nla_data(nfqa[NFQA_CFG_CMD]);
   1284
   1285		/* Obsolete commands without queue context */
   1286		switch (cmd->command) {
   1287		case NFQNL_CFG_CMD_PF_BIND: return 0;
   1288		case NFQNL_CFG_CMD_PF_UNBIND: return 0;
   1289		}
   1290	}
   1291
   1292	/* Check if we support these flags in first place, dependencies should
   1293	 * be there too not to break atomicity.
   1294	 */
   1295	if (nfqa[NFQA_CFG_FLAGS]) {
   1296		if (!nfqa[NFQA_CFG_MASK]) {
   1297			/* A mask is needed to specify which flags are being
   1298			 * changed.
   1299			 */
   1300			return -EINVAL;
   1301		}
   1302
   1303		flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
   1304		mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
   1305
   1306		if (flags >= NFQA_CFG_F_MAX)
   1307			return -EOPNOTSUPP;
   1308
   1309#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
   1310		if (flags & mask & NFQA_CFG_F_SECCTX)
   1311			return -EOPNOTSUPP;
   1312#endif
   1313		if ((flags & mask & NFQA_CFG_F_CONNTRACK) &&
   1314		    !rcu_access_pointer(nfnl_ct_hook)) {
   1315#ifdef CONFIG_MODULES
   1316			nfnl_unlock(NFNL_SUBSYS_QUEUE);
   1317			request_module("ip_conntrack_netlink");
   1318			nfnl_lock(NFNL_SUBSYS_QUEUE);
   1319			if (rcu_access_pointer(nfnl_ct_hook))
   1320				return -EAGAIN;
   1321#endif
   1322			return -EOPNOTSUPP;
   1323		}
   1324	}
   1325
   1326	rcu_read_lock();
   1327	queue = instance_lookup(q, queue_num);
   1328	if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
   1329		ret = -EPERM;
   1330		goto err_out_unlock;
   1331	}
   1332
   1333	if (cmd != NULL) {
   1334		switch (cmd->command) {
   1335		case NFQNL_CFG_CMD_BIND:
   1336			if (queue) {
   1337				ret = -EBUSY;
   1338				goto err_out_unlock;
   1339			}
   1340			queue = instance_create(q, queue_num,
   1341						NETLINK_CB(skb).portid);
   1342			if (IS_ERR(queue)) {
   1343				ret = PTR_ERR(queue);
   1344				goto err_out_unlock;
   1345			}
   1346			break;
   1347		case NFQNL_CFG_CMD_UNBIND:
   1348			if (!queue) {
   1349				ret = -ENODEV;
   1350				goto err_out_unlock;
   1351			}
   1352			instance_destroy(q, queue);
   1353			goto err_out_unlock;
   1354		case NFQNL_CFG_CMD_PF_BIND:
   1355		case NFQNL_CFG_CMD_PF_UNBIND:
   1356			break;
   1357		default:
   1358			ret = -ENOTSUPP;
   1359			goto err_out_unlock;
   1360		}
   1361	}
   1362
   1363	if (!queue) {
   1364		ret = -ENODEV;
   1365		goto err_out_unlock;
   1366	}
   1367
   1368	if (nfqa[NFQA_CFG_PARAMS]) {
   1369		struct nfqnl_msg_config_params *params =
   1370			nla_data(nfqa[NFQA_CFG_PARAMS]);
   1371
   1372		nfqnl_set_mode(queue, params->copy_mode,
   1373				ntohl(params->copy_range));
   1374	}
   1375
   1376	if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
   1377		__be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
   1378
   1379		spin_lock_bh(&queue->lock);
   1380		queue->queue_maxlen = ntohl(*queue_maxlen);
   1381		spin_unlock_bh(&queue->lock);
   1382	}
   1383
   1384	if (nfqa[NFQA_CFG_FLAGS]) {
   1385		spin_lock_bh(&queue->lock);
   1386		queue->flags &= ~mask;
   1387		queue->flags |= flags & mask;
   1388		spin_unlock_bh(&queue->lock);
   1389	}
   1390
   1391err_out_unlock:
   1392	rcu_read_unlock();
   1393	return ret;
   1394}
   1395
   1396static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
   1397	[NFQNL_MSG_PACKET]	= {
   1398		.call		= nfqnl_recv_unsupp,
   1399		.type		= NFNL_CB_RCU,
   1400		.attr_count	= NFQA_MAX,
   1401	},
   1402	[NFQNL_MSG_VERDICT]	= {
   1403		.call		= nfqnl_recv_verdict,
   1404		.type		= NFNL_CB_RCU,
   1405		.attr_count	= NFQA_MAX,
   1406		.policy		= nfqa_verdict_policy
   1407	},
   1408	[NFQNL_MSG_CONFIG]	= {
   1409		.call		= nfqnl_recv_config,
   1410		.type		= NFNL_CB_MUTEX,
   1411		.attr_count	= NFQA_CFG_MAX,
   1412		.policy		= nfqa_cfg_policy
   1413	},
   1414	[NFQNL_MSG_VERDICT_BATCH] = {
   1415		.call		= nfqnl_recv_verdict_batch,
   1416		.type		= NFNL_CB_RCU,
   1417		.attr_count	= NFQA_MAX,
   1418		.policy		= nfqa_verdict_batch_policy
   1419	},
   1420};
   1421
   1422static const struct nfnetlink_subsystem nfqnl_subsys = {
   1423	.name		= "nf_queue",
   1424	.subsys_id	= NFNL_SUBSYS_QUEUE,
   1425	.cb_count	= NFQNL_MSG_MAX,
   1426	.cb		= nfqnl_cb,
   1427};
   1428
   1429#ifdef CONFIG_PROC_FS
   1430struct iter_state {
   1431	struct seq_net_private p;
   1432	unsigned int bucket;
   1433};
   1434
   1435static struct hlist_node *get_first(struct seq_file *seq)
   1436{
   1437	struct iter_state *st = seq->private;
   1438	struct net *net;
   1439	struct nfnl_queue_net *q;
   1440
   1441	if (!st)
   1442		return NULL;
   1443
   1444	net = seq_file_net(seq);
   1445	q = nfnl_queue_pernet(net);
   1446	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
   1447		if (!hlist_empty(&q->instance_table[st->bucket]))
   1448			return q->instance_table[st->bucket].first;
   1449	}
   1450	return NULL;
   1451}
   1452
   1453static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
   1454{
   1455	struct iter_state *st = seq->private;
   1456	struct net *net = seq_file_net(seq);
   1457
   1458	h = h->next;
   1459	while (!h) {
   1460		struct nfnl_queue_net *q;
   1461
   1462		if (++st->bucket >= INSTANCE_BUCKETS)
   1463			return NULL;
   1464
   1465		q = nfnl_queue_pernet(net);
   1466		h = q->instance_table[st->bucket].first;
   1467	}
   1468	return h;
   1469}
   1470
   1471static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
   1472{
   1473	struct hlist_node *head;
   1474	head = get_first(seq);
   1475
   1476	if (head)
   1477		while (pos && (head = get_next(seq, head)))
   1478			pos--;
   1479	return pos ? NULL : head;
   1480}
   1481
   1482static void *seq_start(struct seq_file *s, loff_t *pos)
   1483	__acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
   1484{
   1485	spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
   1486	return get_idx(s, *pos);
   1487}
   1488
   1489static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
   1490{
   1491	(*pos)++;
   1492	return get_next(s, v);
   1493}
   1494
   1495static void seq_stop(struct seq_file *s, void *v)
   1496	__releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
   1497{
   1498	spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
   1499}
   1500
   1501static int seq_show(struct seq_file *s, void *v)
   1502{
   1503	const struct nfqnl_instance *inst = v;
   1504
   1505	seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
   1506		   inst->queue_num,
   1507		   inst->peer_portid, inst->queue_total,
   1508		   inst->copy_mode, inst->copy_range,
   1509		   inst->queue_dropped, inst->queue_user_dropped,
   1510		   inst->id_sequence, 1);
   1511	return 0;
   1512}
   1513
   1514static const struct seq_operations nfqnl_seq_ops = {
   1515	.start	= seq_start,
   1516	.next	= seq_next,
   1517	.stop	= seq_stop,
   1518	.show	= seq_show,
   1519};
   1520#endif /* PROC_FS */
   1521
   1522static int __net_init nfnl_queue_net_init(struct net *net)
   1523{
   1524	unsigned int i;
   1525	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
   1526
   1527	for (i = 0; i < INSTANCE_BUCKETS; i++)
   1528		INIT_HLIST_HEAD(&q->instance_table[i]);
   1529
   1530	spin_lock_init(&q->instances_lock);
   1531
   1532#ifdef CONFIG_PROC_FS
   1533	if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter,
   1534			&nfqnl_seq_ops, sizeof(struct iter_state)))
   1535		return -ENOMEM;
   1536#endif
   1537	return 0;
   1538}
   1539
   1540static void __net_exit nfnl_queue_net_exit(struct net *net)
   1541{
   1542	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
   1543	unsigned int i;
   1544
   1545#ifdef CONFIG_PROC_FS
   1546	remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
   1547#endif
   1548	for (i = 0; i < INSTANCE_BUCKETS; i++)
   1549		WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
   1550}
   1551
   1552static struct pernet_operations nfnl_queue_net_ops = {
   1553	.init		= nfnl_queue_net_init,
   1554	.exit		= nfnl_queue_net_exit,
   1555	.id		= &nfnl_queue_net_id,
   1556	.size		= sizeof(struct nfnl_queue_net),
   1557};
   1558
   1559static int __init nfnetlink_queue_init(void)
   1560{
   1561	int status;
   1562
   1563	status = register_pernet_subsys(&nfnl_queue_net_ops);
   1564	if (status < 0) {
   1565		pr_err("failed to register pernet ops\n");
   1566		goto out;
   1567	}
   1568
   1569	netlink_register_notifier(&nfqnl_rtnl_notifier);
   1570	status = nfnetlink_subsys_register(&nfqnl_subsys);
   1571	if (status < 0) {
   1572		pr_err("failed to create netlink socket\n");
   1573		goto cleanup_netlink_notifier;
   1574	}
   1575
   1576	status = register_netdevice_notifier(&nfqnl_dev_notifier);
   1577	if (status < 0) {
   1578		pr_err("failed to register netdevice notifier\n");
   1579		goto cleanup_netlink_subsys;
   1580	}
   1581
   1582	nf_register_queue_handler(&nfqh);
   1583
   1584	return status;
   1585
   1586cleanup_netlink_subsys:
   1587	nfnetlink_subsys_unregister(&nfqnl_subsys);
   1588cleanup_netlink_notifier:
   1589	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
   1590	unregister_pernet_subsys(&nfnl_queue_net_ops);
   1591out:
   1592	return status;
   1593}
   1594
   1595static void __exit nfnetlink_queue_fini(void)
   1596{
   1597	nf_unregister_queue_handler();
   1598	unregister_netdevice_notifier(&nfqnl_dev_notifier);
   1599	nfnetlink_subsys_unregister(&nfqnl_subsys);
   1600	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
   1601	unregister_pernet_subsys(&nfnl_queue_net_ops);
   1602
   1603	rcu_barrier(); /* Wait for completion of call_rcu()'s */
   1604}
   1605
   1606MODULE_DESCRIPTION("netfilter packet queue handler");
   1607MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
   1608MODULE_LICENSE("GPL");
   1609MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
   1610
   1611module_init(nfnetlink_queue_init);
   1612module_exit(nfnetlink_queue_fini);