ip_input.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
ip_input.c (19429B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
      4 *		operating system.  INET is implemented using the  BSD Socket
      5 *		interface as the means of communication with the user level.
      6 *
      7 *		The Internet Protocol (IP) module.
      8 *
      9 * Authors:	Ross Biro
     10 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
     11 *		Donald Becker, <becker@super.org>
     12 *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
     13 *		Richard Underwood
     14 *		Stefan Becker, <stefanb@yello.ping.de>
     15 *		Jorge Cwik, <jorge@laser.satlink.net>
     16 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
     17 *
     18 * Fixes:
     19 *		Alan Cox	:	Commented a couple of minor bits of surplus code
     20 *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
     21 *					(just stops a compiler warning).
     22 *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
     23 *					are junked rather than corrupting things.
     24 *		Alan Cox	:	Frames to bad broadcast subnets are dumped
     25 *					We used to process them non broadcast and
     26 *					boy could that cause havoc.
     27 *		Alan Cox	:	ip_forward sets the free flag on the
     28 *					new frame it queues. Still crap because
     29 *					it copies the frame but at least it
     30 *					doesn't eat memory too.
     31 *		Alan Cox	:	Generic queue code and memory fixes.
     32 *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
     33 *		Gerhard Koerting:	Forward fragmented frames correctly.
     34 *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
     35 *		Gerhard Koerting:	IP interface addressing fix.
     36 *		Linus Torvalds	:	More robustness checks
     37 *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
     38 *		Alan Cox	:	Save IP header pointer for later
     39 *		Alan Cox	:	ip option setting
     40 *		Alan Cox	:	Use ip_tos/ip_ttl settings
     41 *		Alan Cox	:	Fragmentation bogosity removed
     42 *					(Thanks to Mark.Bush@prg.ox.ac.uk)
     43 *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
     44 *		Alan Cox	:	Silly ip bug when an overlength
     45 *					fragment turns up. Now frees the
     46 *					queue.
     47 *		Linus Torvalds/ :	Memory leakage on fragmentation
     48 *		Alan Cox	:	handling.
     49 *		Gerhard Koerting:	Forwarding uses IP priority hints
     50 *		Teemu Rantanen	:	Fragment problems.
     51 *		Alan Cox	:	General cleanup, comments and reformat
     52 *		Alan Cox	:	SNMP statistics
     53 *		Alan Cox	:	BSD address rule semantics. Also see
     54 *					UDP as there is a nasty checksum issue
     55 *					if you do things the wrong way.
     56 *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
     57 *		Alan Cox	: 	IP options adjust sk->priority.
     58 *		Pedro Roque	:	Fix mtu/length error in ip_forward.
     59 *		Alan Cox	:	Avoid ip_chk_addr when possible.
     60 *	Richard Underwood	:	IP multicasting.
     61 *		Alan Cox	:	Cleaned up multicast handlers.
     62 *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
     63 *		Gunther Mayer	:	Fix the SNMP reporting typo
     64 *		Alan Cox	:	Always in group 224.0.0.1
     65 *	Pauline Middelink	:	Fast ip_checksum update when forwarding
     66 *					Masquerading support.
     67 *		Alan Cox	:	Multicast loopback error for 224.0.0.1
     68 *		Alan Cox	:	IP_MULTICAST_LOOP option.
     69 *		Alan Cox	:	Use notifiers.
     70 *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
     71 *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
     72 *		Stefan Becker   :       Send out ICMP HOST REDIRECT
     73 *	Arnt Gulbrandsen	:	ip_build_xmit
     74 *		Alan Cox	:	Per socket routing cache
     75 *		Alan Cox	:	Fixed routing cache, added header cache.
     76 *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
     77 *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
     78 *		Alan Cox	:	Incoming IP option handling.
     79 *		Alan Cox	:	Set saddr on raw output frames as per BSD.
     80 *		Alan Cox	:	Stopped broadcast source route explosions.
     81 *		Alan Cox	:	Can disable source routing
     82 *		Takeshi Sone    :	Masquerading didn't work.
     83 *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
     84 *		Alan Cox	:	Memory leaks, tramples, misc debugging.
     85 *		Alan Cox	:	Fixed multicast (by popular demand 8))
     86 *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
     87 *		Alan Cox	:	Fixed SNMP statistics [I think]
     88 *	Gerhard Koerting	:	IP fragmentation forwarding fix
     89 *		Alan Cox	:	Device lock against page fault.
     90 *		Alan Cox	:	IP_HDRINCL facility.
     91 *	Werner Almesberger	:	Zero fragment bug
     92 *		Alan Cox	:	RAW IP frame length bug
     93 *		Alan Cox	:	Outgoing firewall on build_xmit
     94 *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
     95 *		Alan Cox	:	Multicast routing hooks
     96 *		Jos Vos		:	Do accounting *before* call_in_firewall
     97 *	Willy Konynenberg	:	Transparent proxying support
     98 *
     99 * To Fix:
    100 *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
    101 *		and could be made very efficient with the addition of some virtual memory hacks to permit
    102 *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
    103 *		Output fragmentation wants updating along with the buffer management to use a single
    104 *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
    105 *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
    106 *		fragmentation anyway.
    107 */
    108
    109#define pr_fmt(fmt) "IPv4: " fmt
    110
    111#include <linux/module.h>
    112#include <linux/types.h>
    113#include <linux/kernel.h>
    114#include <linux/string.h>
    115#include <linux/errno.h>
    116#include <linux/slab.h>
    117
    118#include <linux/net.h>
    119#include <linux/socket.h>
    120#include <linux/sockios.h>
    121#include <linux/in.h>
    122#include <linux/inet.h>
    123#include <linux/inetdevice.h>
    124#include <linux/netdevice.h>
    125#include <linux/etherdevice.h>
    126#include <linux/indirect_call_wrapper.h>
    127
    128#include <net/snmp.h>
    129#include <net/ip.h>
    130#include <net/protocol.h>
    131#include <net/route.h>
    132#include <linux/skbuff.h>
    133#include <net/sock.h>
    134#include <net/arp.h>
    135#include <net/icmp.h>
    136#include <net/raw.h>
    137#include <net/checksum.h>
    138#include <net/inet_ecn.h>
    139#include <linux/netfilter_ipv4.h>
    140#include <net/xfrm.h>
    141#include <linux/mroute.h>
    142#include <linux/netlink.h>
    143#include <net/dst_metadata.h>
    144
    145/*
    146 *	Process Router Attention IP option (RFC 2113)
    147 */
    148bool ip_call_ra_chain(struct sk_buff *skb)
    149{
    150	struct ip_ra_chain *ra;
    151	u8 protocol = ip_hdr(skb)->protocol;
    152	struct sock *last = NULL;
    153	struct net_device *dev = skb->dev;
    154	struct net *net = dev_net(dev);
    155
    156	for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
    157		struct sock *sk = ra->sk;
    158
    159		/* If socket is bound to an interface, only report
    160		 * the packet if it came  from that interface.
    161		 */
    162		if (sk && inet_sk(sk)->inet_num == protocol &&
    163		    (!sk->sk_bound_dev_if ||
    164		     sk->sk_bound_dev_if == dev->ifindex)) {
    165			if (ip_is_fragment(ip_hdr(skb))) {
    166				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
    167					return true;
    168			}
    169			if (last) {
    170				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
    171				if (skb2)
    172					raw_rcv(last, skb2);
    173			}
    174			last = sk;
    175		}
    176	}
    177
    178	if (last) {
    179		raw_rcv(last, skb);
    180		return true;
    181	}
    182	return false;
    183}
    184
    185INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
    186INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
    187void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
    188{
    189	const struct net_protocol *ipprot;
    190	int raw, ret;
    191
    192resubmit:
    193	raw = raw_local_deliver(skb, protocol);
    194
    195	ipprot = rcu_dereference(inet_protos[protocol]);
    196	if (ipprot) {
    197		if (!ipprot->no_policy) {
    198			if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
    199				kfree_skb_reason(skb,
    200						 SKB_DROP_REASON_XFRM_POLICY);
    201				return;
    202			}
    203			nf_reset_ct(skb);
    204		}
    205		ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
    206				      skb);
    207		if (ret < 0) {
    208			protocol = -ret;
    209			goto resubmit;
    210		}
    211		__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
    212	} else {
    213		if (!raw) {
    214			if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
    215				__IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
    216				icmp_send(skb, ICMP_DEST_UNREACH,
    217					  ICMP_PROT_UNREACH, 0);
    218			}
    219			kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
    220		} else {
    221			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
    222			consume_skb(skb);
    223		}
    224	}
    225}
    226
    227static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
    228{
    229	skb_clear_delivery_time(skb);
    230	__skb_pull(skb, skb_network_header_len(skb));
    231
    232	rcu_read_lock();
    233	ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
    234	rcu_read_unlock();
    235
    236	return 0;
    237}
    238
    239/*
    240 * 	Deliver IP Packets to the higher protocol layers.
    241 */
    242int ip_local_deliver(struct sk_buff *skb)
    243{
    244	/*
    245	 *	Reassemble IP fragments.
    246	 */
    247	struct net *net = dev_net(skb->dev);
    248
    249	if (ip_is_fragment(ip_hdr(skb))) {
    250		if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
    251			return 0;
    252	}
    253
    254	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
    255		       net, NULL, skb, skb->dev, NULL,
    256		       ip_local_deliver_finish);
    257}
    258EXPORT_SYMBOL(ip_local_deliver);
    259
    260static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
    261{
    262	struct ip_options *opt;
    263	const struct iphdr *iph;
    264
    265	/* It looks as overkill, because not all
    266	   IP options require packet mangling.
    267	   But it is the easiest for now, especially taking
    268	   into account that combination of IP options
    269	   and running sniffer is extremely rare condition.
    270					      --ANK (980813)
    271	*/
    272	if (skb_cow(skb, skb_headroom(skb))) {
    273		__IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
    274		goto drop;
    275	}
    276
    277	iph = ip_hdr(skb);
    278	opt = &(IPCB(skb)->opt);
    279	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
    280
    281	if (ip_options_compile(dev_net(dev), opt, skb)) {
    282		__IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
    283		goto drop;
    284	}
    285
    286	if (unlikely(opt->srr)) {
    287		struct in_device *in_dev = __in_dev_get_rcu(dev);
    288
    289		if (in_dev) {
    290			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
    291				if (IN_DEV_LOG_MARTIANS(in_dev))
    292					net_info_ratelimited("source route option %pI4 -> %pI4\n",
    293							     &iph->saddr,
    294							     &iph->daddr);
    295				goto drop;
    296			}
    297		}
    298
    299		if (ip_options_rcv_srr(skb, dev))
    300			goto drop;
    301	}
    302
    303	return false;
    304drop:
    305	return true;
    306}
    307
    308static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
    309			    const struct sk_buff *hint)
    310{
    311	return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
    312	       ip_hdr(hint)->tos == iph->tos;
    313}
    314
    315INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
    316INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
    317static int ip_rcv_finish_core(struct net *net, struct sock *sk,
    318			      struct sk_buff *skb, struct net_device *dev,
    319			      const struct sk_buff *hint)
    320{
    321	const struct iphdr *iph = ip_hdr(skb);
    322	int (*edemux)(struct sk_buff *skb);
    323	int err, drop_reason;
    324	struct rtable *rt;
    325
    326	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
    327
    328	if (ip_can_use_hint(skb, iph, hint)) {
    329		err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
    330					dev, hint);
    331		if (unlikely(err))
    332			goto drop_error;
    333	}
    334
    335	if (net->ipv4.sysctl_ip_early_demux &&
    336	    !skb_dst(skb) &&
    337	    !skb->sk &&
    338	    !ip_is_fragment(iph)) {
    339		const struct net_protocol *ipprot;
    340		int protocol = iph->protocol;
    341
    342		ipprot = rcu_dereference(inet_protos[protocol]);
    343		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
    344			err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
    345					      udp_v4_early_demux, skb);
    346			if (unlikely(err))
    347				goto drop_error;
    348			/* must reload iph, skb->head might have changed */
    349			iph = ip_hdr(skb);
    350		}
    351	}
    352
    353	/*
    354	 *	Initialise the virtual path cache for the packet. It describes
    355	 *	how the packet travels inside Linux networking.
    356	 */
    357	if (!skb_valid_dst(skb)) {
    358		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
    359					   iph->tos, dev);
    360		if (unlikely(err))
    361			goto drop_error;
    362	}
    363
    364#ifdef CONFIG_IP_ROUTE_CLASSID
    365	if (unlikely(skb_dst(skb)->tclassid)) {
    366		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
    367		u32 idx = skb_dst(skb)->tclassid;
    368		st[idx&0xFF].o_packets++;
    369		st[idx&0xFF].o_bytes += skb->len;
    370		st[(idx>>16)&0xFF].i_packets++;
    371		st[(idx>>16)&0xFF].i_bytes += skb->len;
    372	}
    373#endif
    374
    375	if (iph->ihl > 5 && ip_rcv_options(skb, dev))
    376		goto drop;
    377
    378	rt = skb_rtable(skb);
    379	if (rt->rt_type == RTN_MULTICAST) {
    380		__IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
    381	} else if (rt->rt_type == RTN_BROADCAST) {
    382		__IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
    383	} else if (skb->pkt_type == PACKET_BROADCAST ||
    384		   skb->pkt_type == PACKET_MULTICAST) {
    385		struct in_device *in_dev = __in_dev_get_rcu(dev);
    386
    387		/* RFC 1122 3.3.6:
    388		 *
    389		 *   When a host sends a datagram to a link-layer broadcast
    390		 *   address, the IP destination address MUST be a legal IP
    391		 *   broadcast or IP multicast address.
    392		 *
    393		 *   A host SHOULD silently discard a datagram that is received
    394		 *   via a link-layer broadcast (see Section 2.4) but does not
    395		 *   specify an IP multicast or broadcast destination address.
    396		 *
    397		 * This doesn't explicitly say L2 *broadcast*, but broadcast is
    398		 * in a way a form of multicast and the most common use case for
    399		 * this is 802.11 protecting against cross-station spoofing (the
    400		 * so-called "hole-196" attack) so do it for both.
    401		 */
    402		if (in_dev &&
    403		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
    404			drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
    405			goto drop;
    406		}
    407	}
    408
    409	return NET_RX_SUCCESS;
    410
    411drop:
    412	kfree_skb_reason(skb, drop_reason);
    413	return NET_RX_DROP;
    414
    415drop_error:
    416	if (err == -EXDEV) {
    417		drop_reason = SKB_DROP_REASON_IP_RPFILTER;
    418		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
    419	}
    420	goto drop;
    421}
    422
    423static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
    424{
    425	struct net_device *dev = skb->dev;
    426	int ret;
    427
    428	/* if ingress device is enslaved to an L3 master device pass the
    429	 * skb to its handler for processing
    430	 */
    431	skb = l3mdev_ip_rcv(skb);
    432	if (!skb)
    433		return NET_RX_SUCCESS;
    434
    435	ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
    436	if (ret != NET_RX_DROP)
    437		ret = dst_input(skb);
    438	return ret;
    439}
    440
    441/*
    442 * 	Main IP Receive routine.
    443 */
    444static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
    445{
    446	const struct iphdr *iph;
    447	int drop_reason;
    448	u32 len;
    449
    450	/* When the interface is in promisc. mode, drop all the crap
    451	 * that it receives, do not try to analyse it.
    452	 */
    453	if (skb->pkt_type == PACKET_OTHERHOST) {
    454		dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
    455		drop_reason = SKB_DROP_REASON_OTHERHOST;
    456		goto drop;
    457	}
    458
    459	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
    460
    461	skb = skb_share_check(skb, GFP_ATOMIC);
    462	if (!skb) {
    463		__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
    464		goto out;
    465	}
    466
    467	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
    468	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
    469		goto inhdr_error;
    470
    471	iph = ip_hdr(skb);
    472
    473	/*
    474	 *	RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
    475	 *
    476	 *	Is the datagram acceptable?
    477	 *
    478	 *	1.	Length at least the size of an ip header
    479	 *	2.	Version of 4
    480	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
    481	 *	4.	Doesn't have a bogus length
    482	 */
    483
    484	if (iph->ihl < 5 || iph->version != 4)
    485		goto inhdr_error;
    486
    487	BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
    488	BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
    489	BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
    490	__IP_ADD_STATS(net,
    491		       IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
    492		       max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
    493
    494	if (!pskb_may_pull(skb, iph->ihl*4))
    495		goto inhdr_error;
    496
    497	iph = ip_hdr(skb);
    498
    499	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
    500		goto csum_error;
    501
    502	len = ntohs(iph->tot_len);
    503	if (skb->len < len) {
    504		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
    505		__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
    506		goto drop;
    507	} else if (len < (iph->ihl*4))
    508		goto inhdr_error;
    509
    510	/* Our transport medium may have padded the buffer out. Now we know it
    511	 * is IP we can trim to the true length of the frame.
    512	 * Note this now means skb->len holds ntohs(iph->tot_len).
    513	 */
    514	if (pskb_trim_rcsum(skb, len)) {
    515		__IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
    516		goto drop;
    517	}
    518
    519	iph = ip_hdr(skb);
    520	skb->transport_header = skb->network_header + iph->ihl*4;
    521
    522	/* Remove any debris in the socket control block */
    523	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
    524	IPCB(skb)->iif = skb->skb_iif;
    525
    526	/* Must drop socket now because of tproxy. */
    527	if (!skb_sk_is_prefetched(skb))
    528		skb_orphan(skb);
    529
    530	return skb;
    531
    532csum_error:
    533	drop_reason = SKB_DROP_REASON_IP_CSUM;
    534	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
    535inhdr_error:
    536	if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
    537		drop_reason = SKB_DROP_REASON_IP_INHDR;
    538	__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
    539drop:
    540	kfree_skb_reason(skb, drop_reason);
    541out:
    542	return NULL;
    543}
    544
    545/*
    546 * IP receive entry point
    547 */
    548int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
    549	   struct net_device *orig_dev)
    550{
    551	struct net *net = dev_net(dev);
    552
    553	skb = ip_rcv_core(skb, net);
    554	if (skb == NULL)
    555		return NET_RX_DROP;
    556
    557	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
    558		       net, NULL, skb, dev, NULL,
    559		       ip_rcv_finish);
    560}
    561
    562static void ip_sublist_rcv_finish(struct list_head *head)
    563{
    564	struct sk_buff *skb, *next;
    565
    566	list_for_each_entry_safe(skb, next, head, list) {
    567		skb_list_del_init(skb);
    568		dst_input(skb);
    569	}
    570}
    571
    572static struct sk_buff *ip_extract_route_hint(const struct net *net,
    573					     struct sk_buff *skb, int rt_type)
    574{
    575	if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
    576		return NULL;
    577
    578	return skb;
    579}
    580
    581static void ip_list_rcv_finish(struct net *net, struct sock *sk,
    582			       struct list_head *head)
    583{
    584	struct sk_buff *skb, *next, *hint = NULL;
    585	struct dst_entry *curr_dst = NULL;
    586	struct list_head sublist;
    587
    588	INIT_LIST_HEAD(&sublist);
    589	list_for_each_entry_safe(skb, next, head, list) {
    590		struct net_device *dev = skb->dev;
    591		struct dst_entry *dst;
    592
    593		skb_list_del_init(skb);
    594		/* if ingress device is enslaved to an L3 master device pass the
    595		 * skb to its handler for processing
    596		 */
    597		skb = l3mdev_ip_rcv(skb);
    598		if (!skb)
    599			continue;
    600		if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
    601			continue;
    602
    603		dst = skb_dst(skb);
    604		if (curr_dst != dst) {
    605			hint = ip_extract_route_hint(net, skb,
    606					       ((struct rtable *)dst)->rt_type);
    607
    608			/* dispatch old sublist */
    609			if (!list_empty(&sublist))
    610				ip_sublist_rcv_finish(&sublist);
    611			/* start new sublist */
    612			INIT_LIST_HEAD(&sublist);
    613			curr_dst = dst;
    614		}
    615		list_add_tail(&skb->list, &sublist);
    616	}
    617	/* dispatch final sublist */
    618	ip_sublist_rcv_finish(&sublist);
    619}
    620
    621static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
    622			   struct net *net)
    623{
    624	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
    625		     head, dev, NULL, ip_rcv_finish);
    626	ip_list_rcv_finish(net, NULL, head);
    627}
    628
    629/* Receive a list of IP packets */
    630void ip_list_rcv(struct list_head *head, struct packet_type *pt,
    631		 struct net_device *orig_dev)
    632{
    633	struct net_device *curr_dev = NULL;
    634	struct net *curr_net = NULL;
    635	struct sk_buff *skb, *next;
    636	struct list_head sublist;
    637
    638	INIT_LIST_HEAD(&sublist);
    639	list_for_each_entry_safe(skb, next, head, list) {
    640		struct net_device *dev = skb->dev;
    641		struct net *net = dev_net(dev);
    642
    643		skb_list_del_init(skb);
    644		skb = ip_rcv_core(skb, net);
    645		if (skb == NULL)
    646			continue;
    647
    648		if (curr_dev != dev || curr_net != net) {
    649			/* dispatch old sublist */
    650			if (!list_empty(&sublist))
    651				ip_sublist_rcv(&sublist, curr_dev, curr_net);
    652			/* start new sublist */
    653			INIT_LIST_HEAD(&sublist);
    654			curr_dev = dev;
    655			curr_net = net;
    656		}
    657		list_add_tail(&skb->list, &sublist);
    658	}
    659	/* dispatch final sublist */
    660	if (!list_empty(&sublist))
    661		ip_sublist_rcv(&sublist, curr_dev, curr_net);
    662}