cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nf_nat_proto.c (28554B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* (C) 1999-2001 Paul `Rusty' Russell
      3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
      4 */
      5
      6#include <linux/types.h>
      7#include <linux/export.h>
      8#include <linux/init.h>
      9#include <linux/udp.h>
     10#include <linux/tcp.h>
     11#include <linux/icmp.h>
     12#include <linux/icmpv6.h>
     13
     14#include <linux/dccp.h>
     15#include <linux/sctp.h>
     16#include <net/sctp/checksum.h>
     17
     18#include <linux/netfilter.h>
     19#include <net/netfilter/nf_nat.h>
     20
     21#include <linux/ipv6.h>
     22#include <linux/netfilter_ipv6.h>
     23#include <net/checksum.h>
     24#include <net/ip6_checksum.h>
     25#include <net/ip6_route.h>
     26#include <net/xfrm.h>
     27#include <net/ipv6.h>
     28
     29#include <net/netfilter/nf_conntrack_core.h>
     30#include <net/netfilter/nf_conntrack.h>
     31#include <linux/netfilter/nfnetlink_conntrack.h>
     32
     33static void nf_csum_update(struct sk_buff *skb,
     34			   unsigned int iphdroff, __sum16 *check,
     35			   const struct nf_conntrack_tuple *t,
     36			   enum nf_nat_manip_type maniptype);
     37
     38static void
     39__udp_manip_pkt(struct sk_buff *skb,
     40	        unsigned int iphdroff, struct udphdr *hdr,
     41	        const struct nf_conntrack_tuple *tuple,
     42	        enum nf_nat_manip_type maniptype, bool do_csum)
     43{
     44	__be16 *portptr, newport;
     45
     46	if (maniptype == NF_NAT_MANIP_SRC) {
     47		/* Get rid of src port */
     48		newport = tuple->src.u.udp.port;
     49		portptr = &hdr->source;
     50	} else {
     51		/* Get rid of dst port */
     52		newport = tuple->dst.u.udp.port;
     53		portptr = &hdr->dest;
     54	}
     55	if (do_csum) {
     56		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
     57		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
     58					 false);
     59		if (!hdr->check)
     60			hdr->check = CSUM_MANGLED_0;
     61	}
     62	*portptr = newport;
     63}
     64
     65static bool udp_manip_pkt(struct sk_buff *skb,
     66			  unsigned int iphdroff, unsigned int hdroff,
     67			  const struct nf_conntrack_tuple *tuple,
     68			  enum nf_nat_manip_type maniptype)
     69{
     70	struct udphdr *hdr;
     71
     72	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
     73		return false;
     74
     75	hdr = (struct udphdr *)(skb->data + hdroff);
     76	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
     77
     78	return true;
     79}
     80
     81static bool udplite_manip_pkt(struct sk_buff *skb,
     82			      unsigned int iphdroff, unsigned int hdroff,
     83			      const struct nf_conntrack_tuple *tuple,
     84			      enum nf_nat_manip_type maniptype)
     85{
     86#ifdef CONFIG_NF_CT_PROTO_UDPLITE
     87	struct udphdr *hdr;
     88
     89	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
     90		return false;
     91
     92	hdr = (struct udphdr *)(skb->data + hdroff);
     93	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
     94#endif
     95	return true;
     96}
     97
     98static bool
     99sctp_manip_pkt(struct sk_buff *skb,
    100	       unsigned int iphdroff, unsigned int hdroff,
    101	       const struct nf_conntrack_tuple *tuple,
    102	       enum nf_nat_manip_type maniptype)
    103{
    104#ifdef CONFIG_NF_CT_PROTO_SCTP
    105	struct sctphdr *hdr;
    106	int hdrsize = 8;
    107
    108	/* This could be an inner header returned in imcp packet; in such
    109	 * cases we cannot update the checksum field since it is outside
    110	 * of the 8 bytes of transport layer headers we are guaranteed.
    111	 */
    112	if (skb->len >= hdroff + sizeof(*hdr))
    113		hdrsize = sizeof(*hdr);
    114
    115	if (skb_ensure_writable(skb, hdroff + hdrsize))
    116		return false;
    117
    118	hdr = (struct sctphdr *)(skb->data + hdroff);
    119
    120	if (maniptype == NF_NAT_MANIP_SRC) {
    121		/* Get rid of src port */
    122		hdr->source = tuple->src.u.sctp.port;
    123	} else {
    124		/* Get rid of dst port */
    125		hdr->dest = tuple->dst.u.sctp.port;
    126	}
    127
    128	if (hdrsize < sizeof(*hdr))
    129		return true;
    130
    131	if (skb->ip_summed != CHECKSUM_PARTIAL) {
    132		hdr->checksum = sctp_compute_cksum(skb, hdroff);
    133		skb->ip_summed = CHECKSUM_NONE;
    134	}
    135
    136#endif
    137	return true;
    138}
    139
    140static bool
    141tcp_manip_pkt(struct sk_buff *skb,
    142	      unsigned int iphdroff, unsigned int hdroff,
    143	      const struct nf_conntrack_tuple *tuple,
    144	      enum nf_nat_manip_type maniptype)
    145{
    146	struct tcphdr *hdr;
    147	__be16 *portptr, newport, oldport;
    148	int hdrsize = 8; /* TCP connection tracking guarantees this much */
    149
    150	/* this could be a inner header returned in icmp packet; in such
    151	   cases we cannot update the checksum field since it is outside of
    152	   the 8 bytes of transport layer headers we are guaranteed */
    153	if (skb->len >= hdroff + sizeof(struct tcphdr))
    154		hdrsize = sizeof(struct tcphdr);
    155
    156	if (skb_ensure_writable(skb, hdroff + hdrsize))
    157		return false;
    158
    159	hdr = (struct tcphdr *)(skb->data + hdroff);
    160
    161	if (maniptype == NF_NAT_MANIP_SRC) {
    162		/* Get rid of src port */
    163		newport = tuple->src.u.tcp.port;
    164		portptr = &hdr->source;
    165	} else {
    166		/* Get rid of dst port */
    167		newport = tuple->dst.u.tcp.port;
    168		portptr = &hdr->dest;
    169	}
    170
    171	oldport = *portptr;
    172	*portptr = newport;
    173
    174	if (hdrsize < sizeof(*hdr))
    175		return true;
    176
    177	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
    178	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
    179	return true;
    180}
    181
    182static bool
    183dccp_manip_pkt(struct sk_buff *skb,
    184	       unsigned int iphdroff, unsigned int hdroff,
    185	       const struct nf_conntrack_tuple *tuple,
    186	       enum nf_nat_manip_type maniptype)
    187{
    188#ifdef CONFIG_NF_CT_PROTO_DCCP
    189	struct dccp_hdr *hdr;
    190	__be16 *portptr, oldport, newport;
    191	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
    192
    193	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
    194		hdrsize = sizeof(struct dccp_hdr);
    195
    196	if (skb_ensure_writable(skb, hdroff + hdrsize))
    197		return false;
    198
    199	hdr = (struct dccp_hdr *)(skb->data + hdroff);
    200
    201	if (maniptype == NF_NAT_MANIP_SRC) {
    202		newport = tuple->src.u.dccp.port;
    203		portptr = &hdr->dccph_sport;
    204	} else {
    205		newport = tuple->dst.u.dccp.port;
    206		portptr = &hdr->dccph_dport;
    207	}
    208
    209	oldport = *portptr;
    210	*portptr = newport;
    211
    212	if (hdrsize < sizeof(*hdr))
    213		return true;
    214
    215	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
    216	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
    217				 false);
    218#endif
    219	return true;
    220}
    221
    222static bool
    223icmp_manip_pkt(struct sk_buff *skb,
    224	       unsigned int iphdroff, unsigned int hdroff,
    225	       const struct nf_conntrack_tuple *tuple,
    226	       enum nf_nat_manip_type maniptype)
    227{
    228	struct icmphdr *hdr;
    229
    230	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
    231		return false;
    232
    233	hdr = (struct icmphdr *)(skb->data + hdroff);
    234	switch (hdr->type) {
    235	case ICMP_ECHO:
    236	case ICMP_ECHOREPLY:
    237	case ICMP_TIMESTAMP:
    238	case ICMP_TIMESTAMPREPLY:
    239	case ICMP_INFO_REQUEST:
    240	case ICMP_INFO_REPLY:
    241	case ICMP_ADDRESS:
    242	case ICMP_ADDRESSREPLY:
    243		break;
    244	default:
    245		return true;
    246	}
    247	inet_proto_csum_replace2(&hdr->checksum, skb,
    248				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
    249	hdr->un.echo.id = tuple->src.u.icmp.id;
    250	return true;
    251}
    252
    253static bool
    254icmpv6_manip_pkt(struct sk_buff *skb,
    255		 unsigned int iphdroff, unsigned int hdroff,
    256		 const struct nf_conntrack_tuple *tuple,
    257		 enum nf_nat_manip_type maniptype)
    258{
    259	struct icmp6hdr *hdr;
    260
    261	if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
    262		return false;
    263
    264	hdr = (struct icmp6hdr *)(skb->data + hdroff);
    265	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
    266	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
    267	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
    268		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
    269					 hdr->icmp6_identifier,
    270					 tuple->src.u.icmp.id, false);
    271		hdr->icmp6_identifier = tuple->src.u.icmp.id;
    272	}
    273	return true;
    274}
    275
    276/* manipulate a GRE packet according to maniptype */
    277static bool
    278gre_manip_pkt(struct sk_buff *skb,
    279	      unsigned int iphdroff, unsigned int hdroff,
    280	      const struct nf_conntrack_tuple *tuple,
    281	      enum nf_nat_manip_type maniptype)
    282{
    283#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
    284	const struct gre_base_hdr *greh;
    285	struct pptp_gre_header *pgreh;
    286
    287	/* pgreh includes two optional 32bit fields which are not required
    288	 * to be there.  That's where the magic '8' comes from */
    289	if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
    290		return false;
    291
    292	greh = (void *)skb->data + hdroff;
    293	pgreh = (struct pptp_gre_header *)greh;
    294
    295	/* we only have destination manip of a packet, since 'source key'
    296	 * is not present in the packet itself */
    297	if (maniptype != NF_NAT_MANIP_DST)
    298		return true;
    299
    300	switch (greh->flags & GRE_VERSION) {
    301	case GRE_VERSION_0:
    302		/* We do not currently NAT any GREv0 packets.
    303		 * Try to behave like "nf_nat_proto_unknown" */
    304		break;
    305	case GRE_VERSION_1:
    306		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
    307		pgreh->call_id = tuple->dst.u.gre.key;
    308		break;
    309	default:
    310		pr_debug("can't nat unknown GRE version\n");
    311		return false;
    312	}
    313#endif
    314	return true;
    315}
    316
    317static bool l4proto_manip_pkt(struct sk_buff *skb,
    318			      unsigned int iphdroff, unsigned int hdroff,
    319			      const struct nf_conntrack_tuple *tuple,
    320			      enum nf_nat_manip_type maniptype)
    321{
    322	switch (tuple->dst.protonum) {
    323	case IPPROTO_TCP:
    324		return tcp_manip_pkt(skb, iphdroff, hdroff,
    325				     tuple, maniptype);
    326	case IPPROTO_UDP:
    327		return udp_manip_pkt(skb, iphdroff, hdroff,
    328				     tuple, maniptype);
    329	case IPPROTO_UDPLITE:
    330		return udplite_manip_pkt(skb, iphdroff, hdroff,
    331					 tuple, maniptype);
    332	case IPPROTO_SCTP:
    333		return sctp_manip_pkt(skb, iphdroff, hdroff,
    334				      tuple, maniptype);
    335	case IPPROTO_ICMP:
    336		return icmp_manip_pkt(skb, iphdroff, hdroff,
    337				      tuple, maniptype);
    338	case IPPROTO_ICMPV6:
    339		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
    340					tuple, maniptype);
    341	case IPPROTO_DCCP:
    342		return dccp_manip_pkt(skb, iphdroff, hdroff,
    343				      tuple, maniptype);
    344	case IPPROTO_GRE:
    345		return gre_manip_pkt(skb, iphdroff, hdroff,
    346				     tuple, maniptype);
    347	}
    348
    349	/* If we don't know protocol -- no error, pass it unmodified. */
    350	return true;
    351}
    352
    353static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
    354				  unsigned int iphdroff,
    355				  const struct nf_conntrack_tuple *target,
    356				  enum nf_nat_manip_type maniptype)
    357{
    358	struct iphdr *iph;
    359	unsigned int hdroff;
    360
    361	if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
    362		return false;
    363
    364	iph = (void *)skb->data + iphdroff;
    365	hdroff = iphdroff + iph->ihl * 4;
    366
    367	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
    368		return false;
    369	iph = (void *)skb->data + iphdroff;
    370
    371	if (maniptype == NF_NAT_MANIP_SRC) {
    372		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
    373		iph->saddr = target->src.u3.ip;
    374	} else {
    375		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
    376		iph->daddr = target->dst.u3.ip;
    377	}
    378	return true;
    379}
    380
    381static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
    382				  unsigned int iphdroff,
    383				  const struct nf_conntrack_tuple *target,
    384				  enum nf_nat_manip_type maniptype)
    385{
    386#if IS_ENABLED(CONFIG_IPV6)
    387	struct ipv6hdr *ipv6h;
    388	__be16 frag_off;
    389	int hdroff;
    390	u8 nexthdr;
    391
    392	if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
    393		return false;
    394
    395	ipv6h = (void *)skb->data + iphdroff;
    396	nexthdr = ipv6h->nexthdr;
    397	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
    398				  &nexthdr, &frag_off);
    399	if (hdroff < 0)
    400		goto manip_addr;
    401
    402	if ((frag_off & htons(~0x7)) == 0 &&
    403	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
    404		return false;
    405
    406	/* must reload, offset might have changed */
    407	ipv6h = (void *)skb->data + iphdroff;
    408
    409manip_addr:
    410	if (maniptype == NF_NAT_MANIP_SRC)
    411		ipv6h->saddr = target->src.u3.in6;
    412	else
    413		ipv6h->daddr = target->dst.u3.in6;
    414
    415#endif
    416	return true;
    417}
    418
    419unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
    420			      enum nf_nat_manip_type mtype,
    421			      enum ip_conntrack_dir dir)
    422{
    423	struct nf_conntrack_tuple target;
    424
    425	/* We are aiming to look like inverse of other direction. */
    426	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
    427
    428	switch (target.src.l3num) {
    429	case NFPROTO_IPV6:
    430		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
    431			return NF_ACCEPT;
    432		break;
    433	case NFPROTO_IPV4:
    434		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
    435			return NF_ACCEPT;
    436		break;
    437	default:
    438		WARN_ON_ONCE(1);
    439		break;
    440	}
    441
    442	return NF_DROP;
    443}
    444
    445static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
    446				    unsigned int iphdroff, __sum16 *check,
    447				    const struct nf_conntrack_tuple *t,
    448				    enum nf_nat_manip_type maniptype)
    449{
    450	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
    451	__be32 oldip, newip;
    452
    453	if (maniptype == NF_NAT_MANIP_SRC) {
    454		oldip = iph->saddr;
    455		newip = t->src.u3.ip;
    456	} else {
    457		oldip = iph->daddr;
    458		newip = t->dst.u3.ip;
    459	}
    460	inet_proto_csum_replace4(check, skb, oldip, newip, true);
    461}
    462
    463static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
    464				    unsigned int iphdroff, __sum16 *check,
    465				    const struct nf_conntrack_tuple *t,
    466				    enum nf_nat_manip_type maniptype)
    467{
    468#if IS_ENABLED(CONFIG_IPV6)
    469	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
    470	const struct in6_addr *oldip, *newip;
    471
    472	if (maniptype == NF_NAT_MANIP_SRC) {
    473		oldip = &ipv6h->saddr;
    474		newip = &t->src.u3.in6;
    475	} else {
    476		oldip = &ipv6h->daddr;
    477		newip = &t->dst.u3.in6;
    478	}
    479	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
    480				  newip->s6_addr32, true);
    481#endif
    482}
    483
    484static void nf_csum_update(struct sk_buff *skb,
    485			   unsigned int iphdroff, __sum16 *check,
    486			   const struct nf_conntrack_tuple *t,
    487			   enum nf_nat_manip_type maniptype)
    488{
    489	switch (t->src.l3num) {
    490	case NFPROTO_IPV4:
    491		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
    492		return;
    493	case NFPROTO_IPV6:
    494		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
    495		return;
    496	}
    497}
    498
    499static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
    500				    u8 proto, void *data, __sum16 *check,
    501				    int datalen, int oldlen)
    502{
    503	if (skb->ip_summed != CHECKSUM_PARTIAL) {
    504		const struct iphdr *iph = ip_hdr(skb);
    505
    506		skb->ip_summed = CHECKSUM_PARTIAL;
    507		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
    508			ip_hdrlen(skb);
    509		skb->csum_offset = (void *)check - data;
    510		*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
    511					    proto, 0);
    512	} else {
    513		inet_proto_csum_replace2(check, skb,
    514					 htons(oldlen), htons(datalen), true);
    515	}
    516}
    517
    518#if IS_ENABLED(CONFIG_IPV6)
    519static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
    520				    u8 proto, void *data, __sum16 *check,
    521				    int datalen, int oldlen)
    522{
    523	if (skb->ip_summed != CHECKSUM_PARTIAL) {
    524		const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
    525
    526		skb->ip_summed = CHECKSUM_PARTIAL;
    527		skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
    528			(data - (void *)skb->data);
    529		skb->csum_offset = (void *)check - data;
    530		*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
    531					  datalen, proto, 0);
    532	} else {
    533		inet_proto_csum_replace2(check, skb,
    534					 htons(oldlen), htons(datalen), true);
    535	}
    536}
    537#endif
    538
    539void nf_nat_csum_recalc(struct sk_buff *skb,
    540			u8 nfproto, u8 proto, void *data, __sum16 *check,
    541			int datalen, int oldlen)
    542{
    543	switch (nfproto) {
    544	case NFPROTO_IPV4:
    545		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
    546					datalen, oldlen);
    547		return;
    548#if IS_ENABLED(CONFIG_IPV6)
    549	case NFPROTO_IPV6:
    550		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
    551					datalen, oldlen);
    552		return;
    553#endif
    554	}
    555
    556	WARN_ON_ONCE(1);
    557}
    558
    559int nf_nat_icmp_reply_translation(struct sk_buff *skb,
    560				  struct nf_conn *ct,
    561				  enum ip_conntrack_info ctinfo,
    562				  unsigned int hooknum)
    563{
    564	struct {
    565		struct icmphdr	icmp;
    566		struct iphdr	ip;
    567	} *inside;
    568	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
    569	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
    570	unsigned int hdrlen = ip_hdrlen(skb);
    571	struct nf_conntrack_tuple target;
    572	unsigned long statusbit;
    573
    574	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
    575
    576	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
    577		return 0;
    578	if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
    579		return 0;
    580
    581	inside = (void *)skb->data + hdrlen;
    582	if (inside->icmp.type == ICMP_REDIRECT) {
    583		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
    584			return 0;
    585		if (ct->status & IPS_NAT_MASK)
    586			return 0;
    587	}
    588
    589	if (manip == NF_NAT_MANIP_SRC)
    590		statusbit = IPS_SRC_NAT;
    591	else
    592		statusbit = IPS_DST_NAT;
    593
    594	/* Invert if this is reply direction */
    595	if (dir == IP_CT_DIR_REPLY)
    596		statusbit ^= IPS_NAT_MASK;
    597
    598	if (!(ct->status & statusbit))
    599		return 1;
    600
    601	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
    602				   &ct->tuplehash[!dir].tuple, !manip))
    603		return 0;
    604
    605	if (skb->ip_summed != CHECKSUM_PARTIAL) {
    606		/* Reloading "inside" here since manip_pkt may reallocate */
    607		inside = (void *)skb->data + hdrlen;
    608		inside->icmp.checksum = 0;
    609		inside->icmp.checksum =
    610			csum_fold(skb_checksum(skb, hdrlen,
    611					       skb->len - hdrlen, 0));
    612	}
    613
    614	/* Change outer to look like the reply to an incoming packet */
    615	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
    616	target.dst.protonum = IPPROTO_ICMP;
    617	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
    618		return 0;
    619
    620	return 1;
    621}
    622EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
    623
    624static unsigned int
    625nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
    626	       const struct nf_hook_state *state)
    627{
    628	struct nf_conn *ct;
    629	enum ip_conntrack_info ctinfo;
    630
    631	ct = nf_ct_get(skb, &ctinfo);
    632	if (!ct)
    633		return NF_ACCEPT;
    634
    635	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
    636		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
    637			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
    638							   state->hook))
    639				return NF_DROP;
    640			else
    641				return NF_ACCEPT;
    642		}
    643	}
    644
    645	return nf_nat_inet_fn(priv, skb, state);
    646}
    647
    648static unsigned int
    649nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
    650			const struct nf_hook_state *state)
    651{
    652	unsigned int ret;
    653	__be32 daddr = ip_hdr(skb)->daddr;
    654
    655	ret = nf_nat_ipv4_fn(priv, skb, state);
    656	if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
    657		skb_dst_drop(skb);
    658
    659	return ret;
    660}
    661
    662#ifdef CONFIG_XFRM
    663static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
    664{
    665	struct sock *sk = skb->sk;
    666	struct dst_entry *dst;
    667	unsigned int hh_len;
    668	struct flowi fl;
    669	int err;
    670
    671	err = xfrm_decode_session(skb, &fl, family);
    672	if (err < 0)
    673		return err;
    674
    675	dst = skb_dst(skb);
    676	if (dst->xfrm)
    677		dst = ((struct xfrm_dst *)dst)->route;
    678	if (!dst_hold_safe(dst))
    679		return -EHOSTUNREACH;
    680
    681	if (sk && !net_eq(net, sock_net(sk)))
    682		sk = NULL;
    683
    684	dst = xfrm_lookup(net, dst, &fl, sk, 0);
    685	if (IS_ERR(dst))
    686		return PTR_ERR(dst);
    687
    688	skb_dst_drop(skb);
    689	skb_dst_set(skb, dst);
    690
    691	/* Change in oif may mean change in hh_len. */
    692	hh_len = skb_dst(skb)->dev->hard_header_len;
    693	if (skb_headroom(skb) < hh_len &&
    694	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
    695		return -ENOMEM;
    696	return 0;
    697}
    698#endif
    699
    700static unsigned int
    701nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
    702		     const struct nf_hook_state *state)
    703{
    704	__be32 saddr = ip_hdr(skb)->saddr;
    705	struct sock *sk = skb->sk;
    706	unsigned int ret;
    707
    708	ret = nf_nat_ipv4_fn(priv, skb, state);
    709
    710	if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
    711	    !inet_sk_transparent(sk))
    712		skb_orphan(skb); /* TCP edemux obtained wrong socket */
    713
    714	return ret;
    715}
    716
    717static unsigned int
    718nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
    719		const struct nf_hook_state *state)
    720{
    721#ifdef CONFIG_XFRM
    722	const struct nf_conn *ct;
    723	enum ip_conntrack_info ctinfo;
    724	int err;
    725#endif
    726	unsigned int ret;
    727
    728	ret = nf_nat_ipv4_fn(priv, skb, state);
    729#ifdef CONFIG_XFRM
    730	if (ret != NF_ACCEPT)
    731		return ret;
    732
    733	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
    734		return ret;
    735
    736	ct = nf_ct_get(skb, &ctinfo);
    737	if (ct) {
    738		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
    739
    740		if (ct->tuplehash[dir].tuple.src.u3.ip !=
    741		     ct->tuplehash[!dir].tuple.dst.u3.ip ||
    742		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
    743		     ct->tuplehash[dir].tuple.src.u.all !=
    744		     ct->tuplehash[!dir].tuple.dst.u.all)) {
    745			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
    746			if (err < 0)
    747				ret = NF_DROP_ERR(err);
    748		}
    749	}
    750#endif
    751	return ret;
    752}
    753
    754static unsigned int
    755nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
    756		     const struct nf_hook_state *state)
    757{
    758	const struct nf_conn *ct;
    759	enum ip_conntrack_info ctinfo;
    760	unsigned int ret;
    761	int err;
    762
    763	ret = nf_nat_ipv4_fn(priv, skb, state);
    764	if (ret != NF_ACCEPT)
    765		return ret;
    766
    767	ct = nf_ct_get(skb, &ctinfo);
    768	if (ct) {
    769		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
    770
    771		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
    772		    ct->tuplehash[!dir].tuple.src.u3.ip) {
    773			err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
    774			if (err < 0)
    775				ret = NF_DROP_ERR(err);
    776		}
    777#ifdef CONFIG_XFRM
    778		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
    779			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
    780			 ct->tuplehash[dir].tuple.dst.u.all !=
    781			 ct->tuplehash[!dir].tuple.src.u.all) {
    782			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
    783			if (err < 0)
    784				ret = NF_DROP_ERR(err);
    785		}
    786#endif
    787	}
    788	return ret;
    789}
    790
    791static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
    792	/* Before packet filtering, change destination */
    793	{
    794		.hook		= nf_nat_ipv4_pre_routing,
    795		.pf		= NFPROTO_IPV4,
    796		.hooknum	= NF_INET_PRE_ROUTING,
    797		.priority	= NF_IP_PRI_NAT_DST,
    798	},
    799	/* After packet filtering, change source */
    800	{
    801		.hook		= nf_nat_ipv4_out,
    802		.pf		= NFPROTO_IPV4,
    803		.hooknum	= NF_INET_POST_ROUTING,
    804		.priority	= NF_IP_PRI_NAT_SRC,
    805	},
    806	/* Before packet filtering, change destination */
    807	{
    808		.hook		= nf_nat_ipv4_local_fn,
    809		.pf		= NFPROTO_IPV4,
    810		.hooknum	= NF_INET_LOCAL_OUT,
    811		.priority	= NF_IP_PRI_NAT_DST,
    812	},
    813	/* After packet filtering, change source */
    814	{
    815		.hook		= nf_nat_ipv4_local_in,
    816		.pf		= NFPROTO_IPV4,
    817		.hooknum	= NF_INET_LOCAL_IN,
    818		.priority	= NF_IP_PRI_NAT_SRC,
    819	},
    820};
    821
    822int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
    823{
    824	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
    825				  ARRAY_SIZE(nf_nat_ipv4_ops));
    826}
    827EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
    828
    829void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
    830{
    831	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
    832}
    833EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
    834
    835#if IS_ENABLED(CONFIG_IPV6)
    836int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
    837				    struct nf_conn *ct,
    838				    enum ip_conntrack_info ctinfo,
    839				    unsigned int hooknum,
    840				    unsigned int hdrlen)
    841{
    842	struct {
    843		struct icmp6hdr	icmp6;
    844		struct ipv6hdr	ip6;
    845	} *inside;
    846	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
    847	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
    848	struct nf_conntrack_tuple target;
    849	unsigned long statusbit;
    850
    851	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
    852
    853	if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
    854		return 0;
    855	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
    856		return 0;
    857
    858	inside = (void *)skb->data + hdrlen;
    859	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
    860		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
    861			return 0;
    862		if (ct->status & IPS_NAT_MASK)
    863			return 0;
    864	}
    865
    866	if (manip == NF_NAT_MANIP_SRC)
    867		statusbit = IPS_SRC_NAT;
    868	else
    869		statusbit = IPS_DST_NAT;
    870
    871	/* Invert if this is reply direction */
    872	if (dir == IP_CT_DIR_REPLY)
    873		statusbit ^= IPS_NAT_MASK;
    874
    875	if (!(ct->status & statusbit))
    876		return 1;
    877
    878	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
    879				   &ct->tuplehash[!dir].tuple, !manip))
    880		return 0;
    881
    882	if (skb->ip_summed != CHECKSUM_PARTIAL) {
    883		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
    884
    885		inside = (void *)skb->data + hdrlen;
    886		inside->icmp6.icmp6_cksum = 0;
    887		inside->icmp6.icmp6_cksum =
    888			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
    889					skb->len - hdrlen, IPPROTO_ICMPV6,
    890					skb_checksum(skb, hdrlen,
    891						     skb->len - hdrlen, 0));
    892	}
    893
    894	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
    895	target.dst.protonum = IPPROTO_ICMPV6;
    896	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
    897		return 0;
    898
    899	return 1;
    900}
    901EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
    902
    903static unsigned int
    904nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
    905	       const struct nf_hook_state *state)
    906{
    907	struct nf_conn *ct;
    908	enum ip_conntrack_info ctinfo;
    909	__be16 frag_off;
    910	int hdrlen;
    911	u8 nexthdr;
    912
    913	ct = nf_ct_get(skb, &ctinfo);
    914	/* Can't track?  It's not due to stress, or conntrack would
    915	 * have dropped it.  Hence it's the user's responsibilty to
    916	 * packet filter it out, or implement conntrack/NAT for that
    917	 * protocol. 8) --RR
    918	 */
    919	if (!ct)
    920		return NF_ACCEPT;
    921
    922	if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
    923		nexthdr = ipv6_hdr(skb)->nexthdr;
    924		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
    925					  &nexthdr, &frag_off);
    926
    927		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
    928			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
    929							     state->hook,
    930							     hdrlen))
    931				return NF_DROP;
    932			else
    933				return NF_ACCEPT;
    934		}
    935	}
    936
    937	return nf_nat_inet_fn(priv, skb, state);
    938}
    939
    940static unsigned int
    941nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
    942	       const struct nf_hook_state *state)
    943{
    944	unsigned int ret;
    945	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
    946
    947	ret = nf_nat_ipv6_fn(priv, skb, state);
    948	if (ret != NF_DROP && ret != NF_STOLEN &&
    949	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
    950		skb_dst_drop(skb);
    951
    952	return ret;
    953}
    954
    955static unsigned int
    956nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
    957		const struct nf_hook_state *state)
    958{
    959#ifdef CONFIG_XFRM
    960	const struct nf_conn *ct;
    961	enum ip_conntrack_info ctinfo;
    962	int err;
    963#endif
    964	unsigned int ret;
    965
    966	ret = nf_nat_ipv6_fn(priv, skb, state);
    967#ifdef CONFIG_XFRM
    968	if (ret != NF_ACCEPT)
    969		return ret;
    970
    971	if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
    972		return ret;
    973	ct = nf_ct_get(skb, &ctinfo);
    974	if (ct) {
    975		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
    976
    977		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
    978				      &ct->tuplehash[!dir].tuple.dst.u3) ||
    979		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
    980		     ct->tuplehash[dir].tuple.src.u.all !=
    981		     ct->tuplehash[!dir].tuple.dst.u.all)) {
    982			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
    983			if (err < 0)
    984				ret = NF_DROP_ERR(err);
    985		}
    986	}
    987#endif
    988
    989	return ret;
    990}
    991
    992static unsigned int
    993nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
    994		     const struct nf_hook_state *state)
    995{
    996	const struct nf_conn *ct;
    997	enum ip_conntrack_info ctinfo;
    998	unsigned int ret;
    999	int err;
   1000
   1001	ret = nf_nat_ipv6_fn(priv, skb, state);
   1002	if (ret != NF_ACCEPT)
   1003		return ret;
   1004
   1005	ct = nf_ct_get(skb, &ctinfo);
   1006	if (ct) {
   1007		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
   1008
   1009		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
   1010				      &ct->tuplehash[!dir].tuple.src.u3)) {
   1011			err = nf_ip6_route_me_harder(state->net, state->sk, skb);
   1012			if (err < 0)
   1013				ret = NF_DROP_ERR(err);
   1014		}
   1015#ifdef CONFIG_XFRM
   1016		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
   1017			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
   1018			 ct->tuplehash[dir].tuple.dst.u.all !=
   1019			 ct->tuplehash[!dir].tuple.src.u.all) {
   1020			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
   1021			if (err < 0)
   1022				ret = NF_DROP_ERR(err);
   1023		}
   1024#endif
   1025	}
   1026
   1027	return ret;
   1028}
   1029
   1030static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
   1031	/* Before packet filtering, change destination */
   1032	{
   1033		.hook		= nf_nat_ipv6_in,
   1034		.pf		= NFPROTO_IPV6,
   1035		.hooknum	= NF_INET_PRE_ROUTING,
   1036		.priority	= NF_IP6_PRI_NAT_DST,
   1037	},
   1038	/* After packet filtering, change source */
   1039	{
   1040		.hook		= nf_nat_ipv6_out,
   1041		.pf		= NFPROTO_IPV6,
   1042		.hooknum	= NF_INET_POST_ROUTING,
   1043		.priority	= NF_IP6_PRI_NAT_SRC,
   1044	},
   1045	/* Before packet filtering, change destination */
   1046	{
   1047		.hook		= nf_nat_ipv6_local_fn,
   1048		.pf		= NFPROTO_IPV6,
   1049		.hooknum	= NF_INET_LOCAL_OUT,
   1050		.priority	= NF_IP6_PRI_NAT_DST,
   1051	},
   1052	/* After packet filtering, change source */
   1053	{
   1054		.hook		= nf_nat_ipv6_fn,
   1055		.pf		= NFPROTO_IPV6,
   1056		.hooknum	= NF_INET_LOCAL_IN,
   1057		.priority	= NF_IP6_PRI_NAT_SRC,
   1058	},
   1059};
   1060
   1061int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
   1062{
   1063	return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
   1064				  ARRAY_SIZE(nf_nat_ipv6_ops));
   1065}
   1066EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
   1067
   1068void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
   1069{
   1070	nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
   1071}
   1072EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
   1073#endif /* CONFIG_IPV6 */
   1074
   1075#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
   1076int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
   1077{
   1078	int ret;
   1079
   1080	if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
   1081		return -EINVAL;
   1082
   1083	ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
   1084				 ARRAY_SIZE(nf_nat_ipv6_ops));
   1085	if (ret)
   1086		return ret;
   1087
   1088	ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
   1089				 ARRAY_SIZE(nf_nat_ipv4_ops));
   1090	if (ret)
   1091		nf_nat_unregister_fn(net, NFPROTO_IPV6, ops,
   1092					ARRAY_SIZE(nf_nat_ipv6_ops));
   1093	return ret;
   1094}
   1095EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
   1096
   1097void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
   1098{
   1099	nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
   1100	nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
   1101}
   1102EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
   1103#endif /* NFT INET NAT */