cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ip_tunnels.h (13993B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#ifndef __NET_IP_TUNNELS_H
      3#define __NET_IP_TUNNELS_H 1
      4
      5#include <linux/if_tunnel.h>
      6#include <linux/netdevice.h>
      7#include <linux/skbuff.h>
      8#include <linux/socket.h>
      9#include <linux/types.h>
     10#include <linux/u64_stats_sync.h>
     11#include <linux/bitops.h>
     12
     13#include <net/dsfield.h>
     14#include <net/gro_cells.h>
     15#include <net/inet_ecn.h>
     16#include <net/netns/generic.h>
     17#include <net/rtnetlink.h>
     18#include <net/lwtunnel.h>
     19#include <net/dst_cache.h>
     20
     21#if IS_ENABLED(CONFIG_IPV6)
     22#include <net/ipv6.h>
     23#include <net/ip6_fib.h>
     24#include <net/ip6_route.h>
     25#endif
     26
     27/* Keep error state on tunnel for 30 sec */
     28#define IPTUNNEL_ERR_TIMEO	(30*HZ)
     29
     30/* Used to memset ip_tunnel padding. */
     31#define IP_TUNNEL_KEY_SIZE	offsetofend(struct ip_tunnel_key, tp_dst)
     32
     33/* Used to memset ipv4 address padding. */
     34#define IP_TUNNEL_KEY_IPV4_PAD	offsetofend(struct ip_tunnel_key, u.ipv4.dst)
     35#define IP_TUNNEL_KEY_IPV4_PAD_LEN				\
     36	(sizeof_field(struct ip_tunnel_key, u) -		\
     37	 sizeof_field(struct ip_tunnel_key, u.ipv4))
     38
     39struct ip_tunnel_key {
     40	__be64			tun_id;
     41	union {
     42		struct {
     43			__be32	src;
     44			__be32	dst;
     45		} ipv4;
     46		struct {
     47			struct in6_addr src;
     48			struct in6_addr dst;
     49		} ipv6;
     50	} u;
     51	__be16			tun_flags;
     52	u8			tos;		/* TOS for IPv4, TC for IPv6 */
     53	u8			ttl;		/* TTL for IPv4, HL for IPv6 */
     54	__be32			label;		/* Flow Label for IPv6 */
     55	__be16			tp_src;
     56	__be16			tp_dst;
     57};
     58
     59/* Flags for ip_tunnel_info mode. */
     60#define IP_TUNNEL_INFO_TX	0x01	/* represents tx tunnel parameters */
     61#define IP_TUNNEL_INFO_IPV6	0x02	/* key contains IPv6 addresses */
     62#define IP_TUNNEL_INFO_BRIDGE	0x04	/* represents a bridged tunnel id */
     63
     64/* Maximum tunnel options length. */
     65#define IP_TUNNEL_OPTS_MAX					\
     66	GENMASK((sizeof_field(struct ip_tunnel_info,		\
     67			      options_len) * BITS_PER_BYTE) - 1, 0)
     68
     69struct ip_tunnel_info {
     70	struct ip_tunnel_key	key;
     71#ifdef CONFIG_DST_CACHE
     72	struct dst_cache	dst_cache;
     73#endif
     74	u8			options_len;
     75	u8			mode;
     76};
     77
     78/* 6rd prefix/relay information */
     79#ifdef CONFIG_IPV6_SIT_6RD
     80struct ip_tunnel_6rd_parm {
     81	struct in6_addr		prefix;
     82	__be32			relay_prefix;
     83	u16			prefixlen;
     84	u16			relay_prefixlen;
     85};
     86#endif
     87
     88struct ip_tunnel_encap {
     89	u16			type;
     90	u16			flags;
     91	__be16			sport;
     92	__be16			dport;
     93};
     94
     95struct ip_tunnel_prl_entry {
     96	struct ip_tunnel_prl_entry __rcu *next;
     97	__be32				addr;
     98	u16				flags;
     99	struct rcu_head			rcu_head;
    100};
    101
    102struct metadata_dst;
    103
    104struct ip_tunnel {
    105	struct ip_tunnel __rcu	*next;
    106	struct hlist_node hash_node;
    107
    108	struct net_device	*dev;
    109	netdevice_tracker	dev_tracker;
    110
    111	struct net		*net;	/* netns for packet i/o */
    112
    113	unsigned long	err_time;	/* Time when the last ICMP error
    114					 * arrived */
    115	int		err_count;	/* Number of arrived ICMP errors */
    116
    117	/* These four fields used only by GRE */
    118	u32		i_seqno;	/* The last seen seqno	*/
    119	atomic_t	o_seqno;	/* The last output seqno */
    120	int		tun_hlen;	/* Precalculated header length */
    121
    122	/* These four fields used only by ERSPAN */
    123	u32		index;		/* ERSPAN type II index */
    124	u8		erspan_ver;	/* ERSPAN version */
    125	u8		dir;		/* ERSPAN direction */
    126	u16		hwid;		/* ERSPAN hardware ID */
    127
    128	struct dst_cache dst_cache;
    129
    130	struct ip_tunnel_parm parms;
    131
    132	int		mlink;
    133	int		encap_hlen;	/* Encap header length (FOU,GUE) */
    134	int		hlen;		/* tun_hlen + encap_hlen */
    135	struct ip_tunnel_encap encap;
    136
    137	/* for SIT */
    138#ifdef CONFIG_IPV6_SIT_6RD
    139	struct ip_tunnel_6rd_parm ip6rd;
    140#endif
    141	struct ip_tunnel_prl_entry __rcu *prl;	/* potential router list */
    142	unsigned int		prl_count;	/* # of entries in PRL */
    143	unsigned int		ip_tnl_net_id;
    144	struct gro_cells	gro_cells;
    145	__u32			fwmark;
    146	bool			collect_md;
    147	bool			ignore_df;
    148};
    149
    150struct tnl_ptk_info {
    151	__be16 flags;
    152	__be16 proto;
    153	__be32 key;
    154	__be32 seq;
    155	int hdr_len;
    156};
    157
    158#define PACKET_RCVD	0
    159#define PACKET_REJECT	1
    160#define PACKET_NEXT	2
    161
    162#define IP_TNL_HASH_BITS   7
    163#define IP_TNL_HASH_SIZE   (1 << IP_TNL_HASH_BITS)
    164
    165struct ip_tunnel_net {
    166	struct net_device *fb_tunnel_dev;
    167	struct rtnl_link_ops *rtnl_link_ops;
    168	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
    169	struct ip_tunnel __rcu *collect_md_tun;
    170	int type;
    171};
    172
    173static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
    174				      __be32 saddr, __be32 daddr,
    175				      u8 tos, u8 ttl, __be32 label,
    176				      __be16 tp_src, __be16 tp_dst,
    177				      __be64 tun_id, __be16 tun_flags)
    178{
    179	key->tun_id = tun_id;
    180	key->u.ipv4.src = saddr;
    181	key->u.ipv4.dst = daddr;
    182	memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD,
    183	       0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
    184	key->tos = tos;
    185	key->ttl = ttl;
    186	key->label = label;
    187	key->tun_flags = tun_flags;
    188
    189	/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
    190	 * the upper tunnel are used.
    191	 * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
    192	 */
    193	key->tp_src = tp_src;
    194	key->tp_dst = tp_dst;
    195
    196	/* Clear struct padding. */
    197	if (sizeof(*key) != IP_TUNNEL_KEY_SIZE)
    198		memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE,
    199		       0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
    200}
    201
    202static inline bool
    203ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
    204			   const struct ip_tunnel_info *info)
    205{
    206	if (skb->mark)
    207		return false;
    208	if (!info)
    209		return true;
    210	if (info->key.tun_flags & TUNNEL_NOCACHE)
    211		return false;
    212
    213	return true;
    214}
    215
    216static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
    217					       *tun_info)
    218{
    219	return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
    220}
    221
    222static inline __be64 key32_to_tunnel_id(__be32 key)
    223{
    224#ifdef __BIG_ENDIAN
    225	return (__force __be64)key;
    226#else
    227	return (__force __be64)((__force u64)key << 32);
    228#endif
    229}
    230
    231/* Returns the least-significant 32 bits of a __be64. */
    232static inline __be32 tunnel_id_to_key32(__be64 tun_id)
    233{
    234#ifdef __BIG_ENDIAN
    235	return (__force __be32)tun_id;
    236#else
    237	return (__force __be32)((__force u64)tun_id >> 32);
    238#endif
    239}
    240
    241#ifdef CONFIG_INET
    242
    243static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
    244				       int proto,
    245				       __be32 daddr, __be32 saddr,
    246				       __be32 key, __u8 tos,
    247				       struct net *net, int oif,
    248				       __u32 mark, __u32 tun_inner_hash)
    249{
    250	memset(fl4, 0, sizeof(*fl4));
    251
    252	if (oif) {
    253		fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif);
    254		/* Legacy VRF/l3mdev use case */
    255		fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif;
    256	}
    257
    258	fl4->daddr = daddr;
    259	fl4->saddr = saddr;
    260	fl4->flowi4_tos = tos;
    261	fl4->flowi4_proto = proto;
    262	fl4->fl4_gre_key = key;
    263	fl4->flowi4_mark = mark;
    264	fl4->flowi4_multipath_hash = tun_inner_hash;
    265}
    266
    267int ip_tunnel_init(struct net_device *dev);
    268void ip_tunnel_uninit(struct net_device *dev);
    269void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
    270struct net *ip_tunnel_get_link_net(const struct net_device *dev);
    271int ip_tunnel_get_iflink(const struct net_device *dev);
    272int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
    273		       struct rtnl_link_ops *ops, char *devname);
    274
    275void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
    276			   struct rtnl_link_ops *ops);
    277
    278void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
    279		    const struct iphdr *tnl_params, const u8 protocol);
    280void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
    281		       const u8 proto, int tunnel_hlen);
    282int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
    283int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
    284			     void __user *data, int cmd);
    285int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
    286int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
    287
    288struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
    289				   int link, __be16 flags,
    290				   __be32 remote, __be32 local,
    291				   __be32 key);
    292
    293int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
    294		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
    295		  bool log_ecn_error);
    296int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
    297			 struct ip_tunnel_parm *p, __u32 fwmark);
    298int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
    299		      struct ip_tunnel_parm *p, __u32 fwmark);
    300void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
    301
    302extern const struct header_ops ip_tunnel_header_ops;
    303__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
    304
    305struct ip_tunnel_encap_ops {
    306	size_t (*encap_hlen)(struct ip_tunnel_encap *e);
    307	int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
    308			    u8 *protocol, struct flowi4 *fl4);
    309	int (*err_handler)(struct sk_buff *skb, u32 info);
    310};
    311
    312#define MAX_IPTUN_ENCAP_OPS 8
    313
    314extern const struct ip_tunnel_encap_ops __rcu *
    315		iptun_encaps[MAX_IPTUN_ENCAP_OPS];
    316
    317int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,
    318			    unsigned int num);
    319int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
    320			    unsigned int num);
    321
    322int ip_tunnel_encap_setup(struct ip_tunnel *t,
    323			  struct ip_tunnel_encap *ipencap);
    324
    325static inline bool pskb_inet_may_pull(struct sk_buff *skb)
    326{
    327	int nhlen;
    328
    329	switch (skb->protocol) {
    330#if IS_ENABLED(CONFIG_IPV6)
    331	case htons(ETH_P_IPV6):
    332		nhlen = sizeof(struct ipv6hdr);
    333		break;
    334#endif
    335	case htons(ETH_P_IP):
    336		nhlen = sizeof(struct iphdr);
    337		break;
    338	default:
    339		nhlen = 0;
    340	}
    341
    342	return pskb_network_may_pull(skb, nhlen);
    343}
    344
    345static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
    346{
    347	const struct ip_tunnel_encap_ops *ops;
    348	int hlen = -EINVAL;
    349
    350	if (e->type == TUNNEL_ENCAP_NONE)
    351		return 0;
    352
    353	if (e->type >= MAX_IPTUN_ENCAP_OPS)
    354		return -EINVAL;
    355
    356	rcu_read_lock();
    357	ops = rcu_dereference(iptun_encaps[e->type]);
    358	if (likely(ops && ops->encap_hlen))
    359		hlen = ops->encap_hlen(e);
    360	rcu_read_unlock();
    361
    362	return hlen;
    363}
    364
    365static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
    366				  u8 *protocol, struct flowi4 *fl4)
    367{
    368	const struct ip_tunnel_encap_ops *ops;
    369	int ret = -EINVAL;
    370
    371	if (t->encap.type == TUNNEL_ENCAP_NONE)
    372		return 0;
    373
    374	if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
    375		return -EINVAL;
    376
    377	rcu_read_lock();
    378	ops = rcu_dereference(iptun_encaps[t->encap.type]);
    379	if (likely(ops && ops->build_header))
    380		ret = ops->build_header(skb, &t->encap, protocol, fl4);
    381	rcu_read_unlock();
    382
    383	return ret;
    384}
    385
    386/* Extract dsfield from inner protocol */
    387static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
    388				       const struct sk_buff *skb)
    389{
    390	if (skb->protocol == htons(ETH_P_IP))
    391		return iph->tos;
    392	else if (skb->protocol == htons(ETH_P_IPV6))
    393		return ipv6_get_dsfield((const struct ipv6hdr *)iph);
    394	else
    395		return 0;
    396}
    397
    398static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
    399				       const struct sk_buff *skb)
    400{
    401	if (skb->protocol == htons(ETH_P_IP))
    402		return iph->ttl;
    403	else if (skb->protocol == htons(ETH_P_IPV6))
    404		return ((const struct ipv6hdr *)iph)->hop_limit;
    405	else
    406		return 0;
    407}
    408
    409/* Propogate ECN bits out */
    410static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
    411				     const struct sk_buff *skb)
    412{
    413	u8 inner = ip_tunnel_get_dsfield(iph, skb);
    414
    415	return INET_ECN_encapsulate(tos, inner);
    416}
    417
    418int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
    419			   __be16 inner_proto, bool raw_proto, bool xnet);
    420
    421static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
    422				       __be16 inner_proto, bool xnet)
    423{
    424	return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
    425}
    426
    427void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
    428		   __be32 src, __be32 dst, u8 proto,
    429		   u8 tos, u8 ttl, __be16 df, bool xnet);
    430struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
    431					     gfp_t flags);
    432int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
    433			  int headroom, bool reply);
    434
    435int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
    436
    437static inline int iptunnel_pull_offloads(struct sk_buff *skb)
    438{
    439	if (skb_is_gso(skb)) {
    440		int err;
    441
    442		err = skb_unclone(skb, GFP_ATOMIC);
    443		if (unlikely(err))
    444			return err;
    445		skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
    446					       NETIF_F_GSO_SHIFT);
    447	}
    448
    449	skb->encapsulation = 0;
    450	return 0;
    451}
    452
    453static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
    454{
    455	if (pkt_len > 0) {
    456		struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
    457
    458		u64_stats_update_begin(&tstats->syncp);
    459		tstats->tx_bytes += pkt_len;
    460		tstats->tx_packets++;
    461		u64_stats_update_end(&tstats->syncp);
    462		put_cpu_ptr(tstats);
    463	} else {
    464		struct net_device_stats *err_stats = &dev->stats;
    465
    466		if (pkt_len < 0) {
    467			err_stats->tx_errors++;
    468			err_stats->tx_aborted_errors++;
    469		} else {
    470			err_stats->tx_dropped++;
    471		}
    472	}
    473}
    474
    475static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info)
    476{
    477	return info + 1;
    478}
    479
    480static inline void ip_tunnel_info_opts_get(void *to,
    481					   const struct ip_tunnel_info *info)
    482{
    483	memcpy(to, info + 1, info->options_len);
    484}
    485
    486static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
    487					   const void *from, int len,
    488					   __be16 flags)
    489{
    490	info->options_len = len;
    491	if (len > 0) {
    492		memcpy(ip_tunnel_info_opts(info), from, len);
    493		info->key.tun_flags |= flags;
    494	}
    495}
    496
    497static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
    498{
    499	return (struct ip_tunnel_info *)lwtstate->data;
    500}
    501
    502DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
    503
    504/* Returns > 0 if metadata should be collected */
    505static inline int ip_tunnel_collect_metadata(void)
    506{
    507	return static_branch_unlikely(&ip_tunnel_metadata_cnt);
    508}
    509
    510void __init ip_tunnel_core_init(void);
    511
    512void ip_tunnel_need_metadata(void);
    513void ip_tunnel_unneed_metadata(void);
    514
    515#else /* CONFIG_INET */
    516
    517static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
    518{
    519	return NULL;
    520}
    521
    522static inline void ip_tunnel_need_metadata(void)
    523{
    524}
    525
    526static inline void ip_tunnel_unneed_metadata(void)
    527{
    528}
    529
    530static inline void ip_tunnel_info_opts_get(void *to,
    531					   const struct ip_tunnel_info *info)
    532{
    533}
    534
    535static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
    536					   const void *from, int len,
    537					   __be16 flags)
    538{
    539	info->options_len = 0;
    540}
    541
    542#endif /* CONFIG_INET */
    543
    544#endif /* __NET_IP_TUNNELS_H */