lwtunnel.c (9285B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8#include <linux/capability.h> 9#include <linux/module.h> 10#include <linux/types.h> 11#include <linux/kernel.h> 12#include <linux/slab.h> 13#include <linux/uaccess.h> 14#include <linux/skbuff.h> 15#include <linux/netdevice.h> 16#include <linux/lwtunnel.h> 17#include <linux/in.h> 18#include <linux/init.h> 19#include <linux/err.h> 20 21#include <net/lwtunnel.h> 22#include <net/rtnetlink.h> 23#include <net/ip6_fib.h> 24#include <net/rtnh.h> 25 26DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 27EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 28 29#ifdef CONFIG_MODULES 30 31static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 32{ 33 /* Only lwt encaps implemented without using an interface for 34 * the encap need to return a string here. 35 */ 36 switch (encap_type) { 37 case LWTUNNEL_ENCAP_MPLS: 38 return "MPLS"; 39 case LWTUNNEL_ENCAP_ILA: 40 return "ILA"; 41 case LWTUNNEL_ENCAP_SEG6: 42 return "SEG6"; 43 case LWTUNNEL_ENCAP_BPF: 44 return "BPF"; 45 case LWTUNNEL_ENCAP_SEG6_LOCAL: 46 return "SEG6LOCAL"; 47 case LWTUNNEL_ENCAP_RPL: 48 return "RPL"; 49 case LWTUNNEL_ENCAP_IOAM6: 50 return "IOAM6"; 51 case LWTUNNEL_ENCAP_IP6: 52 case LWTUNNEL_ENCAP_IP: 53 case LWTUNNEL_ENCAP_NONE: 54 case __LWTUNNEL_ENCAP_MAX: 55 /* should not have got here */ 56 WARN_ON(1); 57 break; 58 } 59 return NULL; 60} 61 62#endif /* CONFIG_MODULES */ 63 64struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 65{ 66 struct lwtunnel_state *lws; 67 68 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 69 70 return lws; 71} 72EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 73 74static const struct lwtunnel_encap_ops __rcu * 75 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 76 77int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 78 unsigned int num) 79{ 80 if (num > LWTUNNEL_ENCAP_MAX) 81 return -ERANGE; 82 83 return !cmpxchg((const struct lwtunnel_encap_ops **) 84 &lwtun_encaps[num], 85 NULL, ops) ? 0 : -1; 86} 87EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 88 89int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 90 unsigned int encap_type) 91{ 92 int ret; 93 94 if (encap_type == LWTUNNEL_ENCAP_NONE || 95 encap_type > LWTUNNEL_ENCAP_MAX) 96 return -ERANGE; 97 98 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 99 &lwtun_encaps[encap_type], 100 ops, NULL) == ops) ? 0 : -1; 101 102 synchronize_net(); 103 104 return ret; 105} 106EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 107 108int lwtunnel_build_state(struct net *net, u16 encap_type, 109 struct nlattr *encap, unsigned int family, 110 const void *cfg, struct lwtunnel_state **lws, 111 struct netlink_ext_ack *extack) 112{ 113 const struct lwtunnel_encap_ops *ops; 114 bool found = false; 115 int ret = -EINVAL; 116 117 if (encap_type == LWTUNNEL_ENCAP_NONE || 118 encap_type > LWTUNNEL_ENCAP_MAX) { 119 NL_SET_ERR_MSG_ATTR(extack, encap, 120 "Unknown LWT encapsulation type"); 121 return ret; 122 } 123 124 ret = -EOPNOTSUPP; 125 rcu_read_lock(); 126 ops = rcu_dereference(lwtun_encaps[encap_type]); 127 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 128 found = true; 129 rcu_read_unlock(); 130 131 if (found) { 132 ret = ops->build_state(net, encap, family, cfg, lws, extack); 133 if (ret) 134 module_put(ops->owner); 135 } else { 136 /* don't rely on -EOPNOTSUPP to detect match as build_state 137 * handlers could return it 138 */ 139 NL_SET_ERR_MSG_ATTR(extack, encap, 140 "LWT encapsulation type not supported"); 141 } 142 143 return ret; 144} 145EXPORT_SYMBOL_GPL(lwtunnel_build_state); 146 147int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 148{ 149 const struct lwtunnel_encap_ops *ops; 150 int ret = -EINVAL; 151 152 if (encap_type == LWTUNNEL_ENCAP_NONE || 153 encap_type > LWTUNNEL_ENCAP_MAX) { 154 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 155 return ret; 156 } 157 158 rcu_read_lock(); 159 ops = rcu_dereference(lwtun_encaps[encap_type]); 160 rcu_read_unlock(); 161#ifdef CONFIG_MODULES 162 if (!ops) { 163 const char *encap_type_str = lwtunnel_encap_str(encap_type); 164 165 if (encap_type_str) { 166 __rtnl_unlock(); 167 request_module("rtnl-lwt-%s", encap_type_str); 168 rtnl_lock(); 169 170 rcu_read_lock(); 171 ops = rcu_dereference(lwtun_encaps[encap_type]); 172 rcu_read_unlock(); 173 } 174 } 175#endif 176 ret = ops ? 0 : -EOPNOTSUPP; 177 if (ret < 0) 178 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 179 180 return ret; 181} 182EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 183 184int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 185 struct netlink_ext_ack *extack) 186{ 187 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 188 struct nlattr *nla_entype; 189 struct nlattr *attrs; 190 u16 encap_type; 191 int attrlen; 192 193 while (rtnh_ok(rtnh, remaining)) { 194 attrlen = rtnh_attrlen(rtnh); 195 if (attrlen > 0) { 196 attrs = rtnh_attrs(rtnh); 197 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 198 199 if (nla_entype) { 200 if (nla_len(nla_entype) < sizeof(u16)) { 201 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); 202 return -EINVAL; 203 } 204 encap_type = nla_get_u16(nla_entype); 205 206 if (lwtunnel_valid_encap_type(encap_type, 207 extack) != 0) 208 return -EOPNOTSUPP; 209 } 210 } 211 rtnh = rtnh_next(rtnh, &remaining); 212 } 213 214 return 0; 215} 216EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 217 218void lwtstate_free(struct lwtunnel_state *lws) 219{ 220 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 221 222 if (ops->destroy_state) { 223 ops->destroy_state(lws); 224 kfree_rcu(lws, rcu); 225 } else { 226 kfree(lws); 227 } 228 module_put(ops->owner); 229} 230EXPORT_SYMBOL_GPL(lwtstate_free); 231 232int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 233 int encap_attr, int encap_type_attr) 234{ 235 const struct lwtunnel_encap_ops *ops; 236 struct nlattr *nest; 237 int ret; 238 239 if (!lwtstate) 240 return 0; 241 242 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 243 lwtstate->type > LWTUNNEL_ENCAP_MAX) 244 return 0; 245 246 nest = nla_nest_start_noflag(skb, encap_attr); 247 if (!nest) 248 return -EMSGSIZE; 249 250 ret = -EOPNOTSUPP; 251 rcu_read_lock(); 252 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 253 if (likely(ops && ops->fill_encap)) 254 ret = ops->fill_encap(skb, lwtstate); 255 rcu_read_unlock(); 256 257 if (ret) 258 goto nla_put_failure; 259 nla_nest_end(skb, nest); 260 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 261 if (ret) 262 goto nla_put_failure; 263 264 return 0; 265 266nla_put_failure: 267 nla_nest_cancel(skb, nest); 268 269 return (ret == -EOPNOTSUPP ? 0 : ret); 270} 271EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 272 273int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 274{ 275 const struct lwtunnel_encap_ops *ops; 276 int ret = 0; 277 278 if (!lwtstate) 279 return 0; 280 281 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 282 lwtstate->type > LWTUNNEL_ENCAP_MAX) 283 return 0; 284 285 rcu_read_lock(); 286 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 287 if (likely(ops && ops->get_encap_size)) 288 ret = nla_total_size(ops->get_encap_size(lwtstate)); 289 rcu_read_unlock(); 290 291 return ret; 292} 293EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 294 295int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 296{ 297 const struct lwtunnel_encap_ops *ops; 298 int ret = 0; 299 300 if (!a && !b) 301 return 0; 302 303 if (!a || !b) 304 return 1; 305 306 if (a->type != b->type) 307 return 1; 308 309 if (a->type == LWTUNNEL_ENCAP_NONE || 310 a->type > LWTUNNEL_ENCAP_MAX) 311 return 0; 312 313 rcu_read_lock(); 314 ops = rcu_dereference(lwtun_encaps[a->type]); 315 if (likely(ops && ops->cmp_encap)) 316 ret = ops->cmp_encap(a, b); 317 rcu_read_unlock(); 318 319 return ret; 320} 321EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 322 323int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 324{ 325 struct dst_entry *dst = skb_dst(skb); 326 const struct lwtunnel_encap_ops *ops; 327 struct lwtunnel_state *lwtstate; 328 int ret = -EINVAL; 329 330 if (!dst) 331 goto drop; 332 lwtstate = dst->lwtstate; 333 334 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 335 lwtstate->type > LWTUNNEL_ENCAP_MAX) 336 return 0; 337 338 ret = -EOPNOTSUPP; 339 rcu_read_lock(); 340 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 341 if (likely(ops && ops->output)) 342 ret = ops->output(net, sk, skb); 343 rcu_read_unlock(); 344 345 if (ret == -EOPNOTSUPP) 346 goto drop; 347 348 return ret; 349 350drop: 351 kfree_skb(skb); 352 353 return ret; 354} 355EXPORT_SYMBOL_GPL(lwtunnel_output); 356 357int lwtunnel_xmit(struct sk_buff *skb) 358{ 359 struct dst_entry *dst = skb_dst(skb); 360 const struct lwtunnel_encap_ops *ops; 361 struct lwtunnel_state *lwtstate; 362 int ret = -EINVAL; 363 364 if (!dst) 365 goto drop; 366 367 lwtstate = dst->lwtstate; 368 369 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 370 lwtstate->type > LWTUNNEL_ENCAP_MAX) 371 return 0; 372 373 ret = -EOPNOTSUPP; 374 rcu_read_lock(); 375 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 376 if (likely(ops && ops->xmit)) 377 ret = ops->xmit(skb); 378 rcu_read_unlock(); 379 380 if (ret == -EOPNOTSUPP) 381 goto drop; 382 383 return ret; 384 385drop: 386 kfree_skb(skb); 387 388 return ret; 389} 390EXPORT_SYMBOL_GPL(lwtunnel_xmit); 391 392int lwtunnel_input(struct sk_buff *skb) 393{ 394 struct dst_entry *dst = skb_dst(skb); 395 const struct lwtunnel_encap_ops *ops; 396 struct lwtunnel_state *lwtstate; 397 int ret = -EINVAL; 398 399 if (!dst) 400 goto drop; 401 lwtstate = dst->lwtstate; 402 403 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 404 lwtstate->type > LWTUNNEL_ENCAP_MAX) 405 return 0; 406 407 ret = -EOPNOTSUPP; 408 rcu_read_lock(); 409 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 410 if (likely(ops && ops->input)) 411 ret = ops->input(skb); 412 rcu_read_unlock(); 413 414 if (ret == -EOPNOTSUPP) 415 goto drop; 416 417 return ret; 418 419drop: 420 kfree_skb(skb); 421 422 return ret; 423} 424EXPORT_SYMBOL_GPL(lwtunnel_input);