nf_conntrack_ecache.c (8774B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* Event cache for netfilter. */ 3 4/* 5 * (C) 2005 Harald Welte <laforge@gnumonks.org> 6 * (C) 2005 Patrick McHardy <kaber@trash.net> 7 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> 8 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org> 9 */ 10 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13#include <linux/types.h> 14#include <linux/netfilter.h> 15#include <linux/skbuff.h> 16#include <linux/vmalloc.h> 17#include <linux/stddef.h> 18#include <linux/err.h> 19#include <linux/kernel.h> 20#include <linux/netdevice.h> 21#include <linux/slab.h> 22#include <linux/export.h> 23 24#include <net/netfilter/nf_conntrack.h> 25#include <net/netfilter/nf_conntrack_core.h> 26#include <net/netfilter/nf_conntrack_ecache.h> 27#include <net/netfilter/nf_conntrack_extend.h> 28 29static DEFINE_MUTEX(nf_ct_ecache_mutex); 30 31#define DYING_NULLS_VAL ((1 << 30) + 1) 32#define ECACHE_MAX_JIFFIES msecs_to_jiffies(10) 33#define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10) 34 35enum retry_state { 36 STATE_CONGESTED, 37 STATE_RESTART, 38 STATE_DONE, 39}; 40 41struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net) 42{ 43 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 44 45 return &cnet->ecache; 46} 47#if IS_MODULE(CONFIG_NF_CT_NETLINK) 48EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache); 49#endif 50 51static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet) 52{ 53 unsigned long stop = jiffies + ECACHE_MAX_JIFFIES; 54 struct hlist_nulls_head evicted_list; 55 enum retry_state ret = STATE_DONE; 56 struct nf_conntrack_tuple_hash *h; 57 struct hlist_nulls_node *n; 58 unsigned int sent; 59 60 INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL); 61 62next: 63 sent = 0; 64 spin_lock_bh(&cnet->ecache.dying_lock); 65 66 hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) { 67 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 68 69 /* The worker owns all entries, ct remains valid until nf_ct_put 70 * in the loop below. 71 */ 72 if (nf_conntrack_event(IPCT_DESTROY, ct)) { 73 ret = STATE_CONGESTED; 74 break; 75 } 76 77 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 78 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list); 79 80 if (time_after(stop, jiffies)) { 81 ret = STATE_RESTART; 82 break; 83 } 84 85 if (sent++ > 16) { 86 spin_unlock_bh(&cnet->ecache.dying_lock); 87 cond_resched(); 88 goto next; 89 } 90 } 91 92 spin_unlock_bh(&cnet->ecache.dying_lock); 93 94 hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) { 95 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 96 97 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); 98 nf_ct_put(ct); 99 100 cond_resched(); 101 } 102 103 return ret; 104} 105 106static void ecache_work(struct work_struct *work) 107{ 108 struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); 109 int ret, delay = -1; 110 111 ret = ecache_work_evict_list(cnet); 112 switch (ret) { 113 case STATE_CONGESTED: 114 delay = ECACHE_RETRY_JIFFIES; 115 break; 116 case STATE_RESTART: 117 delay = 0; 118 break; 119 case STATE_DONE: 120 break; 121 } 122 123 if (delay >= 0) 124 schedule_delayed_work(&cnet->ecache.dwork, delay); 125} 126 127static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, 128 const u32 events, 129 const u32 missed, 130 const struct nf_ct_event *item) 131{ 132 struct net *net = nf_ct_net(item->ct); 133 struct nf_ct_event_notifier *notify; 134 u32 old, want; 135 int ret; 136 137 if (!((events | missed) & e->ctmask)) 138 return 0; 139 140 rcu_read_lock(); 141 142 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 143 if (!notify) { 144 rcu_read_unlock(); 145 return 0; 146 } 147 148 ret = notify->ct_event(events | missed, item); 149 rcu_read_unlock(); 150 151 if (likely(ret >= 0 && missed == 0)) 152 return 0; 153 154 do { 155 old = READ_ONCE(e->missed); 156 if (ret < 0) 157 want = old | events; 158 else 159 want = old & ~missed; 160 } while (cmpxchg(&e->missed, old, want) != old); 161 162 return ret; 163} 164 165int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, 166 u32 portid, int report) 167{ 168 struct nf_conntrack_ecache *e; 169 struct nf_ct_event item; 170 unsigned int missed; 171 int ret; 172 173 if (!nf_ct_is_confirmed(ct)) 174 return 0; 175 176 e = nf_ct_ecache_find(ct); 177 if (!e) 178 return 0; 179 180 memset(&item, 0, sizeof(item)); 181 182 item.ct = ct; 183 item.portid = e->portid ? e->portid : portid; 184 item.report = report; 185 186 /* This is a resent of a destroy event? If so, skip missed */ 187 missed = e->portid ? 0 : e->missed; 188 189 ret = __nf_conntrack_eventmask_report(e, events, missed, &item); 190 if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { 191 /* This is a destroy event that has been triggered by a process, 192 * we store the PORTID to include it in the retransmission. 193 */ 194 if (e->portid == 0 && portid != 0) 195 e->portid = portid; 196 } 197 198 return ret; 199} 200EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 201 202/* deliver cached events and clear cache entry - must be called with locally 203 * disabled softirqs */ 204void nf_ct_deliver_cached_events(struct nf_conn *ct) 205{ 206 struct nf_conntrack_ecache *e; 207 struct nf_ct_event item; 208 unsigned int events; 209 210 if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 211 return; 212 213 e = nf_ct_ecache_find(ct); 214 if (e == NULL) 215 return; 216 217 events = xchg(&e->cache, 0); 218 219 item.ct = ct; 220 item.portid = 0; 221 item.report = 0; 222 223 /* We make a copy of the missed event cache without taking 224 * the lock, thus we may send missed events twice. However, 225 * this does not harm and it happens very rarely. 226 */ 227 __nf_conntrack_eventmask_report(e, events, e->missed, &item); 228} 229EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 230 231void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 232 struct nf_conntrack_expect *exp, 233 u32 portid, int report) 234 235{ 236 struct net *net = nf_ct_exp_net(exp); 237 struct nf_ct_event_notifier *notify; 238 struct nf_conntrack_ecache *e; 239 240 rcu_read_lock(); 241 notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 242 if (!notify) 243 goto out_unlock; 244 245 e = nf_ct_ecache_find(exp->master); 246 if (!e) 247 goto out_unlock; 248 249 if (e->expmask & (1 << event)) { 250 struct nf_exp_event item = { 251 .exp = exp, 252 .portid = portid, 253 .report = report 254 }; 255 notify->exp_event(1 << event, &item); 256 } 257out_unlock: 258 rcu_read_unlock(); 259} 260 261void nf_conntrack_register_notifier(struct net *net, 262 const struct nf_ct_event_notifier *new) 263{ 264 struct nf_ct_event_notifier *notify; 265 266 mutex_lock(&nf_ct_ecache_mutex); 267 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 268 lockdep_is_held(&nf_ct_ecache_mutex)); 269 WARN_ON_ONCE(notify); 270 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 271 mutex_unlock(&nf_ct_ecache_mutex); 272} 273EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 274 275void nf_conntrack_unregister_notifier(struct net *net) 276{ 277 mutex_lock(&nf_ct_ecache_mutex); 278 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); 279 mutex_unlock(&nf_ct_ecache_mutex); 280 /* synchronize_rcu() is called after netns pre_exit */ 281} 282EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 283 284void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) 285{ 286 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 287 288 if (state == NFCT_ECACHE_DESTROY_FAIL && 289 !delayed_work_pending(&cnet->ecache.dwork)) { 290 schedule_delayed_work(&cnet->ecache.dwork, HZ); 291 net->ct.ecache_dwork_pending = true; 292 } else if (state == NFCT_ECACHE_DESTROY_SENT) { 293 if (!hlist_nulls_empty(&cnet->ecache.dying_list)) 294 mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); 295 else 296 net->ct.ecache_dwork_pending = false; 297 } 298} 299 300bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) 301{ 302 struct net *net = nf_ct_net(ct); 303 struct nf_conntrack_ecache *e; 304 305 switch (net->ct.sysctl_events) { 306 case 0: 307 /* assignment via template / ruleset? ignore sysctl. */ 308 if (ctmask || expmask) 309 break; 310 return true; 311 case 2: /* autodetect: no event listener, don't allocate extension. */ 312 if (!READ_ONCE(net->ct.ctnetlink_has_listener)) 313 return true; 314 fallthrough; 315 case 1: 316 /* always allocate an extension. */ 317 if (!ctmask && !expmask) { 318 ctmask = ~0; 319 expmask = ~0; 320 } 321 break; 322 default: 323 WARN_ON_ONCE(1); 324 return true; 325 } 326 327 e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); 328 if (e) { 329 e->ctmask = ctmask; 330 e->expmask = expmask; 331 } 332 333 return e != NULL; 334} 335EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add); 336 337#define NF_CT_EVENTS_DEFAULT 2 338static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; 339 340void nf_conntrack_ecache_pernet_init(struct net *net) 341{ 342 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 343 344 net->ct.sysctl_events = nf_ct_events; 345 346 INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); 347 INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL); 348 spin_lock_init(&cnet->ecache.dying_lock); 349 350 BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ 351} 352 353void nf_conntrack_ecache_pernet_fini(struct net *net) 354{ 355 struct nf_conntrack_net *cnet = nf_ct_pernet(net); 356 357 cancel_delayed_work_sync(&cnet->ecache.dwork); 358}