nft_set_pipapo.h (9090B)
1// SPDX-License-Identifier: GPL-2.0-only 2 3#ifndef _NFT_SET_PIPAPO_H 4 5#include <linux/log2.h> 6#include <net/ipv6.h> /* For the maximum length of a field */ 7 8/* Count of concatenated fields depends on count of 32-bit nftables registers */ 9#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT 10 11/* Restrict usage to multiple fields, make sure rbtree is used otherwise */ 12#define NFT_PIPAPO_MIN_FIELDS 2 13 14/* Largest supported field size */ 15#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) 16#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) 17 18/* Bits to be grouped together in table buckets depending on set size */ 19#define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET 20#define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8 21#define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4 22#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \ 23 BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \ 24 (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4)) 25#define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb) 26 27/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the 28 * small group width, and switch to the big group width if the table gets 29 * smaller than NFT_PIPAPO_LT_SIZE_LOW. 30 * 31 * Picking 2MiB as threshold (for a single table) avoids as much as possible 32 * crossing page boundaries on most architectures (x86-64 and MIPS huge pages, 33 * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which 34 * keeps performance nice in case kvmalloc() gives us non-contiguous areas. 35 */ 36#define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21) 37#define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16) 38#define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD 39#define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \ 40 NFT_PIPAPO_LT_SIZE_HYSTERESIS 41 42/* Fields are padded to 32 bits in input registers */ 43#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ 44 (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) 45#define NFT_PIPAPO_GROUPS_PADDING(f) \ 46 (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ 47 NFT_PIPAPO_GROUPS_PER_BYTE(f)) 48 49/* Number of buckets given by 2 ^ n, with n bucket bits */ 50#define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) 51 52/* Each n-bit range maps to up to n * 2 rules */ 53#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) 54 55/* Use the rest of mapping table buckets for rule indices, but it makes no sense 56 * to exceed 32 bits 57 */ 58#if BITS_PER_LONG == 64 59#define NFT_PIPAPO_MAP_TOBITS 32 60#else 61#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) 62#endif 63 64/* ...which gives us the highest allowed index for a rule */ 65#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ 66 - (1UL << NFT_PIPAPO_MAP_NBITS)) 67 68/* Definitions for vectorised implementations */ 69#ifdef NFT_PIPAPO_ALIGN 70#define NFT_PIPAPO_ALIGN_HEADROOM \ 71 (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) 72#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) 73#define NFT_PIPAPO_LT_ASSIGN(field, x) \ 74 do { \ 75 (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ 76 (field)->lt = (x); \ 77 } while (0) 78#else 79#define NFT_PIPAPO_ALIGN_HEADROOM 0 80#define NFT_PIPAPO_LT_ALIGN(lt) (lt) 81#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x)) 82#endif /* NFT_PIPAPO_ALIGN */ 83 84#define nft_pipapo_for_each_field(field, index, match) \ 85 for ((field) = (match)->f, (index) = 0; \ 86 (index) < (match)->field_count; \ 87 (index)++, (field)++) 88 89/** 90 * union nft_pipapo_map_bucket - Bucket of mapping table 91 * @to: First rule number (in next field) this rule maps to 92 * @n: Number of rules (in next field) this rule maps to 93 * @e: If there's no next field, pointer to element this rule maps to 94 */ 95union nft_pipapo_map_bucket { 96 struct { 97#if BITS_PER_LONG == 64 98 static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); 99 u32 to; 100 101 static_assert(NFT_PIPAPO_MAP_NBITS <= 32); 102 u32 n; 103#else 104 unsigned long to:NFT_PIPAPO_MAP_TOBITS; 105 unsigned long n:NFT_PIPAPO_MAP_NBITS; 106#endif 107 }; 108 struct nft_pipapo_elem *e; 109}; 110 111/** 112 * struct nft_pipapo_field - Lookup, mapping tables and related data for a field 113 * @groups: Amount of bit groups 114 * @rules: Number of inserted rules 115 * @bsize: Size of each bucket in lookup table, in longs 116 * @bb: Number of bits grouped together in lookup table buckets 117 * @lt: Lookup table: 'groups' rows of buckets 118 * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes 119 * @mt: Mapping table: one bucket per rule 120 */ 121struct nft_pipapo_field { 122 int groups; 123 unsigned long rules; 124 size_t bsize; 125 int bb; 126#ifdef NFT_PIPAPO_ALIGN 127 unsigned long *lt_aligned; 128#endif 129 unsigned long *lt; 130 union nft_pipapo_map_bucket *mt; 131}; 132 133/** 134 * struct nft_pipapo_match - Data used for lookup and matching 135 * @field_count Amount of fields in set 136 * @scratch: Preallocated per-CPU maps for partial matching results 137 * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes 138 * @bsize_max: Maximum lookup table bucket size of all fields, in longs 139 * @rcu Matching data is swapped on commits 140 * @f: Fields, with lookup and mapping tables 141 */ 142struct nft_pipapo_match { 143 int field_count; 144#ifdef NFT_PIPAPO_ALIGN 145 unsigned long * __percpu *scratch_aligned; 146#endif 147 unsigned long * __percpu *scratch; 148 size_t bsize_max; 149 struct rcu_head rcu; 150 struct nft_pipapo_field f[]; 151}; 152 153/** 154 * struct nft_pipapo - Representation of a set 155 * @match: Currently in-use matching data 156 * @clone: Copy where pending insertions and deletions are kept 157 * @width: Total bytes to be matched for one packet, including padding 158 * @dirty: Working copy has pending insertions or deletions 159 * @last_gc: Timestamp of last garbage collection run, jiffies 160 */ 161struct nft_pipapo { 162 struct nft_pipapo_match __rcu *match; 163 struct nft_pipapo_match *clone; 164 int width; 165 bool dirty; 166 unsigned long last_gc; 167}; 168 169struct nft_pipapo_elem; 170 171/** 172 * struct nft_pipapo_elem - API-facing representation of single set element 173 * @ext: nftables API extensions 174 */ 175struct nft_pipapo_elem { 176 struct nft_set_ext ext; 177}; 178 179int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, 180 union nft_pipapo_map_bucket *mt, bool match_only); 181 182/** 183 * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets 184 * @f: Field including lookup table 185 * @dst: Area to store result 186 * @data: Input data selecting table buckets 187 */ 188static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, 189 unsigned long *dst, 190 const u8 *data) 191{ 192 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); 193 int group; 194 195 for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) { 196 u8 v; 197 198 v = *data >> 4; 199 __bitmap_and(dst, dst, lt + v * f->bsize, 200 f->bsize * BITS_PER_LONG); 201 lt += f->bsize * NFT_PIPAPO_BUCKETS(4); 202 203 v = *data & 0x0f; 204 __bitmap_and(dst, dst, lt + v * f->bsize, 205 f->bsize * BITS_PER_LONG); 206 lt += f->bsize * NFT_PIPAPO_BUCKETS(4); 207 } 208} 209 210/** 211 * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets 212 * @f: Field including lookup table 213 * @dst: Area to store result 214 * @data: Input data selecting table buckets 215 */ 216static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f, 217 unsigned long *dst, 218 const u8 *data) 219{ 220 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); 221 int group; 222 223 for (group = 0; group < f->groups; group++, data++) { 224 __bitmap_and(dst, dst, lt + *data * f->bsize, 225 f->bsize * BITS_PER_LONG); 226 lt += f->bsize * NFT_PIPAPO_BUCKETS(8); 227 } 228} 229 230/** 231 * pipapo_estimate_size() - Estimate worst-case for set size 232 * @desc: Set description, element count and field description used here 233 * 234 * The size for this set type can vary dramatically, as it depends on the number 235 * of rules (composing netmasks) the entries expand to. We compute the worst 236 * case here. 237 * 238 * In general, for a non-ranged entry or a single composing netmask, we need 239 * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that 240 * is, each input bit needs four bits of matching data), plus a bucket in the 241 * mapping table for each field. 242 * 243 * Return: worst-case set size in bytes, 0 on any overflow 244 */ 245static u64 pipapo_estimate_size(const struct nft_set_desc *desc) 246{ 247 unsigned long entry_size; 248 u64 size; 249 int i; 250 251 for (i = 0, entry_size = 0; i < desc->field_count; i++) { 252 unsigned long rules; 253 254 if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) 255 return 0; 256 257 /* Worst-case ranges for each concatenated field: each n-bit 258 * field can expand to up to n * 2 rules in each bucket, and 259 * each rule also needs a mapping bucket. 260 */ 261 rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; 262 entry_size += rules * 263 NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) / 264 BITS_PER_BYTE; 265 entry_size += rules * sizeof(union nft_pipapo_map_bucket); 266 } 267 268 /* Rules in lookup and mapping tables are needed for each entry */ 269 size = desc->size * entry_size; 270 if (size && div_u64(size, desc->size) != entry_size) 271 return 0; 272 273 size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2; 274 275 size += sizeof(struct nft_pipapo_field) * desc->field_count; 276 277 return size; 278} 279 280#endif /* _NFT_SET_PIPAPO_H */