1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015-2019 Yandex LLC
5 * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6 * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 */
31
32 #ifndef _IP_FW_NAT64LSN_H_
33 #define _IP_FW_NAT64LSN_H_
34
35 #include "ip_fw_nat64.h"
36 #include "nat64_translate.h"
37
38 #define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
39 #define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
40
41 #define NAT64_MIN_PORT 1024
42 #define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
43
44 struct st_ptr {
45 uint8_t idx; /* index in nh->pg_ptr array.
46 * NOTE: it starts from 1.
47 */
48 uint8_t off;
49 };
50 #define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1)
51 #define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY)
52 #define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \
53 NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS)
54
55 struct nat64lsn_portgroup;
56 /* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */
57 struct nat64lsn_host {
58 struct rwlock h_lock; /* Host states lock */
59
60 struct in6_addr addr;
61 struct nat64lsn_host *next;
62 uint16_t timestamp; /* Last altered */
63 uint16_t hsize; /* ports hash size */
64 uint16_t pg_used; /* Number of portgroups used */
65 #define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before
66 * requesting of new chunk of indexes.
67 */
68 uint16_t pg_allocated; /* Number of portgroups indexes
69 * allocated.
70 */
71 #define NAT64LSN_HSIZE 64
72 struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */
73 /*
74 * PG indexes are stored in chunks with 32 elements.
75 * The maximum count is limited to 255 due to st_ptr->idx is uint8_t.
76 */
77 #define NAT64LSN_PGIDX_CHUNK 32
78 #define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \
79 NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK)
80 struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */
81 };
82
83 #define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
84 #define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
85
86 #define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
87 #define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
88 #define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
89 #define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
90 #define NAT64_LOCK(h) NAT64_WLOCK(h)
91 #define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
92 #define NAT64_LOCK_INIT(h) do { \
93 rw_init(&(h)->h_lock, "NAT64 host lock"); \
94 } while (0)
95
96 #define NAT64_LOCK_DESTROY(h) do { \
97 rw_destroy(&(h)->h_lock); \
98 } while (0)
99
100 /* Internal proto index */
101 #define NAT_PROTO_TCP 1
102 #define NAT_PROTO_UDP 2
103 #define NAT_PROTO_ICMP 3
104
105 #define NAT_MAX_PROTO 4
106 extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
107
108 VNET_DECLARE(uint16_t, nat64lsn_eid);
109 #define V_nat64lsn_eid VNET(nat64lsn_eid)
110 #define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
111
112 /* Timestamp macro */
113 #define _CT ((int)time_uptime % 65536)
114 #define SET_AGE(x) (x) = _CT
115 #define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
116 (int)65536 + _CT - (x))
117
118 #ifdef __LP64__
119 /* ffsl() is capable of checking 64-bit ints */
120 #define _FFS64
121 #endif
122
123 /* 16 bytes */
124 struct nat64lsn_state {
125 union {
126 struct {
127 in_addr_t faddr; /* Remote IPv4 address */
128 uint16_t fport; /* Remote IPv4 port */
129 uint16_t lport; /* Local IPv6 port */
130 }s;
131 uint64_t hkey;
132 } u;
133 uint8_t nat_proto;
134 uint8_t flags;
135 uint16_t timestamp;
136 struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
137 struct st_ptr next; /* Next entry index */
138 };
139
140 /*
141 * 1024+32 bytes per 64 states, used to store state
142 * AND for outside-in state lookup
143 */
144 struct nat64lsn_portgroup {
145 struct nat64lsn_host *host; /* IPv6 source host info */
146 in_addr_t aaddr; /* Alias addr, network format */
147 uint16_t aport; /* Base port */
148 uint16_t timestamp;
149 uint8_t nat_proto;
150 uint8_t spare[3];
151 uint32_t idx;
152 #ifdef _FFS64
153 uint64_t freemask; /* Mask of free entries */
154 #else
155 uint32_t freemask[2]; /* Mask of free entries */
156 #endif
157 struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
158 };
159 #ifdef _FFS64
160 #define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx))
161 #define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx))
162 #define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx)))
163 #define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
164 #define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask))
165 #define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
166 #else
167 #define PG_MARK_BUSY_IDX(_pg, _idx) \
168 (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
169 #define PG_MARK_FREE_IDX(_pg, _idx) \
170 (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
171 #define PG_IS_FREE_IDX(_pg, _idx) \
172 ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
173 #define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
174 #define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
175 #define PG_IS_EMPTY(_pg) \
176 ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
177
178 static inline int
_pg_get_free_idx(const struct nat64lsn_portgroup * pg)179 _pg_get_free_idx(const struct nat64lsn_portgroup *pg)
180 {
181 int i;
182
183 if ((i = ffsl(pg->freemask[0])) != 0)
184 return (i);
185 if ((i = ffsl(pg->freemask[1])) != 0)
186 return (i + 32);
187 return (0);
188 }
189
190 #endif
191
192 TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
193
194 struct nat64lsn_cfg {
195 struct named_object no;
196 struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
197 struct nat64lsn_host **ih; /* Host hash */
198 uint32_t prefix4; /* IPv4 prefix */
199 uint32_t pmask4; /* IPv4 prefix mask */
200 uint32_t ihsize; /* IPv6 host hash size */
201 uint8_t plen4;
202 uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
203
204 uint32_t ihcount; /* Number of items in host hash */
205 int max_chunks; /* Max chunks per client */
206 int agg_prefix_len; /* Prefix length to count */
207 int agg_prefix_max; /* Max hosts per agg prefix */
208 uint32_t jmaxlen; /* Max jobqueue length */
209 uint16_t min_chunk; /* Min port group # to use */
210 uint16_t max_chunk; /* Max port group # to use */
211 uint16_t nh_delete_delay; /* Stale host delete delay */
212 uint16_t pg_delete_delay; /* Stale portgroup del delay */
213 uint16_t st_syn_ttl; /* TCP syn expire */
214 uint16_t st_close_ttl; /* TCP fin expire */
215 uint16_t st_estab_ttl; /* TCP established expire */
216 uint16_t st_udp_ttl; /* UDP expire */
217 uint16_t st_icmp_ttl; /* ICMP expire */
218 uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
219 struct nat64_config base;
220 #define NAT64LSN_FLAGSMASK (NAT64_LOG | NAT64_ALLOW_PRIVATE)
221
222 struct callout periodic;
223 struct callout jcallout;
224 struct ip_fw_chain *ch;
225 struct vnet *vp;
226 struct nat64lsn_job_head jhead;
227 int jlen;
228 char name[64]; /* Nat instance name */
229 };
230
231 struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
232 size_t numaddr);
233 void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
234 void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
235 void nat64lsn_init_internal(void);
236 void nat64lsn_uninit_internal(void);
237 int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
238 ipfw_insn *cmd, int *done);
239
240 void
241 nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
242 const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
243 const char *px, int off);
244 /*
245 * Portgroup layout
246 * addr x nat_proto x port_off
247 *
248 */
249
250 #define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
251 #define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
252
253 #define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
254 #define __GET_PORTGROUP_IDX(_proto, _port) \
255 ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \
256 ((_port) >> NAT64_CHUNK_SIZE_BITS))
257
258 #define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
259 GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
260 __GET_PORTGROUP_IDX(_proto, _port)
261 #define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
262 ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)])
263
264 #define PORTGROUP_CHUNK(_nh, _idx) \
265 ((_nh)->pg_ptr[(_idx)])
266 #define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
267 (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \
268 [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK])
269
270
271 /* Chained hash table */
272 #define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
273 unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
274 _PX##lock(_ph, _buck); \
275 _x = _PX##first(_ph, _buck); \
276 for ( ; _x != NULL; _x = _PX##next(_x)) { \
277 if (_PX##cmp(_key, _PX##val(_x))) \
278 break; \
279 } \
280 if (_x == NULL) \
281 _PX##unlock(_ph, _buck); \
282 } while(0)
283
284 #define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
285 _PX##unlock(_ph, _buck);
286
287 #define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
288 unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
289 _PX##unlock(_ph, _buck); \
290 } while(0)
291
292 #define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
293 unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
294 _PX##lock(_ph, _buck); \
295 _PX##next(_i) = _PX##first(_ph, _buck); \
296 _PX##first(_ph, _buck) = _i; \
297 _PX##unlock(_ph, _buck); \
298 } while(0)
299
300 #define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
301 unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
302 _PX##lock(_ph, _buck); \
303 _x = _PX##first(_ph, _buck); \
304 _tmp = NULL; \
305 for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
306 if (_PX##cmp(_key, _PX##val(_x))) \
307 break; \
308 } \
309 if (_x != NULL) { \
310 if (_tmp == NULL) \
311 _PX##first(_ph, _buck) = _PX##next(_x); \
312 else \
313 _PX##next(_tmp) = _PX##next(_x); \
314 } \
315 _PX##unlock(_ph, _buck); \
316 } while(0)
317
318 #define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
319 for (unsigned int _i = 0; _i < _hsize; _i++) { \
320 _PX##lock(_ph, _i); \
321 _x = _PX##first(_ph, _i); \
322 _tmp = NULL; \
323 for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
324 if (_cb(_x, _arg) == 0) \
325 continue; \
326 if (_tmp == NULL) \
327 _PX##first(_ph, _i) = _PX##next(_x); \
328 else \
329 _tmp = _PX##next(_x); \
330 } \
331 _PX##unlock(_ph, _i); \
332 } \
333 } while(0)
334
335 #define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
336 unsigned int _buck; \
337 for (unsigned int _i = 0; _i < _hsize; _i++) { \
338 _x = _PX##first(_ph, _i); \
339 _y = _x; \
340 while (_y != NULL) { \
341 _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
342 _y = _PX##next(_x); \
343 _PX##next(_x) = _PX##first(_nph, _buck); \
344 _PX##first(_nph, _buck) = _x; \
345 } \
346 } \
347 } while(0)
348
349 #endif /* _IP_FW_NAT64LSN_H_ */
350
351