1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37 */
38
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49
50 #include <crypto/siphash/siphash.h>
51
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <net/vnet.h>
55 #include <net/pfvar.h>
56 #include <net/if_pflog.h>
57
58 #ifdef INET
59 #include <netinet/in_var.h>
60 #endif /* INET */
61
62 #ifdef INET6
63 #include <netinet6/in6_var.h>
64 #endif /* INET6 */
65
66
67 /*
68 * Limit the amount of work we do to find a free source port for redirects that
69 * introduce a state conflict.
70 */
71 #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries)
72 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
73
74 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
75
76 static uint64_t pf_hash(struct pf_addr *, struct pf_addr *,
77 struct pf_poolhashkey *, sa_family_t);
78 struct pf_krule *pf_match_translation(struct pf_pdesc *,
79 int, struct pf_kanchor_stackframe *);
80 static int pf_get_sport(struct pf_pdesc *, struct pf_krule *,
81 struct pf_addr *, uint16_t *, uint16_t, uint16_t,
82 struct pf_ksrc_node **, struct pf_srchash **,
83 struct pf_kpool *, struct pf_udp_mapping **,
84 pf_sn_types_t);
85 static bool pf_islinklocal(const sa_family_t, const struct pf_addr *);
86
87 static uint64_t
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)88 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
89 struct pf_poolhashkey *key, sa_family_t af)
90 {
91 SIPHASH_CTX ctx;
92 #ifdef INET6
93 union {
94 uint64_t hash64;
95 uint32_t hash32[2];
96 } h;
97 #endif /* INET6 */
98 uint64_t res = 0;
99
100 _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
101
102 switch (af) {
103 #ifdef INET
104 case AF_INET:
105 res = SipHash24(&ctx, (const uint8_t *)key,
106 &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
107 hash->addr32[0] = res;
108 break;
109 #endif /* INET */
110 #ifdef INET6
111 case AF_INET6:
112 res = SipHash24(&ctx, (const uint8_t *)key,
113 &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
114 h.hash64 = res;
115 hash->addr32[0] = h.hash32[0];
116 hash->addr32[1] = h.hash32[1];
117 /*
118 * siphash isn't big enough, but flipping it around is
119 * good enough here.
120 */
121 hash->addr32[2] = ~h.hash32[1];
122 hash->addr32[3] = ~h.hash32[0];
123 break;
124 #endif /* INET6 */
125 default:
126 unhandled_af(af);
127 }
128 return (res);
129 }
130
131 struct pf_krule *
pf_match_translation(struct pf_pdesc * pd,int rs_num,struct pf_kanchor_stackframe * anchor_stack)132 pf_match_translation(struct pf_pdesc *pd,
133 int rs_num, struct pf_kanchor_stackframe *anchor_stack)
134 {
135 struct pf_krule *r, *rm = NULL;
136 struct pf_kruleset *ruleset = NULL;
137 int tag = -1;
138 int rtableid = -1;
139 int asd = 0;
140
141 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
142 while (r != NULL) {
143 struct pf_rule_addr *src = NULL, *dst = NULL;
144 struct pf_addr_wrap *xdst = NULL;
145
146 if (r->action == PF_BINAT && pd->dir == PF_IN) {
147 src = &r->dst;
148 if (r->rdr.cur != NULL)
149 xdst = &r->rdr.cur->addr;
150 } else {
151 src = &r->src;
152 dst = &r->dst;
153 }
154
155 pf_counter_u64_add(&r->evaluations, 1);
156 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot)
157 r = r->skip[PF_SKIP_IFP];
158 else if (r->direction && r->direction != pd->dir)
159 r = r->skip[PF_SKIP_DIR];
160 else if (r->af && r->af != pd->af)
161 r = r->skip[PF_SKIP_AF];
162 else if (r->proto && r->proto != pd->proto)
163 r = r->skip[PF_SKIP_PROTO];
164 else if (PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af,
165 src->neg, pd->kif, M_GETFIB(pd->m)))
166 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
167 PF_SKIP_DST_ADDR];
168 else if (src->port_op && !pf_match_port(src->port_op,
169 src->port[0], src->port[1], pd->nsport))
170 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
171 PF_SKIP_DST_PORT];
172 else if (dst != NULL &&
173 PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL,
174 M_GETFIB(pd->m)))
175 r = r->skip[PF_SKIP_DST_ADDR];
176 else if (xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af,
177 0, NULL, M_GETFIB(pd->m)))
178 r = TAILQ_NEXT(r, entries);
179 else if (dst != NULL && dst->port_op &&
180 !pf_match_port(dst->port_op, dst->port[0],
181 dst->port[1], pd->ndport))
182 r = r->skip[PF_SKIP_DST_PORT];
183 else if (r->match_tag && !pf_match_tag(pd->m, r, &tag,
184 pd->pf_mtag ? pd->pf_mtag->tag : 0))
185 r = TAILQ_NEXT(r, entries);
186 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
187 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd,
188 &pd->hdr.tcp), r->os_fingerprint)))
189 r = TAILQ_NEXT(r, entries);
190 else {
191 if (r->tag)
192 tag = r->tag;
193 if (r->rtableid >= 0)
194 rtableid = r->rtableid;
195 if (r->anchor == NULL) {
196 rm = r;
197 if (rm->action == PF_NONAT ||
198 rm->action == PF_NORDR ||
199 rm->action == PF_NOBINAT) {
200 rm = NULL;
201 }
202 break;
203 } else
204 pf_step_into_anchor(anchor_stack, &asd,
205 &ruleset, rs_num, &r, NULL);
206 }
207 if (r == NULL)
208 pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
209 rs_num, &r, NULL, NULL);
210 }
211
212 if (tag > 0 && pf_tag_packet(pd, tag))
213 return (NULL);
214 if (rtableid >= 0)
215 M_SETFIB(pd->m, rtableid);
216
217 return (rm);
218 }
219
220 static int
pf_get_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_kpool * rpool,struct pf_udp_mapping ** udp_mapping,pf_sn_types_t sn_type)221 pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
222 struct pf_addr *naddr, uint16_t *nport, uint16_t low,
223 uint16_t high, struct pf_ksrc_node **sn,
224 struct pf_srchash **sh, struct pf_kpool *rpool,
225 struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type)
226 {
227 struct pf_state_key_cmp key;
228 struct pf_addr init_addr;
229 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
230 int sidx = pd->sidx;
231 int didx = pd->didx;
232
233 bzero(&init_addr, sizeof(init_addr));
234
235 if (udp_mapping) {
236 MPASS(*udp_mapping == NULL);
237 }
238
239 /*
240 * If we are UDP and have an existing mapping we can get source port
241 * from the mapping. In this case we have to look up the src_node as
242 * pf_map_addr would.
243 */
244 if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) {
245 struct pf_udp_endpoint_cmp udp_source;
246
247 bzero(&udp_source, sizeof(udp_source));
248 udp_source.af = pd->af;
249 PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af);
250 udp_source.port = pd->nsport;
251 if (udp_mapping) {
252 *udp_mapping = pf_udp_mapping_find(&udp_source);
253 if (*udp_mapping) {
254 PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af);
255 *nport = (*udp_mapping)->endpoints[1].port;
256 /* Try to find a src_node as per pf_map_addr(). */
257 if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
258 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
259 *sn = pf_find_src_node(&pd->nsaddr, r,
260 pd->af, sh, sn_type, false);
261 if (*sn != NULL)
262 PF_SRC_NODE_UNLOCK(*sn);
263 return (0);
264 } else {
265 *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr,
266 pd->nsport, &init_addr, 0);
267 if (*udp_mapping == NULL)
268 return (1);
269 }
270 }
271 }
272
273 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr,
274 sn, sh, rpool, sn_type))
275 goto failed;
276
277 if (pd->proto == IPPROTO_ICMP) {
278 if (*nport == htons(ICMP_ECHO)) {
279 low = 1;
280 high = 65535;
281 } else
282 return (0); /* Don't try to modify non-echo ICMP */
283 }
284 #ifdef INET6
285 if (pd->proto == IPPROTO_ICMPV6) {
286 if (*nport == htons(ICMP6_ECHO_REQUEST)) {
287 low = 1;
288 high = 65535;
289 } else
290 return (0); /* Don't try to modify non-echo ICMP */
291 }
292 #endif /* INET6 */
293
294 bzero(&key, sizeof(key));
295 key.af = pd->naf;
296 key.proto = pd->proto;
297
298 do {
299 PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af);
300 PF_ACPY(&key.addr[sidx], naddr, key.af);
301 key.port[didx] = pd->ndport;
302
303 if (udp_mapping && *udp_mapping)
304 PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af);
305
306 /*
307 * port search; start random, step;
308 * similar 2 portloop in in_pcbbind
309 */
310 if (pd->proto == IPPROTO_SCTP) {
311 key.port[sidx] = pd->nsport;
312 if (!pf_find_state_all_exists(&key, dir)) {
313 *nport = pd->nsport;
314 return (0);
315 } else {
316 return (1); /* Fail mapping. */
317 }
318 } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
319 pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
320 /*
321 * XXX bug: icmp states don't use the id on both sides.
322 * (traceroute -I through nat)
323 */
324 key.port[sidx] = pd->nsport;
325 if (!pf_find_state_all_exists(&key, dir)) {
326 *nport = pd->nsport;
327 return (0);
328 }
329 } else if (low == high) {
330 key.port[sidx] = htons(low);
331 if (!pf_find_state_all_exists(&key, dir)) {
332 if (udp_mapping && *udp_mapping != NULL) {
333 (*udp_mapping)->endpoints[1].port = htons(low);
334 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
335 *nport = htons(low);
336 return (0);
337 }
338 } else {
339 *nport = htons(low);
340 return (0);
341 }
342 }
343 } else {
344 uint32_t tmp;
345 uint16_t cut;
346
347 if (low > high) {
348 tmp = low;
349 low = high;
350 high = tmp;
351 }
352 /* low < high */
353 cut = arc4random() % (1 + high - low) + low;
354 /* low <= cut <= high */
355 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
356 if (udp_mapping && *udp_mapping != NULL) {
357 (*udp_mapping)->endpoints[sidx].port = htons(tmp);
358 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
359 *nport = htons(tmp);
360 return (0);
361 }
362 } else {
363 key.port[sidx] = htons(tmp);
364 if (!pf_find_state_all_exists(&key, dir)) {
365 *nport = htons(tmp);
366 return (0);
367 }
368 }
369 }
370 tmp = cut;
371 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
372 if (pd->proto == IPPROTO_UDP &&
373 (rpool->opts & PF_POOL_ENDPI &&
374 udp_mapping != NULL)) {
375 (*udp_mapping)->endpoints[1].port = htons(tmp);
376 if (pf_udp_mapping_insert(*udp_mapping) == 0) {
377 *nport = htons(tmp);
378 return (0);
379 }
380 } else {
381 key.port[sidx] = htons(tmp);
382 if (!pf_find_state_all_exists(&key, dir)) {
383 *nport = htons(tmp);
384 return (0);
385 }
386 }
387 }
388 }
389
390 switch (rpool->opts & PF_POOL_TYPEMASK) {
391 case PF_POOL_RANDOM:
392 case PF_POOL_ROUNDROBIN:
393 /*
394 * pick a different source address since we're out
395 * of free port choices for the current one.
396 */
397 (*sn) = NULL;
398 if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL,
399 &init_addr, sn, sh, rpool, sn_type))
400 return (1);
401 break;
402 case PF_POOL_NONE:
403 case PF_POOL_SRCHASH:
404 case PF_POOL_BITMASK:
405 default:
406 return (1);
407 }
408 } while (! PF_AEQ(&init_addr, naddr, pd->naf) );
409
410 failed:
411 if (udp_mapping) {
412 uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
413 *udp_mapping = NULL;
414 }
415
416 return (1); /* none available */
417 }
418
419 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)420 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
421 {
422 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
423 return (true);
424 return (false);
425 }
426
427 static int
pf_get_mape_sport(struct pf_pdesc * pd,struct pf_krule * r,struct pf_addr * naddr,uint16_t * nport,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_udp_mapping ** udp_mapping,struct pf_kpool * rpool)428 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
429 struct pf_addr *naddr, uint16_t *nport,
430 struct pf_ksrc_node **sn, struct pf_srchash **sh,
431 struct pf_udp_mapping **udp_mapping, struct pf_kpool *rpool)
432 {
433 uint16_t psmask, low, highmask;
434 uint16_t i, ahigh, cut;
435 int ashift, psidshift;
436
437 ashift = 16 - rpool->mape.offset;
438 psidshift = ashift - rpool->mape.psidlen;
439 psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1);
440 psmask = psmask << psidshift;
441 highmask = (1U << psidshift) - 1;
442
443 ahigh = (1U << rpool->mape.offset) - 1;
444 cut = arc4random() & ahigh;
445 if (cut == 0)
446 cut = 1;
447
448 for (i = cut; i <= ahigh; i++) {
449 low = (i << ashift) | psmask;
450 if (!pf_get_sport(pd, r,
451 naddr, nport, low, low | highmask, sn, sh, rpool,
452 udp_mapping, PF_SN_NAT))
453 return (0);
454 }
455 for (i = cut - 1; i > 0; i--) {
456 low = (i << ashift) | psmask;
457 if (!pf_get_sport(pd, r,
458 naddr, nport, low, low | highmask, sn, sh, rpool,
459 udp_mapping, PF_SN_NAT))
460 return (0);
461 }
462 return (1);
463 }
464
465 u_short
pf_map_addr(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_kpool * rpool)466 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
467 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
468 struct pf_kpool *rpool)
469 {
470 u_short reason = PFRES_MATCH;
471 struct pf_addr *raddr = NULL, *rmask = NULL;
472 uint64_t hashidx;
473 int cnt;
474
475 mtx_lock(&rpool->mtx);
476 /* Find the route using chosen algorithm. Store the found route
477 in src_node if it was given or found. */
478 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
479 reason = PFRES_MAPFAILED;
480 goto done_pool_mtx;
481 }
482 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
483 switch (af) {
484 #ifdef INET
485 case AF_INET:
486 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
487 !PF_POOL_DYNTYPE(rpool->opts)) {
488 reason = PFRES_MAPFAILED;
489 goto done_pool_mtx;
490 }
491 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
492 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
493 break;
494 #endif /* INET */
495 #ifdef INET6
496 case AF_INET6:
497 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
498 !PF_POOL_DYNTYPE(rpool->opts)) {
499 reason = PFRES_MAPFAILED;
500 goto done_pool_mtx;
501 }
502 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
503 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
504 break;
505 #endif /* INET6 */
506 default:
507 unhandled_af(af);
508 }
509 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
510 if (!PF_POOL_DYNTYPE(rpool->opts)) {
511 reason = PFRES_MAPFAILED;
512 goto done_pool_mtx; /* unsupported */
513 }
514 } else {
515 raddr = &rpool->cur->addr.v.a.addr;
516 rmask = &rpool->cur->addr.v.a.mask;
517 }
518
519 switch (rpool->opts & PF_POOL_TYPEMASK) {
520 case PF_POOL_NONE:
521 PF_ACPY(naddr, raddr, af);
522 break;
523 case PF_POOL_BITMASK:
524 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
525 break;
526 case PF_POOL_RANDOM:
527 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
528 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
529 if (cnt == 0)
530 rpool->tblidx = 0;
531 else
532 rpool->tblidx = (int)arc4random_uniform(cnt);
533 memset(&rpool->counter, 0, sizeof(rpool->counter));
534 if (pfr_pool_get(rpool->cur->addr.p.tbl,
535 &rpool->tblidx, &rpool->counter, af, NULL)) {
536 reason = PFRES_MAPFAILED;
537 goto done_pool_mtx; /* unsupported */
538 }
539 PF_ACPY(naddr, &rpool->counter, af);
540 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
541 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
542 if (cnt == 0)
543 rpool->tblidx = 0;
544 else
545 rpool->tblidx = (int)arc4random_uniform(cnt);
546 memset(&rpool->counter, 0, sizeof(rpool->counter));
547 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
548 &rpool->tblidx, &rpool->counter, af,
549 pf_islinklocal)) {
550 reason = PFRES_MAPFAILED;
551 goto done_pool_mtx; /* unsupported */
552 }
553 PF_ACPY(naddr, &rpool->counter, af);
554 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
555 switch (af) {
556 #ifdef INET
557 case AF_INET:
558 rpool->counter.addr32[0] = arc4random();
559 break;
560 #endif /* INET */
561 #ifdef INET6
562 case AF_INET6:
563 if (rmask->addr32[3] != 0xffffffff)
564 rpool->counter.addr32[3] =
565 arc4random();
566 else
567 break;
568 if (rmask->addr32[2] != 0xffffffff)
569 rpool->counter.addr32[2] =
570 arc4random();
571 else
572 break;
573 if (rmask->addr32[1] != 0xffffffff)
574 rpool->counter.addr32[1] =
575 arc4random();
576 else
577 break;
578 if (rmask->addr32[0] != 0xffffffff)
579 rpool->counter.addr32[0] =
580 arc4random();
581 break;
582 #endif /* INET6 */
583 }
584 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
585 PF_ACPY(init_addr, naddr, af);
586
587 } else {
588 PF_AINC(&rpool->counter, af);
589 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
590 }
591 break;
592 case PF_POOL_SRCHASH:
593 {
594 unsigned char hash[16];
595
596 hashidx =
597 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
598 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
599 cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
600 if (cnt == 0)
601 rpool->tblidx = 0;
602 else
603 rpool->tblidx = (int)(hashidx % cnt);
604 memset(&rpool->counter, 0, sizeof(rpool->counter));
605 if (pfr_pool_get(rpool->cur->addr.p.tbl,
606 &rpool->tblidx, &rpool->counter, af, NULL)) {
607 reason = PFRES_MAPFAILED;
608 goto done_pool_mtx; /* unsupported */
609 }
610 PF_ACPY(naddr, &rpool->counter, af);
611 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
612 cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
613 if (cnt == 0)
614 rpool->tblidx = 0;
615 else
616 rpool->tblidx = (int)(hashidx % cnt);
617 memset(&rpool->counter, 0, sizeof(rpool->counter));
618 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
619 &rpool->tblidx, &rpool->counter, af,
620 pf_islinklocal)) {
621 reason = PFRES_MAPFAILED;
622 goto done_pool_mtx; /* unsupported */
623 }
624 PF_ACPY(naddr, &rpool->counter, af);
625 } else {
626 PF_POOLMASK(naddr, raddr, rmask,
627 (struct pf_addr *)&hash, af);
628 }
629 break;
630 }
631 case PF_POOL_ROUNDROBIN:
632 {
633 struct pf_kpooladdr *acur = rpool->cur;
634
635 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
636 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
637 &rpool->tblidx, &rpool->counter, af, NULL))
638 goto get_addr;
639 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
640 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
641 &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
642 goto get_addr;
643 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
644 goto get_addr;
645
646 try_next:
647 if (TAILQ_NEXT(rpool->cur, entries) == NULL)
648 rpool->cur = TAILQ_FIRST(&rpool->list);
649 else
650 rpool->cur = TAILQ_NEXT(rpool->cur, entries);
651 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
652 if (pfr_pool_get(rpool->cur->addr.p.tbl,
653 &rpool->tblidx, &rpool->counter, af, NULL)) {
654 /* table contains no address of type 'af' */
655 if (rpool->cur != acur)
656 goto try_next;
657 reason = PFRES_MAPFAILED;
658 goto done_pool_mtx;
659 }
660 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
661 rpool->tblidx = -1;
662 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
663 &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
664 /* table contains no address of type 'af' */
665 if (rpool->cur != acur)
666 goto try_next;
667 reason = PFRES_MAPFAILED;
668 goto done_pool_mtx;
669 }
670 } else {
671 raddr = &rpool->cur->addr.v.a.addr;
672 rmask = &rpool->cur->addr.v.a.mask;
673 PF_ACPY(&rpool->counter, raddr, af);
674 }
675
676 get_addr:
677 PF_ACPY(naddr, &rpool->counter, af);
678 if (init_addr != NULL && PF_AZERO(init_addr, af))
679 PF_ACPY(init_addr, naddr, af);
680 PF_AINC(&rpool->counter, af);
681 break;
682 }
683 }
684
685 if (nkif)
686 *nkif = rpool->cur->kif;
687
688 done_pool_mtx:
689 mtx_unlock(&rpool->mtx);
690
691 if (reason) {
692 counter_u64_add(V_pf_status.counters[reason], 1);
693 }
694
695 return (reason);
696 }
697
698 u_short
pf_map_addr_sn(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_ksrc_node ** sn,struct pf_srchash ** sh,struct pf_kpool * rpool,pf_sn_types_t sn_type)699 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
700 struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
701 struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool,
702 pf_sn_types_t sn_type)
703 {
704 u_short reason = 0;
705
706 KASSERT(*sn == NULL, ("*sn not NULL"));
707
708 /*
709 * If this is a sticky-address rule, try to find an existing src_node.
710 * Request the sh to be unlocked if sn was not found, as we never
711 * insert a new sn when parsing the ruleset.
712 */
713 if (rpool->opts & PF_POOL_STICKYADDR &&
714 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
715 *sn = pf_find_src_node(saddr, r, af, sh, sn_type, false);
716
717 if (*sn != NULL) {
718 PF_SRC_NODE_LOCK_ASSERT(*sn);
719
720 /* If the supplied address is the same as the current one we've
721 * been asked before, so tell the caller that there's no other
722 * address to be had. */
723 if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
724 reason = PFRES_MAPFAILED;
725 goto done;
726 }
727
728 PF_ACPY(naddr, &(*sn)->raddr, af);
729 if (nkif)
730 *nkif = (*sn)->rkif;
731 if (V_pf_status.debug >= PF_DEBUG_NOISY) {
732 printf("pf_map_addr: src tracking maps ");
733 pf_print_host(saddr, 0, af);
734 printf(" to ");
735 pf_print_host(naddr, 0, af);
736 if (nkif)
737 printf("@%s", (*nkif)->pfik_name);
738 printf("\n");
739 }
740 goto done;
741 }
742
743 /*
744 * Source node has not been found. Find a new address and store it
745 * in variables given by the caller.
746 */
747 if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) {
748 /* pf_map_addr() sets reason counters on its own */
749 goto done;
750 }
751
752 if (V_pf_status.debug >= PF_DEBUG_NOISY &&
753 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
754 printf("pf_map_addr: selected address ");
755 pf_print_host(naddr, 0, af);
756 if (nkif)
757 printf("@%s", (*nkif)->pfik_name);
758 printf("\n");
759 }
760
761 done:
762 if ((*sn) != NULL)
763 PF_SRC_NODE_UNLOCK(*sn);
764
765 if (reason) {
766 counter_u64_add(V_pf_status.counters[reason], 1);
767 }
768
769 return (reason);
770 }
771
772 u_short
pf_get_translation(struct pf_pdesc * pd,int off,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_kanchor_stackframe * anchor_stack,struct pf_krule ** rp,struct pf_udp_mapping ** udp_mapping)773 pf_get_translation(struct pf_pdesc *pd, int off,
774 struct pf_state_key **skp, struct pf_state_key **nkp,
775 struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp,
776 struct pf_udp_mapping **udp_mapping)
777 {
778 struct pf_krule *r = NULL;
779 u_short transerror;
780
781 PF_RULES_RASSERT();
782 KASSERT(*skp == NULL, ("*skp not NULL"));
783 KASSERT(*nkp == NULL, ("*nkp not NULL"));
784
785 *rp = NULL;
786
787 if (pd->dir == PF_OUT) {
788 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack);
789 if (r == NULL)
790 r = pf_match_translation(pd, PF_RULESET_NAT, anchor_stack);
791 } else {
792 r = pf_match_translation(pd, PF_RULESET_RDR, anchor_stack);
793 if (r == NULL)
794 r = pf_match_translation(pd, PF_RULESET_BINAT, anchor_stack);
795 }
796
797 if (r == NULL)
798 return (PFRES_MAX);
799
800 switch (r->action) {
801 case PF_NONAT:
802 case PF_NOBINAT:
803 case PF_NORDR:
804 return (PFRES_MAX);
805 }
806
807 transerror = pf_get_transaddr(pd, skp, nkp, r, udp_mapping, r->action, &(r->rdr));
808 if (transerror == PFRES_MATCH)
809 *rp = r;
810
811 return (transerror);
812 }
813
814 u_short
pf_get_transaddr(struct pf_pdesc * pd,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_krule * r,struct pf_udp_mapping ** udp_mapping,uint8_t nat_action,struct pf_kpool * rpool)815 pf_get_transaddr(struct pf_pdesc *pd, struct pf_state_key **skp,
816 struct pf_state_key **nkp, struct pf_krule *r,
817 struct pf_udp_mapping **udp_mapping, uint8_t nat_action,
818 struct pf_kpool *rpool)
819 {
820 struct pf_addr *naddr;
821 struct pf_ksrc_node *sn = NULL;
822 struct pf_srchash *sh = NULL;
823 uint16_t *nportp;
824 uint16_t low, high;
825 u_short reason;
826
827 PF_RULES_RASSERT();
828 KASSERT(r != NULL, ("r is NULL"));
829 KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule"));
830
831 if (*skp == NULL && *nkp == NULL) {
832 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, skp, nkp))
833 return (PFRES_MEMORY);
834 }
835
836 naddr = &(*nkp)->addr[1];
837 nportp = &(*nkp)->port[1];
838
839 switch (nat_action) {
840 case PF_NAT:
841 if (pd->proto == IPPROTO_ICMP) {
842 low = 1;
843 high = 65535;
844 } else {
845 low = rpool->proxy_port[0];
846 high = rpool->proxy_port[1];
847 }
848 if (rpool->mape.offset > 0) {
849 if (pf_get_mape_sport(pd, r, naddr, nportp, &sn,
850 &sh, udp_mapping, rpool)) {
851 DPFPRINTF(PF_DEBUG_MISC,
852 ("pf: MAP-E port allocation (%u/%u/%u)"
853 " failed\n",
854 rpool->mape.offset,
855 rpool->mape.psidlen,
856 rpool->mape.psid));
857 reason = PFRES_MAPFAILED;
858 goto notrans;
859 }
860 } else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn,
861 &sh, rpool, udp_mapping, PF_SN_NAT)) {
862 DPFPRINTF(PF_DEBUG_MISC,
863 ("pf: NAT proxy port allocation (%u-%u) failed\n",
864 rpool->proxy_port[0], rpool->proxy_port[1]));
865 reason = PFRES_MAPFAILED;
866 goto notrans;
867 }
868 break;
869 case PF_BINAT:
870 switch (pd->dir) {
871 case PF_OUT:
872 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){
873 switch (pd->af) {
874 #ifdef INET
875 case AF_INET:
876 if (rpool->cur->addr.p.dyn->
877 pfid_acnt4 < 1) {
878 reason = PFRES_MAPFAILED;
879 goto notrans;
880 }
881 PF_POOLMASK(naddr,
882 &rpool->cur->addr.p.dyn->pfid_addr4,
883 &rpool->cur->addr.p.dyn->pfid_mask4,
884 &pd->nsaddr, AF_INET);
885 break;
886 #endif /* INET */
887 #ifdef INET6
888 case AF_INET6:
889 if (rpool->cur->addr.p.dyn->
890 pfid_acnt6 < 1) {
891 reason = PFRES_MAPFAILED;
892 goto notrans;
893 }
894 PF_POOLMASK(naddr,
895 &rpool->cur->addr.p.dyn->pfid_addr6,
896 &rpool->cur->addr.p.dyn->pfid_mask6,
897 &pd->nsaddr, AF_INET6);
898 break;
899 #endif /* INET6 */
900 }
901 } else
902 PF_POOLMASK(naddr,
903 &rpool->cur->addr.v.a.addr,
904 &rpool->cur->addr.v.a.mask, &pd->nsaddr,
905 pd->af);
906 break;
907 case PF_IN:
908 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
909 switch (pd->af) {
910 #ifdef INET
911 case AF_INET:
912 if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
913 reason = PFRES_MAPFAILED;
914 goto notrans;
915 }
916 PF_POOLMASK(naddr,
917 &r->src.addr.p.dyn->pfid_addr4,
918 &r->src.addr.p.dyn->pfid_mask4,
919 &pd->ndaddr, AF_INET);
920 break;
921 #endif /* INET */
922 #ifdef INET6
923 case AF_INET6:
924 if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
925 reason = PFRES_MAPFAILED;
926 goto notrans;
927 }
928 PF_POOLMASK(naddr,
929 &r->src.addr.p.dyn->pfid_addr6,
930 &r->src.addr.p.dyn->pfid_mask6,
931 &pd->ndaddr, AF_INET6);
932 break;
933 #endif /* INET6 */
934 }
935 } else
936 PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
937 &r->src.addr.v.a.mask, &pd->ndaddr, pd->af);
938 break;
939 }
940 break;
941 case PF_RDR: {
942 struct pf_state_key_cmp key;
943 int tries;
944 uint16_t cut, low, high, nport;
945
946 reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL,
947 NULL, &sn, &sh, rpool, PF_SN_NAT);
948 if (reason != 0)
949 goto notrans;
950 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
951 PF_POOLMASK(naddr, naddr, &rpool->cur->addr.v.a.mask,
952 &pd->ndaddr, pd->af);
953
954 /* Do not change SCTP ports. */
955 if (pd->proto == IPPROTO_SCTP)
956 break;
957
958 if (rpool->proxy_port[1]) {
959 uint32_t tmp_nport;
960
961 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) %
962 (rpool->proxy_port[1] - rpool->proxy_port[0] +
963 1)) + rpool->proxy_port[0];
964
965 /* Wrap around if necessary. */
966 if (tmp_nport > 65535)
967 tmp_nport -= 65535;
968 nport = htons((uint16_t)tmp_nport);
969 } else if (rpool->proxy_port[0])
970 nport = htons(rpool->proxy_port[0]);
971 else
972 nport = pd->ndport;
973
974 /*
975 * Update the destination port.
976 */
977 *nportp = nport;
978
979 /*
980 * Do we have a source port conflict in the stack state? Try to
981 * modulate the source port if so. Note that this is racy since
982 * the state lookup may not find any matches here but will once
983 * pf_create_state() actually instantiates the state.
984 */
985 bzero(&key, sizeof(key));
986 key.af = pd->af;
987 key.proto = pd->proto;
988 key.port[0] = pd->nsport;
989 PF_ACPY(&key.addr[0], &pd->nsaddr, key.af);
990 key.port[1] = nport;
991 PF_ACPY(&key.addr[1], naddr, key.af);
992
993 if (!pf_find_state_all_exists(&key, PF_OUT))
994 break;
995
996 tries = 0;
997
998 low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
999 high = 65535;
1000 cut = arc4random() % (1 + high - low) + low;
1001 for (uint32_t tmp = cut;
1002 tmp <= high && tmp <= UINT16_MAX &&
1003 tries < V_pf_rdr_srcport_rewrite_tries;
1004 tmp++, tries++) {
1005 key.port[0] = htons(tmp);
1006 if (!pf_find_state_all_exists(&key, PF_OUT)) {
1007 /* Update the source port. */
1008 (*nkp)->port[0] = htons(tmp);
1009 goto out;
1010 }
1011 }
1012 for (uint32_t tmp = cut - 1;
1013 tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
1014 tmp--, tries++) {
1015 key.port[0] = htons(tmp);
1016 if (!pf_find_state_all_exists(&key, PF_OUT)) {
1017 /* Update the source port. */
1018 (*nkp)->port[0] = htons(tmp);
1019 goto out;
1020 }
1021 }
1022
1023 /*
1024 * We failed to find a match. Push on ahead anyway, let
1025 * pf_state_insert() be the arbiter of whether the state
1026 * conflict is tolerable. In particular, with TCP connections
1027 * the state may be reused if the TCP state is terminal.
1028 */
1029 DPFPRINTF(PF_DEBUG_MISC,
1030 ("pf: RDR source port allocation failed\n"));
1031 break;
1032
1033 out:
1034 DPFPRINTF(PF_DEBUG_MISC,
1035 ("pf: RDR source port allocation %u->%u\n",
1036 ntohs(pd->nsport), ntohs((*nkp)->port[0])));
1037 break;
1038 }
1039 default:
1040 panic("%s: unknown action %u", __func__, r->action);
1041 }
1042
1043 /* Return success only if translation really happened. */
1044 if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
1045 return (PFRES_MATCH);
1046 }
1047
1048 reason = PFRES_MAX;
1049 notrans:
1050 uma_zfree(V_pf_state_key_z, *nkp);
1051 uma_zfree(V_pf_state_key_z, *skp);
1052 *skp = *nkp = NULL;
1053
1054 return (reason);
1055 }
1056
1057 int
pf_get_transaddr_af(struct pf_krule * r,struct pf_pdesc * pd)1058 pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
1059 {
1060 #if defined(INET) && defined(INET6)
1061 struct pf_addr ndaddr, nsaddr, naddr;
1062 u_int16_t nport = 0;
1063 int prefixlen = 96;
1064 struct pf_srchash *sh = NULL;
1065 struct pf_ksrc_node *sns = NULL;
1066
1067 bzero(&nsaddr, sizeof(nsaddr));
1068 bzero(&ndaddr, sizeof(ndaddr));
1069
1070 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1071 printf("pf: af-to %s %s, ",
1072 pd->naf == AF_INET ? "inet" : "inet6",
1073 TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr");
1074 pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
1075 printf(" -> ");
1076 pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
1077 printf("\n");
1078 }
1079
1080 if (TAILQ_EMPTY(&r->nat.list))
1081 panic("pf_get_transaddr_af: no nat pool for source address");
1082
1083 /* get source address and port */
1084 if (pf_get_sport(pd, r, &nsaddr, &nport,
1085 r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat,
1086 NULL, PF_SN_NAT)) {
1087 DPFPRINTF(PF_DEBUG_MISC,
1088 ("pf: af-to NAT proxy port allocation (%u-%u) failed",
1089 r->nat.proxy_port[0], r->nat.proxy_port[1]));
1090 return (-1);
1091 }
1092
1093 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
1094 pd->ndport = ntohs(pd->ndport);
1095 if (pd->ndport == ICMP6_ECHO_REQUEST)
1096 pd->ndport = ICMP_ECHO;
1097 else if (pd->ndport == ICMP6_ECHO_REPLY)
1098 pd->ndport = ICMP_ECHOREPLY;
1099 pd->ndport = htons(pd->ndport);
1100 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
1101 pd->nsport = ntohs(pd->nsport);
1102 if (pd->ndport == ICMP_ECHO)
1103 pd->ndport = ICMP6_ECHO_REQUEST;
1104 else if (pd->ndport == ICMP_ECHOREPLY)
1105 pd->ndport = ICMP6_ECHO_REPLY;
1106 pd->nsport = htons(pd->nsport);
1107 }
1108
1109 /* get the destination address and port */
1110 if (! TAILQ_EMPTY(&r->rdr.list)) {
1111 if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL,
1112 &sns, NULL, &r->rdr, PF_SN_NAT))
1113 return (-1);
1114 if (r->rdr.proxy_port[0])
1115 pd->ndport = htons(r->rdr.proxy_port[0]);
1116
1117 if (pd->naf == AF_INET) {
1118 /* The prefix is the IPv4 rdr address */
1119 prefixlen = in_mask2len(
1120 (struct in_addr *)&r->rdr.cur->addr.v.a.mask);
1121 inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1122 prefixlen);
1123 } else {
1124 /* The prefix is the IPv6 rdr address */
1125 prefixlen = in6_mask2len(
1126 (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL);
1127 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr,
1128 prefixlen);
1129 }
1130 } else {
1131 if (pd->naf == AF_INET) {
1132 /* The prefix is the IPv6 dst address */
1133 prefixlen = in6_mask2len(
1134 (struct in6_addr *)&r->dst.addr.v.a.mask, NULL);
1135 if (prefixlen < 32)
1136 prefixlen = 96;
1137 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr,
1138 prefixlen);
1139 } else {
1140 /*
1141 * The prefix is the IPv6 nat address
1142 * (that was stored in pd->nsaddr)
1143 */
1144 prefixlen = in6_mask2len(
1145 (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL);
1146 if (prefixlen > 96)
1147 prefixlen = 96;
1148 inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr,
1149 prefixlen);
1150 }
1151 }
1152
1153 PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
1154 PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
1155
1156 if (V_pf_status.debug >= PF_DEBUG_MISC) {
1157 printf("pf: af-to %s done, prefixlen %d, ",
1158 pd->naf == AF_INET ? "inet" : "inet6",
1159 prefixlen);
1160 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
1161 printf(" -> ");
1162 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
1163 printf("\n");
1164 }
1165
1166 return (0);
1167 #else
1168 return (-1);
1169 #endif
1170 }
1171