1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_dl.h>
47 #include <net/vnet.h>
48 #include <net/route.h>
49 #include <net/route/route_ctl.h>
50 #include <net/route/route_var.h>
51 #include <net/route/nhop_utils.h>
52 #include <net/route/nhop.h>
53 #include <net/route/nhop_var.h>
54 #include <netinet/in.h>
55 #include <netinet6/scope6_var.h>
56 #include <netinet6/in6_var.h>
57
58 #define DEBUG_MOD_NAME route_ctl
59 #define DEBUG_MAX_LEVEL LOG_DEBUG
60 #include <net/route/route_debug.h>
61 _DECLARE_DEBUG(LOG_INFO);
62
63 /*
64 * This file contains control plane routing tables functions.
65 *
66 * All functions assumes they are called in net epoch.
67 */
68
69 union sockaddr_union {
70 struct sockaddr sa;
71 struct sockaddr_in sin;
72 struct sockaddr_in6 sin6;
73 char _buf[32];
74 };
75
76 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
77 struct rib_cmd_info *rc);
78 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
79 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
80 struct rib_cmd_info *rc);
81
82 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
83 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
84 #ifdef ROUTE_MPATH
85 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
86 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
87 int op_flags, struct rib_cmd_info *rc);
88 #endif
89
90 static int add_route(struct rib_head *rnh, struct rtentry *rt,
91 struct route_nhop_data *rnd, struct rib_cmd_info *rc);
92 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
93 struct rib_cmd_info *rc);
94 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
95 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
96
97 static int get_prio_from_info(const struct rt_addrinfo *info);
98 static int nhop_get_prio(const struct nhop_object *nh);
99
100 #ifdef ROUTE_MPATH
101 static bool rib_can_multipath(struct rib_head *rh);
102 #endif
103
104 /* Per-vnet multipath routing configuration */
105 SYSCTL_DECL(_net_route);
106 #define V_rib_route_multipath VNET(rib_route_multipath)
107 #ifdef ROUTE_MPATH
108 #define _MP_FLAGS CTLFLAG_RW
109 VNET_DEFINE(u_int, rib_route_multipath) = 1;
110 #else
111 #define _MP_FLAGS CTLFLAG_RD
112 VNET_DEFINE(u_int, rib_route_multipath) = 0;
113 #endif
114 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
115 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
116 #undef _MP_FLAGS
117
118 #ifdef ROUTE_MPATH
119 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
120 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
121 &VNET_NAME(fib_hash_outbound), 0,
122 "Compute flowid for locally-originated packets");
123
124 /* Default entropy to add to the hash calculation for the outbound connections*/
125 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
126 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
127 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
128 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
129 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
130 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
131 };
132 #endif
133
134 #if defined(INET) && defined(INET6)
135 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
136 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
137 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
138 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
139 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
140 #endif
141
142 /* Debug bits */
143 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
144
145 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)146 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
147 {
148 struct rib_head *rnh;
149 struct sockaddr *dst;
150
151 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
152
153 dst = info->rti_info[RTAX_DST];
154 rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
155
156 return (rnh);
157 }
158
159 #if defined(INET) && defined(INET6)
160 bool
rib_can_4o6_nhop(void)161 rib_can_4o6_nhop(void)
162 {
163 return (!!V_rib_route_ipv6_nexthop);
164 }
165 #endif
166
167 #ifdef ROUTE_MPATH
168 static bool
rib_can_multipath(struct rib_head * rh)169 rib_can_multipath(struct rib_head *rh)
170 {
171 int result;
172
173 CURVNET_SET(rh->rib_vnet);
174 result = !!V_rib_route_multipath;
175 CURVNET_RESTORE();
176
177 return (result);
178 }
179
180 /*
181 * Check is nhop is multipath-eligible.
182 * Avoid nhops without gateways and redirects.
183 *
184 * Returns 1 for multipath-eligible nexthop,
185 * 0 otherwise.
186 */
187 bool
nhop_can_multipath(const struct nhop_object * nh)188 nhop_can_multipath(const struct nhop_object *nh)
189 {
190
191 if ((nh->nh_flags & NHF_MULTIPATH) != 0)
192 return (1);
193 if ((nh->nh_flags & NHF_GATEWAY) == 0)
194 return (0);
195 if ((nh->nh_flags & NHF_REDIRECT) != 0)
196 return (0);
197
198 return (1);
199 }
200 #endif
201
202 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)203 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
204 {
205 uint32_t weight;
206
207 if (info->rti_mflags & RTV_WEIGHT)
208 weight = info->rti_rmx->rmx_weight;
209 else
210 weight = default_weight;
211 /* Keep upper 1 byte for adm distance purposes */
212 if (weight > RT_MAX_WEIGHT)
213 weight = RT_MAX_WEIGHT;
214 else if (weight == 0)
215 weight = default_weight;
216
217 return (weight);
218 }
219
220 /*
221 * File-local concept for distingushing between the normal and
222 * RTF_PINNED routes tha can override the "normal" one.
223 */
224 #define NH_PRIORITY_HIGH 2
225 #define NH_PRIORITY_NORMAL 1
226 static int
get_prio_from_info(const struct rt_addrinfo * info)227 get_prio_from_info(const struct rt_addrinfo *info)
228 {
229 if (info->rti_flags & RTF_PINNED)
230 return (NH_PRIORITY_HIGH);
231 return (NH_PRIORITY_NORMAL);
232 }
233
234 static int
nhop_get_prio(const struct nhop_object * nh)235 nhop_get_prio(const struct nhop_object *nh)
236 {
237 if (NH_IS_PINNED(nh))
238 return (NH_PRIORITY_HIGH);
239 return (NH_PRIORITY_NORMAL);
240 }
241
242 /*
243 * Check if specified @gw matches gw data in the nexthop @nh.
244 *
245 * Returns true if matches, false otherwise.
246 */
247 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)248 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
249 {
250
251 if (nh->gw_sa.sa_family != gw->sa_family)
252 return (false);
253
254 switch (gw->sa_family) {
255 case AF_INET:
256 return (nh->gw4_sa.sin_addr.s_addr ==
257 ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
258 case AF_INET6:
259 {
260 const struct sockaddr_in6 *gw6;
261 gw6 = (const struct sockaddr_in6 *)gw;
262
263 /*
264 * Currently (2020-09) IPv6 gws in kernel have their
265 * scope embedded. Once this becomes false, this code
266 * has to be revisited.
267 */
268 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
269 &gw6->sin6_addr))
270 return (true);
271 return (false);
272 }
273 case AF_LINK:
274 {
275 const struct sockaddr_dl *sdl;
276 sdl = (const struct sockaddr_dl *)gw;
277 return (nh->gwl_sa.sdl_index == sdl->sdl_index);
278 }
279 default:
280 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
281 }
282
283 /* NOTREACHED */
284 return (false);
285 }
286
287 /*
288 * Matches all nexthop with given @gw.
289 * Can be used as rib_filter_f callback.
290 */
291 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)292 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
293 {
294 const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
295
296 return (match_nhop_gw(nh, gw));
297 }
298
299 struct gw_filter_data {
300 const struct sockaddr *gw;
301 int count;
302 };
303
304 /*
305 * Matches first occurence of the gateway provided in @gwd
306 */
307 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)308 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
309 {
310 struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
311
312 /* Return only first match to make rtsock happy */
313 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
314 return (1);
315 return (0);
316 }
317
318 /*
319 * Checks if data in @info matches nexhop @nh.
320 *
321 * Returns 0 on success,
322 * ESRCH if not matched,
323 * ENOENT if filter function returned false
324 */
325 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)326 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
327 const struct nhop_object *nh)
328 {
329 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
330
331 if (info->rti_filter != NULL) {
332 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
333 return (ENOENT);
334 else
335 return (0);
336 }
337 if ((gw != NULL) && !match_nhop_gw(nh, gw))
338 return (ESRCH);
339
340 return (0);
341 }
342
343 /*
344 * Runs exact prefix match based on @dst and @netmask.
345 * Returns matched @rtentry if found or NULL.
346 * If rtentry was found, saves nexthop / weight value into @rnd.
347 */
348 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)349 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
350 const struct sockaddr *netmask, struct route_nhop_data *rnd)
351 {
352 struct rtentry *rt;
353
354 RIB_LOCK_ASSERT(rnh);
355
356 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
357 if (rt != NULL) {
358 rnd->rnd_nhop = rt->rt_nhop;
359 rnd->rnd_weight = rt->rt_weight;
360 } else {
361 rnd->rnd_nhop = NULL;
362 rnd->rnd_weight = 0;
363 }
364
365 return (rt);
366 }
367
368 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)369 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
370 struct route_nhop_data *rnd)
371 {
372 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
373 }
374
375 /*
376 * Runs exact prefix match based on dst/netmask from @info.
377 * Assumes RIB lock is held.
378 * Returns matched @rtentry if found or NULL.
379 * If rtentry was found, saves nexthop / weight value into @rnd.
380 */
381 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)382 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
383 struct route_nhop_data *rnd)
384 {
385 struct rtentry *rt;
386
387 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
388 info->rti_info[RTAX_NETMASK], rnd);
389
390 return (rt);
391 }
392
393 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)394 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
395 struct sockaddr **pmask)
396 {
397 if (plen == -1) {
398 *pmask = NULL;
399 return (true);
400 }
401
402 switch (family) {
403 #ifdef INET
404 case AF_INET:
405 {
406 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
407 struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
408
409 memset(mask, 0, sizeof(*mask));
410 mask->sin_family = family;
411 mask->sin_len = sizeof(*mask);
412 if (plen == 32)
413 *pmask = NULL;
414 else if (plen > 32 || plen < 0)
415 return (false);
416 else {
417 uint32_t daddr, maddr;
418 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
419 mask->sin_addr.s_addr = maddr;
420 daddr = dst->sin_addr.s_addr;
421 daddr = htonl(ntohl(daddr) & ntohl(maddr));
422 dst->sin_addr.s_addr = daddr;
423 }
424 return (true);
425 }
426 break;
427 #endif
428 #ifdef INET6
429 case AF_INET6:
430 {
431 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
432 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
433
434 memset(mask, 0, sizeof(*mask));
435 mask->sin6_family = family;
436 mask->sin6_len = sizeof(*mask);
437 if (plen == 128)
438 *pmask = NULL;
439 else if (plen > 128 || plen < 0)
440 return (false);
441 else {
442 ip6_writemask(&mask->sin6_addr, plen);
443 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
444 }
445 return (true);
446 }
447 break;
448 #endif
449 }
450 return (false);
451 }
452
453 /*
454 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
455 * to the routing table.
456 *
457 * @fibnum: verified kernel rtable id to insert route to
458 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
459 * @plen: prefix length (or -1 if host route or not applicable for AF)
460 * @op_flags: combination of RTM_F_ flags
461 * @rc: storage to report operation result
462 *
463 * Returns 0 on success.
464 */
465 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)466 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
467 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
468 {
469 union sockaddr_union mask_storage;
470 struct sockaddr *netmask = &mask_storage.sa;
471 struct rtentry *rt = NULL;
472
473 NET_EPOCH_ASSERT();
474
475 bzero(rc, sizeof(struct rib_cmd_info));
476 rc->rc_cmd = RTM_ADD;
477
478 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
479 if (rnh == NULL)
480 return (EAFNOSUPPORT);
481
482 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
483 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
484 return (EINVAL);
485 }
486
487 if (op_flags & RTM_F_CREATE) {
488 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
489 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
490 return (ENOMEM);
491 }
492 } else {
493 struct route_nhop_data rnd_tmp;
494 RIB_RLOCK_TRACKER;
495
496 RIB_RLOCK(rnh);
497 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
498 RIB_RUNLOCK(rnh);
499
500 if (rt == NULL)
501 return (ESRCH);
502 }
503
504 return (add_route_flags(rnh, rt, rnd, op_flags, rc));
505 }
506
507 /*
508 * Attempts to delete @dst/plen prefix matching gateway @gw from the
509 * routing rable.
510 *
511 * @fibnum: rtable id to remove route from
512 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
513 * @plen: prefix length (or -1 if host route or not applicable for AF)
514 * @gw: gateway to match
515 * @op_flags: combination of RTM_F_ flags
516 * @rc: storage to report operation result
517 *
518 * Returns 0 on success.
519 */
520 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)521 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
522 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
523 {
524 struct gw_filter_data gwd = { .gw = gw };
525
526 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
527 }
528
529 /*
530 * Attempts to delete @dst/plen prefix matching @filter_func from the
531 * routing rable.
532 *
533 * @fibnum: rtable id to remove route from
534 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
535 * @plen: prefix length (or -1 if host route or not applicable for AF)
536 * @filter_func: func to be called for each nexthop of the prefix for matching
537 * @filter_arg: argument to pass to @filter_func
538 * @op_flags: combination of RTM_F_ flags
539 * @rc: storage to report operation result
540 *
541 * Returns 0 on success.
542 */
543 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)544 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
545 rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
546 struct rib_cmd_info *rc)
547 {
548 union sockaddr_union mask_storage;
549 struct sockaddr *netmask = &mask_storage.sa;
550 int error;
551
552 NET_EPOCH_ASSERT();
553
554 bzero(rc, sizeof(struct rib_cmd_info));
555 rc->rc_cmd = RTM_DELETE;
556
557 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
558 if (rnh == NULL)
559 return (EAFNOSUPPORT);
560
561 if (dst->sa_len > sizeof(mask_storage)) {
562 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
563 return (EINVAL);
564 }
565
566 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
567 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
568 return (EINVAL);
569 }
570
571 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
572
573 RIB_WLOCK(rnh);
574 struct route_nhop_data rnd;
575 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
576 if (rt != NULL) {
577 error = rt_delete_conditional(rnh, rt, prio, filter_func,
578 filter_arg, rc);
579 } else
580 error = ESRCH;
581 RIB_WUNLOCK(rnh);
582
583 if (error != 0)
584 return (error);
585
586 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
587
588 if (rc->rc_cmd == RTM_DELETE)
589 rt_free(rc->rc_rt);
590 #ifdef ROUTE_MPATH
591 else {
592 /*
593 * Deleting 1 path may result in RTM_CHANGE to
594 * a different mpath group/nhop.
595 * Free old mpath group.
596 */
597 nhop_free_any(rc->rc_nh_old);
598 }
599 #endif
600
601 return (0);
602 }
603
604 /*
605 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
606 * @rt: route to copy.
607 * @rnd_src: nhop and weight. Multipath routes are not supported
608 * @rh_dst: target rtable.
609 * @rc: operation result storage
610 *
611 * Return 0 on success.
612 */
613 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)614 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
615 struct rib_head *rh_dst, struct rib_cmd_info *rc)
616 {
617 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
618 int error;
619
620 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
621
622 IF_DEBUG_LEVEL(LOG_DEBUG2) {
623 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
624 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
625 rt_print_buf(rt, rtbuf, sizeof(rtbuf));
626 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
627 rtbuf, nhbuf, nhop_get_fibnum(nh_src));
628 }
629 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
630 if (nh == NULL) {
631 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
632 return (ENOMEM);
633 }
634 nhop_copy(nh, rnd_src->rnd_nhop);
635 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
636 nhop_set_fibnum(nh, rh_dst->rib_fibnum);
637 nh = nhop_get_nhop_internal(rh_dst, nh, &error);
638 if (error != 0) {
639 FIB_RH_LOG(LOG_INFO, rh_dst,
640 "unable to finalize new nexthop: error %d", error);
641 return (ENOMEM);
642 }
643
644 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
645 if (rt_new == NULL) {
646 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
647 nhop_free(nh);
648 return (ENOMEM);
649 }
650
651 struct route_nhop_data rnd = {
652 .rnd_nhop = nh,
653 .rnd_weight = rnd_src->rnd_weight
654 };
655 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
656 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
657
658 if (error != 0) {
659 IF_DEBUG_LEVEL(LOG_DEBUG2) {
660 char buf[NHOP_PRINT_BUFSIZE];
661 rt_print_buf(rt_new, buf, sizeof(buf));
662 FIB_RH_LOG(LOG_DEBUG, rh_dst,
663 "Unable to add route %s: error %d", buf, error);
664 }
665 nhop_free(nh);
666 rt_free_immediate(rt_new);
667 }
668 return (error);
669 }
670
671 /*
672 * Adds route defined by @info into the kernel table specified by @fibnum and
673 * sa_family in @info->rti_info[RTAX_DST].
674 *
675 * Returns 0 on success and fills in operation metadata into @rc.
676 */
677 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)678 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
679 struct rib_cmd_info *rc)
680 {
681 struct rib_head *rnh;
682 int error;
683
684 NET_EPOCH_ASSERT();
685
686 rnh = get_rnh(fibnum, info);
687 if (rnh == NULL)
688 return (EAFNOSUPPORT);
689
690 /*
691 * Check consistency between RTF_HOST flag and netmask
692 * existence.
693 */
694 if (info->rti_flags & RTF_HOST)
695 info->rti_info[RTAX_NETMASK] = NULL;
696 else if (info->rti_info[RTAX_NETMASK] == NULL) {
697 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
698 return (EINVAL);
699 }
700
701 bzero(rc, sizeof(struct rib_cmd_info));
702 rc->rc_cmd = RTM_ADD;
703
704 error = add_route_byinfo(rnh, info, rc);
705 if (error == 0)
706 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
707
708 return (error);
709 }
710
711 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)712 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
713 struct rib_cmd_info *rc)
714 {
715 struct route_nhop_data rnd_add;
716 struct nhop_object *nh;
717 struct rtentry *rt;
718 struct sockaddr *dst, *gateway, *netmask;
719 int error;
720
721 dst = info->rti_info[RTAX_DST];
722 gateway = info->rti_info[RTAX_GATEWAY];
723 netmask = info->rti_info[RTAX_NETMASK];
724
725 if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
726 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
727 return (EINVAL);
728 }
729 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
730 FIB_RH_LOG(LOG_DEBUG, rnh,
731 "error: invalid dst/gateway family combination (%d, %d)",
732 dst->sa_family, gateway->sa_family);
733 return (EINVAL);
734 }
735
736 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
737 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
738 dst->sa_len);
739 return (EINVAL);
740 }
741
742 if (info->rti_ifa == NULL) {
743 error = rt_getifa_fib(info, rnh->rib_fibnum);
744 if (error)
745 return (error);
746 }
747
748 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
749 return (ENOBUFS);
750
751 error = nhop_create_from_info(rnh, info, &nh);
752 if (error != 0) {
753 rt_free_immediate(rt);
754 return (error);
755 }
756
757 rnd_add.rnd_nhop = nh;
758 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
759
760 int op_flags = RTM_F_CREATE;
761
762 /*
763 * Set the desired action when the route already exists:
764 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
765 * Otherwise, append the path.
766 */
767 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
768
769 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
770 }
771
772 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)773 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
774 int op_flags, struct rib_cmd_info *rc)
775 {
776 struct route_nhop_data rnd_orig;
777 struct nhop_object *nh;
778 struct rtentry *rt_orig;
779 int error = 0;
780
781 MPASS(rt != NULL);
782
783 nh = rnd_add->rnd_nhop;
784
785 RIB_WLOCK(rnh);
786
787 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
788
789 if (rt_orig == NULL) {
790 if (op_flags & RTM_F_CREATE)
791 error = add_route(rnh, rt, rnd_add, rc);
792 else
793 error = ESRCH; /* no entry but creation was not required */
794 RIB_WUNLOCK(rnh);
795 if (error != 0)
796 goto out;
797 return (0);
798 }
799
800 if (op_flags & RTM_F_EXCL) {
801 /* We have existing route in the RIB but not allowed to replace. */
802 RIB_WUNLOCK(rnh);
803 error = EEXIST;
804 goto out;
805 }
806
807 /* Now either append or replace */
808 if (op_flags & RTM_F_REPLACE) {
809 if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
810 /* Old path is "better" (e.g. has PINNED flag set) */
811 RIB_WUNLOCK(rnh);
812 error = EEXIST;
813 goto out;
814 }
815 change_route(rnh, rt_orig, rnd_add, rc);
816 RIB_WUNLOCK(rnh);
817 nh = rc->rc_nh_old;
818 goto out;
819 }
820
821 RIB_WUNLOCK(rnh);
822
823 #ifdef ROUTE_MPATH
824 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
825 nhop_can_multipath(rnd_add->rnd_nhop) &&
826 nhop_can_multipath(rnd_orig.rnd_nhop)) {
827
828 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
829 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
830 op_flags, rc);
831 if (error != EAGAIN)
832 break;
833 RTSTAT_INC(rts_add_retry);
834 }
835
836 /*
837 * Original nhop reference is unused in any case.
838 */
839 nhop_free_any(rnd_add->rnd_nhop);
840 if (op_flags & RTM_F_CREATE) {
841 if (error != 0 || rc->rc_cmd != RTM_ADD)
842 rt_free_immediate(rt);
843 }
844 return (error);
845 }
846 #endif
847 /* Out of options - free state and return error */
848 error = EEXIST;
849 out:
850 if (op_flags & RTM_F_CREATE)
851 rt_free_immediate(rt);
852 nhop_free_any(nh);
853
854 return (error);
855 }
856
857 #ifdef ROUTE_MPATH
858 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)859 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
860 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
861 int op_flags, struct rib_cmd_info *rc)
862 {
863 RIB_RLOCK_TRACKER;
864 struct route_nhop_data rnd_new;
865 int error = 0;
866
867 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
868 if (error != 0) {
869 if (error == EAGAIN) {
870 /*
871 * Group creation failed, most probably because
872 * @rnd_orig data got scheduled for deletion.
873 * Refresh @rnd_orig data and retry.
874 */
875 RIB_RLOCK(rnh);
876 lookup_prefix_rt(rnh, rt, rnd_orig);
877 RIB_RUNLOCK(rnh);
878 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
879 /* In this iteration route doesn't exist */
880 error = ENOENT;
881 }
882 }
883 return (error);
884 }
885 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
886 if (error != 0)
887 return (error);
888
889 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
890 /*
891 * First multipath route got installed. Enable local
892 * outbound connections hashing.
893 */
894 if (bootverbose)
895 printf("FIB: enabled flowid calculation for locally-originated packets\n");
896 V_fib_hash_outbound = 1;
897 }
898
899 return (0);
900 }
901 #endif
902
903 /*
904 * Removes route defined by @info from the kernel table specified by @fibnum and
905 * sa_family in @info->rti_info[RTAX_DST].
906 *
907 * Returns 0 on success and fills in operation metadata into @rc.
908 */
909 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)910 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
911 {
912 struct rib_head *rnh;
913 struct sockaddr *dst, *netmask;
914 struct sockaddr_storage mdst;
915 int error;
916
917 NET_EPOCH_ASSERT();
918
919 rnh = get_rnh(fibnum, info);
920 if (rnh == NULL)
921 return (EAFNOSUPPORT);
922
923 bzero(rc, sizeof(struct rib_cmd_info));
924 rc->rc_cmd = RTM_DELETE;
925
926 dst = info->rti_info[RTAX_DST];
927 netmask = info->rti_info[RTAX_NETMASK];
928
929 if (netmask != NULL) {
930 /* Ensure @dst is always properly masked */
931 if (dst->sa_len > sizeof(mdst)) {
932 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
933 return (EINVAL);
934 }
935 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
936 dst = (struct sockaddr *)&mdst;
937 }
938
939 rib_filter_f_t *filter_func = NULL;
940 void *filter_arg = NULL;
941 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
942
943 if (info->rti_filter != NULL) {
944 filter_func = info->rti_filter;
945 filter_arg = info->rti_filterdata;
946 } else if (gwd.gw != NULL) {
947 filter_func = match_gw_one;
948 filter_arg = &gwd;
949 }
950
951 int prio = get_prio_from_info(info);
952
953 RIB_WLOCK(rnh);
954 struct route_nhop_data rnd;
955 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
956 if (rt != NULL) {
957 error = rt_delete_conditional(rnh, rt, prio, filter_func,
958 filter_arg, rc);
959 } else
960 error = ESRCH;
961 RIB_WUNLOCK(rnh);
962
963 if (error != 0)
964 return (error);
965
966 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
967
968 if (rc->rc_cmd == RTM_DELETE)
969 rt_free(rc->rc_rt);
970 #ifdef ROUTE_MPATH
971 else {
972 /*
973 * Deleting 1 path may result in RTM_CHANGE to
974 * a different mpath group/nhop.
975 * Free old mpath group.
976 */
977 nhop_free_any(rc->rc_nh_old);
978 }
979 #endif
980
981 return (0);
982 }
983
984 /*
985 * Conditionally unlinks rtentry paths from @rnh matching @cb.
986 * Returns 0 on success with operation result stored in @rc.
987 * On error, returns:
988 * ESRCH - if prefix was not found or filter function failed to match
989 * EADDRINUSE - if trying to delete higher priority route.
990 */
991 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)992 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
993 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
994 {
995 struct nhop_object *nh = rt->rt_nhop;
996
997 #ifdef ROUTE_MPATH
998 if (NH_IS_NHGRP(nh)) {
999 struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1000 struct route_nhop_data rnd;
1001 int error;
1002
1003 if (cb == NULL)
1004 return (ESRCH);
1005 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1006 if (error == 0) {
1007 if (rnd.rnd_nhgrp == nhg) {
1008 /* No match, unreference new group and return. */
1009 nhop_free_any(rnd.rnd_nhop);
1010 return (ESRCH);
1011 }
1012 error = change_route(rnh, rt, &rnd, rc);
1013 }
1014 return (error);
1015 }
1016 #endif
1017 if (cb != NULL && !cb(rt, nh, cbdata))
1018 return (ESRCH);
1019
1020 if (prio < nhop_get_prio(nh))
1021 return (EADDRINUSE);
1022
1023 return (delete_route(rnh, rt, rc));
1024 }
1025
1026 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1027 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1028 struct rib_cmd_info *rc)
1029 {
1030 RIB_RLOCK_TRACKER;
1031 struct route_nhop_data rnd_orig;
1032 struct rib_head *rnh;
1033 struct rtentry *rt;
1034 int error;
1035
1036 NET_EPOCH_ASSERT();
1037
1038 rnh = get_rnh(fibnum, info);
1039 if (rnh == NULL)
1040 return (EAFNOSUPPORT);
1041
1042 bzero(rc, sizeof(struct rib_cmd_info));
1043 rc->rc_cmd = RTM_CHANGE;
1044
1045 /* Check if updated gateway exists */
1046 if ((info->rti_flags & RTF_GATEWAY) &&
1047 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1048
1049 /*
1050 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1051 * Remove RTF_GATEWAY to enforce consistency and maintain
1052 * compatibility..
1053 */
1054 info->rti_flags &= ~RTF_GATEWAY;
1055 }
1056
1057 /*
1058 * route change is done in multiple steps, with dropping and
1059 * reacquiring lock. In the situations with multiple processes
1060 * changes the same route in can lead to the case when route
1061 * is changed between the steps. Address it by retrying the operation
1062 * multiple times before failing.
1063 */
1064
1065 RIB_RLOCK(rnh);
1066 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1067 info->rti_info[RTAX_NETMASK], &rnh->head);
1068
1069 if (rt == NULL) {
1070 RIB_RUNLOCK(rnh);
1071 return (ESRCH);
1072 }
1073
1074 rnd_orig.rnd_nhop = rt->rt_nhop;
1075 rnd_orig.rnd_weight = rt->rt_weight;
1076
1077 RIB_RUNLOCK(rnh);
1078
1079 for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1080 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1081 if (error != EAGAIN)
1082 break;
1083 }
1084
1085 return (error);
1086 }
1087
1088 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1089 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1090 struct nhop_object *nh_orig, struct nhop_object **nh_new)
1091 {
1092 int error;
1093
1094 /*
1095 * New gateway could require new ifaddr, ifp;
1096 * flags may also be different; ifp may be specified
1097 * by ll sockaddr when protocol address is ambiguous
1098 */
1099 if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1100 info->rti_info[RTAX_GATEWAY] != NULL) ||
1101 info->rti_info[RTAX_IFP] != NULL ||
1102 (info->rti_info[RTAX_IFA] != NULL &&
1103 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1104 error = rt_getifa_fib(info, rnh->rib_fibnum);
1105
1106 if (error != 0) {
1107 info->rti_ifa = NULL;
1108 return (error);
1109 }
1110 }
1111
1112 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1113 info->rti_ifa = NULL;
1114
1115 return (error);
1116 }
1117
1118 #ifdef ROUTE_MPATH
1119 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1120 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1121 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1122 struct rib_cmd_info *rc)
1123 {
1124 int error = 0, found_idx = 0;
1125 struct nhop_object *nh_orig = NULL, *nh_new;
1126 struct route_nhop_data rnd_new = {};
1127 const struct weightened_nhop *wn = NULL;
1128 struct weightened_nhop *wn_new;
1129 uint32_t num_nhops;
1130
1131 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1132 for (int i = 0; i < num_nhops; i++) {
1133 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1134 nh_orig = wn[i].nh;
1135 found_idx = i;
1136 break;
1137 }
1138 }
1139
1140 if (nh_orig == NULL)
1141 return (ESRCH);
1142
1143 error = change_nhop(rnh, info, nh_orig, &nh_new);
1144 if (error != 0)
1145 return (error);
1146
1147 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1148 M_TEMP, M_NOWAIT | M_ZERO);
1149 if (wn_new == NULL) {
1150 nhop_free(nh_new);
1151 return (EAGAIN);
1152 }
1153
1154 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1155 wn_new[found_idx].nh = nh_new;
1156 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1157
1158 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1159 nhop_free(nh_new);
1160 free(wn_new, M_TEMP);
1161
1162 if (error != 0)
1163 return (error);
1164
1165 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1166
1167 return (error);
1168 }
1169 #endif
1170
1171 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1172 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1173 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1174 struct rib_cmd_info *rc)
1175 {
1176 int error = 0;
1177 struct nhop_object *nh_orig;
1178 struct route_nhop_data rnd_new;
1179
1180 nh_orig = rnd_orig->rnd_nhop;
1181 if (nh_orig == NULL)
1182 return (ESRCH);
1183
1184 #ifdef ROUTE_MPATH
1185 if (NH_IS_NHGRP(nh_orig))
1186 return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1187 #endif
1188
1189 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1190 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1191 if (error != 0)
1192 return (error);
1193 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1194
1195 return (error);
1196 }
1197
1198 /*
1199 * Insert @rt with nhop data from @rnd_new to @rnh.
1200 * Returns 0 on success and stores operation results in @rc.
1201 */
1202 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1203 add_route(struct rib_head *rnh, struct rtentry *rt,
1204 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1205 {
1206 struct radix_node *rn;
1207
1208 RIB_WLOCK_ASSERT(rnh);
1209
1210 rt->rt_nhop = rnd->rnd_nhop;
1211 rt->rt_weight = rnd->rnd_weight;
1212 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1213
1214 if (rn != NULL) {
1215 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1216 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1217
1218 /* Finalize notification */
1219 rib_bump_gen(rnh);
1220 rnh->rnh_prefixes++;
1221
1222 rc->rc_cmd = RTM_ADD;
1223 rc->rc_rt = rt;
1224 rc->rc_nh_old = NULL;
1225 rc->rc_nh_new = rnd->rnd_nhop;
1226 rc->rc_nh_weight = rnd->rnd_weight;
1227
1228 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1229 return (0);
1230 }
1231
1232 /* Existing route or memory allocation failure. */
1233 return (EEXIST);
1234 }
1235
1236 /*
1237 * Unconditionally deletes @rt from @rnh.
1238 */
1239 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1240 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1241 {
1242 RIB_WLOCK_ASSERT(rnh);
1243
1244 /* Route deletion requested. */
1245 struct radix_node *rn;
1246
1247 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1248 if (rn == NULL)
1249 return (ESRCH);
1250 rt = RNTORT(rn);
1251 rt->rte_flags &= ~RTF_UP;
1252
1253 rib_bump_gen(rnh);
1254 rnh->rnh_prefixes--;
1255
1256 rc->rc_cmd = RTM_DELETE;
1257 rc->rc_rt = rt;
1258 rc->rc_nh_old = rt->rt_nhop;
1259 rc->rc_nh_new = NULL;
1260 rc->rc_nh_weight = rt->rt_weight;
1261
1262 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1263
1264 return (0);
1265 }
1266
1267 /*
1268 * Switch @rt nhop/weigh to the ones specified in @rnd.
1269 * Returns 0 on success.
1270 */
1271 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1272 change_route(struct rib_head *rnh, struct rtentry *rt,
1273 struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1274 {
1275 struct nhop_object *nh_orig;
1276
1277 RIB_WLOCK_ASSERT(rnh);
1278
1279 nh_orig = rt->rt_nhop;
1280
1281 if (rnd->rnd_nhop == NULL)
1282 return (delete_route(rnh, rt, rc));
1283
1284 /* Changing nexthop & weight to a new one */
1285 rt->rt_nhop = rnd->rnd_nhop;
1286 rt->rt_weight = rnd->rnd_weight;
1287 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1288 tmproutes_update(rnh, rt, rnd->rnd_nhop);
1289
1290 /* Finalize notification */
1291 rib_bump_gen(rnh);
1292 rc->rc_cmd = RTM_CHANGE;
1293 rc->rc_rt = rt;
1294 rc->rc_nh_old = nh_orig;
1295 rc->rc_nh_new = rnd->rnd_nhop;
1296 rc->rc_nh_weight = rnd->rnd_weight;
1297
1298 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1299
1300 return (0);
1301 }
1302
1303 /*
1304 * Conditionally update route nhop/weight IFF data in @nhd_orig is
1305 * consistent with the current route data.
1306 * Nexthop in @nhd_new is consumed.
1307 */
1308 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1309 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1310 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1311 struct rib_cmd_info *rc)
1312 {
1313 struct rtentry *rt_new;
1314 int error = 0;
1315
1316 IF_DEBUG_LEVEL(LOG_DEBUG2) {
1317 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1318 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1319 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1320 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1321 "trying change %s -> %s", buf_old, buf_new);
1322 }
1323 RIB_WLOCK(rnh);
1324
1325 struct route_nhop_data rnd;
1326 rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1327
1328 if (rt_new == NULL) {
1329 if (rnd_orig->rnd_nhop == NULL)
1330 error = add_route(rnh, rt, rnd_new, rc);
1331 else {
1332 /*
1333 * Prefix does not exist, which was not our assumption.
1334 * Update @rnd_orig with the new data and return
1335 */
1336 rnd_orig->rnd_nhop = NULL;
1337 rnd_orig->rnd_weight = 0;
1338 error = EAGAIN;
1339 }
1340 } else {
1341 /* Prefix exists, try to update */
1342 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1343 /*
1344 * Nhop/mpath group hasn't changed. Flip
1345 * to the new precalculated one and return
1346 */
1347 error = change_route(rnh, rt_new, rnd_new, rc);
1348 } else {
1349 /* Update and retry */
1350 rnd_orig->rnd_nhop = rt_new->rt_nhop;
1351 rnd_orig->rnd_weight = rt_new->rt_weight;
1352 error = EAGAIN;
1353 }
1354 }
1355
1356 RIB_WUNLOCK(rnh);
1357
1358 if (error == 0) {
1359 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1360
1361 if (rnd_orig->rnd_nhop != NULL)
1362 nhop_free_any(rnd_orig->rnd_nhop);
1363
1364 } else {
1365 if (rnd_new->rnd_nhop != NULL)
1366 nhop_free_any(rnd_new->rnd_nhop);
1367 }
1368
1369 return (error);
1370 }
1371
1372 /*
1373 * Performs modification of routing table specificed by @action.
1374 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1375 * Needs to be run in network epoch.
1376 *
1377 * Returns 0 on success and fills in @rc with action result.
1378 */
1379 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1380 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1381 struct rib_cmd_info *rc)
1382 {
1383 int error;
1384
1385 switch (action) {
1386 case RTM_ADD:
1387 error = rib_add_route(fibnum, info, rc);
1388 break;
1389 case RTM_DELETE:
1390 error = rib_del_route(fibnum, info, rc);
1391 break;
1392 case RTM_CHANGE:
1393 error = rib_change_route(fibnum, info, rc);
1394 break;
1395 default:
1396 error = ENOTSUP;
1397 }
1398
1399 return (error);
1400 }
1401
1402 struct rt_delinfo
1403 {
1404 struct rib_head *rnh;
1405 struct rtentry *head;
1406 rib_filter_f_t *filter_f;
1407 void *filter_arg;
1408 int prio;
1409 struct rib_cmd_info rc;
1410 };
1411
1412 /*
1413 * Conditionally unlinks rtenties or paths from radix tree based
1414 * on the callback data passed in @arg.
1415 */
1416 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1417 rt_checkdelroute(struct radix_node *rn, void *arg)
1418 {
1419 struct rt_delinfo *di = (struct rt_delinfo *)arg;
1420 struct rtentry *rt = (struct rtentry *)rn;
1421
1422 if (rt_delete_conditional(di->rnh, rt, di->prio,
1423 di->filter_f, di->filter_arg, &di->rc) != 0)
1424 return (0);
1425
1426 /*
1427 * Add deleted rtentries to the list to GC them
1428 * after dropping the lock.
1429 *
1430 * XXX: Delayed notifications not implemented
1431 * for nexthop updates.
1432 */
1433 if (di->rc.rc_cmd == RTM_DELETE) {
1434 /* Add to the list and return */
1435 rt->rt_chain = di->head;
1436 di->head = rt;
1437 #ifdef ROUTE_MPATH
1438 } else {
1439 /*
1440 * RTM_CHANGE to a different nexthop or nexthop group.
1441 * Free old multipath group.
1442 */
1443 nhop_free_any(di->rc.rc_nh_old);
1444 #endif
1445 }
1446
1447 return (0);
1448 }
1449
1450 /*
1451 * Iterates over a routing table specified by @fibnum and @family and
1452 * deletes elements marked by @filter_f.
1453 * @fibnum: rtable id
1454 * @family: AF_ address family
1455 * @filter_f: function returning non-zero value for items to delete
1456 * @arg: data to pass to the @filter_f function
1457 * @report: true if rtsock notification is needed.
1458 */
1459 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1460 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1461 bool report)
1462 {
1463 struct rib_head *rnh;
1464 struct rtentry *rt;
1465 struct nhop_object *nh;
1466 struct epoch_tracker et;
1467
1468 rnh = rt_tables_get_rnh(fibnum, family);
1469 if (rnh == NULL)
1470 return;
1471
1472 struct rt_delinfo di = {
1473 .rnh = rnh,
1474 .filter_f = filter_f,
1475 .filter_arg = filter_arg,
1476 .prio = NH_PRIORITY_NORMAL,
1477 };
1478
1479 NET_EPOCH_ENTER(et);
1480
1481 RIB_WLOCK(rnh);
1482 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1483 RIB_WUNLOCK(rnh);
1484
1485 /* We might have something to reclaim. */
1486 bzero(&di.rc, sizeof(di.rc));
1487 di.rc.rc_cmd = RTM_DELETE;
1488 while (di.head != NULL) {
1489 rt = di.head;
1490 di.head = rt->rt_chain;
1491 rt->rt_chain = NULL;
1492 nh = rt->rt_nhop;
1493
1494 di.rc.rc_rt = rt;
1495 di.rc.rc_nh_old = nh;
1496 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1497
1498 if (report) {
1499 #ifdef ROUTE_MPATH
1500 struct nhgrp_object *nhg;
1501 const struct weightened_nhop *wn;
1502 uint32_t num_nhops;
1503 if (NH_IS_NHGRP(nh)) {
1504 nhg = (struct nhgrp_object *)nh;
1505 wn = nhgrp_get_nhops(nhg, &num_nhops);
1506 for (int i = 0; i < num_nhops; i++)
1507 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1508 } else
1509 #endif
1510 rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1511 }
1512 rt_free(rt);
1513 }
1514
1515 NET_EPOCH_EXIT(et);
1516 }
1517
1518 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1519 rt_delete_unconditional(struct radix_node *rn, void *arg)
1520 {
1521 struct rtentry *rt = RNTORT(rn);
1522 struct rib_head *rnh = (struct rib_head *)arg;
1523
1524 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1525 if (RNTORT(rn) == rt)
1526 rt_free(rt);
1527
1528 return (0);
1529 }
1530
1531 /*
1532 * Removes all routes from the routing table without executing notifications.
1533 * rtentres will be removed after the end of a current epoch.
1534 */
1535 static void
rib_flush_routes(struct rib_head * rnh)1536 rib_flush_routes(struct rib_head *rnh)
1537 {
1538 RIB_WLOCK(rnh);
1539 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1540 RIB_WUNLOCK(rnh);
1541 }
1542
1543 void
rib_flush_routes_family(int family)1544 rib_flush_routes_family(int family)
1545 {
1546 struct rib_head *rnh;
1547
1548 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1549 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1550 rib_flush_routes(rnh);
1551 }
1552 }
1553
1554 const char *
rib_print_family(int family)1555 rib_print_family(int family)
1556 {
1557 switch (family) {
1558 case AF_INET:
1559 return ("inet");
1560 case AF_INET6:
1561 return ("inet6");
1562 case AF_LINK:
1563 return ("link");
1564 }
1565 return ("unknown");
1566 }
1567
1568