xref: /freebsd-13-stable/sys/net/route/route_ctl.c (revision b297093ebab6a16f23cebeeb1dbd7470c13db004)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_route.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/syslog.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/rmlock.h>
43 
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_dl.h>
47 #include <net/vnet.h>
48 #include <net/route.h>
49 #include <net/route/route_ctl.h>
50 #include <net/route/route_var.h>
51 #include <net/route/nhop_utils.h>
52 #include <net/route/nhop.h>
53 #include <net/route/nhop_var.h>
54 #include <netinet/in.h>
55 #include <netinet6/scope6_var.h>
56 #include <netinet6/in6_var.h>
57 
58 #define	DEBUG_MOD_NAME	route_ctl
59 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
60 #include <net/route/route_debug.h>
61 _DECLARE_DEBUG(LOG_INFO);
62 
63 /*
64  * This file contains control plane routing tables functions.
65  *
66  * All functions assumes they are called in net epoch.
67  */
68 
69 union sockaddr_union {
70 	struct sockaddr		sa;
71 	struct sockaddr_in	sin;
72 	struct sockaddr_in6	sin6;
73 	char			_buf[32];
74 };
75 
76 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
77     struct rib_cmd_info *rc);
78 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
79     struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
80     struct rib_cmd_info *rc);
81 
82 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt,
83     struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc);
84 #ifdef ROUTE_MPATH
85 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
86     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
87     int op_flags, struct rib_cmd_info *rc);
88 #endif
89 
90 static int add_route(struct rib_head *rnh, struct rtentry *rt,
91     struct route_nhop_data *rnd, struct rib_cmd_info *rc);
92 static int delete_route(struct rib_head *rnh, struct rtentry *rt,
93     struct rib_cmd_info *rc);
94 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
95     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc);
96 
97 static int get_prio_from_info(const struct rt_addrinfo *info);
98 static int nhop_get_prio(const struct nhop_object *nh);
99 
100 #ifdef ROUTE_MPATH
101 static bool rib_can_multipath(struct rib_head *rh);
102 #endif
103 
104 /* Per-vnet multipath routing configuration */
105 SYSCTL_DECL(_net_route);
106 #define	V_rib_route_multipath	VNET(rib_route_multipath)
107 #ifdef ROUTE_MPATH
108 #define _MP_FLAGS	CTLFLAG_RW
109 VNET_DEFINE(u_int, rib_route_multipath) = 1;
110 #else
111 #define _MP_FLAGS	CTLFLAG_RD
112 VNET_DEFINE(u_int, rib_route_multipath) = 0;
113 #endif
114 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
115     &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
116 #undef _MP_FLAGS
117 
118 #ifdef ROUTE_MPATH
119 VNET_DEFINE(u_int, fib_hash_outbound) = 0;
120 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET,
121     &VNET_NAME(fib_hash_outbound), 0,
122     "Compute flowid for locally-originated packets");
123 
124 /* Default entropy to add to the hash calculation for the outbound connections*/
125 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
126 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
127 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
128 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
129 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
130 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
131 };
132 #endif
133 
134 #if defined(INET) && defined(INET6)
135 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
136 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
137 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
138 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
139     &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
140 #endif
141 
142 /* Debug bits */
143 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
144 
145 static struct rib_head *
get_rnh(uint32_t fibnum,const struct rt_addrinfo * info)146 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
147 {
148 	struct rib_head *rnh;
149 	struct sockaddr *dst;
150 
151 	KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
152 
153 	dst = info->rti_info[RTAX_DST];
154 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
155 
156 	return (rnh);
157 }
158 
159 #if defined(INET) && defined(INET6)
160 bool
rib_can_4o6_nhop(void)161 rib_can_4o6_nhop(void)
162 {
163 	return (!!V_rib_route_ipv6_nexthop);
164 }
165 #endif
166 
167 #ifdef ROUTE_MPATH
168 static bool
rib_can_multipath(struct rib_head * rh)169 rib_can_multipath(struct rib_head *rh)
170 {
171 	int result;
172 
173 	CURVNET_SET(rh->rib_vnet);
174 	result = !!V_rib_route_multipath;
175 	CURVNET_RESTORE();
176 
177 	return (result);
178 }
179 
180 /*
181  * Check is nhop is multipath-eligible.
182  * Avoid nhops without gateways and redirects.
183  *
184  * Returns 1 for multipath-eligible nexthop,
185  * 0 otherwise.
186  */
187 bool
nhop_can_multipath(const struct nhop_object * nh)188 nhop_can_multipath(const struct nhop_object *nh)
189 {
190 
191 	if ((nh->nh_flags & NHF_MULTIPATH) != 0)
192 		return (1);
193 	if ((nh->nh_flags & NHF_GATEWAY) == 0)
194 		return (0);
195 	if ((nh->nh_flags & NHF_REDIRECT) != 0)
196 		return (0);
197 
198 	return (1);
199 }
200 #endif
201 
202 static int
get_info_weight(const struct rt_addrinfo * info,uint32_t default_weight)203 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
204 {
205 	uint32_t weight;
206 
207 	if (info->rti_mflags & RTV_WEIGHT)
208 		weight = info->rti_rmx->rmx_weight;
209 	else
210 		weight = default_weight;
211 	/* Keep upper 1 byte for adm distance purposes */
212 	if (weight > RT_MAX_WEIGHT)
213 		weight = RT_MAX_WEIGHT;
214 	else if (weight == 0)
215 		weight = default_weight;
216 
217 	return (weight);
218 }
219 
220 /*
221  * File-local concept for distingushing between the normal and
222  * RTF_PINNED routes tha can override the "normal" one.
223  */
224 #define	NH_PRIORITY_HIGH	2
225 #define	NH_PRIORITY_NORMAL	1
226 static int
get_prio_from_info(const struct rt_addrinfo * info)227 get_prio_from_info(const struct rt_addrinfo *info)
228 {
229 	if (info->rti_flags & RTF_PINNED)
230 		return (NH_PRIORITY_HIGH);
231 	return (NH_PRIORITY_NORMAL);
232 }
233 
234 static int
nhop_get_prio(const struct nhop_object * nh)235 nhop_get_prio(const struct nhop_object *nh)
236 {
237 	if (NH_IS_PINNED(nh))
238 		return (NH_PRIORITY_HIGH);
239 	return (NH_PRIORITY_NORMAL);
240 }
241 
242 /*
243  * Check if specified @gw matches gw data in the nexthop @nh.
244  *
245  * Returns true if matches, false otherwise.
246  */
247 bool
match_nhop_gw(const struct nhop_object * nh,const struct sockaddr * gw)248 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
249 {
250 
251 	if (nh->gw_sa.sa_family != gw->sa_family)
252 		return (false);
253 
254 	switch (gw->sa_family) {
255 	case AF_INET:
256 		return (nh->gw4_sa.sin_addr.s_addr ==
257 		    ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
258 	case AF_INET6:
259 		{
260 			const struct sockaddr_in6 *gw6;
261 			gw6 = (const struct sockaddr_in6 *)gw;
262 
263 			/*
264 			 * Currently (2020-09) IPv6 gws in kernel have their
265 			 * scope embedded. Once this becomes false, this code
266 			 * has to be revisited.
267 			 */
268 			if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
269 			    &gw6->sin6_addr))
270 				return (true);
271 			return (false);
272 		}
273 	case AF_LINK:
274 		{
275 			const struct sockaddr_dl *sdl;
276 			sdl = (const struct sockaddr_dl *)gw;
277 			return (nh->gwl_sa.sdl_index == sdl->sdl_index);
278 		}
279 	default:
280 		return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
281 	}
282 
283 	/* NOTREACHED */
284 	return (false);
285 }
286 
287 /*
288  * Matches all nexthop with given @gw.
289  * Can be used as rib_filter_f callback.
290  */
291 int
rib_match_gw(const struct rtentry * rt,const struct nhop_object * nh,void * gw_sa)292 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa)
293 {
294 	const struct sockaddr *gw = (const struct sockaddr *)gw_sa;
295 
296 	return (match_nhop_gw(nh, gw));
297 }
298 
299 struct gw_filter_data {
300 	const struct sockaddr *gw;
301 	int count;
302 };
303 
304 /*
305  * Matches first occurence of the gateway provided in @gwd
306  */
307 static int
match_gw_one(const struct rtentry * rt,const struct nhop_object * nh,void * _data)308 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
309 {
310 	struct gw_filter_data *gwd = (struct gw_filter_data *)_data;
311 
312 	/* Return only first match to make rtsock happy */
313 	if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0)
314 		return (1);
315 	return (0);
316 }
317 
318 /*
319  * Checks if data in @info matches nexhop @nh.
320  *
321  * Returns 0 on success,
322  * ESRCH if not matched,
323  * ENOENT if filter function returned false
324  */
325 int
check_info_match_nhop(const struct rt_addrinfo * info,const struct rtentry * rt,const struct nhop_object * nh)326 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
327     const struct nhop_object *nh)
328 {
329 	const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
330 
331 	if (info->rti_filter != NULL) {
332 	    if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
333 		    return (ENOENT);
334 	    else
335 		    return (0);
336 	}
337 	if ((gw != NULL) && !match_nhop_gw(nh, gw))
338 		return (ESRCH);
339 
340 	return (0);
341 }
342 
343 /*
344  * Runs exact prefix match based on @dst and @netmask.
345  * Returns matched @rtentry if found or NULL.
346  * If rtentry was found, saves nexthop / weight value into @rnd.
347  */
348 static struct rtentry *
lookup_prefix_bysa(struct rib_head * rnh,const struct sockaddr * dst,const struct sockaddr * netmask,struct route_nhop_data * rnd)349 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
350     const struct sockaddr *netmask, struct route_nhop_data *rnd)
351 {
352 	struct rtentry *rt;
353 
354 	RIB_LOCK_ASSERT(rnh);
355 
356 	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
357 	if (rt != NULL) {
358 		rnd->rnd_nhop = rt->rt_nhop;
359 		rnd->rnd_weight = rt->rt_weight;
360 	} else {
361 		rnd->rnd_nhop = NULL;
362 		rnd->rnd_weight = 0;
363 	}
364 
365 	return (rt);
366 }
367 
368 struct rtentry *
lookup_prefix_rt(struct rib_head * rnh,const struct rtentry * rt,struct route_nhop_data * rnd)369 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt,
370     struct route_nhop_data *rnd)
371 {
372 	return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd));
373 }
374 
375 /*
376  * Runs exact prefix match based on dst/netmask from @info.
377  * Assumes RIB lock is held.
378  * Returns matched @rtentry if found or NULL.
379  * If rtentry was found, saves nexthop / weight value into @rnd.
380  */
381 struct rtentry *
lookup_prefix(struct rib_head * rnh,const struct rt_addrinfo * info,struct route_nhop_data * rnd)382 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
383     struct route_nhop_data *rnd)
384 {
385 	struct rtentry *rt;
386 
387 	rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
388 	    info->rti_info[RTAX_NETMASK], rnd);
389 
390 	return (rt);
391 }
392 
393 static bool
fill_pxmask_family(int family,int plen,struct sockaddr * _dst,struct sockaddr ** pmask)394 fill_pxmask_family(int family, int plen, struct sockaddr *_dst,
395     struct sockaddr **pmask)
396 {
397 	if (plen == -1) {
398 		*pmask = NULL;
399 		return (true);
400 	}
401 
402 	switch (family) {
403 #ifdef INET
404 	case AF_INET:
405 		{
406 			struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask);
407 			struct sockaddr_in *dst= (struct sockaddr_in *)_dst;
408 
409 			memset(mask, 0, sizeof(*mask));
410 			mask->sin_family = family;
411 			mask->sin_len = sizeof(*mask);
412 			if (plen == 32)
413 				*pmask = NULL;
414 			else if (plen > 32 || plen < 0)
415 				return (false);
416 			else {
417 				uint32_t daddr, maddr;
418 				maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
419 				mask->sin_addr.s_addr = maddr;
420 				daddr = dst->sin_addr.s_addr;
421 				daddr = htonl(ntohl(daddr) & ntohl(maddr));
422 				dst->sin_addr.s_addr = daddr;
423 			}
424 			return (true);
425 		}
426 		break;
427 #endif
428 #ifdef INET6
429 	case AF_INET6:
430 		{
431 			struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask);
432 			struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst;
433 
434 			memset(mask, 0, sizeof(*mask));
435 			mask->sin6_family = family;
436 			mask->sin6_len = sizeof(*mask);
437 			if (plen == 128)
438 				*pmask = NULL;
439 			else if (plen > 128 || plen < 0)
440 				return (false);
441 			else {
442 				ip6_writemask(&mask->sin6_addr, plen);
443 				IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr);
444 			}
445 			return (true);
446 		}
447 		break;
448 #endif
449 	}
450 	return (false);
451 }
452 
453 /*
454  * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd
455  * to the routing table.
456  *
457  * @fibnum: verified kernel rtable id to insert route to
458  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
459  * @plen: prefix length (or -1 if host route or not applicable for AF)
460  * @op_flags: combination of RTM_F_ flags
461  * @rc: storage to report operation result
462  *
463  * Returns 0 on success.
464  */
465 int
rib_add_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,struct route_nhop_data * rnd,int op_flags,struct rib_cmd_info * rc)466 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
467     struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc)
468 {
469 	union sockaddr_union mask_storage;
470 	struct sockaddr *netmask = &mask_storage.sa;
471 	struct rtentry *rt = NULL;
472 
473 	NET_EPOCH_ASSERT();
474 
475 	bzero(rc, sizeof(struct rib_cmd_info));
476 	rc->rc_cmd = RTM_ADD;
477 
478 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
479 	if (rnh == NULL)
480 		return (EAFNOSUPPORT);
481 
482 	if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
483 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
484 		return (EINVAL);
485 	}
486 
487 	if (op_flags & RTM_F_CREATE) {
488 		if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) {
489 			FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed");
490 			return (ENOMEM);
491 		}
492 	} else {
493 		struct route_nhop_data rnd_tmp;
494 		RIB_RLOCK_TRACKER;
495 
496 		RIB_RLOCK(rnh);
497 		rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp);
498 		RIB_RUNLOCK(rnh);
499 
500 		if (rt == NULL)
501 			return (ESRCH);
502 	}
503 
504 	return (add_route_flags(rnh, rt, rnd, op_flags, rc));
505 }
506 
507 /*
508  * Attempts to delete @dst/plen prefix matching gateway @gw from the
509  *  routing rable.
510  *
511  * @fibnum: rtable id to remove route from
512  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
513  * @plen: prefix length (or -1 if host route or not applicable for AF)
514  * @gw: gateway to match
515  * @op_flags: combination of RTM_F_ flags
516  * @rc: storage to report operation result
517  *
518  * Returns 0 on success.
519  */
520 int
rib_del_route_px_gw(uint32_t fibnum,struct sockaddr * dst,int plen,const struct sockaddr * gw,int op_flags,struct rib_cmd_info * rc)521 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen,
522     const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc)
523 {
524 	struct gw_filter_data gwd = { .gw = gw };
525 
526 	return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc));
527 }
528 
529 /*
530  * Attempts to delete @dst/plen prefix matching @filter_func from the
531  *  routing rable.
532  *
533  * @fibnum: rtable id to remove route from
534  * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty
535  * @plen: prefix length (or -1 if host route or not applicable for AF)
536  * @filter_func: func to be called for each nexthop of the prefix for matching
537  * @filter_arg: argument to pass to @filter_func
538  * @op_flags: combination of RTM_F_ flags
539  * @rc: storage to report operation result
540  *
541  * Returns 0 on success.
542  */
543 int
rib_del_route_px(uint32_t fibnum,struct sockaddr * dst,int plen,rib_filter_f_t * filter_func,void * filter_arg,int op_flags,struct rib_cmd_info * rc)544 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen,
545     rib_filter_f_t *filter_func, void *filter_arg, int op_flags,
546     struct rib_cmd_info *rc)
547 {
548 	union sockaddr_union mask_storage;
549 	struct sockaddr *netmask = &mask_storage.sa;
550 	int error;
551 
552 	NET_EPOCH_ASSERT();
553 
554 	bzero(rc, sizeof(struct rib_cmd_info));
555 	rc->rc_cmd = RTM_DELETE;
556 
557 	struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
558 	if (rnh == NULL)
559 		return (EAFNOSUPPORT);
560 
561 	if (dst->sa_len > sizeof(mask_storage)) {
562 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len);
563 		return (EINVAL);
564 	}
565 
566 	if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) {
567 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen);
568 		return (EINVAL);
569 	}
570 
571 	int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL;
572 
573 	RIB_WLOCK(rnh);
574 	struct route_nhop_data rnd;
575 	struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
576 	if (rt != NULL) {
577 		error = rt_delete_conditional(rnh, rt, prio, filter_func,
578 		    filter_arg, rc);
579 	} else
580 		error = ESRCH;
581 	RIB_WUNLOCK(rnh);
582 
583 	if (error != 0)
584 		return (error);
585 
586 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
587 
588 	if (rc->rc_cmd == RTM_DELETE)
589 		rt_free(rc->rc_rt);
590 #ifdef ROUTE_MPATH
591 	else {
592 		/*
593 		 * Deleting 1 path may result in RTM_CHANGE to
594 		 * a different mpath group/nhop.
595 		 * Free old mpath group.
596 		 */
597 		nhop_free_any(rc->rc_nh_old);
598 	}
599 #endif
600 
601 	return (0);
602 }
603 
604 /*
605  * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh.
606  * @rt: route to copy.
607  * @rnd_src: nhop and weight. Multipath routes are not supported
608  * @rh_dst: target rtable.
609  * @rc: operation result storage
610  *
611  * Return 0 on success.
612  */
613 int
rib_copy_route(struct rtentry * rt,const struct route_nhop_data * rnd_src,struct rib_head * rh_dst,struct rib_cmd_info * rc)614 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src,
615     struct rib_head *rh_dst, struct rib_cmd_info *rc)
616 {
617 	struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop;
618 	int error;
619 
620 	MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0);
621 
622 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
623 		char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE];
624 		nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf));
625 		rt_print_buf(rt, rtbuf, sizeof(rtbuf));
626 		FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u",
627 		    rtbuf, nhbuf, nhop_get_fibnum(nh_src));
628 	}
629 	struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family);
630 	if (nh == NULL) {
631 		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop");
632 		return (ENOMEM);
633 	}
634 	nhop_copy(nh, rnd_src->rnd_nhop);
635 	nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop));
636 	nhop_set_fibnum(nh, rh_dst->rib_fibnum);
637 	nh = nhop_get_nhop_internal(rh_dst, nh, &error);
638 	if (error != 0) {
639 		FIB_RH_LOG(LOG_INFO, rh_dst,
640 		    "unable to finalize new nexthop: error %d", error);
641 		return (ENOMEM);
642 	}
643 
644 	struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt));
645 	if (rt_new == NULL) {
646 		FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry");
647 		nhop_free(nh);
648 		return (ENOMEM);
649 	}
650 
651 	struct route_nhop_data rnd = {
652 		.rnd_nhop = nh,
653 		.rnd_weight = rnd_src->rnd_weight
654 	};
655 	int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0);
656 	error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc);
657 
658 	if (error != 0) {
659 		IF_DEBUG_LEVEL(LOG_DEBUG2) {
660 			char buf[NHOP_PRINT_BUFSIZE];
661 			rt_print_buf(rt_new, buf, sizeof(buf));
662 			FIB_RH_LOG(LOG_DEBUG, rh_dst,
663 			    "Unable to add route %s: error %d", buf, error);
664 		}
665 		nhop_free(nh);
666 		rt_free_immediate(rt_new);
667 	}
668 	return (error);
669 }
670 
671 /*
672  * Adds route defined by @info into the kernel table specified by @fibnum and
673  * sa_family in @info->rti_info[RTAX_DST].
674  *
675  * Returns 0 on success and fills in operation metadata into @rc.
676  */
677 int
rib_add_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)678 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
679     struct rib_cmd_info *rc)
680 {
681 	struct rib_head *rnh;
682 	int error;
683 
684 	NET_EPOCH_ASSERT();
685 
686 	rnh = get_rnh(fibnum, info);
687 	if (rnh == NULL)
688 		return (EAFNOSUPPORT);
689 
690 	/*
691 	 * Check consistency between RTF_HOST flag and netmask
692 	 * existence.
693 	 */
694 	if (info->rti_flags & RTF_HOST)
695 		info->rti_info[RTAX_NETMASK] = NULL;
696 	else if (info->rti_info[RTAX_NETMASK] == NULL) {
697 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask");
698 		return (EINVAL);
699 	}
700 
701 	bzero(rc, sizeof(struct rib_cmd_info));
702 	rc->rc_cmd = RTM_ADD;
703 
704 	error = add_route_byinfo(rnh, info, rc);
705 	if (error == 0)
706 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
707 
708 	return (error);
709 }
710 
711 static int
add_route_byinfo(struct rib_head * rnh,struct rt_addrinfo * info,struct rib_cmd_info * rc)712 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
713     struct rib_cmd_info *rc)
714 {
715 	struct route_nhop_data rnd_add;
716 	struct nhop_object *nh;
717 	struct rtentry *rt;
718 	struct sockaddr *dst, *gateway, *netmask;
719 	int error;
720 
721 	dst = info->rti_info[RTAX_DST];
722 	gateway = info->rti_info[RTAX_GATEWAY];
723 	netmask = info->rti_info[RTAX_NETMASK];
724 
725 	if ((info->rti_flags & RTF_GATEWAY) && !gateway) {
726 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
727 		return (EINVAL);
728 	}
729 	if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
730 		FIB_RH_LOG(LOG_DEBUG, rnh,
731 		    "error: invalid dst/gateway family combination (%d, %d)",
732 		    dst->sa_family, gateway->sa_family);
733 		return (EINVAL);
734 	}
735 
736 	if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) {
737 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d",
738 		    dst->sa_len);
739 		return (EINVAL);
740 	}
741 
742 	if (info->rti_ifa == NULL) {
743 		error = rt_getifa_fib(info, rnh->rib_fibnum);
744 		if (error)
745 			return (error);
746 	}
747 
748 	if ((rt = rt_alloc(rnh, dst, netmask)) == NULL)
749 		return (ENOBUFS);
750 
751 	error = nhop_create_from_info(rnh, info, &nh);
752 	if (error != 0) {
753 		rt_free_immediate(rt);
754 		return (error);
755 	}
756 
757 	rnd_add.rnd_nhop = nh;
758 	rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
759 
760 	int op_flags = RTM_F_CREATE;
761 
762 	/*
763 	 * Set the desired action when the route already exists:
764 	 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath.
765 	 * Otherwise, append the path.
766 	 */
767 	op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND;
768 
769 	return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc));
770 }
771 
772 static int
add_route_flags(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,int op_flags,struct rib_cmd_info * rc)773 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add,
774     int op_flags, struct rib_cmd_info *rc)
775 {
776 	struct route_nhop_data rnd_orig;
777 	struct nhop_object *nh;
778 	struct rtentry *rt_orig;
779 	int error = 0;
780 
781 	MPASS(rt != NULL);
782 
783 	nh = rnd_add->rnd_nhop;
784 
785 	RIB_WLOCK(rnh);
786 
787 	rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig);
788 
789 	if (rt_orig == NULL) {
790 		if (op_flags & RTM_F_CREATE)
791 			error = add_route(rnh, rt, rnd_add, rc);
792 		else
793 			error = ESRCH; /* no entry but creation was not required */
794 		RIB_WUNLOCK(rnh);
795 		if (error != 0)
796 			goto out;
797 		return (0);
798 	}
799 
800 	if (op_flags & RTM_F_EXCL) {
801 		/* We have existing route in the RIB but not allowed to replace. */
802 		RIB_WUNLOCK(rnh);
803 		error = EEXIST;
804 		goto out;
805 	}
806 
807 	/* Now either append or replace */
808 	if (op_flags & RTM_F_REPLACE) {
809 		if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) {
810 			/* Old path is "better" (e.g. has PINNED flag set) */
811 			RIB_WUNLOCK(rnh);
812 			error = EEXIST;
813 			goto out;
814 		}
815 		change_route(rnh, rt_orig, rnd_add, rc);
816 		RIB_WUNLOCK(rnh);
817 		nh = rc->rc_nh_old;
818 		goto out;
819 	}
820 
821 	RIB_WUNLOCK(rnh);
822 
823 #ifdef ROUTE_MPATH
824 	if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) &&
825 	    nhop_can_multipath(rnd_add->rnd_nhop) &&
826 	    nhop_can_multipath(rnd_orig.rnd_nhop)) {
827 
828 		for (int i = 0; i < RIB_MAX_RETRIES; i++) {
829 			error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig,
830 			    op_flags, rc);
831 			if (error != EAGAIN)
832 				break;
833 			RTSTAT_INC(rts_add_retry);
834 		}
835 
836 		/*
837 		 *  Original nhop reference is unused in any case.
838 		 */
839 		nhop_free_any(rnd_add->rnd_nhop);
840 		if (op_flags & RTM_F_CREATE) {
841 			if (error != 0 || rc->rc_cmd != RTM_ADD)
842 				rt_free_immediate(rt);
843 		}
844 		return (error);
845 	}
846 #endif
847 	/* Out of options - free state and return error */
848 	error = EEXIST;
849 out:
850 	if (op_flags & RTM_F_CREATE)
851 		rt_free_immediate(rt);
852 	nhop_free_any(nh);
853 
854 	return (error);
855 }
856 
857 #ifdef ROUTE_MPATH
858 static int
add_route_flags_mpath(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_orig,int op_flags,struct rib_cmd_info * rc)859 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt,
860     struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig,
861     int op_flags, struct rib_cmd_info *rc)
862 {
863 	RIB_RLOCK_TRACKER;
864 	struct route_nhop_data rnd_new;
865 	int error = 0;
866 
867 	error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new);
868 	if (error != 0) {
869 		if (error == EAGAIN) {
870 			/*
871 			 * Group creation failed, most probably because
872 			 * @rnd_orig data got scheduled for deletion.
873 			 * Refresh @rnd_orig data and retry.
874 			 */
875 			RIB_RLOCK(rnh);
876 			lookup_prefix_rt(rnh, rt, rnd_orig);
877 			RIB_RUNLOCK(rnh);
878 			if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) {
879 				/* In this iteration route doesn't exist */
880 				error = ENOENT;
881 			}
882 		}
883 		return (error);
884 	}
885 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
886 	if (error != 0)
887 		return (error);
888 
889 	if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) {
890 		/*
891 		 * First multipath route got installed. Enable local
892 		 * outbound connections hashing.
893 		 */
894 		if (bootverbose)
895 			printf("FIB: enabled flowid calculation for locally-originated packets\n");
896 		V_fib_hash_outbound = 1;
897 	}
898 
899 	return (0);
900 }
901 #endif
902 
903 /*
904  * Removes route defined by @info from the kernel table specified by @fibnum and
905  * sa_family in @info->rti_info[RTAX_DST].
906  *
907  * Returns 0 on success and fills in operation metadata into @rc.
908  */
909 int
rib_del_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)910 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
911 {
912 	struct rib_head *rnh;
913 	struct sockaddr *dst, *netmask;
914 	struct sockaddr_storage mdst;
915 	int error;
916 
917 	NET_EPOCH_ASSERT();
918 
919 	rnh = get_rnh(fibnum, info);
920 	if (rnh == NULL)
921 		return (EAFNOSUPPORT);
922 
923 	bzero(rc, sizeof(struct rib_cmd_info));
924 	rc->rc_cmd = RTM_DELETE;
925 
926 	dst = info->rti_info[RTAX_DST];
927 	netmask = info->rti_info[RTAX_NETMASK];
928 
929 	if (netmask != NULL) {
930 		/* Ensure @dst is always properly masked */
931 		if (dst->sa_len > sizeof(mdst)) {
932 			FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large");
933 			return (EINVAL);
934 		}
935 		rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
936 		dst = (struct sockaddr *)&mdst;
937 	}
938 
939 	rib_filter_f_t *filter_func = NULL;
940 	void *filter_arg = NULL;
941 	struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] };
942 
943 	if (info->rti_filter != NULL) {
944 		filter_func = info->rti_filter;
945 		filter_arg = info->rti_filterdata;
946 	} else if (gwd.gw != NULL) {
947 		filter_func = match_gw_one;
948 		filter_arg = &gwd;
949 	}
950 
951 	int prio = get_prio_from_info(info);
952 
953 	RIB_WLOCK(rnh);
954 	struct route_nhop_data rnd;
955 	struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd);
956 	if (rt != NULL) {
957 		error = rt_delete_conditional(rnh, rt, prio, filter_func,
958 		    filter_arg, rc);
959 	} else
960 		error = ESRCH;
961 	RIB_WUNLOCK(rnh);
962 
963 	if (error != 0)
964 		return (error);
965 
966 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
967 
968 	if (rc->rc_cmd == RTM_DELETE)
969 		rt_free(rc->rc_rt);
970 #ifdef ROUTE_MPATH
971 	else {
972 		/*
973 		 * Deleting 1 path may result in RTM_CHANGE to
974 		 * a different mpath group/nhop.
975 		 * Free old mpath group.
976 		 */
977 		nhop_free_any(rc->rc_nh_old);
978 	}
979 #endif
980 
981 	return (0);
982 }
983 
984 /*
985  * Conditionally unlinks rtentry paths from @rnh matching @cb.
986  * Returns 0 on success with operation result stored in @rc.
987  * On error, returns:
988  * ESRCH - if prefix was not found or filter function failed to match
989  * EADDRINUSE - if trying to delete higher priority route.
990  */
991 static int
rt_delete_conditional(struct rib_head * rnh,struct rtentry * rt,int prio,rib_filter_f_t * cb,void * cbdata,struct rib_cmd_info * rc)992 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt,
993     int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc)
994 {
995 	struct nhop_object *nh = rt->rt_nhop;
996 
997 #ifdef ROUTE_MPATH
998 	if (NH_IS_NHGRP(nh)) {
999 		struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
1000 		struct route_nhop_data rnd;
1001 		int error;
1002 
1003 		if (cb == NULL)
1004 			return (ESRCH);
1005 		error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd);
1006 		if (error == 0) {
1007 			if (rnd.rnd_nhgrp == nhg) {
1008 				/* No match, unreference new group and return. */
1009 				nhop_free_any(rnd.rnd_nhop);
1010 				return (ESRCH);
1011 			}
1012 			error = change_route(rnh, rt, &rnd, rc);
1013 		}
1014 		return (error);
1015 	}
1016 #endif
1017 	if (cb != NULL && !cb(rt, nh, cbdata))
1018 		return (ESRCH);
1019 
1020 	if (prio < nhop_get_prio(nh))
1021 		return (EADDRINUSE);
1022 
1023 	return (delete_route(rnh, rt, rc));
1024 }
1025 
1026 int
rib_change_route(uint32_t fibnum,struct rt_addrinfo * info,struct rib_cmd_info * rc)1027 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
1028     struct rib_cmd_info *rc)
1029 {
1030 	RIB_RLOCK_TRACKER;
1031 	struct route_nhop_data rnd_orig;
1032 	struct rib_head *rnh;
1033 	struct rtentry *rt;
1034 	int error;
1035 
1036 	NET_EPOCH_ASSERT();
1037 
1038 	rnh = get_rnh(fibnum, info);
1039 	if (rnh == NULL)
1040 		return (EAFNOSUPPORT);
1041 
1042 	bzero(rc, sizeof(struct rib_cmd_info));
1043 	rc->rc_cmd = RTM_CHANGE;
1044 
1045 	/* Check if updated gateway exists */
1046 	if ((info->rti_flags & RTF_GATEWAY) &&
1047 	    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1048 
1049 		/*
1050 		 * route(8) adds RTF_GATEWAY flag if -interface is not set.
1051 		 * Remove RTF_GATEWAY to enforce consistency and maintain
1052 		 * compatibility..
1053 		 */
1054 		info->rti_flags &= ~RTF_GATEWAY;
1055 	}
1056 
1057 	/*
1058 	 * route change is done in multiple steps, with dropping and
1059 	 * reacquiring lock. In the situations with multiple processes
1060 	 * changes the same route in can lead to the case when route
1061 	 * is changed between the steps. Address it by retrying the operation
1062 	 * multiple times before failing.
1063 	 */
1064 
1065 	RIB_RLOCK(rnh);
1066 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1067 	    info->rti_info[RTAX_NETMASK], &rnh->head);
1068 
1069 	if (rt == NULL) {
1070 		RIB_RUNLOCK(rnh);
1071 		return (ESRCH);
1072 	}
1073 
1074 	rnd_orig.rnd_nhop = rt->rt_nhop;
1075 	rnd_orig.rnd_weight = rt->rt_weight;
1076 
1077 	RIB_RUNLOCK(rnh);
1078 
1079 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1080 		error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc);
1081 		if (error != EAGAIN)
1082 			break;
1083 	}
1084 
1085 	return (error);
1086 }
1087 
1088 static int
change_nhop(struct rib_head * rnh,struct rt_addrinfo * info,struct nhop_object * nh_orig,struct nhop_object ** nh_new)1089 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
1090     struct nhop_object *nh_orig, struct nhop_object **nh_new)
1091 {
1092 	int error;
1093 
1094 	/*
1095 	 * New gateway could require new ifaddr, ifp;
1096 	 * flags may also be different; ifp may be specified
1097 	 * by ll sockaddr when protocol address is ambiguous
1098 	 */
1099 	if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1100 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
1101 	    info->rti_info[RTAX_IFP] != NULL ||
1102 	    (info->rti_info[RTAX_IFA] != NULL &&
1103 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1104 		error = rt_getifa_fib(info, rnh->rib_fibnum);
1105 
1106 		if (error != 0) {
1107 			info->rti_ifa = NULL;
1108 			return (error);
1109 		}
1110 	}
1111 
1112 	error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
1113 	info->rti_ifa = NULL;
1114 
1115 	return (error);
1116 }
1117 
1118 #ifdef ROUTE_MPATH
1119 static int
change_mpath_route(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1120 change_mpath_route(struct rib_head *rnh, struct rtentry *rt,
1121     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1122     struct rib_cmd_info *rc)
1123 {
1124 	int error = 0, found_idx = 0;
1125 	struct nhop_object *nh_orig = NULL, *nh_new;
1126 	struct route_nhop_data rnd_new = {};
1127 	const struct weightened_nhop *wn = NULL;
1128 	struct weightened_nhop *wn_new;
1129 	uint32_t num_nhops;
1130 
1131 	wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
1132 	for (int i = 0; i < num_nhops; i++) {
1133 		if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) {
1134 			nh_orig = wn[i].nh;
1135 			found_idx = i;
1136 			break;
1137 		}
1138 	}
1139 
1140 	if (nh_orig == NULL)
1141 		return (ESRCH);
1142 
1143 	error = change_nhop(rnh, info, nh_orig, &nh_new);
1144 	if (error != 0)
1145 		return (error);
1146 
1147 	wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
1148 	    M_TEMP, M_NOWAIT | M_ZERO);
1149 	if (wn_new == NULL) {
1150 		nhop_free(nh_new);
1151 		return (EAGAIN);
1152 	}
1153 
1154 	memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
1155 	wn_new[found_idx].nh = nh_new;
1156 	wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
1157 
1158 	error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp);
1159 	nhop_free(nh_new);
1160 	free(wn_new, M_TEMP);
1161 
1162 	if (error != 0)
1163 		return (error);
1164 
1165 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1166 
1167 	return (error);
1168 }
1169 #endif
1170 
1171 static int
change_route_byinfo(struct rib_head * rnh,struct rtentry * rt,struct rt_addrinfo * info,struct route_nhop_data * rnd_orig,struct rib_cmd_info * rc)1172 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt,
1173     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
1174     struct rib_cmd_info *rc)
1175 {
1176 	int error = 0;
1177 	struct nhop_object *nh_orig;
1178 	struct route_nhop_data rnd_new;
1179 
1180 	nh_orig = rnd_orig->rnd_nhop;
1181 	if (nh_orig == NULL)
1182 		return (ESRCH);
1183 
1184 #ifdef ROUTE_MPATH
1185 	if (NH_IS_NHGRP(nh_orig))
1186 		return (change_mpath_route(rnh, rt, info, rnd_orig, rc));
1187 #endif
1188 
1189 	rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
1190 	error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
1191 	if (error != 0)
1192 		return (error);
1193 	error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc);
1194 
1195 	return (error);
1196 }
1197 
1198 /*
1199  * Insert @rt with nhop data from @rnd_new to @rnh.
1200  * Returns 0 on success and stores operation results in @rc.
1201  */
1202 static int
add_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1203 add_route(struct rib_head *rnh, struct rtentry *rt,
1204     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1205 {
1206 	struct radix_node *rn;
1207 
1208 	RIB_WLOCK_ASSERT(rnh);
1209 
1210 	rt->rt_nhop = rnd->rnd_nhop;
1211 	rt->rt_weight = rnd->rnd_weight;
1212 	rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes);
1213 
1214 	if (rn != NULL) {
1215 		if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1216 			tmproutes_update(rnh, rt, rnd->rnd_nhop);
1217 
1218 		/* Finalize notification */
1219 		rib_bump_gen(rnh);
1220 		rnh->rnh_prefixes++;
1221 
1222 		rc->rc_cmd = RTM_ADD;
1223 		rc->rc_rt = rt;
1224 		rc->rc_nh_old = NULL;
1225 		rc->rc_nh_new = rnd->rnd_nhop;
1226 		rc->rc_nh_weight = rnd->rnd_weight;
1227 
1228 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1229 		return (0);
1230 	}
1231 
1232 	/* Existing route or memory allocation failure. */
1233 	return (EEXIST);
1234 }
1235 
1236 /*
1237  * Unconditionally deletes @rt from @rnh.
1238  */
1239 static int
delete_route(struct rib_head * rnh,struct rtentry * rt,struct rib_cmd_info * rc)1240 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc)
1241 {
1242 	RIB_WLOCK_ASSERT(rnh);
1243 
1244 	/* Route deletion requested. */
1245 	struct radix_node *rn;
1246 
1247 	rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head);
1248 	if (rn == NULL)
1249 		return (ESRCH);
1250 	rt = RNTORT(rn);
1251 	rt->rte_flags &= ~RTF_UP;
1252 
1253 	rib_bump_gen(rnh);
1254 	rnh->rnh_prefixes--;
1255 
1256 	rc->rc_cmd = RTM_DELETE;
1257 	rc->rc_rt = rt;
1258 	rc->rc_nh_old = rt->rt_nhop;
1259 	rc->rc_nh_new = NULL;
1260 	rc->rc_nh_weight = rt->rt_weight;
1261 
1262 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1263 
1264 	return (0);
1265 }
1266 
1267 /*
1268  * Switch @rt nhop/weigh to the ones specified in @rnd.
1269  * Returns 0 on success.
1270  */
1271 int
change_route(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd,struct rib_cmd_info * rc)1272 change_route(struct rib_head *rnh, struct rtentry *rt,
1273     struct route_nhop_data *rnd, struct rib_cmd_info *rc)
1274 {
1275 	struct nhop_object *nh_orig;
1276 
1277 	RIB_WLOCK_ASSERT(rnh);
1278 
1279 	nh_orig = rt->rt_nhop;
1280 
1281 	if (rnd->rnd_nhop == NULL)
1282 		return (delete_route(rnh, rt, rc));
1283 
1284 	/* Changing nexthop & weight to a new one */
1285 	rt->rt_nhop = rnd->rnd_nhop;
1286 	rt->rt_weight = rnd->rnd_weight;
1287 	if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop))
1288 		tmproutes_update(rnh, rt, rnd->rnd_nhop);
1289 
1290 	/* Finalize notification */
1291 	rib_bump_gen(rnh);
1292 	rc->rc_cmd = RTM_CHANGE;
1293 	rc->rc_rt = rt;
1294 	rc->rc_nh_old = nh_orig;
1295 	rc->rc_nh_new = rnd->rnd_nhop;
1296 	rc->rc_nh_weight = rnd->rnd_weight;
1297 
1298 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
1299 
1300 	return (0);
1301 }
1302 
1303 /*
1304  * Conditionally update route nhop/weight IFF data in @nhd_orig is
1305  *  consistent with the current route data.
1306  * Nexthop in @nhd_new is consumed.
1307  */
1308 int
change_route_conditional(struct rib_head * rnh,struct rtentry * rt,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_new,struct rib_cmd_info * rc)1309 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
1310     struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new,
1311     struct rib_cmd_info *rc)
1312 {
1313 	struct rtentry *rt_new;
1314 	int error = 0;
1315 
1316 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
1317 		char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE];
1318 		nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE);
1319 		nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE);
1320 		FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family,
1321 		    "trying change %s -> %s", buf_old, buf_new);
1322 	}
1323 	RIB_WLOCK(rnh);
1324 
1325 	struct route_nhop_data rnd;
1326 	rt_new = lookup_prefix_rt(rnh, rt, &rnd);
1327 
1328 	if (rt_new == NULL) {
1329 		if (rnd_orig->rnd_nhop == NULL)
1330 			error = add_route(rnh, rt, rnd_new, rc);
1331 		else {
1332 			/*
1333 			 * Prefix does not exist, which was not our assumption.
1334 			 * Update @rnd_orig with the new data and return
1335 			 */
1336 			rnd_orig->rnd_nhop = NULL;
1337 			rnd_orig->rnd_weight = 0;
1338 			error = EAGAIN;
1339 		}
1340 	} else {
1341 		/* Prefix exists, try to update */
1342 		if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
1343 			/*
1344 			 * Nhop/mpath group hasn't changed. Flip
1345 			 * to the new precalculated one and return
1346 			 */
1347 			error = change_route(rnh, rt_new, rnd_new, rc);
1348 		} else {
1349 			/* Update and retry */
1350 			rnd_orig->rnd_nhop = rt_new->rt_nhop;
1351 			rnd_orig->rnd_weight = rt_new->rt_weight;
1352 			error = EAGAIN;
1353 		}
1354 	}
1355 
1356 	RIB_WUNLOCK(rnh);
1357 
1358 	if (error == 0) {
1359 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
1360 
1361 		if (rnd_orig->rnd_nhop != NULL)
1362 			nhop_free_any(rnd_orig->rnd_nhop);
1363 
1364 	} else {
1365 		if (rnd_new->rnd_nhop != NULL)
1366 			nhop_free_any(rnd_new->rnd_nhop);
1367 	}
1368 
1369 	return (error);
1370 }
1371 
1372 /*
1373  * Performs modification of routing table specificed by @action.
1374  * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
1375  * Needs to be run in network epoch.
1376  *
1377  * Returns 0 on success and fills in @rc with action result.
1378  */
1379 int
rib_action(uint32_t fibnum,int action,struct rt_addrinfo * info,struct rib_cmd_info * rc)1380 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
1381     struct rib_cmd_info *rc)
1382 {
1383 	int error;
1384 
1385 	switch (action) {
1386 	case RTM_ADD:
1387 		error = rib_add_route(fibnum, info, rc);
1388 		break;
1389 	case RTM_DELETE:
1390 		error = rib_del_route(fibnum, info, rc);
1391 		break;
1392 	case RTM_CHANGE:
1393 		error = rib_change_route(fibnum, info, rc);
1394 		break;
1395 	default:
1396 		error = ENOTSUP;
1397 	}
1398 
1399 	return (error);
1400 }
1401 
1402 struct rt_delinfo
1403 {
1404 	struct rib_head *rnh;
1405 	struct rtentry *head;
1406 	rib_filter_f_t *filter_f;
1407 	void *filter_arg;
1408 	int prio;
1409 	struct rib_cmd_info rc;
1410 };
1411 
1412 /*
1413  * Conditionally unlinks rtenties or paths from radix tree based
1414  * on the callback data passed in @arg.
1415  */
1416 static int
rt_checkdelroute(struct radix_node * rn,void * arg)1417 rt_checkdelroute(struct radix_node *rn, void *arg)
1418 {
1419 	struct rt_delinfo *di = (struct rt_delinfo *)arg;
1420 	struct rtentry *rt = (struct rtentry *)rn;
1421 
1422 	if (rt_delete_conditional(di->rnh, rt, di->prio,
1423 	    di->filter_f, di->filter_arg, &di->rc) != 0)
1424 		return (0);
1425 
1426 	/*
1427 	 * Add deleted rtentries to the list to GC them
1428 	 *  after dropping the lock.
1429 	 *
1430 	 * XXX: Delayed notifications not implemented
1431 	 *  for nexthop updates.
1432 	 */
1433 	if (di->rc.rc_cmd == RTM_DELETE) {
1434 		/* Add to the list and return */
1435 		rt->rt_chain = di->head;
1436 		di->head = rt;
1437 #ifdef ROUTE_MPATH
1438 	} else {
1439 		/*
1440 		 * RTM_CHANGE to a different nexthop or nexthop group.
1441 		 * Free old multipath group.
1442 		 */
1443 		nhop_free_any(di->rc.rc_nh_old);
1444 #endif
1445 	}
1446 
1447 	return (0);
1448 }
1449 
1450 /*
1451  * Iterates over a routing table specified by @fibnum and @family and
1452  *  deletes elements marked by @filter_f.
1453  * @fibnum: rtable id
1454  * @family: AF_ address family
1455  * @filter_f: function returning non-zero value for items to delete
1456  * @arg: data to pass to the @filter_f function
1457  * @report: true if rtsock notification is needed.
1458  */
1459 void
rib_walk_del(u_int fibnum,int family,rib_filter_f_t * filter_f,void * filter_arg,bool report)1460 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg,
1461     bool report)
1462 {
1463 	struct rib_head *rnh;
1464 	struct rtentry *rt;
1465 	struct nhop_object *nh;
1466 	struct epoch_tracker et;
1467 
1468 	rnh = rt_tables_get_rnh(fibnum, family);
1469 	if (rnh == NULL)
1470 		return;
1471 
1472 	struct rt_delinfo di = {
1473 		.rnh = rnh,
1474 		.filter_f = filter_f,
1475 		.filter_arg = filter_arg,
1476 		.prio = NH_PRIORITY_NORMAL,
1477 	};
1478 
1479 	NET_EPOCH_ENTER(et);
1480 
1481 	RIB_WLOCK(rnh);
1482 	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
1483 	RIB_WUNLOCK(rnh);
1484 
1485 	/* We might have something to reclaim. */
1486 	bzero(&di.rc, sizeof(di.rc));
1487 	di.rc.rc_cmd = RTM_DELETE;
1488 	while (di.head != NULL) {
1489 		rt = di.head;
1490 		di.head = rt->rt_chain;
1491 		rt->rt_chain = NULL;
1492 		nh = rt->rt_nhop;
1493 
1494 		di.rc.rc_rt = rt;
1495 		di.rc.rc_nh_old = nh;
1496 		rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
1497 
1498 		if (report) {
1499 #ifdef ROUTE_MPATH
1500 			struct nhgrp_object *nhg;
1501 			const struct weightened_nhop *wn;
1502 			uint32_t num_nhops;
1503 			if (NH_IS_NHGRP(nh)) {
1504 				nhg = (struct nhgrp_object *)nh;
1505 				wn = nhgrp_get_nhops(nhg, &num_nhops);
1506 				for (int i = 0; i < num_nhops; i++)
1507 					rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
1508 			} else
1509 #endif
1510 			rt_routemsg(RTM_DELETE, rt, nh, fibnum);
1511 		}
1512 		rt_free(rt);
1513 	}
1514 
1515 	NET_EPOCH_EXIT(et);
1516 }
1517 
1518 static int
rt_delete_unconditional(struct radix_node * rn,void * arg)1519 rt_delete_unconditional(struct radix_node *rn, void *arg)
1520 {
1521 	struct rtentry *rt = RNTORT(rn);
1522 	struct rib_head *rnh = (struct rib_head *)arg;
1523 
1524 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
1525 	if (RNTORT(rn) == rt)
1526 		rt_free(rt);
1527 
1528 	return (0);
1529 }
1530 
1531 /*
1532  * Removes all routes from the routing table without executing notifications.
1533  * rtentres will be removed after the end of a current epoch.
1534  */
1535 static void
rib_flush_routes(struct rib_head * rnh)1536 rib_flush_routes(struct rib_head *rnh)
1537 {
1538 	RIB_WLOCK(rnh);
1539 	rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
1540 	RIB_WUNLOCK(rnh);
1541 }
1542 
1543 void
rib_flush_routes_family(int family)1544 rib_flush_routes_family(int family)
1545 {
1546 	struct rib_head *rnh;
1547 
1548 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1549 		if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
1550 			rib_flush_routes(rnh);
1551 	}
1552 }
1553 
1554 const char *
rib_print_family(int family)1555 rib_print_family(int family)
1556 {
1557 	switch (family) {
1558 	case AF_INET:
1559 		return ("inet");
1560 	case AF_INET6:
1561 		return ("inet6");
1562 	case AF_LINK:
1563 		return ("link");
1564 	}
1565 	return ("unknown");
1566 }
1567 
1568