1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27 #include "opt_inet.h"
28 #include "opt_route.h"
29
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/rmlock.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/refcount.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/kernel.h>
41 #include <sys/epoch.h>
42
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/route.h>
46 #include <net/route/route_ctl.h>
47 #include <net/route/route_var.h>
48 #include <net/vnet.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_var.h>
52 #include <netinet/in_fib.h>
53
54 #include <net/route/nhop_utils.h>
55 #include <net/route/nhop.h>
56 #include <net/route/nhop_var.h>
57 #include <net/route/nhgrp_var.h>
58
59 #define DEBUG_MOD_NAME nhgrp_ctl
60 #define DEBUG_MAX_LEVEL LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63
64 /*
65 * This file contains the supporting functions for creating multipath groups
66 * and compiling their dataplane parts.
67 */
68
69 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */
70 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH,
71 "MPF_MULTIPATH must be the same as NHF_MULTIPATH");
72 /* Offset and size of flags field has to be the same for nhop/nhop groups */
73 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags);
74 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */
75 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64);
76
77 static int wn_cmp_idx(const void *a, const void *b);
78 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops);
79
80 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl,
81 struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror);
82 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv);
83 static void destroy_nhgrp_epoch(epoch_context_t ctx);
84 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv);
85
86 static int
wn_cmp_idx(const void * a,const void * b)87 wn_cmp_idx(const void *a, const void *b)
88 {
89 const struct weightened_nhop *w_a = a;
90 const struct weightened_nhop *w_b = b;
91 uint32_t a_idx = w_a->nh->nh_priv->nh_idx;
92 uint32_t b_idx = w_b->nh->nh_priv->nh_idx;
93
94 if (a_idx < b_idx)
95 return (-1);
96 else if (a_idx > b_idx)
97 return (1);
98 else
99 return (0);
100 }
101
102 /*
103 * Perform in-place sorting for array of nexthops in @wn.
104 * Sort by nexthop index ascending.
105 */
106 static void
sort_weightened_nhops(struct weightened_nhop * wn,int num_nhops)107 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops)
108 {
109
110 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx);
111 }
112
113 /*
114 * In order to determine the minimum weight difference in the array
115 * of weights, create a sorted array of weights, using spare "storage"
116 * field in the `struct weightened_nhop`.
117 * Assume weights to be (mostly) the same and use insertion sort to
118 * make it sorted.
119 */
120 static void
sort_weightened_nhops_weights(struct weightened_nhop * wn,int num_items)121 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items)
122 {
123 wn[0].storage = wn[0].weight;
124 for (int i = 1, j = 0; i < num_items; i++) {
125 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered
126 /* Move all weights > weight 1 position right */
127 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--)
128 wn[j + 1].storage = wn[j].storage;
129 wn[j + 1].storage = weight;
130 }
131 }
132
133 /*
134 * Calculate minimum number of slots required to fit the existing
135 * set of weights in the common use case where weights are "easily"
136 * comparable.
137 * Assumes @wn is sorted by weight ascending and each weight is > 0.
138 * Returns number of slots or 0 if precise calculation failed.
139 *
140 * Some examples:
141 * note: (i, X) pair means (nhop=i, weight=X):
142 * (1, 1) (2, 2) -> 3 slots [1, 2, 2]
143 * (1, 100), (2, 200) -> 3 slots [1, 2, 2]
144 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3]
145 */
146 static uint32_t
calc_min_mpath_slots_fast(struct weightened_nhop * wn,size_t num_items,uint64_t * ptotal)147 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items,
148 uint64_t *ptotal)
149 {
150 uint32_t i, last, xmin;
151 uint64_t total = 0;
152
153 // Get sorted array of weights in .storage field
154 sort_weightened_nhops_weights(wn, num_items);
155
156 last = 0;
157 xmin = wn[0].storage;
158 for (i = 0; i < num_items; i++) {
159 total += wn[i].storage;
160 if ((wn[i].storage != last) &&
161 ((wn[i].storage - last < xmin) || xmin == 0)) {
162 xmin = wn[i].storage - last;
163 }
164 last = wn[i].storage;
165 }
166 *ptotal = total;
167 /* xmin is the minimum unit of desired capacity */
168 if ((total % xmin) != 0)
169 return (0);
170 for (i = 0; i < num_items; i++) {
171 if ((wn[i].weight % xmin) != 0)
172 return (0);
173 }
174
175 return ((uint32_t)(total / xmin));
176 }
177
178 /*
179 * Calculate minimum number of slots required to fit the existing
180 * set of weights while maintaining weight coefficients.
181 *
182 * Assume @wn is sorted by weight ascending and each weight is > 0.
183 *
184 * Tries to find simple precise solution first and falls back to
185 * RIB_MAX_MPATH_WIDTH in case of any failure.
186 */
187 static uint32_t
calc_min_mpath_slots(struct weightened_nhop * wn,size_t num_items)188 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items)
189 {
190 uint32_t v;
191 uint64_t total;
192
193 v = calc_min_mpath_slots_fast(wn, num_items, &total);
194 if (total == 0)
195 return (0);
196 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
197 v = RIB_MAX_MPATH_WIDTH;
198
199 return (v);
200 }
201
202 /*
203 * Nexthop group data consists of
204 * 1) dataplane part, with nhgrp_object as a header followed by an
205 * arbitrary number of nexthop pointers.
206 * 2) control plane part, with nhgrp_priv as a header, followed by
207 * an arbirtrary number of 'struct weightened_nhop' object.
208 *
209 * Given nexthop groups are (mostly) immutable, allocate all data
210 * in one go.
211 *
212 */
213 __noinline static size_t
get_nhgrp_alloc_size(uint32_t nhg_size,uint32_t num_nhops)214 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
215 {
216 size_t sz;
217
218 sz = sizeof(struct nhgrp_object);
219 sz += nhg_size * sizeof(struct nhop_object *);
220 sz += sizeof(struct nhgrp_priv);
221 sz += num_nhops * sizeof(struct weightened_nhop);
222 return (sz);
223 }
224
225 /*
226 * Compile actual list of nexthops to be used by datapath from
227 * the nexthop group @dst.
228 *
229 * For example, compiling control plane list of 2 nexthops
230 * [(200, A), (100, B)] would result in the datapath array
231 * [A, A, B]
232 */
233 static void
compile_nhgrp(struct nhgrp_priv * dst_priv,const struct weightened_nhop * x,uint32_t num_slots)234 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
235 uint32_t num_slots)
236 {
237 struct nhgrp_object *dst;
238 int i, slot_idx, remaining_slots;
239 uint64_t remaining_sum, nh_weight, nh_slots;
240
241 slot_idx = 0;
242 dst = dst_priv->nhg;
243 /* Calculate sum of all weights */
244 remaining_sum = 0;
245 for (i = 0; i < dst_priv->nhg_nh_count; i++)
246 remaining_sum += x[i].weight;
247 remaining_slots = num_slots;
248 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d",
249 remaining_sum, remaining_slots);
250 for (i = 0; i < dst_priv->nhg_nh_count; i++) {
251 /* Calculate number of slots for the current nexthop */
252 if (remaining_sum > 0) {
253 nh_weight = (uint64_t)x[i].weight;
254 nh_slots = (nh_weight * remaining_slots / remaining_sum);
255 } else
256 nh_slots = 0;
257
258 remaining_sum -= x[i].weight;
259 remaining_slots -= nh_slots;
260
261 FIB_NH_LOG(LOG_DEBUG3, x[0].nh,
262 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d",
263 remaining_sum, remaining_slots, (int)nh_slots, slot_idx);
264
265 KASSERT((slot_idx + nh_slots <= num_slots),
266 ("index overflow during nhg compilation"));
267 while (nh_slots-- > 0)
268 dst->nhops[slot_idx++] = x[i].nh;
269 }
270 }
271
272 /*
273 * Allocates new nexthop group for the list of weightened nexthops.
274 * Assume sorted list.
275 * Does NOT reference any nexthops in the group.
276 * Returns group with refcount=1 or NULL.
277 */
278 static struct nhgrp_priv *
alloc_nhgrp(struct weightened_nhop * wn,int num_nhops)279 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
280 {
281 uint32_t nhgrp_size;
282 struct nhgrp_object *nhg;
283 struct nhgrp_priv *nhg_priv;
284
285 nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
286 if (nhgrp_size == 0) {
287 /* Zero weights, abort */
288 return (NULL);
289 }
290
291 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops);
292 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO);
293 if (nhg == NULL) {
294 FIB_NH_LOG(LOG_INFO, wn[0].nh,
295 "unable to allocate group with num_nhops %d (compiled %u)",
296 num_nhops, nhgrp_size);
297 return (NULL);
298 }
299
300 /* Has to be the first to make NHGRP_PRIV() work */
301 nhg->nhg_size = nhgrp_size;
302 nhg->nhg_flags = MPF_MULTIPATH;
303
304 nhg_priv = NHGRP_PRIV(nhg);
305 nhg_priv->nhg_nh_count = num_nhops;
306 refcount_init(&nhg_priv->nhg_refcount, 1);
307
308 /* Please see nhgrp_free() comments on the initial value */
309 refcount_init(&nhg_priv->nhg_linked, 2);
310
311 nhg_priv->nhg = nhg;
312 memcpy(&nhg_priv->nhg_nh_weights[0], wn,
313 num_nhops * sizeof(struct weightened_nhop));
314
315 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u",
316 num_nhops, nhgrp_size);
317
318 compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
319
320 return (nhg_priv);
321 }
322
323 void
nhgrp_ref_object(struct nhgrp_object * nhg)324 nhgrp_ref_object(struct nhgrp_object *nhg)
325 {
326 struct nhgrp_priv *nhg_priv;
327 u_int old __diagused;
328
329 nhg_priv = NHGRP_PRIV(nhg);
330 old = refcount_acquire(&nhg_priv->nhg_refcount);
331 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg));
332 }
333
334 void
nhgrp_free(struct nhgrp_object * nhg)335 nhgrp_free(struct nhgrp_object *nhg)
336 {
337 struct nhgrp_priv *nhg_priv;
338 struct nh_control *ctl;
339 struct epoch_tracker et;
340
341 nhg_priv = NHGRP_PRIV(nhg);
342
343 if (!refcount_release(&nhg_priv->nhg_refcount))
344 return;
345
346 /*
347 * group objects don't have an explicit lock attached to it.
348 * As groups are reclaimed based on reference count, it is possible
349 * that some groups will persist after vnet destruction callback
350 * called. Given that, handle scenario with nhgrp_free_group() being
351 * called either after or simultaneously with nhgrp_ctl_unlink_all()
352 * by using another reference counter: nhg_linked.
353 *
354 * There are only 2 places, where nhg_linked can be decreased:
355 * rib destroy (nhgrp_ctl_unlink_all) and this function.
356 * nhg_link can never be increased.
357 *
358 * Hence, use initial value of 2 to make use of
359 * refcount_release_if_not_last().
360 *
361 * There can be two scenarious when calling this function:
362 *
363 * 1) nhg_linked value is 2. This means that either
364 * nhgrp_ctl_unlink_all() has not been called OR it is running,
365 * but we are guaranteed that nh_control won't be freed in
366 * this epoch. Hence, nexthop can be safely unlinked.
367 *
368 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all()
369 * has been called and nhgrp unlink can be skipped.
370 */
371
372 NET_EPOCH_ENTER(et);
373 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) {
374 ctl = nhg_priv->nh_control;
375 if (unlink_nhgrp(ctl, nhg_priv) == NULL) {
376 /* Do not try to reclaim */
377 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p",
378 nhg_priv);
379 NET_EPOCH_EXIT(et);
380 return;
381 }
382 }
383 NET_EPOCH_EXIT(et);
384
385 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
386 NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx);
387 }
388
389 /*
390 * Destroys all local resources belonging to @nhg_priv.
391 */
392 __noinline static void
destroy_nhgrp_int(struct nhgrp_priv * nhg_priv)393 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv)
394 {
395
396 free(nhg_priv->nhg, M_NHOP);
397 }
398
399 __noinline static void
destroy_nhgrp(struct nhgrp_priv * nhg_priv)400 destroy_nhgrp(struct nhgrp_priv *nhg_priv)
401 {
402
403 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0"));
404 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
405
406 IF_DEBUG_LEVEL(LOG_DEBUG2) {
407 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused;
408 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh,
409 "destroying %s", nhgrp_print_buf(nhg_priv->nhg,
410 nhgbuf, sizeof(nhgbuf)));
411 }
412
413 free_nhgrp_nhops(nhg_priv);
414 destroy_nhgrp_int(nhg_priv);
415 }
416
417 /*
418 * Epoch callback indicating group is safe to destroy
419 */
420 static void
destroy_nhgrp_epoch(epoch_context_t ctx)421 destroy_nhgrp_epoch(epoch_context_t ctx)
422 {
423 struct nhgrp_priv *nhg_priv;
424
425 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx);
426
427 destroy_nhgrp(nhg_priv);
428 }
429
430 static bool
ref_nhgrp_nhops(struct nhgrp_priv * nhg_priv)431 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
432 {
433
434 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
435 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0)
436 continue;
437
438 /*
439 * Failed to ref the nexthop, b/c it's deleted.
440 * Need to rollback references back.
441 */
442 for (int j = 0; j < i; j++)
443 nhop_free(nhg_priv->nhg_nh_weights[j].nh);
444 return (false);
445 }
446
447 return (true);
448 }
449
450 static void
free_nhgrp_nhops(struct nhgrp_priv * nhg_priv)451 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
452 {
453
454 for (int i = 0; i < nhg_priv->nhg_nh_count; i++)
455 nhop_free(nhg_priv->nhg_nh_weights[i].nh);
456 }
457
458 /*
459 * Allocate nexthop group of size @num_nhops with nexthops specified by
460 * @wn. Nexthops have to be unique and match the fibnum/family of the group.
461 * Returns unlinked nhgrp object on success or NULL and non-zero perror.
462 */
463 struct nhgrp_object *
nhgrp_alloc(uint32_t fibnum,int family,struct weightened_nhop * wn,int num_nhops,int * perror)464 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops,
465 int *perror)
466 {
467 struct rib_head *rh = rt_tables_get_rnh(fibnum, family);
468 struct nhgrp_priv *nhg_priv;
469 struct nh_control *ctl;
470
471 if (rh == NULL) {
472 *perror = E2BIG;
473 return (NULL);
474 }
475
476 ctl = rh->nh_control;
477
478 if (num_nhops > RIB_MAX_MPATH_WIDTH) {
479 *perror = E2BIG;
480 return (NULL);
481 }
482
483 if (ctl->gr_head.hash_size == 0) {
484 /* First multipath request. Bootstrap mpath datastructures. */
485 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) {
486 *perror = ENOMEM;
487 return (NULL);
488 }
489 }
490
491 /* Sort nexthops & check there are no duplicates */
492 sort_weightened_nhops(wn, num_nhops);
493 uint32_t last_id = 0;
494 for (int i = 0; i < num_nhops; i++) {
495 if (wn[i].nh->nh_priv->nh_control != ctl) {
496 *perror = EINVAL;
497 return (NULL);
498 }
499 if (wn[i].nh->nh_priv->nh_idx == last_id) {
500 *perror = EEXIST;
501 return (NULL);
502 }
503 last_id = wn[i].nh->nh_priv->nh_idx;
504 }
505
506 if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) {
507 *perror = ENOMEM;
508 return (NULL);
509 }
510 nhg_priv->nh_control = ctl;
511
512 *perror = 0;
513 return (nhg_priv->nhg);
514 }
515
516 /*
517 * Finds an existing group matching @nhg or links @nhg to the tree.
518 * Returns the referenced group or NULL and non-zero @perror.
519 */
520 struct nhgrp_object *
nhgrp_get_nhgrp(struct nhgrp_object * nhg,int * perror)521 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror)
522 {
523 struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg);
524 struct nh_control *ctl = key->nh_control;
525
526 nhg_priv = find_nhgrp(ctl, key);
527 if (nhg_priv != NULL) {
528 /*
529 * Free originally-created group. As it hasn't been linked
530 * and the dependent nexhops haven't been referenced, just free
531 * the group.
532 */
533 destroy_nhgrp_int(key);
534 *perror = 0;
535 return (nhg_priv->nhg);
536 } else {
537 /* No existing group, try to link the new one */
538 if (!ref_nhgrp_nhops(key)) {
539 /*
540 * Some of the nexthops have been scheduled for deletion.
541 * As the group hasn't been linked / no nexhops have been
542 * referenced, call the final destructor immediately.
543 */
544 destroy_nhgrp_int(key);
545 *perror = EAGAIN;
546 return (NULL);
547 }
548 if (link_nhgrp(ctl, key) == 0) {
549 /* Unable to allocate index? */
550 *perror = EAGAIN;
551 free_nhgrp_nhops(key);
552 destroy_nhgrp_int(key);
553 return (NULL);
554 }
555 *perror = 0;
556 return (nhg);
557 }
558
559 /* NOTREACHED */
560 }
561
562 /*
563 * Creates or looks up an existing nexthop group based on @wn and @num_nhops.
564 *
565 * Returns referenced nhop group or NULL, passing error code in @perror.
566 */
567 struct nhgrp_priv *
get_nhgrp(struct nh_control * ctl,struct weightened_nhop * wn,int num_nhops,uint32_t uidx,int * perror)568 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops,
569 uint32_t uidx, int *perror)
570 {
571 struct nhgrp_object *nhg;
572
573 nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family,
574 wn, num_nhops, perror);
575 if (nhg == NULL)
576 return (NULL);
577 nhgrp_set_uidx(nhg, uidx);
578 nhg = nhgrp_get_nhgrp(nhg, perror);
579 if (nhg != NULL)
580 return (NHGRP_PRIV(nhg));
581 return (NULL);
582 }
583
584
585 /*
586 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig.
587 *
588 * Returns referenced nexthop group or NULL. In the latter case, @perror is
589 * filled with an error code.
590 * Note that function does NOT care if the next nexthops already exists
591 * in the @gr_orig. As a result, they will be added, resulting in the
592 * same nexthop being present multiple times in the new group.
593 */
594 static struct nhgrp_priv *
append_nhops(struct nh_control * ctl,const struct nhgrp_object * gr_orig,struct weightened_nhop * wn,int num_nhops,int * perror)595 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig,
596 struct weightened_nhop *wn, int num_nhops, int *perror)
597 {
598 char storage[64];
599 struct weightened_nhop *pnhops;
600 struct nhgrp_priv *nhg_priv;
601 const struct nhgrp_priv *src_priv;
602 size_t sz;
603 int curr_nhops;
604
605 src_priv = NHGRP_PRIV_CONST(gr_orig);
606 curr_nhops = src_priv->nhg_nh_count;
607
608 *perror = 0;
609
610 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop));
611 /* optimize for <= 4 paths, each path=16 bytes */
612 if (sz <= sizeof(storage))
613 pnhops = (struct weightened_nhop *)&storage[0];
614 else {
615 pnhops = malloc(sz, M_TEMP, M_NOWAIT);
616 if (pnhops == NULL) {
617 *perror = ENOMEM;
618 return (NULL);
619 }
620 }
621
622 /* Copy nhops from original group first */
623 memcpy(pnhops, src_priv->nhg_nh_weights,
624 curr_nhops * sizeof(struct weightened_nhop));
625 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop));
626 curr_nhops += num_nhops;
627
628 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror);
629
630 if (pnhops != (struct weightened_nhop *)&storage[0])
631 free(pnhops, M_TEMP);
632
633 if (nhg_priv == NULL)
634 return (NULL);
635
636 return (nhg_priv);
637 }
638
639
640 /*
641 * Creates/finds nexthop group based on @wn and @num_nhops.
642 * Returns 0 on success with referenced group in @rnd, or
643 * errno.
644 *
645 * If the error is EAGAIN, then the operation can be retried.
646 */
647 int
nhgrp_get_group(struct rib_head * rh,struct weightened_nhop * wn,int num_nhops,uint32_t uidx,struct nhgrp_object ** pnhg)648 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
649 uint32_t uidx, struct nhgrp_object **pnhg)
650 {
651 struct nh_control *ctl = rh->nh_control;
652 struct nhgrp_priv *nhg_priv;
653 int error;
654
655 nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error);
656 if (nhg_priv != NULL)
657 *pnhg = nhg_priv->nhg;
658
659 return (error);
660 }
661
662 /*
663 * Creates new nexthop group based on @src group without the nexthops
664 * chosen by @flt_func.
665 * Returns 0 on success, storring the reference nhop group/object in @rnd.
666 */
667 int
nhgrp_get_filtered_group(struct rib_head * rh,const struct rtentry * rt,const struct nhgrp_object * src,rib_filter_f_t flt_func,void * flt_data,struct route_nhop_data * rnd)668 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt,
669 const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data,
670 struct route_nhop_data *rnd)
671 {
672 char storage[64];
673 struct nh_control *ctl = rh->nh_control;
674 struct weightened_nhop *pnhops;
675 const struct nhgrp_priv *mp_priv, *src_priv;
676 size_t sz;
677 int error, i, num_nhops;
678
679 src_priv = NHGRP_PRIV_CONST(src);
680
681 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop));
682 /* optimize for <= 4 paths, each path=16 bytes */
683 if (sz <= sizeof(storage))
684 pnhops = (struct weightened_nhop *)&storage[0];
685 else {
686 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL)
687 return (ENOMEM);
688 }
689
690 /* Filter nexthops */
691 error = 0;
692 num_nhops = 0;
693 for (i = 0; i < src_priv->nhg_nh_count; i++) {
694 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data))
695 continue;
696 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i],
697 sizeof(struct weightened_nhop));
698 }
699
700 if (num_nhops == 0) {
701 rnd->rnd_nhgrp = NULL;
702 rnd->rnd_weight = 0;
703 } else if (num_nhops == 1) {
704 rnd->rnd_nhop = pnhops[0].nh;
705 rnd->rnd_weight = pnhops[0].weight;
706 if (nhop_try_ref_object(rnd->rnd_nhop) == 0)
707 error = EAGAIN;
708 } else {
709 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error);
710 if (mp_priv != NULL)
711 rnd->rnd_nhgrp = mp_priv->nhg;
712 rnd->rnd_weight = 0;
713 }
714
715 if (pnhops != (struct weightened_nhop *)&storage[0])
716 free(pnhops, M_TEMP);
717
718 return (error);
719 }
720
721 /*
722 * Creates new multipath group based on existing group/nhop in @rnd_orig and
723 * to-be-added nhop @wn_add.
724 * Returns 0 on success and stores result in @rnd_new.
725 */
726 int
nhgrp_get_addition_group(struct rib_head * rh,struct route_nhop_data * rnd_orig,struct route_nhop_data * rnd_add,struct route_nhop_data * rnd_new)727 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig,
728 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new)
729 {
730 struct nh_control *ctl = rh->nh_control;
731 struct nhgrp_priv *nhg_priv;
732 struct weightened_nhop wn[2] = {};
733 int error;
734
735 if (rnd_orig->rnd_nhop == NULL) {
736 /* No paths to add to, just reference current nhop */
737 *rnd_new = *rnd_add;
738 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0)
739 return (EAGAIN);
740 return (0);
741 }
742
743 wn[0].nh = rnd_add->rnd_nhop;
744 wn[0].weight = rnd_add->rnd_weight;
745
746 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) {
747 /* Simple merge of 2 non-multipath nexthops */
748 wn[1].nh = rnd_orig->rnd_nhop;
749 wn[1].weight = rnd_orig->rnd_weight;
750 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error);
751 } else {
752 /* Get new nhop group with @rt->rt_nhop as an additional nhop */
753 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1,
754 &error);
755 }
756
757 if (nhg_priv == NULL)
758 return (error);
759 rnd_new->rnd_nhgrp = nhg_priv->nhg;
760 rnd_new->rnd_weight = 0;
761
762 return (0);
763 }
764
765 /*
766 * Returns pointer to array of nexthops with weights for
767 * given @nhg. Stores number of items in the array into @pnum_nhops.
768 */
769 const struct weightened_nhop *
nhgrp_get_nhops(const struct nhgrp_object * nhg,uint32_t * pnum_nhops)770 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops)
771 {
772 const struct nhgrp_priv *nhg_priv;
773
774 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
775
776 nhg_priv = NHGRP_PRIV_CONST(nhg);
777 *pnum_nhops = nhg_priv->nhg_nh_count;
778
779 return (nhg_priv->nhg_nh_weights);
780 }
781
782 void
nhgrp_set_uidx(struct nhgrp_object * nhg,uint32_t uidx)783 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx)
784 {
785 struct nhgrp_priv *nhg_priv;
786
787 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
788
789 nhg_priv = NHGRP_PRIV(nhg);
790
791 nhg_priv->nhg_uidx = uidx;
792 }
793
794 uint32_t
nhgrp_get_uidx(const struct nhgrp_object * nhg)795 nhgrp_get_uidx(const struct nhgrp_object *nhg)
796 {
797 const struct nhgrp_priv *nhg_priv;
798
799 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
800
801 nhg_priv = NHGRP_PRIV_CONST(nhg);
802 return (nhg_priv->nhg_uidx);
803 }
804
805 /*
806 * Prints nexhop group @nhg data in the provided @buf.
807 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100]
808 * Example: nhg#33/sz=5:[#1:100,#2:100,..]
809 */
810 char *
nhgrp_print_buf(const struct nhgrp_object * nhg,char * buf,size_t bufsize)811 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize)
812 {
813 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg);
814
815 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx,
816 nhg_priv->nhg_nh_count);
817
818 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
819 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i];
820 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,",
821 wn->nh->nh_priv->nh_idx, wn->weight);
822 if (len + off + 3 >= bufsize) {
823 int len = snprintf(&buf[off], bufsize - off, "...");
824 off += len;
825 break;
826 }
827 off += len;
828 }
829 if (off > 0)
830 off--; // remove last ","
831 if (off + 1 < bufsize)
832 snprintf(&buf[off], bufsize - off, "]");
833 return buf;
834 }
835
836 __noinline static int
dump_nhgrp_entry(struct rib_head * rh,const struct nhgrp_priv * nhg_priv,char * buffer,size_t buffer_size,struct sysctl_req * w)837 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv,
838 char *buffer, size_t buffer_size, struct sysctl_req *w)
839 {
840 struct rt_msghdr *rtm;
841 struct nhgrp_external *nhge;
842 struct nhgrp_container *nhgc;
843 const struct nhgrp_object *nhg;
844 struct nhgrp_nhop_external *ext;
845 int error;
846 size_t sz;
847
848 nhg = nhg_priv->nhg;
849
850 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
851 /* controlplane nexthops */
852 sz += sizeof(struct nhgrp_container);
853 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
854 /* dataplane nexthops */
855 sz += sizeof(struct nhgrp_container);
856 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
857
858 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size"));
859
860 bzero(buffer, sz);
861
862 rtm = (struct rt_msghdr *)buffer;
863 rtm->rtm_msglen = sz;
864 rtm->rtm_version = RTM_VERSION;
865 rtm->rtm_type = RTM_GET;
866
867 nhge = (struct nhgrp_external *)(rtm + 1);
868
869 nhge->nhg_idx = nhg_priv->nhg_idx;
870 nhge->nhg_refcount = nhg_priv->nhg_refcount;
871
872 /* fill in control plane nexthops firs */
873 nhgc = (struct nhgrp_container *)(nhge + 1);
874 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS;
875 nhgc->nhgc_subtype = 0;
876 nhgc->nhgc_len = sizeof(struct nhgrp_container);
877 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
878 nhgc->nhgc_count = nhg_priv->nhg_nh_count;
879
880 ext = (struct nhgrp_nhop_external *)(nhgc + 1);
881 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
882 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx;
883 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight;
884 }
885
886 /* fill in dataplane nexthops */
887 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]);
888 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS;
889 nhgc->nhgc_subtype = 0;
890 nhgc->nhgc_len = sizeof(struct nhgrp_container);
891 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
892 nhgc->nhgc_count = nhg->nhg_size;
893
894 ext = (struct nhgrp_nhop_external *)(nhgc + 1);
895 for (int i = 0; i < nhg->nhg_size; i++) {
896 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx;
897 ext[i].nh_weight = 0;
898 }
899
900 error = SYSCTL_OUT(w, buffer, sz);
901
902 return (error);
903 }
904
905 uint32_t
nhgrp_get_idx(const struct nhgrp_object * nhg)906 nhgrp_get_idx(const struct nhgrp_object *nhg)
907 {
908 const struct nhgrp_priv *nhg_priv;
909
910 nhg_priv = NHGRP_PRIV_CONST(nhg);
911 return (nhg_priv->nhg_idx);
912 }
913
914 uint8_t
nhgrp_get_origin(const struct nhgrp_object * nhg)915 nhgrp_get_origin(const struct nhgrp_object *nhg)
916 {
917 return (NHGRP_PRIV_CONST(nhg)->nhg_origin);
918 }
919
920 void
nhgrp_set_origin(struct nhgrp_object * nhg,uint8_t origin)921 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin)
922 {
923 NHGRP_PRIV(nhg)->nhg_origin = origin;
924 }
925
926 uint32_t
nhgrp_get_count(struct rib_head * rh)927 nhgrp_get_count(struct rib_head *rh)
928 {
929 struct nh_control *ctl;
930 uint32_t count;
931
932 ctl = rh->nh_control;
933
934 NHOPS_RLOCK(ctl);
935 count = ctl->gr_head.items_count;
936 NHOPS_RUNLOCK(ctl);
937
938 return (count);
939 }
940
941 int
nhgrp_dump_sysctl(struct rib_head * rh,struct sysctl_req * w)942 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
943 {
944 struct nh_control *ctl = rh->nh_control;
945 struct epoch_tracker et;
946 struct nhgrp_priv *nhg_priv;
947 char *buffer;
948 size_t sz;
949 int error = 0;
950
951 if (ctl->gr_head.items_count == 0)
952 return (0);
953
954 /* Calculate the maximum nhop group size in bytes */
955 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
956 sz += 2 * sizeof(struct nhgrp_container);
957 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH;
958 buffer = malloc(sz, M_TEMP, M_NOWAIT);
959 if (buffer == NULL)
960 return (ENOMEM);
961
962 NET_EPOCH_ENTER(et);
963 NHOPS_RLOCK(ctl);
964 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
965 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w);
966 if (error != 0)
967 break;
968 } CHT_SLIST_FOREACH_END;
969 NHOPS_RUNLOCK(ctl);
970 NET_EPOCH_EXIT(et);
971
972 free(buffer, M_TEMP);
973
974 return (error);
975 }
976