1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include "opt_inet.h"
29 #include "opt_route.h"
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/rwlock.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/refcount.h>
40 #include <sys/socket.h>
41 #include <sys/sysctl.h>
42 #include <sys/kernel.h>
43
44 #include <net/if.h>
45 #include <net/if_var.h>
46 #include <net/if_dl.h>
47 #include <net/route.h>
48 #include <net/route/route_ctl.h>
49 #include <net/route/route_var.h>
50 #include <net/vnet.h>
51
52 #include <netinet/in.h>
53 #include <netinet/in_var.h>
54 #include <netinet/in_fib.h>
55
56 #include <net/route/nhop_utils.h>
57 #include <net/route/nhop.h>
58 #include <net/route/nhop_var.h>
59 #include <net/route/nhgrp_var.h>
60
61 #define DEBUG_MOD_NAME nhgrp
62 #define DEBUG_MAX_LEVEL LOG_DEBUG
63 #include <net/route/route_debug.h>
64 _DECLARE_DEBUG(LOG_INFO);
65
66 /*
67 * This file contains data structures management logic for the nexthop
68 * groups ("nhgrp") route subsystem.
69 *
70 * Nexthop groups are used to store multiple routes available for the specific
71 * prefix. Nexthop groups are immutable and can be shared across multiple
72 * prefixes.
73 *
74 * Each group consists of a control plane part and a dataplane part.
75 * Control plane is basically a collection of nexthop objects with
76 * weights and refcount.
77 *
78 * Datapath consists of a array of nexthop pointers, compiled from control
79 * plane data to support O(1) nexthop selection.
80 *
81 * For example, consider the following group:
82 * [(nh1, weight=100), (nh2, weight=200)]
83 * It will compile to the following array:
84 * [nh1, nh2, nh2]
85 *
86 */
87
88 static void consider_resize(struct nh_control *ctl, uint32_t new_gr_buckets,
89 uint32_t new_idx_items);
90
91 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b);
92 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj);
93
94 static unsigned
djb_hash(const unsigned char * h,const int len)95 djb_hash(const unsigned char *h, const int len)
96 {
97 unsigned int result = 0;
98 int i;
99
100 for (i = 0; i < len; i++)
101 result = 33 * result ^ h[i];
102
103 return (result);
104 }
105
106 static int
cmp_nhgrp(const struct nhgrp_priv * a,const struct nhgrp_priv * b)107 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b)
108 {
109
110 /*
111 * In case of consistent hashing, there can be multiple nexthop groups
112 * with the same "control plane" list of nexthops with weights and a
113 * different set of "data plane" nexthops.
114 * For now, ignore the data plane and focus on the control plane list.
115 */
116 if (a->nhg_nh_count != b->nhg_nh_count || a->nhg_uidx != b->nhg_uidx)
117 return (0);
118 return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights,
119 sizeof(struct weightened_nhop) * a->nhg_nh_count);
120 }
121
122 /*
123 * Hash callback: calculate hash of an object
124 */
125 static unsigned int
hash_nhgrp(const struct nhgrp_priv * obj)126 hash_nhgrp(const struct nhgrp_priv *obj)
127 {
128 const unsigned char *key;
129
130 key = (const unsigned char *)obj->nhg_nh_weights;
131
132 return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count));
133 }
134
135 /*
136 * Returns object referenced and unlocked
137 */
138 struct nhgrp_priv *
find_nhgrp(struct nh_control * ctl,const struct nhgrp_priv * key)139 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key)
140 {
141 struct nhgrp_priv *priv_ret;
142
143 NHOPS_RLOCK(ctl);
144 CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret);
145 if (priv_ret != NULL) {
146 if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) {
147 /* refcount is 0 -> group is being deleted */
148 priv_ret = NULL;
149 }
150 }
151 NHOPS_RUNLOCK(ctl);
152
153 return (priv_ret);
154 }
155
156 int
link_nhgrp(struct nh_control * ctl,struct nhgrp_priv * grp_priv)157 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
158 {
159 uint16_t idx;
160 uint32_t new_num_buckets, new_num_items;
161
162 NHOPS_WLOCK(ctl);
163 /* Check if we need to resize hash and index */
164 new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head);
165 new_num_items = bitmask_get_resize_items(&ctl->nh_idx_head);
166
167 if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
168 NHOPS_WUNLOCK(ctl);
169 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to allocate nhg index");
170 consider_resize(ctl, new_num_buckets, new_num_items);
171 return (0);
172 }
173
174 grp_priv->nhg_idx = idx;
175 grp_priv->nh_control = ctl;
176 CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv);
177
178 NHOPS_WUNLOCK(ctl);
179
180 IF_DEBUG_LEVEL(LOG_DEBUG2) {
181 char nhgrp_buf[NHOP_PRINT_BUFSIZE] __unused;
182 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "linked %s",
183 nhgrp_print_buf(grp_priv->nhg, nhgrp_buf, sizeof(nhgrp_buf)));
184 }
185 consider_resize(ctl, new_num_buckets, new_num_items);
186
187 return (1);
188 }
189
190 struct nhgrp_priv *
unlink_nhgrp(struct nh_control * ctl,struct nhgrp_priv * key)191 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
192 {
193 struct nhgrp_priv *nhg_priv_ret;
194 int idx;
195
196 NHOPS_WLOCK(ctl);
197
198 CHT_SLIST_REMOVE(&ctl->gr_head, mpath, key, nhg_priv_ret);
199
200 if (nhg_priv_ret == NULL) {
201 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to find nhg");
202 NHOPS_WUNLOCK(ctl);
203 return (NULL);
204 }
205
206 idx = nhg_priv_ret->nhg_idx;
207 bitmask_free_idx(&ctl->nh_idx_head, idx);
208 nhg_priv_ret->nhg_idx = 0;
209 nhg_priv_ret->nh_control = NULL;
210
211 NHOPS_WUNLOCK(ctl);
212
213 IF_DEBUG_LEVEL(LOG_DEBUG2) {
214 char nhgrp_buf[NHOP_PRINT_BUFSIZE];
215 nhgrp_print_buf(nhg_priv_ret->nhg, nhgrp_buf, sizeof(nhgrp_buf));
216 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "unlinked idx#%d %s", idx,
217 nhgrp_buf);
218 }
219
220 return (nhg_priv_ret);
221 }
222
223 /*
224 * Checks if hash needs resizing and performs this resize if necessary
225 *
226 */
227 static void
consider_resize(struct nh_control * ctl,uint32_t new_gr_bucket,uint32_t new_idx_items)228 consider_resize(struct nh_control *ctl, uint32_t new_gr_bucket, uint32_t new_idx_items)
229 {
230 void *gr_ptr, *gr_idx_ptr;
231 void *old_idx_ptr;
232 size_t alloc_size;
233
234 gr_ptr = NULL ;
235 if (new_gr_bucket != 0) {
236 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_gr_bucket);
237 gr_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
238 }
239
240 gr_idx_ptr = NULL;
241 if (new_idx_items != 0) {
242 alloc_size = bitmask_get_size(new_idx_items);
243 gr_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
244 }
245
246 if (gr_ptr == NULL && gr_idx_ptr == NULL) {
247 /* Either resize is not required or allocations have failed. */
248 return;
249 }
250
251 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh,
252 "going to resize nhg hash: [ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
253 gr_ptr, new_gr_bucket, gr_idx_ptr, new_idx_items);
254
255 old_idx_ptr = NULL;
256
257 NHOPS_WLOCK(ctl);
258 if (gr_ptr != NULL) {
259 CHT_SLIST_RESIZE(&ctl->gr_head, mpath, gr_ptr, new_gr_bucket);
260 }
261 if (gr_idx_ptr != NULL) {
262 if (bitmask_copy(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items) == 0)
263 bitmask_swap(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items, &old_idx_ptr);
264 }
265 NHOPS_WUNLOCK(ctl);
266
267 if (gr_ptr != NULL)
268 free(gr_ptr, M_NHOP);
269 if (old_idx_ptr != NULL)
270 free(old_idx_ptr, M_NHOP);
271 }
272
273 /*
274 * Function allocating the necessary group data structures.
275 */
276 bool
nhgrp_ctl_alloc_default(struct nh_control * ctl,int malloc_flags)277 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags)
278 {
279 size_t alloc_size;
280 uint32_t num_buckets;
281 void *cht_ptr;
282
283 malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO;
284
285 num_buckets = 8;
286 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
287 cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags);
288
289 if (cht_ptr == NULL) {
290 FIB_RH_LOG(LOG_WARNING, ctl->ctl_rh, "multipath init failed");
291 return (false);
292 }
293
294 NHOPS_WLOCK(ctl);
295
296 if (ctl->gr_head.hash_size == 0) {
297 /* Init hash and bitmask */
298 CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets);
299 NHOPS_WUNLOCK(ctl);
300 } else {
301 /* Other thread has already initiliazed hash/bitmask */
302 NHOPS_WUNLOCK(ctl);
303 free(cht_ptr, M_NHOP);
304 }
305
306 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "multipath init done");
307
308 return (true);
309 }
310
311 int
nhgrp_ctl_init(struct nh_control * ctl)312 nhgrp_ctl_init(struct nh_control *ctl)
313 {
314
315 /*
316 * By default, do not allocate datastructures as multipath
317 * routes will not be necessarily used.
318 */
319 CHT_SLIST_INIT(&ctl->gr_head, NULL, 0);
320 return (0);
321 }
322
323 void
nhgrp_ctl_free(struct nh_control * ctl)324 nhgrp_ctl_free(struct nh_control *ctl)
325 {
326 if (ctl->gr_head.ptr != NULL)
327 free(ctl->gr_head.ptr, M_NHOP);
328 }
329
330 void
nhgrp_ctl_unlink_all(struct nh_control * ctl)331 nhgrp_ctl_unlink_all(struct nh_control *ctl)
332 {
333 struct nhgrp_priv *nhg_priv;
334
335 NHOPS_WLOCK_ASSERT(ctl);
336
337 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
338 IF_DEBUG_LEVEL(LOG_DEBUG2) {
339 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused;
340 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "marking %s unlinked",
341 nhgrp_print_buf(nhg_priv->nhg, nhgbuf, sizeof(nhgbuf)));
342 }
343 refcount_release(&nhg_priv->nhg_linked);
344 } CHT_SLIST_FOREACH_END;
345 }
346
347