xref: /freebsd-13-stable/sys/netinet/in_pcbgroup.c (revision ecf688348bd2c1e292b64a1a37b1d0f545aaa11e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2010-2011 Juniper Networks, Inc.
5  * All rights reserved.
6  *
7  * This software was developed by Robert N. M. Watson under contract
8  * to Juniper Networks, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 
33 #include "opt_inet6.h"
34 #include "opt_rss.h"
35 
36 #include <sys/param.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/mutex.h>
41 #include <sys/smp.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 
45 #include <net/rss_config.h>
46 
47 #include <netinet/in.h>
48 
49 #include <netinet/in_pcb.h>
50 #include <netinet/in_rss.h>
51 #ifdef INET6
52 #include <netinet6/in6_pcb.h>
53 #endif /* INET6 */
54 
55 /*
56  * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
57  * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
58  * Strategies in Modern Operating Systems".  This implementation differs
59  * significantly from that described in the paper, in that it attempts to
60  * introduce not just notions of affinity for connections and distribute work
61  * so as to reduce lock contention, but also align those notions with
62  * hardware work distribution strategies such as RSS.  In this construction,
63  * connection groups supplement, rather than replace, existing reservation
64  * tables for protocol 4-tuples, offering CPU-affine lookup tables with
65  * minimal cache line migration and lock contention during steady state
66  * operation.
67  *
68  * Hardware-offloaded checksums are often inefficient in software -- for
69  * example, Toeplitz, specified by RSS, introduced a significant overhead if
70  * performed during per-packge processing.  It is therefore desirable to fall
71  * back on traditional reservation table lookups without affinity where
72  * hardware-offloaded checksums aren't available, such as for traffic over
73  * non-RSS interfaces.
74  *
75  * Internet protocols, such as UDP and TCP, register to use connection groups
76  * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
77  * indicates to the connection group code whether a 2-tuple or 4-tuple is
78  * used as an argument to hashes that assign a connection to a particular
79  * group.  This must be aligned with any hardware offloaded distribution
80  * model, such as RSS or similar approaches taken in embedded network boards.
81  * Wildcard sockets require special handling, as in Willman 2006, and are
82  * shared between connection groups -- while being protected by group-local
83  * locks.  This means that connection establishment and teardown can be
84  * signficantly more expensive than without connection groups, but that
85  * steady-state processing can be significantly faster.
86  *
87  * When RSS is used, certain connection group parameters, such as the number
88  * of groups, are provided by the RSS implementation, found in in_rss.c.
89  * Otherwise, in_pcbgroup.c selects possible sensible parameters
90  * corresponding to the degree of parallelism exposed by netisr.
91  *
92  * Most of the implementation of connection groups is in this file; however,
93  * connection group lookup is implemented in in_pcb.c alongside reservation
94  * table lookups -- see in_pcblookup_group().
95  *
96  * TODO:
97  *
98  * Implement dynamic rebalancing of buckets with connection groups; when
99  * load is unevenly distributed, search for more optimal balancing on
100  * demand.  This might require scaling up the number of connection groups
101  * by <<1.
102  *
103  * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
104  * groups for ip_input and ip6_input, allowing non-offloaded work
105  * distribution.
106  *
107  * Expose effective CPU affinity of connections to userspace using socket
108  * options.
109  *
110  * Investigate per-connection affinity overrides based on socket options; an
111  * option could be set, certainly resulting in work being distributed
112  * differently in software, and possibly propagated to supporting hardware
113  * with TCAMs or hardware hash tables.  This might require connections to
114  * exist in more than one connection group at a time.
115  *
116  * Hook netisr thread reconfiguration events, and propagate those to RSS so
117  * that rebalancing can occur when the thread pool grows or shrinks.
118  *
119  * Expose per-pcbgroup statistics to userspace monitoring tools such as
120  * netstat, in order to allow better debugging and profiling.
121  */
122 
123 void
in_pcbgroup_init(struct inpcbinfo * pcbinfo,u_int hashfields,int hash_nelements)124 in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
125     int hash_nelements)
126 {
127 	struct inpcbgroup *pcbgroup;
128 	u_int numpcbgroups, pgn;
129 
130 	/*
131 	 * Only enable connection groups for a protocol if it has been
132 	 * specifically requested.
133 	 */
134 	if (hashfields == IPI_HASHFIELDS_NONE)
135 		return;
136 
137 	/*
138 	 * Connection groups are about multi-processor load distribution,
139 	 * lock contention, and connection CPU affinity.  As such, no point
140 	 * in turning them on for a uniprocessor machine, it only wastes
141 	 * memory.
142 	 */
143 	if (mp_ncpus == 1)
144 		return;
145 
146 #ifdef RSS
147 	/*
148 	 * If we're using RSS, then RSS determines the number of connection
149 	 * groups to use: one connection group per RSS bucket.  If for some
150 	 * reason RSS isn't able to provide a number of buckets, disable
151 	 * connection groups entirely.
152 	 *
153 	 * XXXRW: Can this ever happen?
154 	 */
155 	numpcbgroups = rss_getnumbuckets();
156 	if (numpcbgroups == 0)
157 		return;
158 #else
159 	/*
160 	 * Otherwise, we'll just use one per CPU for now.  If we decide to
161 	 * do dynamic rebalancing a la RSS, we'll need similar logic here.
162 	 */
163 	numpcbgroups = mp_ncpus;
164 #endif
165 
166 	pcbinfo->ipi_hashfields = hashfields;
167 	pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
168 	    sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
169 	pcbinfo->ipi_npcbgroups = numpcbgroups;
170 	pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
171 	    &pcbinfo->ipi_wildmask);
172 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
173 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
174 		pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
175 		    &pcbgroup->ipg_hashmask);
176 		INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
177 
178 		/*
179 		 * Initialise notional affinity of the pcbgroup -- for RSS,
180 		 * we want the same notion of affinity as NICs to be used.  In
181 		 * the non-RSS case, just round robin for the time being.
182 		 *
183 		 * XXXRW: The notion of a bucket to CPU mapping is common at
184 		 * both pcbgroup and RSS layers -- does that mean that we
185 		 * should migrate it all from RSS to here, and just leave RSS
186 		 * responsible only for providing hashing and mapping functions?
187 		 */
188 #ifdef RSS
189 		pcbgroup->ipg_cpu = rss_getcpu(pgn);
190 #else
191 		pcbgroup->ipg_cpu = (pgn % mp_ncpus);
192 #endif
193 	}
194 }
195 
196 void
in_pcbgroup_destroy(struct inpcbinfo * pcbinfo)197 in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
198 {
199 	struct inpcbgroup *pcbgroup;
200 	u_int pgn;
201 
202 	if (pcbinfo->ipi_npcbgroups == 0)
203 		return;
204 
205 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
206 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
207 		KASSERT(CK_LIST_EMPTY(pcbinfo->ipi_listhead),
208 		    ("in_pcbinfo_destroy: listhead not empty"));
209 		INP_GROUP_LOCK_DESTROY(pcbgroup);
210 		hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
211 		    pcbgroup->ipg_hashmask);
212 	}
213 	hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
214 	free(pcbinfo->ipi_pcbgroups, M_PCB);
215 	pcbinfo->ipi_pcbgroups = NULL;
216 	pcbinfo->ipi_npcbgroups = 0;
217 	pcbinfo->ipi_hashfields = 0;
218 }
219 
220 /*
221  * Given a hash of whatever the covered tuple might be, return a pcbgroup
222  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
223  * affinity strategy.
224  */
225 static __inline u_int
in_pcbgroup_getbucket(struct inpcbinfo * pcbinfo,uint32_t hash)226 in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
227 {
228 
229 #ifdef RSS
230 	return (rss_getbucket(hash));
231 #else
232 	return (hash % pcbinfo->ipi_npcbgroups);
233 #endif
234 }
235 
236 /*
237  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
238  * information is insufficient to identify the pcbgroup.  This might occur if
239  * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but
240  * RSS is not compiled into the kernel.
241  */
242 struct inpcbgroup *
in_pcbgroup_byhash(struct inpcbinfo * pcbinfo,u_int hashtype,uint32_t hash)243 in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
244 {
245 
246 #ifdef RSS
247 	if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
248 	    hashtype == M_HASHTYPE_RSS_TCP_IPV4) ||
249 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
250 	    hashtype == M_HASHTYPE_RSS_UDP_IPV4) ||
251 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
252 	    hashtype == M_HASHTYPE_RSS_IPV4))
253 		return (&pcbinfo->ipi_pcbgroups[
254 		    in_pcbgroup_getbucket(pcbinfo, hash)]);
255 #endif
256 	return (NULL);
257 }
258 
259 static struct inpcbgroup *
in_pcbgroup_bymbuf(struct inpcbinfo * pcbinfo,struct mbuf * m)260 in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
261 {
262 
263 	return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
264 	    m->m_pkthdr.flowid));
265 }
266 
267 struct inpcbgroup *
in_pcbgroup_bytuple(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_short lport,struct in_addr faddr,u_short fport)268 in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
269     u_short lport, struct in_addr faddr, u_short fport)
270 {
271 	uint32_t hash;
272 
273 	/*
274 	 * RSS note: we pass foreign addr/port as source, and local addr/port
275 	 * as destination, as we want to align with what the hardware is
276 	 * doing.
277 	 */
278 	switch (pcbinfo->ipi_hashfields) {
279 	case IPI_HASHFIELDS_4TUPLE:
280 #ifdef RSS
281 		hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport);
282 #else
283 		hash = faddr.s_addr ^ fport;
284 #endif
285 		break;
286 
287 	case IPI_HASHFIELDS_2TUPLE:
288 #ifdef RSS
289 		hash = rss_hash_ip4_2tuple(faddr, laddr);
290 #else
291 		hash = faddr.s_addr ^ laddr.s_addr;
292 #endif
293 		break;
294 
295 	default:
296 		hash = 0;
297 	}
298 	return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
299 	    hash)]);
300 }
301 
302 struct inpcbgroup *
in_pcbgroup_byinpcb(struct inpcb * inp)303 in_pcbgroup_byinpcb(struct inpcb *inp)
304 {
305 #ifdef	RSS
306 	/*
307 	 * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
308 	 * RSS bucket and thus we should use this pcbgroup, rather than
309 	 * using a tuple or hash.
310 	 *
311 	 * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
312 	 * fits in that!
313 	 */
314 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
315 		return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
316 #endif
317 
318 	return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
319 	    inp->inp_lport, inp->inp_faddr, inp->inp_fport));
320 }
321 
322 static void
in_pcbwild_add(struct inpcb * inp)323 in_pcbwild_add(struct inpcb *inp)
324 {
325 	struct inpcbinfo *pcbinfo;
326 	struct inpcbhead *head;
327 	u_int pgn;
328 
329 	INP_WLOCK_ASSERT(inp);
330 	KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
331 	    ("%s: is wild",__func__));
332 
333 	pcbinfo = inp->inp_pcbinfo;
334 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
335 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
336 	head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
337 	    0, pcbinfo->ipi_wildmask)];
338 	CK_LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
339 	inp->inp_flags2 |= INP_PCBGROUPWILD;
340 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
341 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
342 }
343 
344 static void
in_pcbwild_remove(struct inpcb * inp)345 in_pcbwild_remove(struct inpcb *inp)
346 {
347 	struct inpcbinfo *pcbinfo;
348 	u_int pgn;
349 
350 	INP_WLOCK_ASSERT(inp);
351 	KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
352 	    ("%s: not wild", __func__));
353 
354 	pcbinfo = inp->inp_pcbinfo;
355 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
356 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
357 	CK_LIST_REMOVE(inp, inp_pcbgroup_wild);
358 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
359 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
360 	inp->inp_flags2 &= ~INP_PCBGROUPWILD;
361 }
362 
363 static __inline int
in_pcbwild_needed(struct inpcb * inp)364 in_pcbwild_needed(struct inpcb *inp)
365 {
366 #ifdef	RSS
367 	/*
368 	 * If it's a listen socket and INP_RSS_BUCKET_SET is set,
369 	 * it's a wildcard socket _but_ it's in a specific pcbgroup.
370 	 * Thus we don't treat it as a pcbwild inp.
371 	 */
372 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
373 		return (0);
374 #endif
375 
376 #ifdef INET6
377 	if (inp->inp_vflag & INP_IPV6)
378 		return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
379 	else
380 #endif
381 		return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
382 }
383 
384 static void
in_pcbwild_update_internal(struct inpcb * inp)385 in_pcbwild_update_internal(struct inpcb *inp)
386 {
387 	int wildcard_needed;
388 
389 	wildcard_needed = in_pcbwild_needed(inp);
390 	if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
391 		in_pcbwild_add(inp);
392 	else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
393 		in_pcbwild_remove(inp);
394 }
395 
396 /*
397  * Update the pcbgroup of an inpcb, which might include removing an old
398  * pcbgroup reference and/or adding a new one.  Wildcard processing is not
399  * performed here, although ideally we'll never install a pcbgroup for a
400  * wildcard inpcb (asserted below).
401  */
402 static void
in_pcbgroup_update_internal(struct inpcbinfo * pcbinfo,struct inpcbgroup * newpcbgroup,struct inpcb * inp)403 in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
404     struct inpcbgroup *newpcbgroup, struct inpcb *inp)
405 {
406 	struct inpcbgroup *oldpcbgroup;
407 	struct inpcbhead *pcbhash;
408 	uint32_t hashkey_faddr;
409 
410 	INP_WLOCK_ASSERT(inp);
411 
412 	oldpcbgroup = inp->inp_pcbgroup;
413 	if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
414 		INP_GROUP_LOCK(oldpcbgroup);
415 		CK_LIST_REMOVE(inp, inp_pcbgrouphash);
416 		inp->inp_pcbgroup = NULL;
417 		INP_GROUP_UNLOCK(oldpcbgroup);
418 	}
419 	if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
420 #ifdef INET6
421 		if (inp->inp_vflag & INP_IPV6)
422 			hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
423 		else
424 #endif
425 			hashkey_faddr = inp->inp_faddr.s_addr;
426 		INP_GROUP_LOCK(newpcbgroup);
427 		/*
428 		 * If the inp is an RSS bucket wildcard entry, ensure
429 		 * that the PCB hash is calculated correctly.
430 		 *
431 		 * The wildcard hash calculation differs from the
432 		 * non-wildcard definition.  The source address is
433 		 * INADDR_ANY and the far port is 0.
434 		 */
435 		if (inp->inp_flags2 & INP_RSS_BUCKET_SET) {
436 			pcbhash = &newpcbgroup->ipg_hashbase[
437 			    INP_PCBHASH(INADDR_ANY, inp->inp_lport, 0,
438 			    newpcbgroup->ipg_hashmask)];
439 		} else {
440 			pcbhash = &newpcbgroup->ipg_hashbase[
441 			    INP_PCBHASH(hashkey_faddr, inp->inp_lport,
442 			    inp->inp_fport,
443 			    newpcbgroup->ipg_hashmask)];
444 		}
445 		CK_LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
446 		inp->inp_pcbgroup = newpcbgroup;
447 		INP_GROUP_UNLOCK(newpcbgroup);
448 	}
449 
450 	KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
451 	    ("%s: pcbgroup and wildcard!", __func__));
452 }
453 
454 /*
455  * Two update paths: one in which the 4-tuple on an inpcb has been updated
456  * and therefore connection groups may need to change (or a wildcard entry
457  * may needed to be installed), and another in which the 4-tuple has been
458  * set as a result of a packet received, in which case we may be able to use
459  * the hash on the mbuf to avoid doing a software hash calculation for RSS.
460  *
461  * In each case: first, let the wildcard code have a go at placing it as a
462  * wildcard socket.  If it was a wildcard, or if the connection has been
463  * dropped, then no pcbgroup is required (so potentially clear it);
464  * otherwise, calculate and update the pcbgroup for the inpcb.
465  */
466 void
in_pcbgroup_update(struct inpcb * inp)467 in_pcbgroup_update(struct inpcb *inp)
468 {
469 	struct inpcbinfo *pcbinfo;
470 	struct inpcbgroup *newpcbgroup;
471 
472 	INP_WLOCK_ASSERT(inp);
473 
474 	pcbinfo = inp->inp_pcbinfo;
475 	if (!in_pcbgroup_enabled(pcbinfo))
476 		return;
477 
478 	in_pcbwild_update_internal(inp);
479 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
480 	    !(inp->inp_flags & INP_DROPPED)) {
481 #ifdef INET6
482 		if (inp->inp_vflag & INP_IPV6)
483 			newpcbgroup = in6_pcbgroup_byinpcb(inp);
484 		else
485 #endif
486 			newpcbgroup = in_pcbgroup_byinpcb(inp);
487 	} else
488 		newpcbgroup = NULL;
489 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
490 }
491 
492 void
in_pcbgroup_update_mbuf(struct inpcb * inp,struct mbuf * m)493 in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
494 {
495 	struct inpcbinfo *pcbinfo;
496 	struct inpcbgroup *newpcbgroup;
497 
498 	INP_WLOCK_ASSERT(inp);
499 
500 	pcbinfo = inp->inp_pcbinfo;
501 	if (!in_pcbgroup_enabled(pcbinfo))
502 		return;
503 
504 	/*
505 	 * Possibly should assert !INP_PCBGROUPWILD rather than testing for
506 	 * it; presumably this function should never be called for anything
507 	 * other than non-wildcard socket?
508 	 */
509 	in_pcbwild_update_internal(inp);
510 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
511 	    !(inp->inp_flags & INP_DROPPED)) {
512 		newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
513 #ifdef INET6
514 		if (inp->inp_vflag & INP_IPV6) {
515 			if (newpcbgroup == NULL)
516 				newpcbgroup = in6_pcbgroup_byinpcb(inp);
517 		} else {
518 #endif
519 			if (newpcbgroup == NULL)
520 				newpcbgroup = in_pcbgroup_byinpcb(inp);
521 #ifdef INET6
522 		}
523 #endif
524 	} else
525 		newpcbgroup = NULL;
526 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
527 }
528 
529 /*
530  * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
531  */
532 void
in_pcbgroup_remove(struct inpcb * inp)533 in_pcbgroup_remove(struct inpcb *inp)
534 {
535 	struct inpcbgroup *pcbgroup;
536 
537 	INP_WLOCK_ASSERT(inp);
538 
539 	if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
540 		return;
541 
542 	if (inp->inp_flags2 & INP_PCBGROUPWILD)
543 		in_pcbwild_remove(inp);
544 
545 	pcbgroup = inp->inp_pcbgroup;
546 	if (pcbgroup != NULL) {
547 		INP_GROUP_LOCK(pcbgroup);
548 		CK_LIST_REMOVE(inp, inp_pcbgrouphash);
549 		inp->inp_pcbgroup = NULL;
550 		INP_GROUP_UNLOCK(pcbgroup);
551 	}
552 }
553 
554 /*
555  * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
556  * for a protocol.
557  */
558 int
in_pcbgroup_enabled(struct inpcbinfo * pcbinfo)559 in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
560 {
561 
562 	return (pcbinfo->ipi_npcbgroups > 0);
563 }
564