xref: /freebsd-11-stable/sys/netinet/in_pcbgroup.c (revision d60840138f6292c1ceeb177ebe797eca0b2749da)
1 /*-
2  * Copyright (c) 2010-2011 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * This software was developed by Robert N. M. Watson under contract
6  * to Juniper Networks, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_inet6.h"
35 #include "opt_rss.h"
36 
37 #include <sys/param.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/mutex.h>
42 #include <sys/smp.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 
46 #include <net/rss_config.h>
47 
48 #include <netinet/in.h>
49 
50 #include <netinet/in_pcb.h>
51 #include <netinet/in_rss.h>
52 #ifdef INET6
53 #include <netinet6/in6_pcb.h>
54 #endif /* INET6 */
55 
56 /*
57  * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
58  * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
59  * Strategies in Modern Operating Systems".  This implementation differs
60  * significantly from that described in the paper, in that it attempts to
61  * introduce not just notions of affinity for connections and distribute work
62  * so as to reduce lock contention, but also align those notions with
63  * hardware work distribution strategies such as RSS.  In this construction,
64  * connection groups supplement, rather than replace, existing reservation
65  * tables for protocol 4-tuples, offering CPU-affine lookup tables with
66  * minimal cache line migration and lock contention during steady state
67  * operation.
68  *
69  * Hardware-offloaded checksums are often inefficient in software -- for
70  * example, Toeplitz, specified by RSS, introduced a significant overhead if
71  * performed during per-packge processing.  It is therefore desirable to fall
72  * back on traditional reservation table lookups without affinity where
73  * hardware-offloaded checksums aren't available, such as for traffic over
74  * non-RSS interfaces.
75  *
76  * Internet protocols, such as UDP and TCP, register to use connection groups
77  * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
78  * indicates to the connection group code whether a 2-tuple or 4-tuple is
79  * used as an argument to hashes that assign a connection to a particular
80  * group.  This must be aligned with any hardware offloaded distribution
81  * model, such as RSS or similar approaches taken in embedded network boards.
82  * Wildcard sockets require special handling, as in Willman 2006, and are
83  * shared between connection groups -- while being protected by group-local
84  * locks.  This means that connection establishment and teardown can be
85  * signficantly more expensive than without connection groups, but that
86  * steady-state processing can be significantly faster.
87  *
88  * When RSS is used, certain connection group parameters, such as the number
89  * of groups, are provided by the RSS implementation, found in in_rss.c.
90  * Otherwise, in_pcbgroup.c selects possible sensible parameters
91  * corresponding to the degree of parallelism exposed by netisr.
92  *
93  * Most of the implementation of connection groups is in this file; however,
94  * connection group lookup is implemented in in_pcb.c alongside reservation
95  * table lookups -- see in_pcblookup_group().
96  *
97  * TODO:
98  *
99  * Implement dynamic rebalancing of buckets with connection groups; when
100  * load is unevenly distributed, search for more optimal balancing on
101  * demand.  This might require scaling up the number of connection groups
102  * by <<1.
103  *
104  * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
105  * groups for ip_input and ip6_input, allowing non-offloaded work
106  * distribution.
107  *
108  * Expose effective CPU affinity of connections to userspace using socket
109  * options.
110  *
111  * Investigate per-connection affinity overrides based on socket options; an
112  * option could be set, certainly resulting in work being distributed
113  * differently in software, and possibly propagated to supporting hardware
114  * with TCAMs or hardware hash tables.  This might require connections to
115  * exist in more than one connection group at a time.
116  *
117  * Hook netisr thread reconfiguration events, and propagate those to RSS so
118  * that rebalancing can occur when the thread pool grows or shrinks.
119  *
120  * Expose per-pcbgroup statistics to userspace monitoring tools such as
121  * netstat, in order to allow better debugging and profiling.
122  */
123 
124 void
in_pcbgroup_init(struct inpcbinfo * pcbinfo,u_int hashfields,int hash_nelements)125 in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
126     int hash_nelements)
127 {
128 	struct inpcbgroup *pcbgroup;
129 	u_int numpcbgroups, pgn;
130 
131 	/*
132 	 * Only enable connection groups for a protocol if it has been
133 	 * specifically requested.
134 	 */
135 	if (hashfields == IPI_HASHFIELDS_NONE)
136 		return;
137 
138 	/*
139 	 * Connection groups are about multi-processor load distribution,
140 	 * lock contention, and connection CPU affinity.  As such, no point
141 	 * in turning them on for a uniprocessor machine, it only wastes
142 	 * memory.
143 	 */
144 	if (mp_ncpus == 1)
145 		return;
146 
147 #ifdef RSS
148 	/*
149 	 * If we're using RSS, then RSS determines the number of connection
150 	 * groups to use: one connection group per RSS bucket.  If for some
151 	 * reason RSS isn't able to provide a number of buckets, disable
152 	 * connection groups entirely.
153 	 *
154 	 * XXXRW: Can this ever happen?
155 	 */
156 	numpcbgroups = rss_getnumbuckets();
157 	if (numpcbgroups == 0)
158 		return;
159 #else
160 	/*
161 	 * Otherwise, we'll just use one per CPU for now.  If we decide to
162 	 * do dynamic rebalancing a la RSS, we'll need similar logic here.
163 	 */
164 	numpcbgroups = mp_ncpus;
165 #endif
166 
167 	pcbinfo->ipi_hashfields = hashfields;
168 	pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
169 	    sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
170 	pcbinfo->ipi_npcbgroups = numpcbgroups;
171 	pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
172 	    &pcbinfo->ipi_wildmask);
173 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
174 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
175 		pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
176 		    &pcbgroup->ipg_hashmask);
177 		INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
178 
179 		/*
180 		 * Initialise notional affinity of the pcbgroup -- for RSS,
181 		 * we want the same notion of affinity as NICs to be used.  In
182 		 * the non-RSS case, just round robin for the time being.
183 		 *
184 		 * XXXRW: The notion of a bucket to CPU mapping is common at
185 		 * both pcbgroup and RSS layers -- does that mean that we
186 		 * should migrate it all from RSS to here, and just leave RSS
187 		 * responsible only for providing hashing and mapping funtions?
188 		 */
189 #ifdef RSS
190 		pcbgroup->ipg_cpu = rss_getcpu(pgn);
191 #else
192 		pcbgroup->ipg_cpu = (pgn % mp_ncpus);
193 #endif
194 	}
195 }
196 
197 void
in_pcbgroup_destroy(struct inpcbinfo * pcbinfo)198 in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
199 {
200 	struct inpcbgroup *pcbgroup;
201 	u_int pgn;
202 
203 	if (pcbinfo->ipi_npcbgroups == 0)
204 		return;
205 
206 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
207 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
208 		KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
209 		    ("in_pcbinfo_destroy: listhead not empty"));
210 		INP_GROUP_LOCK_DESTROY(pcbgroup);
211 		hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
212 		    pcbgroup->ipg_hashmask);
213 	}
214 	hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
215 	free(pcbinfo->ipi_pcbgroups, M_PCB);
216 	pcbinfo->ipi_pcbgroups = NULL;
217 	pcbinfo->ipi_npcbgroups = 0;
218 	pcbinfo->ipi_hashfields = 0;
219 }
220 
221 /*
222  * Given a hash of whatever the covered tuple might be, return a pcbgroup
223  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
224  * affinity strategy.
225  */
226 static __inline u_int
in_pcbgroup_getbucket(struct inpcbinfo * pcbinfo,uint32_t hash)227 in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
228 {
229 
230 #ifdef RSS
231 	return (rss_getbucket(hash));
232 #else
233 	return (hash % pcbinfo->ipi_npcbgroups);
234 #endif
235 }
236 
237 /*
238  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
239  * information is insufficient to identify the pcbgroup.  This might occur if
240  * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but
241  * RSS is not compiled into the kernel.
242  */
243 struct inpcbgroup *
in_pcbgroup_byhash(struct inpcbinfo * pcbinfo,u_int hashtype,uint32_t hash)244 in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
245 {
246 
247 #ifdef RSS
248 	if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
249 	    hashtype == M_HASHTYPE_RSS_TCP_IPV4) ||
250 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
251 	    hashtype == M_HASHTYPE_RSS_UDP_IPV4) ||
252 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
253 	    hashtype == M_HASHTYPE_RSS_IPV4))
254 		return (&pcbinfo->ipi_pcbgroups[
255 		    in_pcbgroup_getbucket(pcbinfo, hash)]);
256 #endif
257 	return (NULL);
258 }
259 
260 static struct inpcbgroup *
in_pcbgroup_bymbuf(struct inpcbinfo * pcbinfo,struct mbuf * m)261 in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
262 {
263 
264 	return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
265 	    m->m_pkthdr.flowid));
266 }
267 
268 struct inpcbgroup *
in_pcbgroup_bytuple(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_short lport,struct in_addr faddr,u_short fport)269 in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
270     u_short lport, struct in_addr faddr, u_short fport)
271 {
272 	uint32_t hash;
273 
274 	/*
275 	 * RSS note: we pass foreign addr/port as source, and local addr/port
276 	 * as destination, as we want to align with what the hardware is
277 	 * doing.
278 	 */
279 	switch (pcbinfo->ipi_hashfields) {
280 	case IPI_HASHFIELDS_4TUPLE:
281 #ifdef RSS
282 		hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport);
283 #else
284 		hash = faddr.s_addr ^ fport;
285 #endif
286 		break;
287 
288 	case IPI_HASHFIELDS_2TUPLE:
289 #ifdef RSS
290 		hash = rss_hash_ip4_2tuple(faddr, laddr);
291 #else
292 		hash = faddr.s_addr ^ laddr.s_addr;
293 #endif
294 		break;
295 
296 	default:
297 		hash = 0;
298 	}
299 	return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
300 	    hash)]);
301 }
302 
303 struct inpcbgroup *
in_pcbgroup_byinpcb(struct inpcb * inp)304 in_pcbgroup_byinpcb(struct inpcb *inp)
305 {
306 #ifdef	RSS
307 	/*
308 	 * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
309 	 * RSS bucket and thus we should use this pcbgroup, rather than
310 	 * using a tuple or hash.
311 	 *
312 	 * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
313 	 * fits in that!
314 	 */
315 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
316 		return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
317 #endif
318 
319 	return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
320 	    inp->inp_lport, inp->inp_faddr, inp->inp_fport));
321 }
322 
323 static void
in_pcbwild_add(struct inpcb * inp)324 in_pcbwild_add(struct inpcb *inp)
325 {
326 	struct inpcbinfo *pcbinfo;
327 	struct inpcbhead *head;
328 	u_int pgn;
329 
330 	INP_WLOCK_ASSERT(inp);
331 	KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
332 	    ("%s: is wild",__func__));
333 
334 	pcbinfo = inp->inp_pcbinfo;
335 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
336 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
337 	head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
338 	    0, pcbinfo->ipi_wildmask)];
339 	LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
340 	inp->inp_flags2 |= INP_PCBGROUPWILD;
341 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
342 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
343 }
344 
345 static void
in_pcbwild_remove(struct inpcb * inp)346 in_pcbwild_remove(struct inpcb *inp)
347 {
348 	struct inpcbinfo *pcbinfo;
349 	u_int pgn;
350 
351 	INP_WLOCK_ASSERT(inp);
352 	KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
353 	    ("%s: not wild", __func__));
354 
355 	pcbinfo = inp->inp_pcbinfo;
356 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
357 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
358 	LIST_REMOVE(inp, inp_pcbgroup_wild);
359 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
360 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
361 	inp->inp_flags2 &= ~INP_PCBGROUPWILD;
362 }
363 
364 static __inline int
in_pcbwild_needed(struct inpcb * inp)365 in_pcbwild_needed(struct inpcb *inp)
366 {
367 #ifdef	RSS
368 	/*
369 	 * If it's a listen socket and INP_RSS_BUCKET_SET is set,
370 	 * it's a wildcard socket _but_ it's in a specific pcbgroup.
371 	 * Thus we don't treat it as a pcbwild inp.
372 	 */
373 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
374 		return (0);
375 #endif
376 
377 #ifdef INET6
378 	if (inp->inp_vflag & INP_IPV6)
379 		return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
380 	else
381 #endif
382 		return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
383 }
384 
385 static void
in_pcbwild_update_internal(struct inpcb * inp)386 in_pcbwild_update_internal(struct inpcb *inp)
387 {
388 	int wildcard_needed;
389 
390 	wildcard_needed = in_pcbwild_needed(inp);
391 	if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
392 		in_pcbwild_add(inp);
393 	else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
394 		in_pcbwild_remove(inp);
395 }
396 
397 /*
398  * Update the pcbgroup of an inpcb, which might include removing an old
399  * pcbgroup reference and/or adding a new one.  Wildcard processing is not
400  * performed here, although ideally we'll never install a pcbgroup for a
401  * wildcard inpcb (asserted below).
402  */
403 static void
in_pcbgroup_update_internal(struct inpcbinfo * pcbinfo,struct inpcbgroup * newpcbgroup,struct inpcb * inp)404 in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
405     struct inpcbgroup *newpcbgroup, struct inpcb *inp)
406 {
407 	struct inpcbgroup *oldpcbgroup;
408 	struct inpcbhead *pcbhash;
409 	uint32_t hashkey_faddr;
410 
411 	INP_WLOCK_ASSERT(inp);
412 
413 	oldpcbgroup = inp->inp_pcbgroup;
414 	if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
415 		INP_GROUP_LOCK(oldpcbgroup);
416 		LIST_REMOVE(inp, inp_pcbgrouphash);
417 		inp->inp_pcbgroup = NULL;
418 		INP_GROUP_UNLOCK(oldpcbgroup);
419 	}
420 	if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
421 #ifdef INET6
422 		if (inp->inp_vflag & INP_IPV6)
423 			hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
424 		else
425 #endif
426 			hashkey_faddr = inp->inp_faddr.s_addr;
427 		INP_GROUP_LOCK(newpcbgroup);
428 		/*
429 		 * If the inp is an RSS bucket wildcard entry, ensure
430 		 * that the PCB hash is calculated correctly.
431 		 *
432 		 * The wildcard hash calculation differs from the
433 		 * non-wildcard definition.  The source address is
434 		 * INADDR_ANY and the far port is 0.
435 		 */
436 		if (inp->inp_flags2 & INP_RSS_BUCKET_SET) {
437 			pcbhash = &newpcbgroup->ipg_hashbase[
438 			    INP_PCBHASH(INADDR_ANY, inp->inp_lport, 0,
439 			    newpcbgroup->ipg_hashmask)];
440 		} else {
441 			pcbhash = &newpcbgroup->ipg_hashbase[
442 			    INP_PCBHASH(hashkey_faddr, inp->inp_lport,
443 			    inp->inp_fport,
444 			    newpcbgroup->ipg_hashmask)];
445 		}
446 		LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
447 		inp->inp_pcbgroup = newpcbgroup;
448 		INP_GROUP_UNLOCK(newpcbgroup);
449 	}
450 
451 	KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
452 	    ("%s: pcbgroup and wildcard!", __func__));
453 }
454 
455 /*
456  * Two update paths: one in which the 4-tuple on an inpcb has been updated
457  * and therefore connection groups may need to change (or a wildcard entry
458  * may needed to be installed), and another in which the 4-tuple has been
459  * set as a result of a packet received, in which case we may be able to use
460  * the hash on the mbuf to avoid doing a software hash calculation for RSS.
461  *
462  * In each case: first, let the wildcard code have a go at placing it as a
463  * wildcard socket.  If it was a wildcard, or if the connection has been
464  * dropped, then no pcbgroup is required (so potentially clear it);
465  * otherwise, calculate and update the pcbgroup for the inpcb.
466  */
467 void
in_pcbgroup_update(struct inpcb * inp)468 in_pcbgroup_update(struct inpcb *inp)
469 {
470 	struct inpcbinfo *pcbinfo;
471 	struct inpcbgroup *newpcbgroup;
472 
473 	INP_WLOCK_ASSERT(inp);
474 
475 	pcbinfo = inp->inp_pcbinfo;
476 	if (!in_pcbgroup_enabled(pcbinfo))
477 		return;
478 
479 	in_pcbwild_update_internal(inp);
480 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
481 	    !(inp->inp_flags & INP_DROPPED)) {
482 #ifdef INET6
483 		if (inp->inp_vflag & INP_IPV6)
484 			newpcbgroup = in6_pcbgroup_byinpcb(inp);
485 		else
486 #endif
487 			newpcbgroup = in_pcbgroup_byinpcb(inp);
488 	} else
489 		newpcbgroup = NULL;
490 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
491 }
492 
493 void
in_pcbgroup_update_mbuf(struct inpcb * inp,struct mbuf * m)494 in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
495 {
496 	struct inpcbinfo *pcbinfo;
497 	struct inpcbgroup *newpcbgroup;
498 
499 	INP_WLOCK_ASSERT(inp);
500 
501 	pcbinfo = inp->inp_pcbinfo;
502 	if (!in_pcbgroup_enabled(pcbinfo))
503 		return;
504 
505 	/*
506 	 * Possibly should assert !INP_PCBGROUPWILD rather than testing for
507 	 * it; presumably this function should never be called for anything
508 	 * other than non-wildcard socket?
509 	 */
510 	in_pcbwild_update_internal(inp);
511 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
512 	    !(inp->inp_flags & INP_DROPPED)) {
513 		newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
514 #ifdef INET6
515 		if (inp->inp_vflag & INP_IPV6) {
516 			if (newpcbgroup == NULL)
517 				newpcbgroup = in6_pcbgroup_byinpcb(inp);
518 		} else {
519 #endif
520 			if (newpcbgroup == NULL)
521 				newpcbgroup = in_pcbgroup_byinpcb(inp);
522 #ifdef INET6
523 		}
524 #endif
525 	} else
526 		newpcbgroup = NULL;
527 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
528 }
529 
530 /*
531  * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
532  */
533 void
in_pcbgroup_remove(struct inpcb * inp)534 in_pcbgroup_remove(struct inpcb *inp)
535 {
536 	struct inpcbgroup *pcbgroup;
537 
538 	INP_WLOCK_ASSERT(inp);
539 
540 	if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
541 		return;
542 
543 	if (inp->inp_flags2 & INP_PCBGROUPWILD)
544 		in_pcbwild_remove(inp);
545 
546 	pcbgroup = inp->inp_pcbgroup;
547 	if (pcbgroup != NULL) {
548 		INP_GROUP_LOCK(pcbgroup);
549 		LIST_REMOVE(inp, inp_pcbgrouphash);
550 		inp->inp_pcbgroup = NULL;
551 		INP_GROUP_UNLOCK(pcbgroup);
552 	}
553 }
554 
555 /*
556  * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
557  * for a protocol.
558  */
559 int
in_pcbgroup_enabled(struct inpcbinfo * pcbinfo)560 in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
561 {
562 
563 	return (pcbinfo->ipi_npcbgroups > 0);
564 }
565