1 /* $FreeBSD: stable/9/sys/contrib/altq/altq/altq_subr.c 220433 2011-04-07 23:28:28Z jkim $ */
2 /* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #if defined(__FreeBSD__) || defined(__NetBSD__)
31 #include "opt_altq.h"
32 #include "opt_inet.h"
33 #ifdef __FreeBSD__
34 #include "opt_inet6.h"
35 #endif
36 #endif /* __FreeBSD__ || __NetBSD__ */
37
38 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/kernel.h>
46 #include <sys/errno.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49 #include <sys/queue.h>
50
51 #include <net/if.h>
52 #include <net/if_dl.h>
53 #include <net/if_types.h>
54 #ifdef __FreeBSD__
55 #include <net/vnet.h>
56 #endif
57
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/ip.h>
61 #ifdef INET6
62 #include <netinet/ip6.h>
63 #endif
64 #include <netinet/tcp.h>
65 #include <netinet/udp.h>
66
67 #include <net/pfvar.h>
68 #include <altq/altq.h>
69 #ifdef ALTQ3_COMPAT
70 #include <altq/altq_conf.h>
71 #endif
72
73 /* machine dependent clock related includes */
74 #ifdef __FreeBSD__
75 #include <sys/bus.h>
76 #include <sys/cpu.h>
77 #include <sys/eventhandler.h>
78 #include <machine/clock.h>
79 #endif
80 #if defined(__amd64__) || defined(__i386__)
81 #include <machine/cpufunc.h> /* for pentium tsc */
82 #include <machine/specialreg.h> /* for CPUID_TSC */
83 #ifdef __FreeBSD__
84 #include <machine/md_var.h> /* for cpu_feature */
85 #elif defined(__NetBSD__) || defined(__OpenBSD__)
86 #include <machine/cpu.h> /* for cpu_feature */
87 #endif
88 #endif /* __amd64 || __i386__ */
89
90 /*
91 * internal function prototypes
92 */
93 static void tbr_timeout(void *);
94 int (*altq_input)(struct mbuf *, int) = NULL;
95 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
96 static int tbr_timer = 0; /* token bucket regulator timer */
97 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
98 static struct callout tbr_callout = CALLOUT_INITIALIZER;
99 #else
100 static struct callout tbr_callout;
101 #endif
102
103 #ifdef ALTQ3_CLFIER_COMPAT
104 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
105 #ifdef INET6
106 static int extract_ports6(struct mbuf *, struct ip6_hdr *,
107 struct flowinfo_in6 *);
108 #endif
109 static int apply_filter4(u_int32_t, struct flow_filter *,
110 struct flowinfo_in *);
111 static int apply_ppfilter4(u_int32_t, struct flow_filter *,
112 struct flowinfo_in *);
113 #ifdef INET6
114 static int apply_filter6(u_int32_t, struct flow_filter6 *,
115 struct flowinfo_in6 *);
116 #endif
117 static int apply_tosfilter4(u_int32_t, struct flow_filter *,
118 struct flowinfo_in *);
119 static u_long get_filt_handle(struct acc_classifier *, int);
120 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
121 static u_int32_t filt2fibmask(struct flow_filter *);
122
123 static void ip4f_cache(struct ip *, struct flowinfo_in *);
124 static int ip4f_lookup(struct ip *, struct flowinfo_in *);
125 static int ip4f_init(void);
126 static struct ip4_frag *ip4f_alloc(void);
127 static void ip4f_free(struct ip4_frag *);
128 #endif /* ALTQ3_CLFIER_COMPAT */
129
130 /*
131 * alternate queueing support routines
132 */
133
134 /* look up the queue state by the interface name and the queueing type. */
135 void *
altq_lookup(name,type)136 altq_lookup(name, type)
137 char *name;
138 int type;
139 {
140 struct ifnet *ifp;
141
142 if ((ifp = ifunit(name)) != NULL) {
143 /* read if_snd unlocked */
144 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
145 return (ifp->if_snd.altq_disc);
146 }
147
148 return NULL;
149 }
150
151 int
altq_attach(ifq,type,discipline,enqueue,dequeue,request,clfier,classify)152 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
153 struct ifaltq *ifq;
154 int type;
155 void *discipline;
156 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
157 struct mbuf *(*dequeue)(struct ifaltq *, int);
158 int (*request)(struct ifaltq *, int, void *);
159 void *clfier;
160 void *(*classify)(void *, struct mbuf *, int);
161 {
162 IFQ_LOCK(ifq);
163 if (!ALTQ_IS_READY(ifq)) {
164 IFQ_UNLOCK(ifq);
165 return ENXIO;
166 }
167
168 #ifdef ALTQ3_COMPAT
169 /*
170 * pfaltq can override the existing discipline, but altq3 cannot.
171 * check these if clfier is not NULL (which implies altq3).
172 */
173 if (clfier != NULL) {
174 if (ALTQ_IS_ENABLED(ifq)) {
175 IFQ_UNLOCK(ifq);
176 return EBUSY;
177 }
178 if (ALTQ_IS_ATTACHED(ifq)) {
179 IFQ_UNLOCK(ifq);
180 return EEXIST;
181 }
182 }
183 #endif
184 ifq->altq_type = type;
185 ifq->altq_disc = discipline;
186 ifq->altq_enqueue = enqueue;
187 ifq->altq_dequeue = dequeue;
188 ifq->altq_request = request;
189 ifq->altq_clfier = clfier;
190 ifq->altq_classify = classify;
191 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
192 #ifdef ALTQ3_COMPAT
193 #ifdef ALTQ_KLD
194 altq_module_incref(type);
195 #endif
196 #endif
197 IFQ_UNLOCK(ifq);
198 return 0;
199 }
200
201 int
altq_detach(ifq)202 altq_detach(ifq)
203 struct ifaltq *ifq;
204 {
205 IFQ_LOCK(ifq);
206
207 if (!ALTQ_IS_READY(ifq)) {
208 IFQ_UNLOCK(ifq);
209 return ENXIO;
210 }
211 if (ALTQ_IS_ENABLED(ifq)) {
212 IFQ_UNLOCK(ifq);
213 return EBUSY;
214 }
215 if (!ALTQ_IS_ATTACHED(ifq)) {
216 IFQ_UNLOCK(ifq);
217 return (0);
218 }
219 #ifdef ALTQ3_COMPAT
220 #ifdef ALTQ_KLD
221 altq_module_declref(ifq->altq_type);
222 #endif
223 #endif
224
225 ifq->altq_type = ALTQT_NONE;
226 ifq->altq_disc = NULL;
227 ifq->altq_enqueue = NULL;
228 ifq->altq_dequeue = NULL;
229 ifq->altq_request = NULL;
230 ifq->altq_clfier = NULL;
231 ifq->altq_classify = NULL;
232 ifq->altq_flags &= ALTQF_CANTCHANGE;
233
234 IFQ_UNLOCK(ifq);
235 return 0;
236 }
237
238 int
altq_enable(ifq)239 altq_enable(ifq)
240 struct ifaltq *ifq;
241 {
242 int s;
243
244 IFQ_LOCK(ifq);
245
246 if (!ALTQ_IS_READY(ifq)) {
247 IFQ_UNLOCK(ifq);
248 return ENXIO;
249 }
250 if (ALTQ_IS_ENABLED(ifq)) {
251 IFQ_UNLOCK(ifq);
252 return 0;
253 }
254
255 #ifdef __NetBSD__
256 s = splnet();
257 #else
258 s = splimp();
259 #endif
260 IFQ_PURGE_NOLOCK(ifq);
261 ASSERT(ifq->ifq_len == 0);
262 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */
263 ifq->altq_flags |= ALTQF_ENABLED;
264 if (ifq->altq_clfier != NULL)
265 ifq->altq_flags |= ALTQF_CLASSIFY;
266 splx(s);
267
268 IFQ_UNLOCK(ifq);
269 return 0;
270 }
271
272 int
altq_disable(ifq)273 altq_disable(ifq)
274 struct ifaltq *ifq;
275 {
276 int s;
277
278 IFQ_LOCK(ifq);
279 if (!ALTQ_IS_ENABLED(ifq)) {
280 IFQ_UNLOCK(ifq);
281 return 0;
282 }
283
284 #ifdef __NetBSD__
285 s = splnet();
286 #else
287 s = splimp();
288 #endif
289 IFQ_PURGE_NOLOCK(ifq);
290 ASSERT(ifq->ifq_len == 0);
291 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
292 splx(s);
293
294 IFQ_UNLOCK(ifq);
295 return 0;
296 }
297
298 #ifdef ALTQ_DEBUG
299 void
altq_assert(file,line,failedexpr)300 altq_assert(file, line, failedexpr)
301 const char *file, *failedexpr;
302 int line;
303 {
304 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
305 failedexpr, file, line);
306 panic("altq assertion");
307 /* NOTREACHED */
308 }
309 #endif
310
311 /*
312 * internal representation of token bucket parameters
313 * rate: byte_per_unittime << 32
314 * (((bits_per_sec) / 8) << 32) / machclk_freq
315 * depth: byte << 32
316 *
317 */
318 #define TBR_SHIFT 32
319 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
320 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
321
322 static struct mbuf *
tbr_dequeue(ifq,op)323 tbr_dequeue(ifq, op)
324 struct ifaltq *ifq;
325 int op;
326 {
327 struct tb_regulator *tbr;
328 struct mbuf *m;
329 int64_t interval;
330 u_int64_t now;
331
332 IFQ_LOCK_ASSERT(ifq);
333 tbr = ifq->altq_tbr;
334 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
335 /* if this is a remove after poll, bypass tbr check */
336 } else {
337 /* update token only when it is negative */
338 if (tbr->tbr_token <= 0) {
339 now = read_machclk();
340 interval = now - tbr->tbr_last;
341 if (interval >= tbr->tbr_filluptime)
342 tbr->tbr_token = tbr->tbr_depth;
343 else {
344 tbr->tbr_token += interval * tbr->tbr_rate;
345 if (tbr->tbr_token > tbr->tbr_depth)
346 tbr->tbr_token = tbr->tbr_depth;
347 }
348 tbr->tbr_last = now;
349 }
350 /* if token is still negative, don't allow dequeue */
351 if (tbr->tbr_token <= 0)
352 return (NULL);
353 }
354
355 if (ALTQ_IS_ENABLED(ifq))
356 m = (*ifq->altq_dequeue)(ifq, op);
357 else {
358 if (op == ALTDQ_POLL)
359 _IF_POLL(ifq, m);
360 else
361 _IF_DEQUEUE(ifq, m);
362 }
363
364 if (m != NULL && op == ALTDQ_REMOVE)
365 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
366 tbr->tbr_lastop = op;
367 return (m);
368 }
369
370 /*
371 * set a token bucket regulator.
372 * if the specified rate is zero, the token bucket regulator is deleted.
373 */
374 int
tbr_set(ifq,profile)375 tbr_set(ifq, profile)
376 struct ifaltq *ifq;
377 struct tb_profile *profile;
378 {
379 struct tb_regulator *tbr, *otbr;
380
381 if (tbr_dequeue_ptr == NULL)
382 tbr_dequeue_ptr = tbr_dequeue;
383
384 if (machclk_freq == 0)
385 init_machclk();
386 if (machclk_freq == 0) {
387 printf("tbr_set: no cpu clock available!\n");
388 return (ENXIO);
389 }
390
391 IFQ_LOCK(ifq);
392 if (profile->rate == 0) {
393 /* delete this tbr */
394 if ((tbr = ifq->altq_tbr) == NULL) {
395 IFQ_UNLOCK(ifq);
396 return (ENOENT);
397 }
398 ifq->altq_tbr = NULL;
399 free(tbr, M_DEVBUF);
400 IFQ_UNLOCK(ifq);
401 return (0);
402 }
403
404 IFQ_UNLOCK(ifq);
405 tbr = malloc(sizeof(struct tb_regulator),
406 M_DEVBUF, M_WAITOK);
407 if (tbr == NULL) { /* can not happen */
408 IFQ_UNLOCK(ifq);
409 return (ENOMEM);
410 }
411 bzero(tbr, sizeof(struct tb_regulator));
412
413 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
414 tbr->tbr_depth = TBR_SCALE(profile->depth);
415 if (tbr->tbr_rate > 0)
416 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
417 else
418 tbr->tbr_filluptime = 0xffffffffffffffffLL;
419 tbr->tbr_token = tbr->tbr_depth;
420 tbr->tbr_last = read_machclk();
421 tbr->tbr_lastop = ALTDQ_REMOVE;
422
423 IFQ_LOCK(ifq);
424 otbr = ifq->altq_tbr;
425 ifq->altq_tbr = tbr; /* set the new tbr */
426
427 if (otbr != NULL)
428 free(otbr, M_DEVBUF);
429 else {
430 if (tbr_timer == 0) {
431 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
432 tbr_timer = 1;
433 }
434 }
435 IFQ_UNLOCK(ifq);
436 return (0);
437 }
438
439 /*
440 * tbr_timeout goes through the interface list, and kicks the drivers
441 * if necessary.
442 *
443 * MPSAFE
444 */
445 static void
tbr_timeout(arg)446 tbr_timeout(arg)
447 void *arg;
448 {
449 #ifdef __FreeBSD__
450 VNET_ITERATOR_DECL(vnet_iter);
451 #endif
452 struct ifnet *ifp;
453 int active, s;
454
455 active = 0;
456 #ifdef __NetBSD__
457 s = splnet();
458 #else
459 s = splimp();
460 #endif
461 #ifdef __FreeBSD__
462 IFNET_RLOCK_NOSLEEP();
463 VNET_LIST_RLOCK_NOSLEEP();
464 VNET_FOREACH(vnet_iter) {
465 CURVNET_SET(vnet_iter);
466 #endif
467 for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
468 ifp = TAILQ_NEXT(ifp, if_list)) {
469 /* read from if_snd unlocked */
470 if (!TBR_IS_ENABLED(&ifp->if_snd))
471 continue;
472 active++;
473 if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
474 ifp->if_start != NULL)
475 (*ifp->if_start)(ifp);
476 }
477 #ifdef __FreeBSD__
478 CURVNET_RESTORE();
479 }
480 VNET_LIST_RUNLOCK_NOSLEEP();
481 IFNET_RUNLOCK_NOSLEEP();
482 #endif
483 splx(s);
484 if (active > 0)
485 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
486 else
487 tbr_timer = 0; /* don't need tbr_timer anymore */
488 }
489
490 /*
491 * get token bucket regulator profile
492 */
493 int
tbr_get(ifq,profile)494 tbr_get(ifq, profile)
495 struct ifaltq *ifq;
496 struct tb_profile *profile;
497 {
498 struct tb_regulator *tbr;
499
500 IFQ_LOCK(ifq);
501 if ((tbr = ifq->altq_tbr) == NULL) {
502 profile->rate = 0;
503 profile->depth = 0;
504 } else {
505 profile->rate =
506 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
507 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
508 }
509 IFQ_UNLOCK(ifq);
510 return (0);
511 }
512
513 /*
514 * attach a discipline to the interface. if one already exists, it is
515 * overridden.
516 * Locking is done in the discipline specific attach functions. Basically
517 * they call back to altq_attach which takes care of the attach and locking.
518 */
519 int
altq_pfattach(struct pf_altq * a)520 altq_pfattach(struct pf_altq *a)
521 {
522 int error = 0;
523
524 switch (a->scheduler) {
525 case ALTQT_NONE:
526 break;
527 #ifdef ALTQ_CBQ
528 case ALTQT_CBQ:
529 error = cbq_pfattach(a);
530 break;
531 #endif
532 #ifdef ALTQ_PRIQ
533 case ALTQT_PRIQ:
534 error = priq_pfattach(a);
535 break;
536 #endif
537 #ifdef ALTQ_HFSC
538 case ALTQT_HFSC:
539 error = hfsc_pfattach(a);
540 break;
541 #endif
542 default:
543 error = ENXIO;
544 }
545
546 return (error);
547 }
548
549 /*
550 * detach a discipline from the interface.
551 * it is possible that the discipline was already overridden by another
552 * discipline.
553 */
554 int
altq_pfdetach(struct pf_altq * a)555 altq_pfdetach(struct pf_altq *a)
556 {
557 struct ifnet *ifp;
558 int s, error = 0;
559
560 if ((ifp = ifunit(a->ifname)) == NULL)
561 return (EINVAL);
562
563 /* if this discipline is no longer referenced, just return */
564 /* read unlocked from if_snd */
565 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
566 return (0);
567
568 #ifdef __NetBSD__
569 s = splnet();
570 #else
571 s = splimp();
572 #endif
573 /* read unlocked from if_snd, _disable and _detach take care */
574 if (ALTQ_IS_ENABLED(&ifp->if_snd))
575 error = altq_disable(&ifp->if_snd);
576 if (error == 0)
577 error = altq_detach(&ifp->if_snd);
578 splx(s);
579
580 return (error);
581 }
582
583 /*
584 * add a discipline or a queue
585 * Locking is done in the discipline specific functions with regards to
586 * malloc with WAITOK, also it is not yet clear which lock to use.
587 */
588 int
altq_add(struct pf_altq * a)589 altq_add(struct pf_altq *a)
590 {
591 int error = 0;
592
593 if (a->qname[0] != 0)
594 return (altq_add_queue(a));
595
596 if (machclk_freq == 0)
597 init_machclk();
598 if (machclk_freq == 0)
599 panic("altq_add: no cpu clock");
600
601 switch (a->scheduler) {
602 #ifdef ALTQ_CBQ
603 case ALTQT_CBQ:
604 error = cbq_add_altq(a);
605 break;
606 #endif
607 #ifdef ALTQ_PRIQ
608 case ALTQT_PRIQ:
609 error = priq_add_altq(a);
610 break;
611 #endif
612 #ifdef ALTQ_HFSC
613 case ALTQT_HFSC:
614 error = hfsc_add_altq(a);
615 break;
616 #endif
617 default:
618 error = ENXIO;
619 }
620
621 return (error);
622 }
623
624 /*
625 * remove a discipline or a queue
626 * It is yet unclear what lock to use to protect this operation, the
627 * discipline specific functions will determine and grab it
628 */
629 int
altq_remove(struct pf_altq * a)630 altq_remove(struct pf_altq *a)
631 {
632 int error = 0;
633
634 if (a->qname[0] != 0)
635 return (altq_remove_queue(a));
636
637 switch (a->scheduler) {
638 #ifdef ALTQ_CBQ
639 case ALTQT_CBQ:
640 error = cbq_remove_altq(a);
641 break;
642 #endif
643 #ifdef ALTQ_PRIQ
644 case ALTQT_PRIQ:
645 error = priq_remove_altq(a);
646 break;
647 #endif
648 #ifdef ALTQ_HFSC
649 case ALTQT_HFSC:
650 error = hfsc_remove_altq(a);
651 break;
652 #endif
653 default:
654 error = ENXIO;
655 }
656
657 return (error);
658 }
659
660 /*
661 * add a queue to the discipline
662 * It is yet unclear what lock to use to protect this operation, the
663 * discipline specific functions will determine and grab it
664 */
665 int
altq_add_queue(struct pf_altq * a)666 altq_add_queue(struct pf_altq *a)
667 {
668 int error = 0;
669
670 switch (a->scheduler) {
671 #ifdef ALTQ_CBQ
672 case ALTQT_CBQ:
673 error = cbq_add_queue(a);
674 break;
675 #endif
676 #ifdef ALTQ_PRIQ
677 case ALTQT_PRIQ:
678 error = priq_add_queue(a);
679 break;
680 #endif
681 #ifdef ALTQ_HFSC
682 case ALTQT_HFSC:
683 error = hfsc_add_queue(a);
684 break;
685 #endif
686 default:
687 error = ENXIO;
688 }
689
690 return (error);
691 }
692
693 /*
694 * remove a queue from the discipline
695 * It is yet unclear what lock to use to protect this operation, the
696 * discipline specific functions will determine and grab it
697 */
698 int
altq_remove_queue(struct pf_altq * a)699 altq_remove_queue(struct pf_altq *a)
700 {
701 int error = 0;
702
703 switch (a->scheduler) {
704 #ifdef ALTQ_CBQ
705 case ALTQT_CBQ:
706 error = cbq_remove_queue(a);
707 break;
708 #endif
709 #ifdef ALTQ_PRIQ
710 case ALTQT_PRIQ:
711 error = priq_remove_queue(a);
712 break;
713 #endif
714 #ifdef ALTQ_HFSC
715 case ALTQT_HFSC:
716 error = hfsc_remove_queue(a);
717 break;
718 #endif
719 default:
720 error = ENXIO;
721 }
722
723 return (error);
724 }
725
726 /*
727 * get queue statistics
728 * Locking is done in the discipline specific functions with regards to
729 * copyout operations, also it is not yet clear which lock to use.
730 */
731 int
altq_getqstats(struct pf_altq * a,void * ubuf,int * nbytes)732 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
733 {
734 int error = 0;
735
736 switch (a->scheduler) {
737 #ifdef ALTQ_CBQ
738 case ALTQT_CBQ:
739 error = cbq_getqstats(a, ubuf, nbytes);
740 break;
741 #endif
742 #ifdef ALTQ_PRIQ
743 case ALTQT_PRIQ:
744 error = priq_getqstats(a, ubuf, nbytes);
745 break;
746 #endif
747 #ifdef ALTQ_HFSC
748 case ALTQT_HFSC:
749 error = hfsc_getqstats(a, ubuf, nbytes);
750 break;
751 #endif
752 default:
753 error = ENXIO;
754 }
755
756 return (error);
757 }
758
759 /*
760 * read and write diffserv field in IPv4 or IPv6 header
761 */
762 u_int8_t
read_dsfield(m,pktattr)763 read_dsfield(m, pktattr)
764 struct mbuf *m;
765 struct altq_pktattr *pktattr;
766 {
767 struct mbuf *m0;
768 u_int8_t ds_field = 0;
769
770 if (pktattr == NULL ||
771 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
772 return ((u_int8_t)0);
773
774 /* verify that pattr_hdr is within the mbuf data */
775 for (m0 = m; m0 != NULL; m0 = m0->m_next)
776 if ((pktattr->pattr_hdr >= m0->m_data) &&
777 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
778 break;
779 if (m0 == NULL) {
780 /* ick, pattr_hdr is stale */
781 pktattr->pattr_af = AF_UNSPEC;
782 #ifdef ALTQ_DEBUG
783 printf("read_dsfield: can't locate header!\n");
784 #endif
785 return ((u_int8_t)0);
786 }
787
788 if (pktattr->pattr_af == AF_INET) {
789 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
790
791 if (ip->ip_v != 4)
792 return ((u_int8_t)0); /* version mismatch! */
793 ds_field = ip->ip_tos;
794 }
795 #ifdef INET6
796 else if (pktattr->pattr_af == AF_INET6) {
797 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
798 u_int32_t flowlabel;
799
800 flowlabel = ntohl(ip6->ip6_flow);
801 if ((flowlabel >> 28) != 6)
802 return ((u_int8_t)0); /* version mismatch! */
803 ds_field = (flowlabel >> 20) & 0xff;
804 }
805 #endif
806 return (ds_field);
807 }
808
809 void
write_dsfield(struct mbuf * m,struct altq_pktattr * pktattr,u_int8_t dsfield)810 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
811 {
812 struct mbuf *m0;
813
814 if (pktattr == NULL ||
815 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
816 return;
817
818 /* verify that pattr_hdr is within the mbuf data */
819 for (m0 = m; m0 != NULL; m0 = m0->m_next)
820 if ((pktattr->pattr_hdr >= m0->m_data) &&
821 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
822 break;
823 if (m0 == NULL) {
824 /* ick, pattr_hdr is stale */
825 pktattr->pattr_af = AF_UNSPEC;
826 #ifdef ALTQ_DEBUG
827 printf("write_dsfield: can't locate header!\n");
828 #endif
829 return;
830 }
831
832 if (pktattr->pattr_af == AF_INET) {
833 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
834 u_int8_t old;
835 int32_t sum;
836
837 if (ip->ip_v != 4)
838 return; /* version mismatch! */
839 old = ip->ip_tos;
840 dsfield |= old & 3; /* leave CU bits */
841 if (old == dsfield)
842 return;
843 ip->ip_tos = dsfield;
844 /*
845 * update checksum (from RFC1624)
846 * HC' = ~(~HC + ~m + m')
847 */
848 sum = ~ntohs(ip->ip_sum) & 0xffff;
849 sum += 0xff00 + (~old & 0xff) + dsfield;
850 sum = (sum >> 16) + (sum & 0xffff);
851 sum += (sum >> 16); /* add carry */
852
853 ip->ip_sum = htons(~sum & 0xffff);
854 }
855 #ifdef INET6
856 else if (pktattr->pattr_af == AF_INET6) {
857 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
858 u_int32_t flowlabel;
859
860 flowlabel = ntohl(ip6->ip6_flow);
861 if ((flowlabel >> 28) != 6)
862 return; /* version mismatch! */
863 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
864 ip6->ip6_flow = htonl(flowlabel);
865 }
866 #endif
867 return;
868 }
869
870
871 /*
872 * high resolution clock support taking advantage of a machine dependent
873 * high resolution time counter (e.g., timestamp counter of intel pentium).
874 * we assume
875 * - 64-bit-long monotonically-increasing counter
876 * - frequency range is 100M-4GHz (CPU speed)
877 */
878 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
879 #define MACHCLK_SHIFT 8
880
881 int machclk_usepcc;
882 u_int32_t machclk_freq;
883 u_int32_t machclk_per_tick;
884
885 #if defined(__i386__) && defined(__NetBSD__)
886 extern u_int64_t cpu_tsc_freq;
887 #endif
888
889 #if (__FreeBSD_version >= 700035)
890 /* Update TSC freq with the value indicated by the caller. */
891 static void
tsc_freq_changed(void * arg,const struct cf_level * level,int status)892 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
893 {
894 /* If there was an error during the transition, don't do anything. */
895 if (status != 0)
896 return;
897
898 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
899 /* If TSC is P-state invariant, don't do anything. */
900 if (tsc_is_invariant)
901 return;
902 #endif
903
904 /* Total setting for this level gives the new frequency in MHz. */
905 init_machclk();
906 }
907 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
908 EVENTHANDLER_PRI_LAST);
909 #endif /* __FreeBSD_version >= 700035 */
910
911 static void
init_machclk_setup(void)912 init_machclk_setup(void)
913 {
914 #if (__FreeBSD_version >= 600000)
915 callout_init(&tbr_callout, 0);
916 #endif
917
918 machclk_usepcc = 1;
919
920 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
921 machclk_usepcc = 0;
922 #endif
923 #if defined(__FreeBSD__) && defined(SMP)
924 machclk_usepcc = 0;
925 #endif
926 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
927 machclk_usepcc = 0;
928 #endif
929 #if defined(__amd64__) || defined(__i386__)
930 /* check if TSC is available */
931 #ifdef __FreeBSD__
932 if ((cpu_feature & CPUID_TSC) == 0 ||
933 atomic_load_acq_64(&tsc_freq) == 0)
934 #else
935 if ((cpu_feature & CPUID_TSC) == 0)
936 #endif
937 machclk_usepcc = 0;
938 #endif
939 }
940
941 void
init_machclk(void)942 init_machclk(void)
943 {
944 static int called;
945
946 /* Call one-time initialization function. */
947 if (!called) {
948 init_machclk_setup();
949 called = 1;
950 }
951
952 if (machclk_usepcc == 0) {
953 /* emulate 256MHz using microtime() */
954 machclk_freq = 1000000 << MACHCLK_SHIFT;
955 machclk_per_tick = machclk_freq / hz;
956 #ifdef ALTQ_DEBUG
957 printf("altq: emulate %uHz cpu clock\n", machclk_freq);
958 #endif
959 return;
960 }
961
962 /*
963 * if the clock frequency (of Pentium TSC or Alpha PCC) is
964 * accessible, just use it.
965 */
966 #if defined(__amd64__) || defined(__i386__)
967 #ifdef __FreeBSD__
968 machclk_freq = atomic_load_acq_64(&tsc_freq);
969 #elif defined(__NetBSD__)
970 machclk_freq = (u_int32_t)cpu_tsc_freq;
971 #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
972 machclk_freq = pentium_mhz * 1000000;
973 #endif
974 #endif
975
976 /*
977 * if we don't know the clock frequency, measure it.
978 */
979 if (machclk_freq == 0) {
980 static int wait;
981 struct timeval tv_start, tv_end;
982 u_int64_t start, end, diff;
983 int timo;
984
985 microtime(&tv_start);
986 start = read_machclk();
987 timo = hz; /* 1 sec */
988 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
989 microtime(&tv_end);
990 end = read_machclk();
991 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
992 + tv_end.tv_usec - tv_start.tv_usec;
993 if (diff != 0)
994 machclk_freq = (u_int)((end - start) * 1000000 / diff);
995 }
996
997 machclk_per_tick = machclk_freq / hz;
998
999 #ifdef ALTQ_DEBUG
1000 printf("altq: CPU clock: %uHz\n", machclk_freq);
1001 #endif
1002 }
1003
1004 #if defined(__OpenBSD__) && defined(__i386__)
1005 static __inline u_int64_t
rdtsc(void)1006 rdtsc(void)
1007 {
1008 u_int64_t rv;
1009 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1010 return (rv);
1011 }
1012 #endif /* __OpenBSD__ && __i386__ */
1013
1014 u_int64_t
read_machclk(void)1015 read_machclk(void)
1016 {
1017 u_int64_t val;
1018
1019 if (machclk_usepcc) {
1020 #if defined(__amd64__) || defined(__i386__)
1021 val = rdtsc();
1022 #else
1023 panic("read_machclk");
1024 #endif
1025 } else {
1026 struct timeval tv;
1027
1028 microtime(&tv);
1029 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1030 + tv.tv_usec) << MACHCLK_SHIFT);
1031 }
1032 return (val);
1033 }
1034
1035 #ifdef ALTQ3_CLFIER_COMPAT
1036
1037 #ifndef IPPROTO_ESP
1038 #define IPPROTO_ESP 50 /* encapsulating security payload */
1039 #endif
1040 #ifndef IPPROTO_AH
1041 #define IPPROTO_AH 51 /* authentication header */
1042 #endif
1043
1044 /*
1045 * extract flow information from a given packet.
1046 * filt_mask shows flowinfo fields required.
1047 * we assume the ip header is in one mbuf, and addresses and ports are
1048 * in network byte order.
1049 */
1050 int
altq_extractflow(m,af,flow,filt_bmask)1051 altq_extractflow(m, af, flow, filt_bmask)
1052 struct mbuf *m;
1053 int af;
1054 struct flowinfo *flow;
1055 u_int32_t filt_bmask;
1056 {
1057
1058 switch (af) {
1059 case PF_INET: {
1060 struct flowinfo_in *fin;
1061 struct ip *ip;
1062
1063 ip = mtod(m, struct ip *);
1064
1065 if (ip->ip_v != 4)
1066 break;
1067
1068 fin = (struct flowinfo_in *)flow;
1069 fin->fi_len = sizeof(struct flowinfo_in);
1070 fin->fi_family = AF_INET;
1071
1072 fin->fi_proto = ip->ip_p;
1073 fin->fi_tos = ip->ip_tos;
1074
1075 fin->fi_src.s_addr = ip->ip_src.s_addr;
1076 fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1077
1078 if (filt_bmask & FIMB4_PORTS)
1079 /* if port info is required, extract port numbers */
1080 extract_ports4(m, ip, fin);
1081 else {
1082 fin->fi_sport = 0;
1083 fin->fi_dport = 0;
1084 fin->fi_gpi = 0;
1085 }
1086 return (1);
1087 }
1088
1089 #ifdef INET6
1090 case PF_INET6: {
1091 struct flowinfo_in6 *fin6;
1092 struct ip6_hdr *ip6;
1093
1094 ip6 = mtod(m, struct ip6_hdr *);
1095 /* should we check the ip version? */
1096
1097 fin6 = (struct flowinfo_in6 *)flow;
1098 fin6->fi6_len = sizeof(struct flowinfo_in6);
1099 fin6->fi6_family = AF_INET6;
1100
1101 fin6->fi6_proto = ip6->ip6_nxt;
1102 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1103
1104 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1105 fin6->fi6_src = ip6->ip6_src;
1106 fin6->fi6_dst = ip6->ip6_dst;
1107
1108 if ((filt_bmask & FIMB6_PORTS) ||
1109 ((filt_bmask & FIMB6_PROTO)
1110 && ip6->ip6_nxt > IPPROTO_IPV6))
1111 /*
1112 * if port info is required, or proto is required
1113 * but there are option headers, extract port
1114 * and protocol numbers.
1115 */
1116 extract_ports6(m, ip6, fin6);
1117 else {
1118 fin6->fi6_sport = 0;
1119 fin6->fi6_dport = 0;
1120 fin6->fi6_gpi = 0;
1121 }
1122 return (1);
1123 }
1124 #endif /* INET6 */
1125
1126 default:
1127 break;
1128 }
1129
1130 /* failed */
1131 flow->fi_len = sizeof(struct flowinfo);
1132 flow->fi_family = AF_UNSPEC;
1133 return (0);
1134 }
1135
1136 /*
1137 * helper routine to extract port numbers
1138 */
1139 /* structure for ipsec and ipv6 option header template */
1140 struct _opt6 {
1141 u_int8_t opt6_nxt; /* next header */
1142 u_int8_t opt6_hlen; /* header extension length */
1143 u_int16_t _pad;
1144 u_int32_t ah_spi; /* security parameter index
1145 for authentication header */
1146 };
1147
1148 /*
1149 * extract port numbers from a ipv4 packet.
1150 */
1151 static int
extract_ports4(m,ip,fin)1152 extract_ports4(m, ip, fin)
1153 struct mbuf *m;
1154 struct ip *ip;
1155 struct flowinfo_in *fin;
1156 {
1157 struct mbuf *m0;
1158 u_short ip_off;
1159 u_int8_t proto;
1160 int off;
1161
1162 fin->fi_sport = 0;
1163 fin->fi_dport = 0;
1164 fin->fi_gpi = 0;
1165
1166 ip_off = ntohs(ip->ip_off);
1167 /* if it is a fragment, try cached fragment info */
1168 if (ip_off & IP_OFFMASK) {
1169 ip4f_lookup(ip, fin);
1170 return (1);
1171 }
1172
1173 /* locate the mbuf containing the protocol header */
1174 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1175 if (((caddr_t)ip >= m0->m_data) &&
1176 ((caddr_t)ip < m0->m_data + m0->m_len))
1177 break;
1178 if (m0 == NULL) {
1179 #ifdef ALTQ_DEBUG
1180 printf("extract_ports4: can't locate header! ip=%p\n", ip);
1181 #endif
1182 return (0);
1183 }
1184 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1185 proto = ip->ip_p;
1186
1187 #ifdef ALTQ_IPSEC
1188 again:
1189 #endif
1190 while (off >= m0->m_len) {
1191 off -= m0->m_len;
1192 m0 = m0->m_next;
1193 if (m0 == NULL)
1194 return (0); /* bogus ip_hl! */
1195 }
1196 if (m0->m_len < off + 4)
1197 return (0);
1198
1199 switch (proto) {
1200 case IPPROTO_TCP:
1201 case IPPROTO_UDP: {
1202 struct udphdr *udp;
1203
1204 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1205 fin->fi_sport = udp->uh_sport;
1206 fin->fi_dport = udp->uh_dport;
1207 fin->fi_proto = proto;
1208 }
1209 break;
1210
1211 #ifdef ALTQ_IPSEC
1212 case IPPROTO_ESP:
1213 if (fin->fi_gpi == 0){
1214 u_int32_t *gpi;
1215
1216 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1217 fin->fi_gpi = *gpi;
1218 }
1219 fin->fi_proto = proto;
1220 break;
1221
1222 case IPPROTO_AH: {
1223 /* get next header and header length */
1224 struct _opt6 *opt6;
1225
1226 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1227 proto = opt6->opt6_nxt;
1228 off += 8 + (opt6->opt6_hlen * 4);
1229 if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1230 fin->fi_gpi = opt6->ah_spi;
1231 }
1232 /* goto the next header */
1233 goto again;
1234 #endif /* ALTQ_IPSEC */
1235
1236 default:
1237 fin->fi_proto = proto;
1238 return (0);
1239 }
1240
1241 /* if this is a first fragment, cache it. */
1242 if (ip_off & IP_MF)
1243 ip4f_cache(ip, fin);
1244
1245 return (1);
1246 }
1247
1248 #ifdef INET6
1249 static int
extract_ports6(m,ip6,fin6)1250 extract_ports6(m, ip6, fin6)
1251 struct mbuf *m;
1252 struct ip6_hdr *ip6;
1253 struct flowinfo_in6 *fin6;
1254 {
1255 struct mbuf *m0;
1256 int off;
1257 u_int8_t proto;
1258
1259 fin6->fi6_gpi = 0;
1260 fin6->fi6_sport = 0;
1261 fin6->fi6_dport = 0;
1262
1263 /* locate the mbuf containing the protocol header */
1264 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1265 if (((caddr_t)ip6 >= m0->m_data) &&
1266 ((caddr_t)ip6 < m0->m_data + m0->m_len))
1267 break;
1268 if (m0 == NULL) {
1269 #ifdef ALTQ_DEBUG
1270 printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1271 #endif
1272 return (0);
1273 }
1274 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1275
1276 proto = ip6->ip6_nxt;
1277 do {
1278 while (off >= m0->m_len) {
1279 off -= m0->m_len;
1280 m0 = m0->m_next;
1281 if (m0 == NULL)
1282 return (0);
1283 }
1284 if (m0->m_len < off + 4)
1285 return (0);
1286
1287 switch (proto) {
1288 case IPPROTO_TCP:
1289 case IPPROTO_UDP: {
1290 struct udphdr *udp;
1291
1292 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1293 fin6->fi6_sport = udp->uh_sport;
1294 fin6->fi6_dport = udp->uh_dport;
1295 fin6->fi6_proto = proto;
1296 }
1297 return (1);
1298
1299 case IPPROTO_ESP:
1300 if (fin6->fi6_gpi == 0) {
1301 u_int32_t *gpi;
1302
1303 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1304 fin6->fi6_gpi = *gpi;
1305 }
1306 fin6->fi6_proto = proto;
1307 return (1);
1308
1309 case IPPROTO_AH: {
1310 /* get next header and header length */
1311 struct _opt6 *opt6;
1312
1313 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1314 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1315 fin6->fi6_gpi = opt6->ah_spi;
1316 proto = opt6->opt6_nxt;
1317 off += 8 + (opt6->opt6_hlen * 4);
1318 /* goto the next header */
1319 break;
1320 }
1321
1322 case IPPROTO_HOPOPTS:
1323 case IPPROTO_ROUTING:
1324 case IPPROTO_DSTOPTS: {
1325 /* get next header and header length */
1326 struct _opt6 *opt6;
1327
1328 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1329 proto = opt6->opt6_nxt;
1330 off += (opt6->opt6_hlen + 1) * 8;
1331 /* goto the next header */
1332 break;
1333 }
1334
1335 case IPPROTO_FRAGMENT:
1336 /* ipv6 fragmentations are not supported yet */
1337 default:
1338 fin6->fi6_proto = proto;
1339 return (0);
1340 }
1341 } while (1);
1342 /*NOTREACHED*/
1343 }
1344 #endif /* INET6 */
1345
1346 /*
1347 * altq common classifier
1348 */
1349 int
acc_add_filter(classifier,filter,class,phandle)1350 acc_add_filter(classifier, filter, class, phandle)
1351 struct acc_classifier *classifier;
1352 struct flow_filter *filter;
1353 void *class;
1354 u_long *phandle;
1355 {
1356 struct acc_filter *afp, *prev, *tmp;
1357 int i, s;
1358
1359 #ifdef INET6
1360 if (filter->ff_flow.fi_family != AF_INET &&
1361 filter->ff_flow.fi_family != AF_INET6)
1362 return (EINVAL);
1363 #else
1364 if (filter->ff_flow.fi_family != AF_INET)
1365 return (EINVAL);
1366 #endif
1367
1368 afp = malloc(sizeof(struct acc_filter),
1369 M_DEVBUF, M_WAITOK);
1370 if (afp == NULL)
1371 return (ENOMEM);
1372 bzero(afp, sizeof(struct acc_filter));
1373
1374 afp->f_filter = *filter;
1375 afp->f_class = class;
1376
1377 i = ACC_WILDCARD_INDEX;
1378 if (filter->ff_flow.fi_family == AF_INET) {
1379 struct flow_filter *filter4 = &afp->f_filter;
1380
1381 /*
1382 * if address is 0, it's a wildcard. if address mask
1383 * isn't set, use full mask.
1384 */
1385 if (filter4->ff_flow.fi_dst.s_addr == 0)
1386 filter4->ff_mask.mask_dst.s_addr = 0;
1387 else if (filter4->ff_mask.mask_dst.s_addr == 0)
1388 filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1389 if (filter4->ff_flow.fi_src.s_addr == 0)
1390 filter4->ff_mask.mask_src.s_addr = 0;
1391 else if (filter4->ff_mask.mask_src.s_addr == 0)
1392 filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1393
1394 /* clear extra bits in addresses */
1395 filter4->ff_flow.fi_dst.s_addr &=
1396 filter4->ff_mask.mask_dst.s_addr;
1397 filter4->ff_flow.fi_src.s_addr &=
1398 filter4->ff_mask.mask_src.s_addr;
1399
1400 /*
1401 * if dst address is a wildcard, use hash-entry
1402 * ACC_WILDCARD_INDEX.
1403 */
1404 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1405 i = ACC_WILDCARD_INDEX;
1406 else
1407 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1408 }
1409 #ifdef INET6
1410 else if (filter->ff_flow.fi_family == AF_INET6) {
1411 struct flow_filter6 *filter6 =
1412 (struct flow_filter6 *)&afp->f_filter;
1413 #ifndef IN6MASK0 /* taken from kame ipv6 */
1414 #define IN6MASK0 {{{ 0, 0, 0, 0 }}}
1415 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1416 const struct in6_addr in6mask0 = IN6MASK0;
1417 const struct in6_addr in6mask128 = IN6MASK128;
1418 #endif
1419
1420 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1421 filter6->ff_mask6.mask6_dst = in6mask0;
1422 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1423 filter6->ff_mask6.mask6_dst = in6mask128;
1424 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1425 filter6->ff_mask6.mask6_src = in6mask0;
1426 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1427 filter6->ff_mask6.mask6_src = in6mask128;
1428
1429 /* clear extra bits in addresses */
1430 for (i = 0; i < 16; i++)
1431 filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1432 filter6->ff_mask6.mask6_dst.s6_addr[i];
1433 for (i = 0; i < 16; i++)
1434 filter6->ff_flow6.fi6_src.s6_addr[i] &=
1435 filter6->ff_mask6.mask6_src.s6_addr[i];
1436
1437 if (filter6->ff_flow6.fi6_flowlabel == 0)
1438 i = ACC_WILDCARD_INDEX;
1439 else
1440 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1441 }
1442 #endif /* INET6 */
1443
1444 afp->f_handle = get_filt_handle(classifier, i);
1445
1446 /* update filter bitmask */
1447 afp->f_fbmask = filt2fibmask(filter);
1448 classifier->acc_fbmask |= afp->f_fbmask;
1449
1450 /*
1451 * add this filter to the filter list.
1452 * filters are ordered from the highest rule number.
1453 */
1454 #ifdef __NetBSD__
1455 s = splnet();
1456 #else
1457 s = splimp();
1458 #endif
1459 prev = NULL;
1460 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1461 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1462 prev = tmp;
1463 else
1464 break;
1465 }
1466 if (prev == NULL)
1467 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1468 else
1469 LIST_INSERT_AFTER(prev, afp, f_chain);
1470 splx(s);
1471
1472 *phandle = afp->f_handle;
1473 return (0);
1474 }
1475
1476 int
acc_delete_filter(classifier,handle)1477 acc_delete_filter(classifier, handle)
1478 struct acc_classifier *classifier;
1479 u_long handle;
1480 {
1481 struct acc_filter *afp;
1482 int s;
1483
1484 if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1485 return (EINVAL);
1486
1487 #ifdef __NetBSD__
1488 s = splnet();
1489 #else
1490 s = splimp();
1491 #endif
1492 LIST_REMOVE(afp, f_chain);
1493 splx(s);
1494
1495 free(afp, M_DEVBUF);
1496
1497 /* todo: update filt_bmask */
1498
1499 return (0);
1500 }
1501
1502 /*
1503 * delete filters referencing to the specified class.
1504 * if the all flag is not 0, delete all the filters.
1505 */
1506 int
acc_discard_filters(classifier,class,all)1507 acc_discard_filters(classifier, class, all)
1508 struct acc_classifier *classifier;
1509 void *class;
1510 int all;
1511 {
1512 struct acc_filter *afp;
1513 int i, s;
1514
1515 #ifdef __NetBSD__
1516 s = splnet();
1517 #else
1518 s = splimp();
1519 #endif
1520 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1521 do {
1522 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1523 if (all || afp->f_class == class) {
1524 LIST_REMOVE(afp, f_chain);
1525 free(afp, M_DEVBUF);
1526 /* start again from the head */
1527 break;
1528 }
1529 } while (afp != NULL);
1530 }
1531 splx(s);
1532
1533 if (all)
1534 classifier->acc_fbmask = 0;
1535
1536 return (0);
1537 }
1538
1539 void *
acc_classify(clfier,m,af)1540 acc_classify(clfier, m, af)
1541 void *clfier;
1542 struct mbuf *m;
1543 int af;
1544 {
1545 struct acc_classifier *classifier;
1546 struct flowinfo flow;
1547 struct acc_filter *afp;
1548 int i;
1549
1550 classifier = (struct acc_classifier *)clfier;
1551 altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1552
1553 if (flow.fi_family == AF_INET) {
1554 struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1555
1556 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1557 /* only tos is used */
1558 LIST_FOREACH(afp,
1559 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1560 f_chain)
1561 if (apply_tosfilter4(afp->f_fbmask,
1562 &afp->f_filter, fp))
1563 /* filter matched */
1564 return (afp->f_class);
1565 } else if ((classifier->acc_fbmask &
1566 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1567 == 0) {
1568 /* only proto and ports are used */
1569 LIST_FOREACH(afp,
1570 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1571 f_chain)
1572 if (apply_ppfilter4(afp->f_fbmask,
1573 &afp->f_filter, fp))
1574 /* filter matched */
1575 return (afp->f_class);
1576 } else {
1577 /* get the filter hash entry from its dest address */
1578 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1579 do {
1580 /*
1581 * go through this loop twice. first for dst
1582 * hash, second for wildcards.
1583 */
1584 LIST_FOREACH(afp, &classifier->acc_filters[i],
1585 f_chain)
1586 if (apply_filter4(afp->f_fbmask,
1587 &afp->f_filter, fp))
1588 /* filter matched */
1589 return (afp->f_class);
1590
1591 /*
1592 * check again for filters with a dst addr
1593 * wildcard.
1594 * (daddr == 0 || dmask != 0xffffffff).
1595 */
1596 if (i != ACC_WILDCARD_INDEX)
1597 i = ACC_WILDCARD_INDEX;
1598 else
1599 break;
1600 } while (1);
1601 }
1602 }
1603 #ifdef INET6
1604 else if (flow.fi_family == AF_INET6) {
1605 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1606
1607 /* get the filter hash entry from its flow ID */
1608 if (fp6->fi6_flowlabel != 0)
1609 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1610 else
1611 /* flowlable can be zero */
1612 i = ACC_WILDCARD_INDEX;
1613
1614 /* go through this loop twice. first for flow hash, second
1615 for wildcards. */
1616 do {
1617 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1618 if (apply_filter6(afp->f_fbmask,
1619 (struct flow_filter6 *)&afp->f_filter,
1620 fp6))
1621 /* filter matched */
1622 return (afp->f_class);
1623
1624 /*
1625 * check again for filters with a wildcard.
1626 */
1627 if (i != ACC_WILDCARD_INDEX)
1628 i = ACC_WILDCARD_INDEX;
1629 else
1630 break;
1631 } while (1);
1632 }
1633 #endif /* INET6 */
1634
1635 /* no filter matched */
1636 return (NULL);
1637 }
1638
1639 static int
apply_filter4(fbmask,filt,pkt)1640 apply_filter4(fbmask, filt, pkt)
1641 u_int32_t fbmask;
1642 struct flow_filter *filt;
1643 struct flowinfo_in *pkt;
1644 {
1645 if (filt->ff_flow.fi_family != AF_INET)
1646 return (0);
1647 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1648 return (0);
1649 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1650 return (0);
1651 if ((fbmask & FIMB4_DADDR) &&
1652 filt->ff_flow.fi_dst.s_addr !=
1653 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1654 return (0);
1655 if ((fbmask & FIMB4_SADDR) &&
1656 filt->ff_flow.fi_src.s_addr !=
1657 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1658 return (0);
1659 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1660 return (0);
1661 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1662 (pkt->fi_tos & filt->ff_mask.mask_tos))
1663 return (0);
1664 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1665 return (0);
1666 /* match */
1667 return (1);
1668 }
1669
1670 /*
1671 * filter matching function optimized for a common case that checks
1672 * only protocol and port numbers
1673 */
1674 static int
apply_ppfilter4(fbmask,filt,pkt)1675 apply_ppfilter4(fbmask, filt, pkt)
1676 u_int32_t fbmask;
1677 struct flow_filter *filt;
1678 struct flowinfo_in *pkt;
1679 {
1680 if (filt->ff_flow.fi_family != AF_INET)
1681 return (0);
1682 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1683 return (0);
1684 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1685 return (0);
1686 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1687 return (0);
1688 /* match */
1689 return (1);
1690 }
1691
1692 /*
1693 * filter matching function only for tos field.
1694 */
1695 static int
apply_tosfilter4(fbmask,filt,pkt)1696 apply_tosfilter4(fbmask, filt, pkt)
1697 u_int32_t fbmask;
1698 struct flow_filter *filt;
1699 struct flowinfo_in *pkt;
1700 {
1701 if (filt->ff_flow.fi_family != AF_INET)
1702 return (0);
1703 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1704 (pkt->fi_tos & filt->ff_mask.mask_tos))
1705 return (0);
1706 /* match */
1707 return (1);
1708 }
1709
1710 #ifdef INET6
1711 static int
apply_filter6(fbmask,filt,pkt)1712 apply_filter6(fbmask, filt, pkt)
1713 u_int32_t fbmask;
1714 struct flow_filter6 *filt;
1715 struct flowinfo_in6 *pkt;
1716 {
1717 int i;
1718
1719 if (filt->ff_flow6.fi6_family != AF_INET6)
1720 return (0);
1721 if ((fbmask & FIMB6_FLABEL) &&
1722 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1723 return (0);
1724 if ((fbmask & FIMB6_PROTO) &&
1725 filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1726 return (0);
1727 if ((fbmask & FIMB6_SPORT) &&
1728 filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1729 return (0);
1730 if ((fbmask & FIMB6_DPORT) &&
1731 filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1732 return (0);
1733 if (fbmask & FIMB6_SADDR) {
1734 for (i = 0; i < 4; i++)
1735 if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1736 (pkt->fi6_src.s6_addr32[i] &
1737 filt->ff_mask6.mask6_src.s6_addr32[i]))
1738 return (0);
1739 }
1740 if (fbmask & FIMB6_DADDR) {
1741 for (i = 0; i < 4; i++)
1742 if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1743 (pkt->fi6_dst.s6_addr32[i] &
1744 filt->ff_mask6.mask6_dst.s6_addr32[i]))
1745 return (0);
1746 }
1747 if ((fbmask & FIMB6_TCLASS) &&
1748 filt->ff_flow6.fi6_tclass !=
1749 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1750 return (0);
1751 if ((fbmask & FIMB6_GPI) &&
1752 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1753 return (0);
1754 /* match */
1755 return (1);
1756 }
1757 #endif /* INET6 */
1758
1759 /*
1760 * filter handle:
1761 * bit 20-28: index to the filter hash table
1762 * bit 0-19: unique id in the hash bucket.
1763 */
1764 static u_long
get_filt_handle(classifier,i)1765 get_filt_handle(classifier, i)
1766 struct acc_classifier *classifier;
1767 int i;
1768 {
1769 static u_long handle_number = 1;
1770 u_long handle;
1771 struct acc_filter *afp;
1772
1773 while (1) {
1774 handle = handle_number++ & 0x000fffff;
1775
1776 if (LIST_EMPTY(&classifier->acc_filters[i]))
1777 break;
1778
1779 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1780 if ((afp->f_handle & 0x000fffff) == handle)
1781 break;
1782 if (afp == NULL)
1783 break;
1784 /* this handle is already used, try again */
1785 }
1786
1787 return ((i << 20) | handle);
1788 }
1789
1790 /* convert filter handle to filter pointer */
1791 static struct acc_filter *
filth_to_filtp(classifier,handle)1792 filth_to_filtp(classifier, handle)
1793 struct acc_classifier *classifier;
1794 u_long handle;
1795 {
1796 struct acc_filter *afp;
1797 int i;
1798
1799 i = ACC_GET_HINDEX(handle);
1800
1801 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1802 if (afp->f_handle == handle)
1803 return (afp);
1804
1805 return (NULL);
1806 }
1807
1808 /* create flowinfo bitmask */
1809 static u_int32_t
filt2fibmask(filt)1810 filt2fibmask(filt)
1811 struct flow_filter *filt;
1812 {
1813 u_int32_t mask = 0;
1814 #ifdef INET6
1815 struct flow_filter6 *filt6;
1816 #endif
1817
1818 switch (filt->ff_flow.fi_family) {
1819 case AF_INET:
1820 if (filt->ff_flow.fi_proto != 0)
1821 mask |= FIMB4_PROTO;
1822 if (filt->ff_flow.fi_tos != 0)
1823 mask |= FIMB4_TOS;
1824 if (filt->ff_flow.fi_dst.s_addr != 0)
1825 mask |= FIMB4_DADDR;
1826 if (filt->ff_flow.fi_src.s_addr != 0)
1827 mask |= FIMB4_SADDR;
1828 if (filt->ff_flow.fi_sport != 0)
1829 mask |= FIMB4_SPORT;
1830 if (filt->ff_flow.fi_dport != 0)
1831 mask |= FIMB4_DPORT;
1832 if (filt->ff_flow.fi_gpi != 0)
1833 mask |= FIMB4_GPI;
1834 break;
1835 #ifdef INET6
1836 case AF_INET6:
1837 filt6 = (struct flow_filter6 *)filt;
1838
1839 if (filt6->ff_flow6.fi6_proto != 0)
1840 mask |= FIMB6_PROTO;
1841 if (filt6->ff_flow6.fi6_tclass != 0)
1842 mask |= FIMB6_TCLASS;
1843 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1844 mask |= FIMB6_DADDR;
1845 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1846 mask |= FIMB6_SADDR;
1847 if (filt6->ff_flow6.fi6_sport != 0)
1848 mask |= FIMB6_SPORT;
1849 if (filt6->ff_flow6.fi6_dport != 0)
1850 mask |= FIMB6_DPORT;
1851 if (filt6->ff_flow6.fi6_gpi != 0)
1852 mask |= FIMB6_GPI;
1853 if (filt6->ff_flow6.fi6_flowlabel != 0)
1854 mask |= FIMB6_FLABEL;
1855 break;
1856 #endif /* INET6 */
1857 }
1858 return (mask);
1859 }
1860
1861
1862 /*
1863 * helper functions to handle IPv4 fragments.
1864 * currently only in-sequence fragments are handled.
1865 * - fragment info is cached in a LRU list.
1866 * - when a first fragment is found, cache its flow info.
1867 * - when a non-first fragment is found, lookup the cache.
1868 */
1869
1870 struct ip4_frag {
1871 TAILQ_ENTRY(ip4_frag) ip4f_chain;
1872 char ip4f_valid;
1873 u_short ip4f_id;
1874 struct flowinfo_in ip4f_info;
1875 };
1876
1877 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1878
1879 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
1880
1881
1882 static void
ip4f_cache(ip,fin)1883 ip4f_cache(ip, fin)
1884 struct ip *ip;
1885 struct flowinfo_in *fin;
1886 {
1887 struct ip4_frag *fp;
1888
1889 if (TAILQ_EMPTY(&ip4f_list)) {
1890 /* first time call, allocate fragment cache entries. */
1891 if (ip4f_init() < 0)
1892 /* allocation failed! */
1893 return;
1894 }
1895
1896 fp = ip4f_alloc();
1897 fp->ip4f_id = ip->ip_id;
1898 fp->ip4f_info.fi_proto = ip->ip_p;
1899 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1900 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1901
1902 /* save port numbers */
1903 fp->ip4f_info.fi_sport = fin->fi_sport;
1904 fp->ip4f_info.fi_dport = fin->fi_dport;
1905 fp->ip4f_info.fi_gpi = fin->fi_gpi;
1906 }
1907
1908 static int
ip4f_lookup(ip,fin)1909 ip4f_lookup(ip, fin)
1910 struct ip *ip;
1911 struct flowinfo_in *fin;
1912 {
1913 struct ip4_frag *fp;
1914
1915 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1916 fp = TAILQ_NEXT(fp, ip4f_chain))
1917 if (ip->ip_id == fp->ip4f_id &&
1918 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1919 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1920 ip->ip_p == fp->ip4f_info.fi_proto) {
1921
1922 /* found the matching entry */
1923 fin->fi_sport = fp->ip4f_info.fi_sport;
1924 fin->fi_dport = fp->ip4f_info.fi_dport;
1925 fin->fi_gpi = fp->ip4f_info.fi_gpi;
1926
1927 if ((ntohs(ip->ip_off) & IP_MF) == 0)
1928 /* this is the last fragment,
1929 release the entry. */
1930 ip4f_free(fp);
1931
1932 return (1);
1933 }
1934
1935 /* no matching entry found */
1936 return (0);
1937 }
1938
1939 static int
ip4f_init(void)1940 ip4f_init(void)
1941 {
1942 struct ip4_frag *fp;
1943 int i;
1944
1945 TAILQ_INIT(&ip4f_list);
1946 for (i=0; i<IP4F_TABSIZE; i++) {
1947 fp = malloc(sizeof(struct ip4_frag),
1948 M_DEVBUF, M_NOWAIT);
1949 if (fp == NULL) {
1950 printf("ip4f_init: can't alloc %dth entry!\n", i);
1951 if (i == 0)
1952 return (-1);
1953 return (0);
1954 }
1955 fp->ip4f_valid = 0;
1956 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1957 }
1958 return (0);
1959 }
1960
1961 static struct ip4_frag *
ip4f_alloc(void)1962 ip4f_alloc(void)
1963 {
1964 struct ip4_frag *fp;
1965
1966 /* reclaim an entry at the tail, put it at the head */
1967 fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1968 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1969 fp->ip4f_valid = 1;
1970 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1971 return (fp);
1972 }
1973
1974 static void
ip4f_free(fp)1975 ip4f_free(fp)
1976 struct ip4_frag *fp;
1977 {
1978 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1979 fp->ip4f_valid = 0;
1980 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1981 }
1982
1983 #endif /* ALTQ3_CLFIER_COMPAT */
1984