1 /*        $NetBSD: pfctl_altq.c,v 1.11 2021/07/24 21:31:31 andvar Exp $         */
2 /*        $OpenBSD: pfctl_altq.c,v 1.92 2007/05/27 05:15:17 claudio Exp $       */
3 
4 /*
5  * Copyright (c) 2002
6  *        Sony Computer Science Laboratories Inc.
7  * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
8  *
9  * Permission to use, copy, modify, and distribute this software for any
10  * purpose with or without fee is hereby granted, provided that the above
11  * copyright notice and this permission notice appear in all copies.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20  */
21 
22 #include <sys/types.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
25 #ifdef __NetBSD__
26 #include <sys/param.h>
27 #include <sys/mbuf.h>
28 #endif
29 
30 #include <net/if.h>
31 #include <netinet/in.h>
32 #include <net/pfvar.h>
33 
34 #include <err.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 
43 #include <altq/altq.h>
44 #include <altq/altq_cbq.h>
45 #include <altq/altq_priq.h>
46 #include <altq/altq_hfsc.h>
47 
48 #include "pfctl_parser.h"
49 #include "pfctl.h"
50 
51 #define is_sc_null(sc)        (((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
52 
53 TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
54 LIST_HEAD(gen_sc, segment) rtsc, lssc;
55 
56 struct pf_altq      *qname_to_pfaltq(const char *, const char *);
57 u_int32_t  qname_to_qid(const char *);
58 
59 static int          eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
60 static int          cbq_compute_idletime(struct pfctl *, struct pf_altq *);
61 static int          check_commit_cbq(int, int, struct pf_altq *);
62 static int          print_cbq_opts(const struct pf_altq *);
63 
64 static int          eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
65 static int          check_commit_priq(int, int, struct pf_altq *);
66 static int          print_priq_opts(const struct pf_altq *);
67 
68 static int          eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
69 static int          check_commit_hfsc(int, int, struct pf_altq *);
70 static int          print_hfsc_opts(const struct pf_altq *,
71                         const struct node_queue_opt *);
72 
73 static void                    gsc_add_sc(struct gen_sc *, struct service_curve *);
74 static int                     is_gsc_under_sc(struct gen_sc *,
75                                    struct service_curve *);
76 static void                    gsc_destroy(struct gen_sc *);
77 static struct segment         *gsc_getentry(struct gen_sc *, double);
78 static int                     gsc_add_seg(struct gen_sc *, double, double, double,
79                                    double);
80 static double                  sc_x2y(struct service_curve *, double);
81 
82 u_int32_t  getifspeed(char *);
83 u_long               getifmtu(char *);
84 int                  eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
85                          u_int32_t);
86 u_int32_t  eval_bwspec(struct node_queue_bw *, u_int32_t);
87 void                 print_hfsc_sc(const char *, u_int, u_int, u_int,
88                          const struct node_hfsc_sc *);
89 
90 void
pfaltq_store(struct pf_altq * a)91 pfaltq_store(struct pf_altq *a)
92 {
93           struct pf_altq      *altq;
94 
95           if ((altq = malloc(sizeof(*altq))) == NULL)
96                     err(1, "malloc");
97           memcpy(altq, a, sizeof(struct pf_altq));
98           TAILQ_INSERT_TAIL(&altqs, altq, entries);
99 }
100 
101 struct pf_altq *
pfaltq_lookup(const char * ifname)102 pfaltq_lookup(const char *ifname)
103 {
104           struct pf_altq      *altq;
105 
106           TAILQ_FOREACH(altq, &altqs, entries) {
107                     if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
108                         altq->qname[0] == 0)
109                               return (altq);
110           }
111           return (NULL);
112 }
113 
114 struct pf_altq *
qname_to_pfaltq(const char * qname,const char * ifname)115 qname_to_pfaltq(const char *qname, const char *ifname)
116 {
117           struct pf_altq      *altq;
118 
119           TAILQ_FOREACH(altq, &altqs, entries) {
120                     if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
121                         strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
122                               return (altq);
123           }
124           return (NULL);
125 }
126 
127 u_int32_t
qname_to_qid(const char * qname)128 qname_to_qid(const char *qname)
129 {
130           struct pf_altq      *altq;
131 
132           /*
133            * We guarantee that same named queues on different interfaces
134            * have the same qid, so we do NOT need to limit matching on
135            * one interface!
136            */
137 
138           TAILQ_FOREACH(altq, &altqs, entries) {
139                     if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
140                               return (altq->qid);
141           }
142           return (0);
143 }
144 
145 void
print_altq(const struct pf_altq * a,unsigned level,struct node_queue_bw * bw,struct node_queue_opt * qopts)146 print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
147           struct node_queue_opt *qopts)
148 {
149           if (a->qname[0] != 0) {
150                     print_queue(a, level, bw, 1, qopts);
151                     return;
152           }
153 
154           printf("altq on %s ", a->ifname);
155 
156           switch (a->scheduler) {
157           case ALTQT_CBQ:
158                     if (!print_cbq_opts(a))
159                               printf("cbq ");
160                     break;
161           case ALTQT_PRIQ:
162                     if (!print_priq_opts(a))
163                               printf("priq ");
164                     break;
165           case ALTQT_HFSC:
166                     if (!print_hfsc_opts(a, qopts))
167                               printf("hfsc ");
168                     break;
169           }
170 
171           if (bw != NULL && bw->bw_percent > 0) {
172                     if (bw->bw_percent < 100)
173                               printf("bandwidth %u%% ", bw->bw_percent);
174           } else
175                     printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
176 
177           if (a->qlimit != DEFAULT_QLIMIT)
178                     printf("qlimit %u ", a->qlimit);
179           printf("tbrsize %u ", a->tbrsize);
180 }
181 
182 void
print_queue(const struct pf_altq * a,unsigned level,struct node_queue_bw * bw,int print_interface,struct node_queue_opt * qopts)183 print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
184     int print_interface, struct node_queue_opt *qopts)
185 {
186           unsigned  i;
187 
188           printf("queue ");
189           for (i = 0; i < level; ++i)
190                     printf(" ");
191           printf("%s ", a->qname);
192           if (print_interface)
193                     printf("on %s ", a->ifname);
194           if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
195                     if (bw != NULL && bw->bw_percent > 0) {
196                               if (bw->bw_percent < 100)
197                                         printf("bandwidth %u%% ", bw->bw_percent);
198                     } else
199                               printf("bandwidth %s ", rate2str((double)a->bandwidth));
200           }
201           if (a->priority != DEFAULT_PRIORITY)
202                     printf("priority %u ", a->priority);
203           if (a->qlimit != DEFAULT_QLIMIT)
204                     printf("qlimit %u ", a->qlimit);
205           switch (a->scheduler) {
206           case ALTQT_CBQ:
207                     print_cbq_opts(a);
208                     break;
209           case ALTQT_PRIQ:
210                     print_priq_opts(a);
211                     break;
212           case ALTQT_HFSC:
213                     print_hfsc_opts(a, qopts);
214                     break;
215           }
216 }
217 
218 /*
219  * eval_pfaltq computes the discipline parameters.
220  */
221 int
eval_pfaltq(struct pfctl * pf,struct pf_altq * pa,struct node_queue_bw * bw,struct node_queue_opt * opts)222 eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
223     struct node_queue_opt *opts)
224 {
225           u_int     rate, size, errors = 0;
226 
227           if (bw->bw_absolute > 0)
228                     pa->ifbandwidth = bw->bw_absolute;
229           else
230                     if ((rate = getifspeed(pa->ifname)) == 0) {
231                               fprintf(stderr, "interface %s does not know its bandwidth, "
232                                   "please specify an absolute bandwidth\n",
233                                   pa->ifname);
234                               errors++;
235                     } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
236                               pa->ifbandwidth = rate;
237 
238           errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
239 
240           /* if tbrsize is not specified, use heuristics */
241           if (pa->tbrsize == 0) {
242                     rate = pa->ifbandwidth;
243                     if (rate <= 1 * 1000 * 1000)
244                               size = 1;
245                     else if (rate <= 10 * 1000 * 1000)
246                               size = 4;
247                     else if (rate <= 200 * 1000 * 1000)
248                               size = 8;
249                     else
250                               size = 24;
251                     size = size * getifmtu(pa->ifname);
252                     if (size > 0xffff)
253                               size = 0xffff;
254                     pa->tbrsize = size;
255           }
256           return (errors);
257 }
258 
259 /*
260  * check_commit_altq does consistency check for each interface
261  */
262 int
check_commit_altq(int dev,int opts)263 check_commit_altq(int dev, int opts)
264 {
265           struct pf_altq      *altq;
266           int                  error = 0;
267 
268           /* call the discipline check for each interface. */
269           TAILQ_FOREACH(altq, &altqs, entries) {
270                     if (altq->qname[0] == 0) {
271                               switch (altq->scheduler) {
272                               case ALTQT_CBQ:
273                                         error = check_commit_cbq(dev, opts, altq);
274                                         break;
275                               case ALTQT_PRIQ:
276                                         error = check_commit_priq(dev, opts, altq);
277                                         break;
278                               case ALTQT_HFSC:
279                                         error = check_commit_hfsc(dev, opts, altq);
280                                         break;
281                               default:
282                                         break;
283                               }
284                     }
285           }
286           return (error);
287 }
288 
289 /*
290  * eval_pfqueue computes the queue parameters.
291  */
292 int
eval_pfqueue(struct pfctl * pf,struct pf_altq * pa,struct node_queue_bw * bw,struct node_queue_opt * opts)293 eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
294     struct node_queue_opt *opts)
295 {
296           /* should be merged with expand_queue */
297           struct pf_altq      *if_pa, *parent, *altq;
298           u_int32_t  bwsum;
299           int                  error = 0;
300 
301           /* find the corresponding interface and copy fields used by queues */
302           if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
303                     fprintf(stderr, "altq not defined on %s\n", pa->ifname);
304                     return (1);
305           }
306           pa->scheduler = if_pa->scheduler;
307           pa->ifbandwidth = if_pa->ifbandwidth;
308 
309           if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
310                     fprintf(stderr, "queue %s already exists on interface %s\n",
311                         pa->qname, pa->ifname);
312                     return (1);
313           }
314           pa->qid = qname_to_qid(pa->qname);
315 
316           parent = NULL;
317           if (pa->parent[0] != 0) {
318                     parent = qname_to_pfaltq(pa->parent, pa->ifname);
319                     if (parent == NULL) {
320                               fprintf(stderr, "parent %s not found for %s\n",
321                                   pa->parent, pa->qname);
322                               return (1);
323                     }
324                     pa->parent_qid = parent->qid;
325           }
326           if (pa->qlimit == 0)
327                     pa->qlimit = DEFAULT_QLIMIT;
328 
329           if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
330                     pa->bandwidth = eval_bwspec(bw,
331                         parent == NULL ? 0 : parent->bandwidth);
332 
333                     if (pa->bandwidth > pa->ifbandwidth) {
334                               fprintf(stderr, "bandwidth for %s higher than "
335                                   "interface\n", pa->qname);
336                               return (1);
337                     }
338                     /* check the sum of the child bandwidth is under parent's */
339                     if (parent != NULL) {
340                               if (pa->bandwidth > parent->bandwidth) {
341                                         warnx("bandwidth for %s higher than parent",
342                                             pa->qname);
343                                         return (1);
344                               }
345                               bwsum = 0;
346                               TAILQ_FOREACH(altq, &altqs, entries) {
347                                         if (strncmp(altq->ifname, pa->ifname,
348                                             IFNAMSIZ) == 0 &&
349                                             altq->qname[0] != 0 &&
350                                             strncmp(altq->parent, pa->parent,
351                                             PF_QNAME_SIZE) == 0)
352                                                   bwsum += altq->bandwidth;
353                               }
354                               bwsum += pa->bandwidth;
355                               if (bwsum > parent->bandwidth) {
356                                         warnx("the sum of the child bandwidth higher"
357                                             " than parent \"%s\"", parent->qname);
358                               }
359                     }
360           }
361 
362           if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
363                     return (1);
364 
365           switch (pa->scheduler) {
366           case ALTQT_CBQ:
367                     error = eval_pfqueue_cbq(pf, pa);
368                     break;
369           case ALTQT_PRIQ:
370                     error = eval_pfqueue_priq(pf, pa);
371                     break;
372           case ALTQT_HFSC:
373                     error = eval_pfqueue_hfsc(pf, pa);
374                     break;
375           default:
376                     break;
377           }
378           return (error);
379 }
380 
381 /*
382  * CBQ support functions
383  */
384 #define   RM_FILTER_GAIN      5         /* log2 of gain, e.g., 5 => 31/32 */
385 #define   RM_NS_PER_SEC       (1000000000)
386 
387 static int
eval_pfqueue_cbq(struct pfctl * pf,struct pf_altq * pa)388 eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
389 {
390           struct cbq_opts     *opts;
391           u_int                ifmtu;
392 
393           if (pa->priority >= CBQ_MAXPRI) {
394                     warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
395                     return (-1);
396           }
397 
398           ifmtu = getifmtu(pa->ifname);
399           opts = &pa->pq_u.cbq_opts;
400 
401           if (opts->pktsize == 0) {     /* use default */
402                     opts->pktsize = ifmtu;
403                     if (opts->pktsize > MCLBYTES) /* do what TCP does */
404                               opts->pktsize &= ~MCLBYTES;
405           } else if (opts->pktsize > ifmtu)
406                     opts->pktsize = ifmtu;
407           if (opts->maxpktsize == 0)    /* use default */
408                     opts->maxpktsize = ifmtu;
409           else if (opts->maxpktsize > ifmtu)
410                     opts->pktsize = ifmtu;
411 
412           if (opts->pktsize > opts->maxpktsize)
413                     opts->pktsize = opts->maxpktsize;
414 
415           if (pa->parent[0] == 0)
416                     opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
417 
418           cbq_compute_idletime(pf, pa);
419           return (0);
420 }
421 
422 /*
423  * compute ns_per_byte, maxidle, minidle, and offtime
424  */
425 static int
cbq_compute_idletime(struct pfctl * pf,struct pf_altq * pa)426 cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
427 {
428           struct cbq_opts     *opts;
429           double               maxidle_s, maxidle, minidle;
430           double               offtime, nsPerByte, ifnsPerByte, ptime, cptime;
431           double               z, g, f, gton, gtom;
432           u_int                minburst, maxburst;
433 
434           opts = &pa->pq_u.cbq_opts;
435           ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
436           minburst = opts->minburst;
437           maxburst = opts->maxburst;
438 
439           if (pa->bandwidth == 0)
440                     f = 0.0001;         /* small enough? */
441           else
442                     f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
443 
444           nsPerByte = ifnsPerByte / f;
445           ptime = (double)opts->pktsize * ifnsPerByte;
446           cptime = ptime * (1.0 - f) / f;
447 
448           if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
449                     /*
450                      * this causes integer overflow in kernel!
451                      * (bandwidth < 6Kbps when max_pkt_size=1500)
452                      */
453                     if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0) {
454                               warnx("queue bandwidth must be larger than %s",
455                                   rate2str(ifnsPerByte * (double)opts->maxpktsize /
456                                   (double)INT_MAX * (double)pa->ifbandwidth));
457                               fprintf(stderr, "cbq: queue %s is too slow!\n",
458                                   pa->qname);
459                     }
460                     nsPerByte = (double)(INT_MAX / opts->maxpktsize);
461           }
462 
463           if (maxburst == 0) {  /* use default */
464                     if (cptime > 10.0 * 1000000)
465                               maxburst = 4;
466                     else
467                               maxburst = 16;
468           }
469           if (minburst == 0)  /* use default */
470                     minburst = 2;
471           if (minburst > maxburst)
472                     minburst = maxburst;
473 
474           z = (double)(1 << RM_FILTER_GAIN);
475           g = (1.0 - 1.0 / z);
476           gton = pow(g, (double)maxburst);
477           gtom = pow(g, (double)(minburst-1));
478           maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
479           maxidle_s = (1.0 - g);
480           if (maxidle > maxidle_s)
481                     maxidle = ptime * maxidle;
482           else
483                     maxidle = ptime * maxidle_s;
484           offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
485           minidle = -((double)opts->maxpktsize * (double)nsPerByte);
486 
487           /* scale parameters */
488           maxidle = ((maxidle * 8.0) / nsPerByte) *
489               pow(2.0, (double)RM_FILTER_GAIN);
490           offtime = (offtime * 8.0) / nsPerByte *
491               pow(2.0, (double)RM_FILTER_GAIN);
492           minidle = ((minidle * 8.0) / nsPerByte) *
493               pow(2.0, (double)RM_FILTER_GAIN);
494 
495           maxidle = maxidle / 1000.0;
496           offtime = offtime / 1000.0;
497           minidle = minidle / 1000.0;
498 
499           opts->minburst = minburst;
500           opts->maxburst = maxburst;
501           opts->ns_per_byte = (u_int)nsPerByte;
502           opts->maxidle = (u_int)fabs(maxidle);
503           opts->minidle = (int)minidle;
504           opts->offtime = (u_int)fabs(offtime);
505 
506           return (0);
507 }
508 
509 static int
check_commit_cbq(int dev,int opts,struct pf_altq * pa)510 check_commit_cbq(int dev, int opts, struct pf_altq *pa)
511 {
512           struct pf_altq      *altq;
513           int                  root_class, default_class;
514           int                  error = 0;
515 
516           /*
517            * check if cbq has one root queue and one default queue
518            * for this interface
519            */
520           root_class = default_class = 0;
521           TAILQ_FOREACH(altq, &altqs, entries) {
522                     if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
523                               continue;
524                     if (altq->qname[0] == 0)  /* this is for interface */
525                               continue;
526                     if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
527                               root_class++;
528                     if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
529                               default_class++;
530           }
531           if (root_class != 1) {
532                     warnx("should have one root queue on %s", pa->ifname);
533                     error++;
534           }
535           if (default_class != 1) {
536                     warnx("should have one default queue on %s", pa->ifname);
537                     error++;
538           }
539           return (error);
540 }
541 
542 static int
print_cbq_opts(const struct pf_altq * a)543 print_cbq_opts(const struct pf_altq *a)
544 {
545           const struct cbq_opts         *opts;
546 
547           opts = &a->pq_u.cbq_opts;
548           if (opts->flags) {
549                     printf("cbq(");
550                     if (opts->flags & CBQCLF_RED)
551                               printf(" red");
552                     if (opts->flags & CBQCLF_ECN)
553                               printf(" ecn");
554                     if (opts->flags & CBQCLF_RIO)
555                               printf(" rio");
556                     if (opts->flags & CBQCLF_CLEARDSCP)
557                               printf(" cleardscp");
558                     if (opts->flags & CBQCLF_FLOWVALVE)
559                               printf(" flowvalve");
560 #ifdef CBQCLF_BORROW
561                     if (opts->flags & CBQCLF_BORROW)
562                               printf(" borrow");
563 #endif
564                     if (opts->flags & CBQCLF_WRR)
565                               printf(" wrr");
566                     if (opts->flags & CBQCLF_EFFICIENT)
567                               printf(" efficient");
568                     if (opts->flags & CBQCLF_ROOTCLASS)
569                               printf(" root");
570                     if (opts->flags & CBQCLF_DEFCLASS)
571                               printf(" default");
572                     printf(" ) ");
573 
574                     return (1);
575           } else
576                     return (0);
577 }
578 
579 /*
580  * PRIQ support functions
581  */
582 static int
eval_pfqueue_priq(struct pfctl * pf,struct pf_altq * pa)583 eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
584 {
585           struct pf_altq      *altq;
586 
587           if (pa->priority >= PRIQ_MAXPRI) {
588                     warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
589                     return (-1);
590           }
591           /* the priority should be unique for the interface */
592           TAILQ_FOREACH(altq, &altqs, entries) {
593                     if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
594                         altq->qname[0] != 0 && altq->priority == pa->priority) {
595                               warnx("%s and %s have the same priority",
596                                   altq->qname, pa->qname);
597                               return (-1);
598                     }
599           }
600 
601           return (0);
602 }
603 
604 static int
check_commit_priq(int dev,int opts,struct pf_altq * pa)605 check_commit_priq(int dev, int opts, struct pf_altq *pa)
606 {
607           struct pf_altq      *altq;
608           int                  default_class;
609           int                  error = 0;
610 
611           /*
612            * check if priq has one default class for this interface
613            */
614           default_class = 0;
615           TAILQ_FOREACH(altq, &altqs, entries) {
616                     if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
617                               continue;
618                     if (altq->qname[0] == 0)  /* this is for interface */
619                               continue;
620                     if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
621                               default_class++;
622           }
623           if (default_class != 1) {
624                     warnx("should have one default queue on %s", pa->ifname);
625                     error++;
626           }
627           return (error);
628 }
629 
630 static int
print_priq_opts(const struct pf_altq * a)631 print_priq_opts(const struct pf_altq *a)
632 {
633           const struct priq_opts        *opts;
634 
635           opts = &a->pq_u.priq_opts;
636 
637           if (opts->flags) {
638                     printf("priq(");
639                     if (opts->flags & PRCF_RED)
640                               printf(" red");
641                     if (opts->flags & PRCF_ECN)
642                               printf(" ecn");
643                     if (opts->flags & PRCF_RIO)
644                               printf(" rio");
645                     if (opts->flags & PRCF_CLEARDSCP)
646                               printf(" cleardscp");
647                     if (opts->flags & PRCF_DEFAULTCLASS)
648                               printf(" default");
649                     printf(" ) ");
650 
651                     return (1);
652           } else
653                     return (0);
654 }
655 
656 /*
657  * HFSC support functions
658  */
659 static int
eval_pfqueue_hfsc(struct pfctl * pf,struct pf_altq * pa)660 eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
661 {
662           struct pf_altq                *altq, *parent;
663           struct hfsc_opts    *opts;
664           struct service_curve           sc;
665 
666           opts = &pa->pq_u.hfsc_opts;
667 
668           if (pa->parent[0] == 0) {
669                     /* root queue */
670                     opts->lssc_m1 = pa->ifbandwidth;
671                     opts->lssc_m2 = pa->ifbandwidth;
672                     opts->lssc_d = 0;
673                     return (0);
674           }
675 
676           LIST_INIT(&rtsc);
677           LIST_INIT(&lssc);
678 
679           /* if link_share is not specified, use bandwidth */
680           if (opts->lssc_m2 == 0)
681                     opts->lssc_m2 = pa->bandwidth;
682 
683           if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
684               (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
685               (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
686                     warnx("m2 is zero for %s", pa->qname);
687                     return (-1);
688           }
689 
690           if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
691               (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
692               (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
693                     warnx("m1 must be zero for convex curve: %s", pa->qname);
694                     return (-1);
695           }
696 
697           /*
698            * admission control:
699            * for the real-time service curve, the sum of the service curves
700            * should not exceed 80% of the interface bandwidth.  20% is reserved
701            * not to over-commit the actual interface bandwidth.
702            * for the linkshare service curve, the sum of the child service
703            * curve should not exceed the parent service curve.
704            * for the upper-limit service curve, the assigned bandwidth should
705            * be smaller than the interface bandwidth, and the upper-limit should
706            * be larger than the real-time service curve when both are defined.
707            */
708           parent = qname_to_pfaltq(pa->parent, pa->ifname);
709           if (parent == NULL)
710                     errx(1, "parent %s not found for %s", pa->parent, pa->qname);
711 
712           TAILQ_FOREACH(altq, &altqs, entries) {
713                     if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
714                               continue;
715                     if (altq->qname[0] == 0)  /* this is for interface */
716                               continue;
717 
718                     /* if the class has a real-time service curve, add it. */
719                     if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
720                               sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
721                               sc.d = altq->pq_u.hfsc_opts.rtsc_d;
722                               sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
723                               gsc_add_sc(&rtsc, &sc);
724                     }
725 
726                     if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
727                               continue;
728 
729                     /* if the class has a linkshare service curve, add it. */
730                     if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
731                               sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
732                               sc.d = altq->pq_u.hfsc_opts.lssc_d;
733                               sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
734                               gsc_add_sc(&lssc, &sc);
735                     }
736           }
737 
738           /* check the real-time service curve.  reserve 20% of interface bw */
739           if (opts->rtsc_m2 != 0) {
740                     /* add this queue to the sum */
741                     sc.m1 = opts->rtsc_m1;
742                     sc.d = opts->rtsc_d;
743                     sc.m2 = opts->rtsc_m2;
744                     gsc_add_sc(&rtsc, &sc);
745                     /* compare the sum with 80% of the interface */
746                     sc.m1 = 0;
747                     sc.d = 0;
748                     sc.m2 = pa->ifbandwidth / 100 * 80;
749                     if (!is_gsc_under_sc(&rtsc, &sc)) {
750                               warnx("real-time sc exceeds 80%% of the interface "
751                                   "bandwidth (%s)", rate2str((double)sc.m2));
752                               goto err_ret;
753                     }
754           }
755 
756           /* check the linkshare service curve. */
757           if (opts->lssc_m2 != 0) {
758                     /* add this queue to the child sum */
759                     sc.m1 = opts->lssc_m1;
760                     sc.d = opts->lssc_d;
761                     sc.m2 = opts->lssc_m2;
762                     gsc_add_sc(&lssc, &sc);
763                     /* compare the sum of the children with parent's sc */
764                     sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
765                     sc.d = parent->pq_u.hfsc_opts.lssc_d;
766                     sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
767                     if (!is_gsc_under_sc(&lssc, &sc)) {
768                               warnx("linkshare sc exceeds parent's sc");
769                               goto err_ret;
770                     }
771           }
772 
773           /* check the upper-limit service curve. */
774           if (opts->ulsc_m2 != 0) {
775                     if (opts->ulsc_m1 > pa->ifbandwidth ||
776                         opts->ulsc_m2 > pa->ifbandwidth) {
777                               warnx("upper-limit larger than interface bandwidth");
778                               goto err_ret;
779                     }
780                     if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
781                               warnx("upper-limit sc smaller than real-time sc");
782                               goto err_ret;
783                     }
784           }
785 
786           gsc_destroy(&rtsc);
787           gsc_destroy(&lssc);
788 
789           return (0);
790 
791 err_ret:
792           gsc_destroy(&rtsc);
793           gsc_destroy(&lssc);
794           return (-1);
795 }
796 
797 static int
check_commit_hfsc(int dev,int opts,struct pf_altq * pa)798 check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
799 {
800           struct pf_altq      *altq, *def = NULL;
801           int                  default_class;
802           int                  error = 0;
803 
804           /* check if hfsc has one default queue for this interface */
805           default_class = 0;
806           TAILQ_FOREACH(altq, &altqs, entries) {
807                     if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
808                               continue;
809                     if (altq->qname[0] == 0)  /* this is for interface */
810                               continue;
811                     if (altq->parent[0] == 0)  /* dummy root */
812                               continue;
813                     if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
814                               default_class++;
815                               def = altq;
816                     }
817           }
818           if (default_class != 1) {
819                     warnx("should have one default queue on %s", pa->ifname);
820                     return (1);
821           }
822           /* make sure the default queue is a leaf */
823           TAILQ_FOREACH(altq, &altqs, entries) {
824                     if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
825                               continue;
826                     if (altq->qname[0] == 0)  /* this is for interface */
827                               continue;
828                     if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
829                               warnx("default queue is not a leaf");
830                               error++;
831                     }
832           }
833           return (error);
834 }
835 
836 static int
print_hfsc_opts(const struct pf_altq * a,const struct node_queue_opt * qopts)837 print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
838 {
839           const struct hfsc_opts                  *opts;
840           const struct node_hfsc_sc     *rtsc, *lssc, *ulsc;
841 
842           opts = &a->pq_u.hfsc_opts;
843           if (qopts == NULL)
844                     rtsc = lssc = ulsc = NULL;
845           else {
846                     rtsc = &qopts->data.hfsc_opts.realtime;
847                     lssc = &qopts->data.hfsc_opts.linkshare;
848                     ulsc = &qopts->data.hfsc_opts.upperlimit;
849           }
850 
851           if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
852               (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
853               opts->lssc_d != 0))) {
854                     printf("hfsc(");
855                     if (opts->flags & HFCF_RED)
856                               printf(" red");
857                     if (opts->flags & HFCF_ECN)
858                               printf(" ecn");
859                     if (opts->flags & HFCF_RIO)
860                               printf(" rio");
861                     if (opts->flags & HFCF_CLEARDSCP)
862                               printf(" cleardscp");
863                     if (opts->flags & HFCF_DEFAULTCLASS)
864                               printf(" default");
865                     if (opts->rtsc_m2 != 0)
866                               print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
867                                   opts->rtsc_m2, rtsc);
868                     if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
869                         opts->lssc_d != 0))
870                               print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
871                                   opts->lssc_m2, lssc);
872                     if (opts->ulsc_m2 != 0)
873                               print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
874                                   opts->ulsc_m2, ulsc);
875                     printf(" ) ");
876 
877                     return (1);
878           } else
879                     return (0);
880 }
881 
882 /*
883  * admission control using generalized service curve
884  */
885 
886 /* add a new service curve to a generalized service curve */
887 static void
gsc_add_sc(struct gen_sc * gsc,struct service_curve * sc)888 gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
889 {
890           if (is_sc_null(sc))
891                     return;
892           if (sc->d != 0)
893                     gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
894           gsc_add_seg(gsc, (double)sc->d, 0.0, HUGE_VAL, (double)sc->m2);
895 }
896 
897 /*
898  * check whether all points of a generalized service curve have
899  * their y-coordinates no larger than a given two-piece linear
900  * service curve.
901  */
902 static int
is_gsc_under_sc(struct gen_sc * gsc,struct service_curve * sc)903 is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
904 {
905           struct segment      *s, *last, *end;
906           double               y;
907 
908           if (is_sc_null(sc)) {
909                     if (LIST_EMPTY(gsc))
910                               return (1);
911                     LIST_FOREACH(s, gsc, _next) {
912                               if (s->m != 0)
913                                         return (0);
914                     }
915                     return (1);
916           }
917           /*
918            * gsc has a dummy entry at the end with x = HUGE_VAL.
919            * loop through up to this dummy entry.
920            */
921           end = gsc_getentry(gsc, HUGE_VAL);
922           if (end == NULL)
923                     return (1);
924           last = NULL;
925           for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
926                     if (s->y > sc_x2y(sc, s->x))
927                               return (0);
928                     last = s;
929           }
930           /* last now holds the real last segment */
931           if (last == NULL)
932                     return (1);
933           if (last->m > sc->m2)
934                     return (0);
935           if (last->x < sc->d && last->m > sc->m1) {
936                     y = last->y + (sc->d - last->x) * last->m;
937                     if (y > sc_x2y(sc, sc->d))
938                               return (0);
939           }
940           return (1);
941 }
942 
943 static void
gsc_destroy(struct gen_sc * gsc)944 gsc_destroy(struct gen_sc *gsc)
945 {
946           struct segment      *s;
947 
948           while ((s = LIST_FIRST(gsc)) != NULL) {
949                     LIST_REMOVE(s, _next);
950                     free(s);
951           }
952 }
953 
954 /*
955  * return a segment entry starting at x.
956  * if gsc has no entry starting at x, a new entry is created at x.
957  */
958 static struct segment *
gsc_getentry(struct gen_sc * gsc,double x)959 gsc_getentry(struct gen_sc *gsc, double x)
960 {
961           struct segment      *new, *prev, *s;
962 
963           prev = NULL;
964           LIST_FOREACH(s, gsc, _next) {
965                     if (s->x == x)
966                               return (s);         /* matching entry found */
967                     else if (s->x < x)
968                               prev = s;
969                     else
970                               break;
971           }
972 
973           /* we have to create a new entry */
974           if ((new = calloc(1, sizeof(struct segment))) == NULL)
975                     return (NULL);
976 
977           new->x = x;
978           if (x == HUGE_VAL || s == NULL)
979                     new->d = 0;
980           else if (s->x == HUGE_VAL)
981                     new->d = HUGE_VAL;
982           else
983                     new->d = s->x - x;
984           if (prev == NULL) {
985                     /* insert the new entry at the head of the list */
986                     new->y = 0;
987                     new->m = 0;
988                     LIST_INSERT_HEAD(gsc, new, _next);
989           } else {
990                     /*
991                      * the start point intersects with the segment pointed by
992                      * prev.  divide prev into 2 segments
993                      */
994                     if (x == HUGE_VAL) {
995                               prev->d = HUGE_VAL;
996                               if (prev->m == 0)
997                                         new->y = prev->y;
998                               else
999                                         new->y = HUGE_VAL;
1000                     } else {
1001                               prev->d = x - prev->x;
1002                               new->y = prev->d * prev->m + prev->y;
1003                     }
1004                     new->m = prev->m;
1005                     LIST_INSERT_AFTER(prev, new, _next);
1006           }
1007           return (new);
1008 }
1009 
1010 /* add a segment to a generalized service curve */
1011 static int
gsc_add_seg(struct gen_sc * gsc,double x,double y,double d,double m)1012 gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1013 {
1014           struct segment      *start, *end, *s;
1015           double               x2;
1016 
1017           if (d == HUGE_VAL)
1018                     x2 = HUGE_VAL;
1019           else
1020                     x2 = x + d;
1021           start = gsc_getentry(gsc, x);
1022           end = gsc_getentry(gsc, x2);
1023           if (start == NULL || end == NULL)
1024                     return (-1);
1025 
1026           for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1027                     s->m += m;
1028                     s->y += y + (s->x - x) * m;
1029           }
1030 
1031           end = gsc_getentry(gsc, HUGE_VAL);
1032           for (; s != end; s = LIST_NEXT(s, _next)) {
1033                     s->y += m * d;
1034           }
1035 
1036           return (0);
1037 }
1038 
1039 /* get y-projection of a service curve */
1040 static double
sc_x2y(struct service_curve * sc,double x)1041 sc_x2y(struct service_curve *sc, double x)
1042 {
1043           double    y;
1044 
1045           if (x <= (double)sc->d)
1046                     /* y belongs to the 1st segment */
1047                     y = x * (double)sc->m1;
1048           else
1049                     /* y belongs to the 2nd segment */
1050                     y = (double)sc->d * (double)sc->m1
1051                               + (x - (double)sc->d) * (double)sc->m2;
1052           return (y);
1053 }
1054 
1055 /*
1056  * misc utilities
1057  */
1058 #define   R2S_BUFS  8
1059 #define   RATESTR_MAX         16
1060 
1061 char *
rate2str(double rate)1062 rate2str(double rate)
1063 {
1064           char                *buf;
1065           static char          r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring buffer */
1066           static int           idx = 0;
1067           int                  i;
1068           static const char unit[] = " KMG";
1069 
1070           buf = r2sbuf[idx++];
1071           if (idx == R2S_BUFS)
1072                     idx = 0;
1073 
1074           for (i = 0; rate >= 1000 && i <= 3; i++)
1075                     rate /= 1000;
1076 
1077           if ((int)(rate * 100) % 100)
1078                     snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1079           else
1080                     snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1081 
1082           return (buf);
1083 }
1084 
1085 u_int32_t
getifspeed(char * ifname)1086 getifspeed(char *ifname)
1087 {
1088 #ifdef __NetBSD__
1089           int                            s;
1090           struct ifdatareq     ifdr;
1091           struct if_data                *ifrdat;
1092 
1093           if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1094                     err(1, "getifspeed: socket");
1095           memset(&ifdr, 0, sizeof(ifdr));
1096           if (strlcpy(ifdr.ifdr_name, ifname, sizeof(ifdr.ifdr_name)) >=
1097               sizeof(ifdr.ifdr_name))
1098                     errx(1, "getifspeed: strlcpy");
1099           if (ioctl(s, SIOCGIFDATA, &ifdr) == -1)
1100                     err(1, "getifspeed: SIOCGIFDATA");
1101           ifrdat = &ifdr.ifdr_data;
1102           if (close(s) == -1)
1103                     err(1, "getifspeed: close");
1104           return ((u_int32_t)ifrdat->ifi_baudrate);
1105 #else
1106           int                 s;
1107           struct ifreq        ifr;
1108           struct if_data      ifrdat;
1109 
1110           if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1111                     err(1, "socket");
1112           bzero(&ifr, sizeof(ifr));
1113           if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1114               sizeof(ifr.ifr_name))
1115                     errx(1, "getifspeed: strlcpy");
1116           ifr.ifr_data = (caddr_t)&ifrdat;
1117           if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1118                     err(1, "SIOCGIFDATA");
1119           if (close(s))
1120                     err(1, "close");
1121           return ((u_int32_t)ifrdat.ifi_baudrate);
1122 #endif /* !__NetBSD__ */
1123 }
1124 
1125 u_long
getifmtu(char * ifname)1126 getifmtu(char *ifname)
1127 {
1128           int                 s;
1129           struct ifreq        ifr;
1130 
1131           if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1132                     err(1, "socket");
1133           bzero(&ifr, sizeof(ifr));
1134           if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1135               sizeof(ifr.ifr_name))
1136                     errx(1, "getifmtu: strlcpy");
1137           if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1138                     err(1, "SIOCGIFMTU");
1139           if (close(s) == -1)
1140                     err(1, "close");
1141           if (ifr.ifr_mtu > 0)
1142                     return (ifr.ifr_mtu);
1143           else {
1144                     warnx("could not get mtu for %s, assuming 1500", ifname);
1145                     return (1500);
1146           }
1147 }
1148 
1149 int
eval_queue_opts(struct pf_altq * pa,struct node_queue_opt * opts,u_int32_t ref_bw)1150 eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1151     u_int32_t ref_bw)
1152 {
1153           int       errors = 0;
1154 
1155           switch (pa->scheduler) {
1156           case ALTQT_CBQ:
1157                     pa->pq_u.cbq_opts = opts->data.cbq_opts;
1158                     break;
1159           case ALTQT_PRIQ:
1160                     pa->pq_u.priq_opts = opts->data.priq_opts;
1161                     break;
1162           case ALTQT_HFSC:
1163                     pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1164                     if (opts->data.hfsc_opts.linkshare.used) {
1165                               pa->pq_u.hfsc_opts.lssc_m1 =
1166                                   eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1167                                   ref_bw);
1168                               pa->pq_u.hfsc_opts.lssc_m2 =
1169                                   eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1170                                   ref_bw);
1171                               pa->pq_u.hfsc_opts.lssc_d =
1172                                   opts->data.hfsc_opts.linkshare.d;
1173                     }
1174                     if (opts->data.hfsc_opts.realtime.used) {
1175                               pa->pq_u.hfsc_opts.rtsc_m1 =
1176                                   eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1177                                   ref_bw);
1178                               pa->pq_u.hfsc_opts.rtsc_m2 =
1179                                   eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1180                                   ref_bw);
1181                               pa->pq_u.hfsc_opts.rtsc_d =
1182                                   opts->data.hfsc_opts.realtime.d;
1183                     }
1184                     if (opts->data.hfsc_opts.upperlimit.used) {
1185                               pa->pq_u.hfsc_opts.ulsc_m1 =
1186                                   eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1187                                   ref_bw);
1188                               pa->pq_u.hfsc_opts.ulsc_m2 =
1189                                   eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1190                                   ref_bw);
1191                               pa->pq_u.hfsc_opts.ulsc_d =
1192                                   opts->data.hfsc_opts.upperlimit.d;
1193                     }
1194                     break;
1195           default:
1196                     warnx("eval_queue_opts: unknown scheduler type %u",
1197                         opts->qtype);
1198                     errors++;
1199                     break;
1200           }
1201 
1202           return (errors);
1203 }
1204 
1205 u_int32_t
eval_bwspec(struct node_queue_bw * bw,u_int32_t ref_bw)1206 eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1207 {
1208           if (bw->bw_absolute > 0)
1209                     return (bw->bw_absolute);
1210 
1211           if (bw->bw_percent > 0)
1212                     return (ref_bw / 100 * bw->bw_percent);
1213 
1214           return (0);
1215 }
1216 
1217 void
print_hfsc_sc(const char * scname,u_int m1,u_int d,u_int m2,const struct node_hfsc_sc * sc)1218 print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1219     const struct node_hfsc_sc *sc)
1220 {
1221           printf(" %s", scname);
1222 
1223           if (d != 0) {
1224                     printf("(");
1225                     if (sc != NULL && sc->m1.bw_percent > 0)
1226                               printf("%u%%", sc->m1.bw_percent);
1227                     else
1228                               printf("%s", rate2str((double)m1));
1229                     printf(" %u", d);
1230           }
1231 
1232           if (sc != NULL && sc->m2.bw_percent > 0)
1233                     printf(" %u%%", sc->m2.bw_percent);
1234           else
1235                     printf(" %s", rate2str((double)m2));
1236 
1237           if (d != 0)
1238                     printf(")");
1239 }
1240