1 /*-
2 * Copyright (C) 1997-2003
3 * Sony Computer Science Laboratories Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27 /*-
28 * Copyright (c) 1990-1994 Regents of the University of California.
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. All advertising materials mentioning features or use of this software
40 * must display the following acknowledgement:
41 * This product includes software developed by the Computer Systems
42 * Engineering Group at Lawrence Berkeley Laboratory.
43 * 4. Neither the name of the University nor of the Laboratory may be used
44 * to endorse or promote products derived from this software without
45 * specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
60 * $FreeBSD$
61 */
62
63 #include "opt_altq.h"
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
67
68 #include <sys/param.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/socket.h>
72 #include <sys/systm.h>
73 #include <sys/errno.h>
74 #if 1 /* ALTQ3_COMPAT */
75 #include <sys/sockio.h>
76 #include <sys/proc.h>
77 #include <sys/kernel.h>
78 #ifdef ALTQ_FLOWVALVE
79 #include <sys/queue.h>
80 #include <sys/time.h>
81 #endif
82 #endif /* ALTQ3_COMPAT */
83
84 #include <net/if.h>
85 #include <net/if_var.h>
86
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #ifdef INET6
91 #include <netinet/ip6.h>
92 #endif
93
94 #include <netpfil/pf/pf.h>
95 #include <netpfil/pf/pf_altq.h>
96 #include <netpfil/pf/pf_mtag.h>
97 #include <net/altq/altq.h>
98 #include <net/altq/altq_red.h>
99 #ifdef ALTQ3_COMPAT
100 #include <net/altq/altq_conf.h>
101 #ifdef ALTQ_FLOWVALVE
102 #include <net/altq/altq_flowvalve.h>
103 #endif
104 #endif
105
106 /*
107 * ALTQ/RED (Random Early Detection) implementation using 32-bit
108 * fixed-point calculation.
109 *
110 * written by kjc using the ns code as a reference.
111 * you can learn more about red and ns from Sally's home page at
112 * http://www-nrg.ee.lbl.gov/floyd/
113 *
114 * most of the red parameter values are fixed in this implementation
115 * to prevent fixed-point overflow/underflow.
116 * if you change the parameters, watch out for overflow/underflow!
117 *
118 * the parameters used are recommended values by Sally.
119 * the corresponding ns config looks:
120 * q_weight=0.00195
121 * minthresh=5 maxthresh=15 queue-size=60
122 * linterm=30
123 * dropmech=drop-tail
124 * bytes=false (can't be handled by 32-bit fixed-point)
125 * doubleq=false dqthresh=false
126 * wait=true
127 */
128 /*
129 * alternative red parameters for a slow link.
130 *
131 * assume the queue length becomes from zero to L and keeps L, it takes
132 * N packets for q_avg to reach 63% of L.
133 * when q_weight is 0.002, N is about 500 packets.
134 * for a slow link like dial-up, 500 packets takes more than 1 minute!
135 * when q_weight is 0.008, N is about 127 packets.
136 * when q_weight is 0.016, N is about 63 packets.
137 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
138 * are allowed for 0.016.
139 * see Sally's paper for more details.
140 */
141 /* normal red parameters */
142 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
143 /* q_weight = 0.00195 */
144
145 /* red parameters for a slow link */
146 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
147 /* q_weight = 0.0078125 */
148
149 /* red parameters for a very slow link (e.g., dialup) */
150 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
151 /* q_weight = 0.015625 */
152
153 /* fixed-point uses 12-bit decimal places */
154 #define FP_SHIFT 12 /* fixed-point shift */
155
156 /* red parameters for drop probability */
157 #define INV_P_MAX 10 /* inverse of max drop probability */
158 #define TH_MIN 5 /* min threshold */
159 #define TH_MAX 15 /* max threshold */
160
161 #define RED_LIMIT 60 /* default max queue lenght */
162 #define RED_STATS /* collect statistics */
163
164 /*
165 * our default policy for forced-drop is drop-tail.
166 * (in altq-1.1.2 or earlier, the default was random-drop.
167 * but it makes more sense to punish the cause of the surge.)
168 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
169 */
170
171 #ifdef ALTQ3_COMPAT
172 #ifdef ALTQ_FLOWVALVE
173 /*
174 * flow-valve is an extention to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve. the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189
190 #endif /* ALTQ3_COMPAT */
191
192 /* default red parameter values */
193 static int default_th_min = TH_MIN;
194 static int default_th_max = TH_MAX;
195 static int default_inv_pmax = INV_P_MAX;
196
197 #ifdef ALTQ3_COMPAT
198 /* internal function prototypes */
199 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
200 static struct mbuf *red_dequeue(struct ifaltq *, int);
201 static int red_request(struct ifaltq *, int, void *);
202 static void red_purgeq(red_queue_t *);
203 static int red_detach(red_queue_t *);
204 #ifdef ALTQ_FLOWVALVE
205 static __inline struct fve *flowlist_lookup(struct flowvalve *,
206 struct altq_pktattr *, struct timeval *);
207 static __inline struct fve *flowlist_reclaim(struct flowvalve *,
208 struct altq_pktattr *);
209 static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
210 static __inline int fv_p2f(struct flowvalve *, int);
211 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
212 static struct flowvalve *fv_alloc(struct red *);
213 #endif
214 static void fv_destroy(struct flowvalve *);
215 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
216 struct fve **);
217 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
218 struct fve *);
219 #endif
220 #endif /* ALTQ3_COMPAT */
221
222 /*
223 * red support routines
224 */
225 red_t *
red_alloc(int weight,int inv_pmax,int th_min,int th_max,int flags,int pkttime)226 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
227 int pkttime)
228 {
229 red_t *rp;
230 int w, i;
231 int npkts_per_sec;
232
233 rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
234 if (rp == NULL)
235 return (NULL);
236
237 if (weight == 0)
238 rp->red_weight = W_WEIGHT;
239 else
240 rp->red_weight = weight;
241
242 /* allocate weight table */
243 rp->red_wtab = wtab_alloc(rp->red_weight);
244 if (rp->red_wtab == NULL) {
245 free(rp, M_DEVBUF);
246 return (NULL);
247 }
248
249 rp->red_avg = 0;
250 rp->red_idle = 1;
251
252 if (inv_pmax == 0)
253 rp->red_inv_pmax = default_inv_pmax;
254 else
255 rp->red_inv_pmax = inv_pmax;
256 if (th_min == 0)
257 rp->red_thmin = default_th_min;
258 else
259 rp->red_thmin = th_min;
260 if (th_max == 0)
261 rp->red_thmax = default_th_max;
262 else
263 rp->red_thmax = th_max;
264
265 rp->red_flags = flags;
266
267 if (pkttime == 0)
268 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
269 rp->red_pkttime = 800;
270 else
271 rp->red_pkttime = pkttime;
272
273 if (weight == 0) {
274 /* when the link is very slow, adjust red parameters */
275 npkts_per_sec = 1000000 / rp->red_pkttime;
276 if (npkts_per_sec < 50) {
277 /* up to about 400Kbps */
278 rp->red_weight = W_WEIGHT_2;
279 } else if (npkts_per_sec < 300) {
280 /* up to about 2.4Mbps */
281 rp->red_weight = W_WEIGHT_1;
282 }
283 }
284
285 /* calculate wshift. weight must be power of 2 */
286 w = rp->red_weight;
287 for (i = 0; w > 1; i++)
288 w = w >> 1;
289 rp->red_wshift = i;
290 w = 1 << rp->red_wshift;
291 if (w != rp->red_weight) {
292 printf("invalid weight value %d for red! use %d\n",
293 rp->red_weight, w);
294 rp->red_weight = w;
295 }
296
297 /*
298 * thmin_s and thmax_s are scaled versions of th_min and th_max
299 * to be compared with avg.
300 */
301 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
302 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
303
304 /*
305 * precompute probability denominator
306 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
307 */
308 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
309 * rp->red_inv_pmax) << FP_SHIFT;
310
311 microtime(&rp->red_last);
312 return (rp);
313 }
314
315 void
red_destroy(red_t * rp)316 red_destroy(red_t *rp)
317 {
318 #ifdef ALTQ3_COMPAT
319 #ifdef ALTQ_FLOWVALVE
320 if (rp->red_flowvalve != NULL)
321 fv_destroy(rp->red_flowvalve);
322 #endif
323 #endif /* ALTQ3_COMPAT */
324 wtab_destroy(rp->red_wtab);
325 free(rp, M_DEVBUF);
326 }
327
328 void
red_getstats(red_t * rp,struct redstats * sp)329 red_getstats(red_t *rp, struct redstats *sp)
330 {
331 sp->q_avg = rp->red_avg >> rp->red_wshift;
332 sp->xmit_cnt = rp->red_stats.xmit_cnt;
333 sp->drop_cnt = rp->red_stats.drop_cnt;
334 sp->drop_forced = rp->red_stats.drop_forced;
335 sp->drop_unforced = rp->red_stats.drop_unforced;
336 sp->marked_packets = rp->red_stats.marked_packets;
337 }
338
339 int
red_addq(red_t * rp,class_queue_t * q,struct mbuf * m,struct altq_pktattr * pktattr)340 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
341 struct altq_pktattr *pktattr)
342 {
343 int avg, droptype;
344 int n;
345 #ifdef ALTQ3_COMPAT
346 #ifdef ALTQ_FLOWVALVE
347 struct fve *fve = NULL;
348
349 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
350 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
351 m_freem(m);
352 return (-1);
353 }
354 #endif
355 #endif /* ALTQ3_COMPAT */
356
357 avg = rp->red_avg;
358
359 /*
360 * if we were idle, we pretend that n packets arrived during
361 * the idle period.
362 */
363 if (rp->red_idle) {
364 struct timeval now;
365 int t;
366
367 rp->red_idle = 0;
368 microtime(&now);
369 t = (now.tv_sec - rp->red_last.tv_sec);
370 if (t > 60) {
371 /*
372 * being idle for more than 1 minute, set avg to zero.
373 * this prevents t from overflow.
374 */
375 avg = 0;
376 } else {
377 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
378 n = t / rp->red_pkttime - 1;
379
380 /* the following line does (avg = (1 - Wq)^n * avg) */
381 if (n > 0)
382 avg = (avg >> FP_SHIFT) *
383 pow_w(rp->red_wtab, n);
384 }
385 }
386
387 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
388 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
389 rp->red_avg = avg; /* save the new value */
390
391 /*
392 * red_count keeps a tally of arriving traffic that has not
393 * been dropped.
394 */
395 rp->red_count++;
396
397 /* see if we drop early */
398 droptype = DTYPE_NODROP;
399 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
400 if (avg >= rp->red_thmax_s) {
401 /* avg >= th_max: forced drop */
402 droptype = DTYPE_FORCED;
403 } else if (rp->red_old == 0) {
404 /* first exceeds th_min */
405 rp->red_count = 1;
406 rp->red_old = 1;
407 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
408 rp->red_probd, rp->red_count)) {
409 /* mark or drop by red */
410 if ((rp->red_flags & REDF_ECN) &&
411 mark_ecn(m, pktattr, rp->red_flags)) {
412 /* successfully marked. do not drop. */
413 rp->red_count = 0;
414 #ifdef RED_STATS
415 rp->red_stats.marked_packets++;
416 #endif
417 } else {
418 /* unforced drop by red */
419 droptype = DTYPE_EARLY;
420 }
421 }
422 } else {
423 /* avg < th_min */
424 rp->red_old = 0;
425 }
426
427 /*
428 * if the queue length hits the hard limit, it's a forced drop.
429 */
430 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
431 droptype = DTYPE_FORCED;
432
433 #ifdef RED_RANDOM_DROP
434 /* if successful or forced drop, enqueue this packet. */
435 if (droptype != DTYPE_EARLY)
436 _addq(q, m);
437 #else
438 /* if successful, enqueue this packet. */
439 if (droptype == DTYPE_NODROP)
440 _addq(q, m);
441 #endif
442 if (droptype != DTYPE_NODROP) {
443 if (droptype == DTYPE_EARLY) {
444 /* drop the incoming packet */
445 #ifdef RED_STATS
446 rp->red_stats.drop_unforced++;
447 #endif
448 } else {
449 /* forced drop, select a victim packet in the queue. */
450 #ifdef RED_RANDOM_DROP
451 m = _getq_random(q);
452 #endif
453 #ifdef RED_STATS
454 rp->red_stats.drop_forced++;
455 #endif
456 }
457 #ifdef RED_STATS
458 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
459 #endif
460 rp->red_count = 0;
461 #ifdef ALTQ3_COMPAT
462 #ifdef ALTQ_FLOWVALVE
463 if (rp->red_flowvalve != NULL)
464 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
465 #endif
466 #endif /* ALTQ3_COMPAT */
467 m_freem(m);
468 return (-1);
469 }
470 /* successfully queued */
471 #ifdef RED_STATS
472 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
473 #endif
474 return (0);
475 }
476
477 /*
478 * early-drop probability is calculated as follows:
479 * prob = p_max * (avg - th_min) / (th_max - th_min)
480 * prob_a = prob / (2 - count*prob)
481 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
482 * here prob_a increases as successive undrop count increases.
483 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
484 * becomes 1 when (count >= (2 / prob))).
485 */
486 int
drop_early(int fp_len,int fp_probd,int count)487 drop_early(int fp_len, int fp_probd, int count)
488 {
489 int d; /* denominator of drop-probability */
490
491 d = fp_probd - count * fp_len;
492 if (d <= 0)
493 /* count exceeds the hard limit: drop or mark */
494 return (1);
495
496 /*
497 * now the range of d is [1..600] in fixed-point. (when
498 * th_max-th_min=10 and p_max=1/30)
499 * drop probability = (avg - TH_MIN) / d
500 */
501
502 if ((arc4random() % d) < fp_len) {
503 /* drop or mark */
504 return (1);
505 }
506 /* no drop/mark */
507 return (0);
508 }
509
510 /*
511 * try to mark CE bit to the packet.
512 * returns 1 if successfully marked, 0 otherwise.
513 */
514 int
mark_ecn(struct mbuf * m,struct altq_pktattr * pktattr,int flags)515 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
516 {
517 struct mbuf *m0;
518 struct pf_mtag *at;
519 void *hdr;
520
521 at = pf_find_mtag(m);
522 if (at != NULL) {
523 hdr = at->hdr;
524 #ifdef ALTQ3_COMPAT
525 } else if (pktattr != NULL) {
526 af = pktattr->pattr_af;
527 hdr = pktattr->pattr_hdr;
528 #endif /* ALTQ3_COMPAT */
529 } else
530 return (0);
531
532 /* verify that pattr_hdr is within the mbuf data */
533 for (m0 = m; m0 != NULL; m0 = m0->m_next)
534 if (((caddr_t)hdr >= m0->m_data) &&
535 ((caddr_t)hdr < m0->m_data + m0->m_len))
536 break;
537 if (m0 == NULL) {
538 /* ick, tag info is stale */
539 return (0);
540 }
541
542 switch (((struct ip *)hdr)->ip_v) {
543 case IPVERSION:
544 if (flags & REDF_ECN4) {
545 struct ip *ip = hdr;
546 u_int8_t otos;
547 int sum;
548
549 if (ip->ip_v != 4)
550 return (0); /* version mismatch! */
551
552 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
553 return (0); /* not-ECT */
554 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
555 return (1); /* already marked */
556
557 /*
558 * ecn-capable but not marked,
559 * mark CE and update checksum
560 */
561 otos = ip->ip_tos;
562 ip->ip_tos |= IPTOS_ECN_CE;
563 /*
564 * update checksum (from RFC1624)
565 * HC' = ~(~HC + ~m + m')
566 */
567 sum = ~ntohs(ip->ip_sum) & 0xffff;
568 sum += (~otos & 0xffff) + ip->ip_tos;
569 sum = (sum >> 16) + (sum & 0xffff);
570 sum += (sum >> 16); /* add carry */
571 ip->ip_sum = htons(~sum & 0xffff);
572 return (1);
573 }
574 break;
575 #ifdef INET6
576 case (IPV6_VERSION >> 4):
577 if (flags & REDF_ECN6) {
578 struct ip6_hdr *ip6 = hdr;
579 u_int32_t flowlabel;
580
581 flowlabel = ntohl(ip6->ip6_flow);
582 if ((flowlabel >> 28) != 6)
583 return (0); /* version mismatch! */
584 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
585 (IPTOS_ECN_NOTECT << 20))
586 return (0); /* not-ECT */
587 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
588 (IPTOS_ECN_CE << 20))
589 return (1); /* already marked */
590 /*
591 * ecn-capable but not marked, mark CE
592 */
593 flowlabel |= (IPTOS_ECN_CE << 20);
594 ip6->ip6_flow = htonl(flowlabel);
595 return (1);
596 }
597 break;
598 #endif /* INET6 */
599 }
600
601 /* not marked */
602 return (0);
603 }
604
605 struct mbuf *
red_getq(rp,q)606 red_getq(rp, q)
607 red_t *rp;
608 class_queue_t *q;
609 {
610 struct mbuf *m;
611
612 if ((m = _getq(q)) == NULL) {
613 if (rp->red_idle == 0) {
614 rp->red_idle = 1;
615 microtime(&rp->red_last);
616 }
617 return NULL;
618 }
619
620 rp->red_idle = 0;
621 return (m);
622 }
623
624 /*
625 * helper routine to calibrate avg during idle.
626 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
627 * here Wq = 1/weight and the code assumes Wq is close to zero.
628 *
629 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
630 */
631 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
632
633 struct wtab *
wtab_alloc(int weight)634 wtab_alloc(int weight)
635 {
636 struct wtab *w;
637 int i;
638
639 for (w = wtab_list; w != NULL; w = w->w_next)
640 if (w->w_weight == weight) {
641 w->w_refcount++;
642 return (w);
643 }
644
645 w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
646 if (w == NULL)
647 return (NULL);
648 w->w_weight = weight;
649 w->w_refcount = 1;
650 w->w_next = wtab_list;
651 wtab_list = w;
652
653 /* initialize the weight table */
654 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
655 for (i = 1; i < 32; i++) {
656 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
657 if (w->w_tab[i] == 0 && w->w_param_max == 0)
658 w->w_param_max = 1 << i;
659 }
660
661 return (w);
662 }
663
664 int
wtab_destroy(struct wtab * w)665 wtab_destroy(struct wtab *w)
666 {
667 struct wtab *prev;
668
669 if (--w->w_refcount > 0)
670 return (0);
671
672 if (wtab_list == w)
673 wtab_list = w->w_next;
674 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
675 if (prev->w_next == w) {
676 prev->w_next = w->w_next;
677 break;
678 }
679
680 free(w, M_DEVBUF);
681 return (0);
682 }
683
684 int32_t
pow_w(struct wtab * w,int n)685 pow_w(struct wtab *w, int n)
686 {
687 int i, bit;
688 int32_t val;
689
690 if (n >= w->w_param_max)
691 return (0);
692
693 val = 1 << FP_SHIFT;
694 if (n <= 0)
695 return (val);
696
697 bit = 1;
698 i = 0;
699 while (n) {
700 if (n & bit) {
701 val = (val * w->w_tab[i]) >> FP_SHIFT;
702 n &= ~bit;
703 }
704 i++;
705 bit <<= 1;
706 }
707 return (val);
708 }
709
710 #ifdef ALTQ3_COMPAT
711 /*
712 * red device interface
713 */
714 altqdev_decl(red);
715
716 int
redopen(dev,flag,fmt,p)717 redopen(dev, flag, fmt, p)
718 dev_t dev;
719 int flag, fmt;
720 #if (__FreeBSD_version > 500000)
721 struct thread *p;
722 #else
723 struct proc *p;
724 #endif
725 {
726 /* everything will be done when the queueing scheme is attached. */
727 return 0;
728 }
729
730 int
redclose(dev,flag,fmt,p)731 redclose(dev, flag, fmt, p)
732 dev_t dev;
733 int flag, fmt;
734 #if (__FreeBSD_version > 500000)
735 struct thread *p;
736 #else
737 struct proc *p;
738 #endif
739 {
740 red_queue_t *rqp;
741 int err, error = 0;
742
743 while ((rqp = red_list) != NULL) {
744 /* destroy all */
745 err = red_detach(rqp);
746 if (err != 0 && error == 0)
747 error = err;
748 }
749
750 return error;
751 }
752
753 int
redioctl(dev,cmd,addr,flag,p)754 redioctl(dev, cmd, addr, flag, p)
755 dev_t dev;
756 ioctlcmd_t cmd;
757 caddr_t addr;
758 int flag;
759 #if (__FreeBSD_version > 500000)
760 struct thread *p;
761 #else
762 struct proc *p;
763 #endif
764 {
765 red_queue_t *rqp;
766 struct red_interface *ifacep;
767 struct ifnet *ifp;
768 int error = 0;
769
770 /* check super-user privilege */
771 switch (cmd) {
772 case RED_GETSTATS:
773 break;
774 default:
775 #if (__FreeBSD_version > 700000)
776 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
777 #elsif (__FreeBSD_version > 400000)
778 if ((error = suser(p)) != 0)
779 #else
780 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
781 #endif
782 return (error);
783 break;
784 }
785
786 switch (cmd) {
787
788 case RED_ENABLE:
789 ifacep = (struct red_interface *)addr;
790 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
791 error = EBADF;
792 break;
793 }
794 error = altq_enable(rqp->rq_ifq);
795 break;
796
797 case RED_DISABLE:
798 ifacep = (struct red_interface *)addr;
799 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
800 error = EBADF;
801 break;
802 }
803 error = altq_disable(rqp->rq_ifq);
804 break;
805
806 case RED_IF_ATTACH:
807 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
808 if (ifp == NULL) {
809 error = ENXIO;
810 break;
811 }
812
813 /* allocate and initialize red_queue_t */
814 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
815 if (rqp == NULL) {
816 error = ENOMEM;
817 break;
818 }
819 bzero(rqp, sizeof(red_queue_t));
820
821 rqp->rq_q = malloc(sizeof(class_queue_t),
822 M_DEVBUF, M_WAITOK);
823 if (rqp->rq_q == NULL) {
824 free(rqp, M_DEVBUF);
825 error = ENOMEM;
826 break;
827 }
828 bzero(rqp->rq_q, sizeof(class_queue_t));
829
830 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
831 if (rqp->rq_red == NULL) {
832 free(rqp->rq_q, M_DEVBUF);
833 free(rqp, M_DEVBUF);
834 error = ENOMEM;
835 break;
836 }
837
838 rqp->rq_ifq = &ifp->if_snd;
839 qtail(rqp->rq_q) = NULL;
840 qlen(rqp->rq_q) = 0;
841 qlimit(rqp->rq_q) = RED_LIMIT;
842 qtype(rqp->rq_q) = Q_RED;
843
844 /*
845 * set RED to this ifnet structure.
846 */
847 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
848 red_enqueue, red_dequeue, red_request,
849 NULL, NULL);
850 if (error) {
851 red_destroy(rqp->rq_red);
852 free(rqp->rq_q, M_DEVBUF);
853 free(rqp, M_DEVBUF);
854 break;
855 }
856
857 /* add this state to the red list */
858 rqp->rq_next = red_list;
859 red_list = rqp;
860 break;
861
862 case RED_IF_DETACH:
863 ifacep = (struct red_interface *)addr;
864 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
865 error = EBADF;
866 break;
867 }
868 error = red_detach(rqp);
869 break;
870
871 case RED_GETSTATS:
872 do {
873 struct red_stats *q_stats;
874 red_t *rp;
875
876 q_stats = (struct red_stats *)addr;
877 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
878 ALTQT_RED)) == NULL) {
879 error = EBADF;
880 break;
881 }
882
883 q_stats->q_len = qlen(rqp->rq_q);
884 q_stats->q_limit = qlimit(rqp->rq_q);
885
886 rp = rqp->rq_red;
887 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
888 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
889 q_stats->drop_cnt = rp->red_stats.drop_cnt;
890 q_stats->drop_forced = rp->red_stats.drop_forced;
891 q_stats->drop_unforced = rp->red_stats.drop_unforced;
892 q_stats->marked_packets = rp->red_stats.marked_packets;
893
894 q_stats->weight = rp->red_weight;
895 q_stats->inv_pmax = rp->red_inv_pmax;
896 q_stats->th_min = rp->red_thmin;
897 q_stats->th_max = rp->red_thmax;
898
899 #ifdef ALTQ_FLOWVALVE
900 if (rp->red_flowvalve != NULL) {
901 struct flowvalve *fv = rp->red_flowvalve;
902 q_stats->fv_flows = fv->fv_flows;
903 q_stats->fv_pass = fv->fv_stats.pass;
904 q_stats->fv_predrop = fv->fv_stats.predrop;
905 q_stats->fv_alloc = fv->fv_stats.alloc;
906 q_stats->fv_escape = fv->fv_stats.escape;
907 } else {
908 #endif /* ALTQ_FLOWVALVE */
909 q_stats->fv_flows = 0;
910 q_stats->fv_pass = 0;
911 q_stats->fv_predrop = 0;
912 q_stats->fv_alloc = 0;
913 q_stats->fv_escape = 0;
914 #ifdef ALTQ_FLOWVALVE
915 }
916 #endif /* ALTQ_FLOWVALVE */
917 } while (/*CONSTCOND*/ 0);
918 break;
919
920 case RED_CONFIG:
921 do {
922 struct red_conf *fc;
923 red_t *new;
924 int s, limit;
925
926 fc = (struct red_conf *)addr;
927 if ((rqp = altq_lookup(fc->iface.red_ifname,
928 ALTQT_RED)) == NULL) {
929 error = EBADF;
930 break;
931 }
932 new = red_alloc(fc->red_weight,
933 fc->red_inv_pmax,
934 fc->red_thmin,
935 fc->red_thmax,
936 fc->red_flags,
937 fc->red_pkttime);
938 if (new == NULL) {
939 error = ENOMEM;
940 break;
941 }
942
943 s = splnet();
944 red_purgeq(rqp);
945 limit = fc->red_limit;
946 if (limit < fc->red_thmax)
947 limit = fc->red_thmax;
948 qlimit(rqp->rq_q) = limit;
949 fc->red_limit = limit; /* write back the new value */
950
951 red_destroy(rqp->rq_red);
952 rqp->rq_red = new;
953
954 splx(s);
955
956 /* write back new values */
957 fc->red_limit = limit;
958 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
959 fc->red_thmin = rqp->rq_red->red_thmin;
960 fc->red_thmax = rqp->rq_red->red_thmax;
961
962 } while (/*CONSTCOND*/ 0);
963 break;
964
965 case RED_SETDEFAULTS:
966 do {
967 struct redparams *rp;
968
969 rp = (struct redparams *)addr;
970
971 default_th_min = rp->th_min;
972 default_th_max = rp->th_max;
973 default_inv_pmax = rp->inv_pmax;
974 } while (/*CONSTCOND*/ 0);
975 break;
976
977 default:
978 error = EINVAL;
979 break;
980 }
981 return error;
982 }
983
984 static int
red_detach(rqp)985 red_detach(rqp)
986 red_queue_t *rqp;
987 {
988 red_queue_t *tmp;
989 int error = 0;
990
991 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
992 altq_disable(rqp->rq_ifq);
993
994 if ((error = altq_detach(rqp->rq_ifq)))
995 return (error);
996
997 if (red_list == rqp)
998 red_list = rqp->rq_next;
999 else {
1000 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1001 if (tmp->rq_next == rqp) {
1002 tmp->rq_next = rqp->rq_next;
1003 break;
1004 }
1005 if (tmp == NULL)
1006 printf("red_detach: no state found in red_list!\n");
1007 }
1008
1009 red_destroy(rqp->rq_red);
1010 free(rqp->rq_q, M_DEVBUF);
1011 free(rqp, M_DEVBUF);
1012 return (error);
1013 }
1014
1015 /*
1016 * enqueue routine:
1017 *
1018 * returns: 0 when successfully queued.
1019 * ENOBUFS when drop occurs.
1020 */
1021 static int
red_enqueue(ifq,m,pktattr)1022 red_enqueue(ifq, m, pktattr)
1023 struct ifaltq *ifq;
1024 struct mbuf *m;
1025 struct altq_pktattr *pktattr;
1026 {
1027 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1028
1029 IFQ_LOCK_ASSERT(ifq);
1030
1031 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1032 return ENOBUFS;
1033 ifq->ifq_len++;
1034 return 0;
1035 }
1036
1037 /*
1038 * dequeue routine:
1039 * must be called in splimp.
1040 *
1041 * returns: mbuf dequeued.
1042 * NULL when no packet is available in the queue.
1043 */
1044
1045 static struct mbuf *
red_dequeue(ifq,op)1046 red_dequeue(ifq, op)
1047 struct ifaltq *ifq;
1048 int op;
1049 {
1050 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1051 struct mbuf *m;
1052
1053 IFQ_LOCK_ASSERT(ifq);
1054
1055 if (op == ALTDQ_POLL)
1056 return qhead(rqp->rq_q);
1057
1058 /* op == ALTDQ_REMOVE */
1059 m = red_getq(rqp->rq_red, rqp->rq_q);
1060 if (m != NULL)
1061 ifq->ifq_len--;
1062 return (m);
1063 }
1064
1065 static int
red_request(ifq,req,arg)1066 red_request(ifq, req, arg)
1067 struct ifaltq *ifq;
1068 int req;
1069 void *arg;
1070 {
1071 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1072
1073 IFQ_LOCK_ASSERT(ifq);
1074
1075 switch (req) {
1076 case ALTRQ_PURGE:
1077 red_purgeq(rqp);
1078 break;
1079 }
1080 return (0);
1081 }
1082
1083 static void
red_purgeq(rqp)1084 red_purgeq(rqp)
1085 red_queue_t *rqp;
1086 {
1087 _flushq(rqp->rq_q);
1088 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1089 rqp->rq_ifq->ifq_len = 0;
1090 }
1091
1092 #ifdef ALTQ_FLOWVALVE
1093
1094 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1095 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1096 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1097 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1098 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1099 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1100
1101 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1102 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1103
1104 #define FV_N 10 /* update fve_f every FV_N packets */
1105
1106 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1107 #define FV_TTHRESH 3 /* time threshold to delete fve */
1108 #define FV_ALPHA 5 /* extra packet count */
1109
1110 #define FV_STATS
1111
1112 #if (__FreeBSD_version > 300000)
1113 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1114 #else
1115 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1116 #endif
1117
1118 /*
1119 * Brtt table: 127 entry table to convert drop rate (p) to
1120 * the corresponding bandwidth fraction (f)
1121 * the following equation is implemented to use scaled values,
1122 * fve_p and fve_f, in the fixed point format.
1123 *
1124 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1125 * f = Brtt(p) / (max_th + alpha)
1126 */
1127 #define BRTT_SIZE 128
1128 #define BRTT_SHIFT 12
1129 #define BRTT_MASK 0x0007f000
1130 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1131
1132 const int brtt_tab[BRTT_SIZE] = {
1133 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1134 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1135 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1136 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1137 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1138 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1139 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1140 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1141 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1142 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1143 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1144 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1145 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1146 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1147 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1148 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1149 };
1150
1151 static __inline struct fve *
flowlist_lookup(fv,pktattr,now)1152 flowlist_lookup(fv, pktattr, now)
1153 struct flowvalve *fv;
1154 struct altq_pktattr *pktattr;
1155 struct timeval *now;
1156 {
1157 struct fve *fve;
1158 int flows;
1159 struct ip *ip;
1160 #ifdef INET6
1161 struct ip6_hdr *ip6;
1162 #endif
1163 struct timeval tthresh;
1164
1165 if (pktattr == NULL)
1166 return (NULL);
1167
1168 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1169 flows = 0;
1170 /*
1171 * search the flow list
1172 */
1173 switch (pktattr->pattr_af) {
1174 case AF_INET:
1175 ip = (struct ip *)pktattr->pattr_hdr;
1176 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1177 if (fve->fve_lastdrop.tv_sec == 0)
1178 break;
1179 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1180 fve->fve_lastdrop.tv_sec = 0;
1181 break;
1182 }
1183 if (fve->fve_flow.flow_af == AF_INET &&
1184 fve->fve_flow.flow_ip.ip_src.s_addr ==
1185 ip->ip_src.s_addr &&
1186 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1187 ip->ip_dst.s_addr)
1188 return (fve);
1189 flows++;
1190 }
1191 break;
1192 #ifdef INET6
1193 case AF_INET6:
1194 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1195 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1196 if (fve->fve_lastdrop.tv_sec == 0)
1197 break;
1198 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1199 fve->fve_lastdrop.tv_sec = 0;
1200 break;
1201 }
1202 if (fve->fve_flow.flow_af == AF_INET6 &&
1203 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1204 &ip6->ip6_src) &&
1205 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1206 &ip6->ip6_dst))
1207 return (fve);
1208 flows++;
1209 }
1210 break;
1211 #endif /* INET6 */
1212
1213 default:
1214 /* unknown protocol. no drop. */
1215 return (NULL);
1216 }
1217 fv->fv_flows = flows; /* save the number of active fve's */
1218 return (NULL);
1219 }
1220
1221 static __inline struct fve *
flowlist_reclaim(fv,pktattr)1222 flowlist_reclaim(fv, pktattr)
1223 struct flowvalve *fv;
1224 struct altq_pktattr *pktattr;
1225 {
1226 struct fve *fve;
1227 struct ip *ip;
1228 #ifdef INET6
1229 struct ip6_hdr *ip6;
1230 #endif
1231
1232 /*
1233 * get an entry from the tail of the LRU list.
1234 */
1235 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1236
1237 switch (pktattr->pattr_af) {
1238 case AF_INET:
1239 ip = (struct ip *)pktattr->pattr_hdr;
1240 fve->fve_flow.flow_af = AF_INET;
1241 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1242 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1243 break;
1244 #ifdef INET6
1245 case AF_INET6:
1246 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1247 fve->fve_flow.flow_af = AF_INET6;
1248 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1249 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1250 break;
1251 #endif
1252 }
1253
1254 fve->fve_state = Green;
1255 fve->fve_p = 0.0;
1256 fve->fve_f = 0.0;
1257 fve->fve_ifseq = fv->fv_ifseq - 1;
1258 fve->fve_count = 0;
1259
1260 fv->fv_flows++;
1261 #ifdef FV_STATS
1262 fv->fv_stats.alloc++;
1263 #endif
1264 return (fve);
1265 }
1266
1267 static __inline void
flowlist_move_to_head(fv,fve)1268 flowlist_move_to_head(fv, fve)
1269 struct flowvalve *fv;
1270 struct fve *fve;
1271 {
1272 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1273 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1274 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1275 }
1276 }
1277
1278 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1279 /*
1280 * allocate flowvalve structure
1281 */
1282 static struct flowvalve *
1283 fv_alloc(rp)
1284 struct red *rp;
1285 {
1286 struct flowvalve *fv;
1287 struct fve *fve;
1288 int i, num;
1289
1290 num = FV_FLOWLISTSIZE;
1291 fv = malloc(sizeof(struct flowvalve),
1292 M_DEVBUF, M_WAITOK);
1293 if (fv == NULL)
1294 return (NULL);
1295 bzero(fv, sizeof(struct flowvalve));
1296
1297 fv->fv_fves = malloc(sizeof(struct fve) * num,
1298 M_DEVBUF, M_WAITOK);
1299 if (fv->fv_fves == NULL) {
1300 free(fv, M_DEVBUF);
1301 return (NULL);
1302 }
1303 bzero(fv->fv_fves, sizeof(struct fve) * num);
1304
1305 fv->fv_flows = 0;
1306 TAILQ_INIT(&fv->fv_flowlist);
1307 for (i = 0; i < num; i++) {
1308 fve = &fv->fv_fves[i];
1309 fve->fve_lastdrop.tv_sec = 0;
1310 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1311 }
1312
1313 /* initialize drop rate threshold in scaled fixed-point */
1314 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1315
1316 /* initialize drop rate to fraction table */
1317 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1318 M_DEVBUF, M_WAITOK);
1319 if (fv->fv_p2ftab == NULL) {
1320 free(fv->fv_fves, M_DEVBUF);
1321 free(fv, M_DEVBUF);
1322 return (NULL);
1323 }
1324 /*
1325 * create the p2f table.
1326 * (shift is used to keep the precision)
1327 */
1328 for (i = 1; i < BRTT_SIZE; i++) {
1329 int f;
1330
1331 f = brtt_tab[i] << 8;
1332 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1333 }
1334
1335 return (fv);
1336 }
1337 #endif
1338
fv_destroy(fv)1339 static void fv_destroy(fv)
1340 struct flowvalve *fv;
1341 {
1342 free(fv->fv_p2ftab, M_DEVBUF);
1343 free(fv->fv_fves, M_DEVBUF);
1344 free(fv, M_DEVBUF);
1345 }
1346
1347 static __inline int
fv_p2f(fv,p)1348 fv_p2f(fv, p)
1349 struct flowvalve *fv;
1350 int p;
1351 {
1352 int val, f;
1353
1354 if (p >= BRTT_PMAX)
1355 f = fv->fv_p2ftab[BRTT_SIZE-1];
1356 else if ((val = (p & BRTT_MASK)))
1357 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1358 else
1359 f = fv->fv_p2ftab[1];
1360 return (f);
1361 }
1362
1363 /*
1364 * check if an arriving packet should be pre-dropped.
1365 * called from red_addq() when a packet arrives.
1366 * returns 1 when the packet should be pre-dropped.
1367 * should be called in splimp.
1368 */
1369 static int
fv_checkflow(fv,pktattr,fcache)1370 fv_checkflow(fv, pktattr, fcache)
1371 struct flowvalve *fv;
1372 struct altq_pktattr *pktattr;
1373 struct fve **fcache;
1374 {
1375 struct fve *fve;
1376 struct timeval now;
1377
1378 fv->fv_ifseq++;
1379 FV_TIMESTAMP(&now);
1380
1381 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1382 /* no matching entry in the flowlist */
1383 return (0);
1384
1385 *fcache = fve;
1386
1387 /* update fraction f for every FV_N packets */
1388 if (++fve->fve_count == FV_N) {
1389 /*
1390 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1391 */
1392 fve->fve_f =
1393 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1394 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1395 fve->fve_ifseq = fv->fv_ifseq;
1396 fve->fve_count = 0;
1397 }
1398
1399 /*
1400 * overpumping test
1401 */
1402 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1403 int fthresh;
1404
1405 /* calculate a threshold */
1406 fthresh = fv_p2f(fv, fve->fve_p);
1407 if (fve->fve_f > fthresh)
1408 fve->fve_state = Red;
1409 }
1410
1411 if (fve->fve_state == Red) {
1412 /*
1413 * backoff test
1414 */
1415 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1416 /* no drop for at least FV_BACKOFFTHRESH sec */
1417 fve->fve_p = 0;
1418 fve->fve_state = Green;
1419 #ifdef FV_STATS
1420 fv->fv_stats.escape++;
1421 #endif
1422 } else {
1423 /* block this flow */
1424 flowlist_move_to_head(fv, fve);
1425 fve->fve_lastdrop = now;
1426 #ifdef FV_STATS
1427 fv->fv_stats.predrop++;
1428 #endif
1429 return (1);
1430 }
1431 }
1432
1433 /*
1434 * p = (1 - Wp) * p
1435 */
1436 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1437 if (fve->fve_p < 0)
1438 fve->fve_p = 0;
1439 #ifdef FV_STATS
1440 fv->fv_stats.pass++;
1441 #endif
1442 return (0);
1443 }
1444
1445 /*
1446 * called from red_addq when a packet is dropped by red.
1447 * should be called in splimp.
1448 */
fv_dropbyred(fv,pktattr,fcache)1449 static void fv_dropbyred(fv, pktattr, fcache)
1450 struct flowvalve *fv;
1451 struct altq_pktattr *pktattr;
1452 struct fve *fcache;
1453 {
1454 struct fve *fve;
1455 struct timeval now;
1456
1457 if (pktattr == NULL)
1458 return;
1459 FV_TIMESTAMP(&now);
1460
1461 if (fcache != NULL)
1462 /* the fve of this packet is already cached */
1463 fve = fcache;
1464 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1465 fve = flowlist_reclaim(fv, pktattr);
1466
1467 flowlist_move_to_head(fv, fve);
1468
1469 /*
1470 * update p: the following line cancels the update
1471 * in fv_checkflow() and calculate
1472 * p = Wp + (1 - Wp) * p
1473 */
1474 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1475
1476 fve->fve_lastdrop = now;
1477 }
1478
1479 #endif /* ALTQ_FLOWVALVE */
1480
1481 #ifdef KLD_MODULE
1482
1483 static struct altqsw red_sw =
1484 {"red", redopen, redclose, redioctl};
1485
1486 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1487 MODULE_VERSION(altq_red, 1);
1488
1489 #endif /* KLD_MODULE */
1490 #endif /* ALTQ3_COMPAT */
1491
1492 #endif /* ALTQ_RED */
1493