1 /*
2 * CoDel - The Controlled-Delay Active Queue Management algorithm
3 *
4 * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
5 * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
6 * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
7 * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
8 * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions, and the following disclaimer,
15 * without modification.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote products
20 * derived from this software without specific prior written permission.
21 *
22 * Alternatively, provided that this notice is retained in full, this
23 * software may be distributed under the terms of the GNU General
24 * Public License ("GPL") version 2, in which case the provisions of the
25 * GPL apply INSTEAD OF those given above.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 */
40 #include "opt_altq.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43
44 #ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */
45
46 #include <sys/param.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/systm.h>
51
52 #include <net/if.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55
56 #include <netpfil/pf/pf.h>
57 #include <netpfil/pf/pf_altq.h>
58 #include <net/altq/if_altq.h>
59 #include <net/altq/altq.h>
60 #include <net/altq/altq_codel.h>
61
62 static int codel_should_drop(struct codel *, class_queue_t *,
63 struct mbuf *, u_int64_t);
64 static void codel_Newton_step(struct codel_vars *);
65 static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t);
66
67 #define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0)
68 #define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0)
69 #define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0)
70 #define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0)
71
72 static int codel_request(struct ifaltq *, int, void *);
73
74 static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
75 static struct mbuf *codel_dequeue(struct ifaltq *, int);
76
77 int
codel_pfattach(struct pf_altq * a)78 codel_pfattach(struct pf_altq *a)
79 {
80 struct ifnet *ifp;
81
82 if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
83 return (EINVAL);
84
85 return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc,
86 codel_enqueue, codel_dequeue, codel_request, NULL, NULL));
87 }
88
89 int
codel_add_altq(struct ifnet * ifp,struct pf_altq * a)90 codel_add_altq(struct ifnet *ifp, struct pf_altq *a)
91 {
92 struct codel_if *cif;
93 struct codel_opts *opts;
94
95 if (ifp == NULL)
96 return (EINVAL);
97 if (!ALTQ_IS_READY(&ifp->if_snd))
98 return (ENODEV);
99
100 opts = &a->pq_u.codel_opts;
101
102 cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO);
103 if (cif == NULL)
104 return (ENOMEM);
105 cif->cif_bandwidth = a->ifbandwidth;
106 cif->cif_ifq = &ifp->if_snd;
107
108 cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
109 if (cif->cl_q == NULL) {
110 free(cif, M_DEVBUF);
111 return (ENOMEM);
112 }
113
114 if (a->qlimit == 0)
115 a->qlimit = 50; /* use default. */
116 qlimit(cif->cl_q) = a->qlimit;
117 qtype(cif->cl_q) = Q_CODEL;
118 qlen(cif->cl_q) = 0;
119 qsize(cif->cl_q) = 0;
120
121 if (opts->target == 0)
122 opts->target = 5;
123 if (opts->interval == 0)
124 opts->interval = 100;
125 cif->codel.params.target = machclk_freq * opts->target / 1000;
126 cif->codel.params.interval = machclk_freq * opts->interval / 1000;
127 cif->codel.params.ecn = opts->ecn;
128 cif->codel.stats.maxpacket = 256;
129
130 cif->cl_stats.qlength = qlen(cif->cl_q);
131 cif->cl_stats.qlimit = qlimit(cif->cl_q);
132
133 /* keep the state in pf_altq */
134 a->altq_disc = cif;
135
136 return (0);
137 }
138
139 int
codel_remove_altq(struct pf_altq * a)140 codel_remove_altq(struct pf_altq *a)
141 {
142 struct codel_if *cif;
143
144 if ((cif = a->altq_disc) == NULL)
145 return (EINVAL);
146 a->altq_disc = NULL;
147
148 if (cif->cl_q)
149 free(cif->cl_q, M_DEVBUF);
150 free(cif, M_DEVBUF);
151
152 return (0);
153 }
154
155 int
codel_getqstats(struct pf_altq * a,void * ubuf,int * nbytes,int version)156 codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
157 {
158 struct codel_if *cif;
159 struct codel_ifstats stats;
160 int error = 0;
161
162 if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL)
163 return (EBADF);
164
165 if (*nbytes < sizeof(stats))
166 return (EINVAL);
167
168 stats = cif->cl_stats;
169 stats.stats = cif->codel.stats;
170
171 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
172 return (error);
173 *nbytes = sizeof(stats);
174
175 return (0);
176 }
177
178 static int
codel_request(struct ifaltq * ifq,int req,void * arg)179 codel_request(struct ifaltq *ifq, int req, void *arg)
180 {
181 struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
182 struct mbuf *m;
183
184 IFQ_LOCK_ASSERT(ifq);
185
186 switch (req) {
187 case ALTRQ_PURGE:
188 if (!ALTQ_IS_ENABLED(cif->cif_ifq))
189 break;
190
191 if (qempty(cif->cl_q))
192 break;
193
194 while ((m = _getq(cif->cl_q)) != NULL) {
195 PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
196 m_freem(m);
197 IFQ_DEC_LEN(cif->cif_ifq);
198 }
199 cif->cif_ifq->ifq_len = 0;
200 break;
201 }
202
203 return (0);
204 }
205
206 static int
codel_enqueue(struct ifaltq * ifq,struct mbuf * m,struct altq_pktattr * pktattr)207 codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
208 {
209
210 struct codel_if *cif = (struct codel_if *) ifq->altq_disc;
211
212 IFQ_LOCK_ASSERT(ifq);
213
214 /* grab class set by classifier */
215 if ((m->m_flags & M_PKTHDR) == 0) {
216 /* should not happen */
217 printf("altq: packet for %s does not have pkthdr\n",
218 ifq->altq_ifp->if_xname);
219 m_freem(m);
220 PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
221 return (ENOBUFS);
222 }
223
224 if (codel_addq(&cif->codel, cif->cl_q, m)) {
225 PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
226 return (ENOBUFS);
227 }
228 IFQ_INC_LEN(ifq);
229
230 return (0);
231 }
232
233 static struct mbuf *
codel_dequeue(struct ifaltq * ifq,int op)234 codel_dequeue(struct ifaltq *ifq, int op)
235 {
236 struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
237 struct mbuf *m;
238
239 IFQ_LOCK_ASSERT(ifq);
240
241 if (IFQ_IS_EMPTY(ifq))
242 return (NULL);
243
244 if (op == ALTDQ_POLL)
245 return (qhead(cif->cl_q));
246
247 m = codel_getq(&cif->codel, cif->cl_q);
248 if (m != NULL) {
249 IFQ_DEC_LEN(ifq);
250 PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m));
251 return (m);
252 }
253
254 return (NULL);
255 }
256
257 struct codel *
codel_alloc(int target,int interval,int ecn)258 codel_alloc(int target, int interval, int ecn)
259 {
260 struct codel *c;
261
262 c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO);
263 if (c != NULL) {
264 c->params.target = machclk_freq * target / 1000;
265 c->params.interval = machclk_freq * interval / 1000;
266 c->params.ecn = ecn;
267 c->stats.maxpacket = 256;
268 }
269
270 return (c);
271 }
272
273 void
codel_destroy(struct codel * c)274 codel_destroy(struct codel *c)
275 {
276
277 free(c, M_DEVBUF);
278 }
279
280 #define MTAG_CODEL 1438031249
281 int
codel_addq(struct codel * c,class_queue_t * q,struct mbuf * m)282 codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m)
283 {
284 struct m_tag *mtag;
285 uint64_t *enqueue_time;
286
287 if (qlen(q) < qlimit(q)) {
288 mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
289 if (mtag == NULL)
290 mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t),
291 M_NOWAIT);
292 if (mtag == NULL) {
293 m_freem(m);
294 return (-1);
295 }
296 enqueue_time = (uint64_t *)(mtag + 1);
297 *enqueue_time = read_machclk();
298 m_tag_prepend(m, mtag);
299 _addq(q, m);
300 return (0);
301 }
302 c->drop_overlimit++;
303 m_freem(m);
304
305 return (-1);
306 }
307
308 static int
codel_should_drop(struct codel * c,class_queue_t * q,struct mbuf * m,u_int64_t now)309 codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m,
310 u_int64_t now)
311 {
312 struct m_tag *mtag;
313 uint64_t *enqueue_time;
314
315 if (m == NULL) {
316 c->vars.first_above_time = 0;
317 return (0);
318 }
319
320 mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
321 if (mtag == NULL) {
322 /* Only one warning per second. */
323 if (ppsratecheck(&c->last_log, &c->last_pps, 1))
324 printf("%s: could not found the packet mtag!\n",
325 __func__);
326 c->vars.first_above_time = 0;
327 return (0);
328 }
329 enqueue_time = (uint64_t *)(mtag + 1);
330 c->vars.ldelay = now - *enqueue_time;
331 c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m));
332
333 if (codel_time_before(c->vars.ldelay, c->params.target) ||
334 qsize(q) <= c->stats.maxpacket) {
335 /* went below - stay below for at least interval */
336 c->vars.first_above_time = 0;
337 return (0);
338 }
339 if (c->vars.first_above_time == 0) {
340 /* just went above from below. If we stay above
341 * for at least interval we'll say it's ok to drop
342 */
343 c->vars.first_above_time = now + c->params.interval;
344 return (0);
345 }
346 if (codel_time_after(now, c->vars.first_above_time))
347 return (1);
348
349 return (0);
350 }
351
352 /*
353 * Run a Newton method step:
354 * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
355 *
356 * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
357 */
358 static void
codel_Newton_step(struct codel_vars * vars)359 codel_Newton_step(struct codel_vars *vars)
360 {
361 uint32_t invsqrt, invsqrt2;
362 uint64_t val;
363
364 /* sizeof_in_bits(rec_inv_sqrt) */
365 #define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t))
366 /* needed shift to get a Q0.32 number from rec_inv_sqrt */
367 #define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
368
369 invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
370 invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32;
371 val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2);
372 val >>= 2; /* avoid overflow in following multiply */
373 val = (val * invsqrt) >> (32 - 2 + 1);
374
375 vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
376 }
377
378 static u_int64_t
codel_control_law(u_int64_t t,u_int64_t interval,u_int32_t rec_inv_sqrt)379 codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt)
380 {
381
382 return (t + (u_int32_t)(((u_int64_t)interval *
383 (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32));
384 }
385
386 struct mbuf *
codel_getq(struct codel * c,class_queue_t * q)387 codel_getq(struct codel *c, class_queue_t *q)
388 {
389 struct mbuf *m;
390 u_int64_t now;
391 int drop;
392
393 if ((m = _getq(q)) == NULL) {
394 c->vars.dropping = 0;
395 return (m);
396 }
397
398 now = read_machclk();
399 drop = codel_should_drop(c, q, m, now);
400 if (c->vars.dropping) {
401 if (!drop) {
402 /* sojourn time below target - leave dropping state */
403 c->vars.dropping = 0;
404 } else if (codel_time_after_eq(now, c->vars.drop_next)) {
405 /* It's time for the next drop. Drop the current
406 * packet and dequeue the next. The dequeue might
407 * take us out of dropping state.
408 * If not, schedule the next drop.
409 * A large backlog might result in drop rates so high
410 * that the next drop should happen now,
411 * hence the while loop.
412 */
413 while (c->vars.dropping &&
414 codel_time_after_eq(now, c->vars.drop_next)) {
415 c->vars.count++; /* don't care of possible wrap
416 * since there is no more
417 * divide */
418 codel_Newton_step(&c->vars);
419 /* TODO ECN */
420 PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
421 m_freem(m);
422 m = _getq(q);
423 if (!codel_should_drop(c, q, m, now))
424 /* leave dropping state */
425 c->vars.dropping = 0;
426 else
427 /* and schedule the next drop */
428 c->vars.drop_next =
429 codel_control_law(c->vars.drop_next,
430 c->params.interval,
431 c->vars.rec_inv_sqrt);
432 }
433 }
434 } else if (drop) {
435 /* TODO ECN */
436 PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
437 m_freem(m);
438
439 m = _getq(q);
440 drop = codel_should_drop(c, q, m, now);
441
442 c->vars.dropping = 1;
443 /* if min went above target close to when we last went below it
444 * assume that the drop rate that controlled the queue on the
445 * last cycle is a good starting point to control it now.
446 */
447 if (codel_time_before(now - c->vars.drop_next,
448 16 * c->params.interval)) {
449 c->vars.count = (c->vars.count - c->vars.lastcount) | 1;
450 /* we dont care if rec_inv_sqrt approximation
451 * is not very precise :
452 * Next Newton steps will correct it quadratically.
453 */
454 codel_Newton_step(&c->vars);
455 } else {
456 c->vars.count = 1;
457 c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
458 }
459 c->vars.lastcount = c->vars.count;
460 c->vars.drop_next = codel_control_law(now, c->params.interval,
461 c->vars.rec_inv_sqrt);
462 }
463
464 return (m);
465 }
466
467 void
codel_getstats(struct codel * c,struct codel_stats * s)468 codel_getstats(struct codel *c, struct codel_stats *s)
469 {
470 *s = c->stats;
471 }
472
473 #endif /* ALTQ_CODEL */
474