1 /* $OpenBSD: ifq.c,v 1.56 2025/02/03 08:58:52 mvs Exp $ */
2
3 /*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include "bpfilter.h"
20 #include "kstat.h"
21
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/socket.h>
25 #include <sys/mbuf.h>
26 #include <sys/proc.h>
27 #include <sys/sysctl.h>
28
29 #include <net/if.h>
30 #include <net/if_var.h>
31
32 #if NBPFILTER > 0
33 #include <net/bpf.h>
34 #endif
35
36 #if NKSTAT > 0
37 #include <sys/kstat.h>
38 #endif
39
40 /*
41 * priq glue
42 */
43 unsigned int priq_idx(unsigned int, const struct mbuf *);
44 struct mbuf *priq_enq(struct ifqueue *, struct mbuf *);
45 struct mbuf *priq_deq_begin(struct ifqueue *, void **);
46 void priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47 void priq_purge(struct ifqueue *, struct mbuf_list *);
48
49 void *priq_alloc(unsigned int, void *);
50 void priq_free(unsigned int, void *);
51
52 const struct ifq_ops priq_ops = {
53 priq_idx,
54 priq_enq,
55 priq_deq_begin,
56 priq_deq_commit,
57 priq_purge,
58 priq_alloc,
59 priq_free,
60 };
61
62 const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63
64 /*
65 * priq internal structures
66 */
67
68 struct priq {
69 struct mbuf_list pq_lists[IFQ_NQUEUES];
70 };
71
72 /*
73 * ifqueue serialiser
74 */
75
76 void ifq_start_task(void *);
77 void ifq_restart_task(void *);
78 void ifq_barrier_task(void *);
79 void ifq_bundle_task(void *);
80
81 static inline void
ifq_run_start(struct ifqueue * ifq)82 ifq_run_start(struct ifqueue *ifq)
83 {
84 ifq_serialize(ifq, &ifq->ifq_start);
85 }
86
87 void
ifq_serialize(struct ifqueue * ifq,struct task * t)88 ifq_serialize(struct ifqueue *ifq, struct task *t)
89 {
90 struct task work;
91
92 if (ISSET(t->t_flags, TASK_ONQUEUE))
93 return;
94
95 mtx_enter(&ifq->ifq_task_mtx);
96 if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97 SET(t->t_flags, TASK_ONQUEUE);
98 TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99 }
100
101 if (ifq->ifq_serializer == NULL) {
102 ifq->ifq_serializer = curcpu();
103
104 while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105 TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106 CLR(t->t_flags, TASK_ONQUEUE);
107 work = *t; /* copy to caller to avoid races */
108
109 mtx_leave(&ifq->ifq_task_mtx);
110
111 (*work.t_func)(work.t_arg);
112
113 mtx_enter(&ifq->ifq_task_mtx);
114 }
115
116 ifq->ifq_serializer = NULL;
117 }
118 mtx_leave(&ifq->ifq_task_mtx);
119 }
120
121 void
ifq_start(struct ifqueue * ifq)122 ifq_start(struct ifqueue *ifq)
123 {
124 if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
125 task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
126 ifq_run_start(ifq);
127 } else
128 task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
129 }
130
131 void
ifq_start_task(void * p)132 ifq_start_task(void *p)
133 {
134 struct ifqueue *ifq = p;
135 struct ifnet *ifp = ifq->ifq_if;
136
137 if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
138 ifq_empty(ifq) || ifq_is_oactive(ifq))
139 return;
140
141 ifp->if_qstart(ifq);
142 }
143
144 void
ifq_set_oactive(struct ifqueue * ifq)145 ifq_set_oactive(struct ifqueue *ifq)
146 {
147 if (ifq->ifq_oactive)
148 return;
149
150 mtx_enter(&ifq->ifq_mtx);
151 if (!ifq->ifq_oactive) {
152 ifq->ifq_oactive = 1;
153 ifq->ifq_oactives++;
154 }
155 mtx_leave(&ifq->ifq_mtx);
156 }
157
158 void
ifq_deq_set_oactive(struct ifqueue * ifq)159 ifq_deq_set_oactive(struct ifqueue *ifq)
160 {
161 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
162
163 if (!ifq->ifq_oactive) {
164 ifq->ifq_oactive = 1;
165 ifq->ifq_oactives++;
166 }
167 }
168
169 void
ifq_restart_task(void * p)170 ifq_restart_task(void *p)
171 {
172 struct ifqueue *ifq = p;
173 struct ifnet *ifp = ifq->ifq_if;
174
175 ifq_clr_oactive(ifq);
176 ifp->if_qstart(ifq);
177 }
178
179 void
ifq_bundle_task(void * p)180 ifq_bundle_task(void *p)
181 {
182 struct ifqueue *ifq = p;
183
184 ifq_run_start(ifq);
185 }
186
187 void
ifq_barrier(struct ifqueue * ifq)188 ifq_barrier(struct ifqueue *ifq)
189 {
190 struct cond c = COND_INITIALIZER();
191 struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
192
193 task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
194
195 if (ifq->ifq_serializer == NULL)
196 return;
197
198 ifq_serialize(ifq, &t);
199
200 cond_wait(&c, "ifqbar");
201 }
202
203 void
ifq_barrier_task(void * p)204 ifq_barrier_task(void *p)
205 {
206 struct cond *c = p;
207
208 cond_signal(c);
209 }
210
211 /*
212 * ifqueue mbuf queue API
213 */
214
215 #if NKSTAT > 0
216 struct ifq_kstat_data {
217 struct kstat_kv kd_packets;
218 struct kstat_kv kd_bytes;
219 struct kstat_kv kd_qdrops;
220 struct kstat_kv kd_errors;
221 struct kstat_kv kd_qlen;
222 struct kstat_kv kd_maxqlen;
223 struct kstat_kv kd_oactive;
224 struct kstat_kv kd_oactives;
225 };
226
227 static const struct ifq_kstat_data ifq_kstat_tpl = {
228 KSTAT_KV_UNIT_INITIALIZER("packets",
229 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
230 KSTAT_KV_UNIT_INITIALIZER("bytes",
231 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
232 KSTAT_KV_UNIT_INITIALIZER("qdrops",
233 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
234 KSTAT_KV_UNIT_INITIALIZER("errors",
235 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
236 KSTAT_KV_UNIT_INITIALIZER("qlen",
237 KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
238 KSTAT_KV_UNIT_INITIALIZER("maxqlen",
239 KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
240 KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
241 KSTAT_KV_INITIALIZER("oactives", KSTAT_KV_T_COUNTER32),
242 };
243
244 int
ifq_kstat_copy(struct kstat * ks,void * dst)245 ifq_kstat_copy(struct kstat *ks, void *dst)
246 {
247 struct ifqueue *ifq = ks->ks_softc;
248 struct ifq_kstat_data *kd = dst;
249
250 *kd = ifq_kstat_tpl;
251 kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
252 kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
253 kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
254 kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
255 kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
256 kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
257 kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
258 kstat_kv_u32(&kd->kd_oactives) = ifq->ifq_oactives;
259
260 return (0);
261 }
262 #endif
263
264 void
ifq_init(struct ifqueue * ifq,struct ifnet * ifp,unsigned int idx)265 ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
266 {
267 ifq->ifq_if = ifp;
268 ifq->ifq_softnet = net_tq(idx);
269 ifq->ifq_softc = NULL;
270
271 mtx_init(&ifq->ifq_mtx, IPL_NET);
272
273 /* default to priq */
274 ifq->ifq_ops = &priq_ops;
275 ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
276
277 ml_init(&ifq->ifq_free);
278 ifq->ifq_len = 0;
279
280 ifq->ifq_packets = 0;
281 ifq->ifq_bytes = 0;
282 ifq->ifq_qdrops = 0;
283 ifq->ifq_errors = 0;
284 ifq->ifq_mcasts = 0;
285
286 mtx_init(&ifq->ifq_task_mtx, IPL_NET);
287 TAILQ_INIT(&ifq->ifq_task_list);
288 ifq->ifq_serializer = NULL;
289 task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
290
291 task_set(&ifq->ifq_start, ifq_start_task, ifq);
292 task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
293
294 if (ifq->ifq_maxlen == 0)
295 ifq_init_maxlen(ifq, IFQ_MAXLEN);
296
297 ifq->ifq_idx = idx;
298
299 #if NKSTAT > 0
300 /* XXX xname vs driver name and unit */
301 ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
302 "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
303 KASSERT(ifq->ifq_kstat != NULL);
304 kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
305 ifq->ifq_kstat->ks_softc = ifq;
306 ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
307 ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
308 kstat_install(ifq->ifq_kstat);
309 #endif
310 }
311
312 void
ifq_attach(struct ifqueue * ifq,const struct ifq_ops * newops,void * opsarg)313 ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
314 {
315 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
316 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
317 struct mbuf *m;
318 const struct ifq_ops *oldops;
319 void *newq, *oldq;
320
321 newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
322
323 mtx_enter(&ifq->ifq_mtx);
324 ifq->ifq_ops->ifqop_purge(ifq, &ml);
325 ifq->ifq_len = 0;
326
327 oldops = ifq->ifq_ops;
328 oldq = ifq->ifq_q;
329
330 ifq->ifq_ops = newops;
331 ifq->ifq_q = newq;
332
333 while ((m = ml_dequeue(&ml)) != NULL) {
334 m = ifq->ifq_ops->ifqop_enq(ifq, m);
335 if (m != NULL) {
336 ifq->ifq_qdrops++;
337 ml_enqueue(&free_ml, m);
338 } else
339 ifq->ifq_len++;
340 }
341 mtx_leave(&ifq->ifq_mtx);
342
343 oldops->ifqop_free(ifq->ifq_idx, oldq);
344
345 ml_purge(&free_ml);
346 }
347
348 void
ifq_destroy(struct ifqueue * ifq)349 ifq_destroy(struct ifqueue *ifq)
350 {
351 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
352
353 #if NKSTAT > 0
354 kstat_destroy(ifq->ifq_kstat);
355 #endif
356
357 NET_ASSERT_UNLOCKED();
358 if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
359 taskq_barrier(ifq->ifq_softnet);
360
361 /* don't need to lock because this is the last use of the ifq */
362
363 ifq->ifq_ops->ifqop_purge(ifq, &ml);
364 ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
365
366 ml_purge(&ml);
367 }
368
369 void
ifq_add_data(struct ifqueue * ifq,struct if_data * data)370 ifq_add_data(struct ifqueue *ifq, struct if_data *data)
371 {
372 mtx_enter(&ifq->ifq_mtx);
373 data->ifi_opackets += ifq->ifq_packets;
374 data->ifi_obytes += ifq->ifq_bytes;
375 data->ifi_oqdrops += ifq->ifq_qdrops;
376 data->ifi_omcasts += ifq->ifq_mcasts;
377 /* ifp->if_data.ifi_oerrors */
378 mtx_leave(&ifq->ifq_mtx);
379 }
380
381 int
ifq_enqueue(struct ifqueue * ifq,struct mbuf * m)382 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
383 {
384 struct mbuf *dm;
385
386 mtx_enter(&ifq->ifq_mtx);
387 dm = ifq->ifq_ops->ifqop_enq(ifq, m);
388 if (dm != m) {
389 ifq->ifq_packets++;
390 ifq->ifq_bytes += m->m_pkthdr.len;
391 if (ISSET(m->m_flags, M_MCAST))
392 ifq->ifq_mcasts++;
393 }
394
395 if (dm == NULL)
396 ifq->ifq_len++;
397 else
398 ifq->ifq_qdrops++;
399 mtx_leave(&ifq->ifq_mtx);
400
401 if (dm != NULL)
402 m_freem(dm);
403
404 return (dm == m ? ENOBUFS : 0);
405 }
406
407 static inline void
ifq_deq_enter(struct ifqueue * ifq)408 ifq_deq_enter(struct ifqueue *ifq)
409 {
410 mtx_enter(&ifq->ifq_mtx);
411 }
412
413 static inline void
ifq_deq_leave(struct ifqueue * ifq)414 ifq_deq_leave(struct ifqueue *ifq)
415 {
416 struct mbuf_list ml;
417
418 ml = ifq->ifq_free;
419 ml_init(&ifq->ifq_free);
420
421 mtx_leave(&ifq->ifq_mtx);
422
423 if (!ml_empty(&ml))
424 ml_purge(&ml);
425 }
426
427 struct mbuf *
ifq_deq_begin(struct ifqueue * ifq)428 ifq_deq_begin(struct ifqueue *ifq)
429 {
430 struct mbuf *m = NULL;
431 void *cookie;
432
433 ifq_deq_enter(ifq);
434 if (ifq->ifq_len == 0 ||
435 (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
436 ifq_deq_leave(ifq);
437 return (NULL);
438 }
439
440 m->m_pkthdr.ph_cookie = cookie;
441
442 return (m);
443 }
444
445 void
ifq_deq_commit(struct ifqueue * ifq,struct mbuf * m)446 ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
447 {
448 void *cookie;
449
450 KASSERT(m != NULL);
451 cookie = m->m_pkthdr.ph_cookie;
452
453 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
454 ifq->ifq_len--;
455 ifq_deq_leave(ifq);
456 }
457
458 void
ifq_deq_rollback(struct ifqueue * ifq,struct mbuf * m)459 ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
460 {
461 KASSERT(m != NULL);
462
463 ifq_deq_leave(ifq);
464 }
465
466 struct mbuf *
ifq_dequeue(struct ifqueue * ifq)467 ifq_dequeue(struct ifqueue *ifq)
468 {
469 struct mbuf *m;
470
471 m = ifq_deq_begin(ifq);
472 if (m == NULL)
473 return (NULL);
474
475 ifq_deq_commit(ifq, m);
476
477 return (m);
478 }
479
480 int
ifq_deq_sleep(struct ifqueue * ifq,struct mbuf ** mp,int nbio,int priority,const char * wmesg,volatile unsigned int * sleeping,volatile unsigned int * alive)481 ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
482 const char *wmesg, volatile unsigned int *sleeping,
483 volatile unsigned int *alive)
484 {
485 struct mbuf *m;
486 void *cookie;
487 int error = 0;
488
489 ifq_deq_enter(ifq);
490 if (ifq->ifq_len == 0 && nbio)
491 error = EWOULDBLOCK;
492 else {
493 for (;;) {
494 m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
495 if (m != NULL) {
496 ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
497 ifq->ifq_len--;
498 *mp = m;
499 break;
500 }
501
502 (*sleeping)++;
503 error = msleep_nsec(ifq, &ifq->ifq_mtx,
504 priority, wmesg, INFSLP);
505 (*sleeping)--;
506 if (error != 0)
507 break;
508 if (!(*alive)) {
509 error = EIO;
510 break;
511 }
512 }
513 }
514 ifq_deq_leave(ifq);
515
516 return (error);
517 }
518
519 int
ifq_hdatalen(struct ifqueue * ifq)520 ifq_hdatalen(struct ifqueue *ifq)
521 {
522 struct mbuf *m;
523 int len = 0;
524
525 if (ifq_empty(ifq))
526 return (0);
527
528 m = ifq_deq_begin(ifq);
529 if (m != NULL) {
530 len = m->m_pkthdr.len;
531 ifq_deq_rollback(ifq, m);
532 }
533
534 return (len);
535 }
536
537 void
ifq_init_maxlen(struct ifqueue * ifq,unsigned int maxlen)538 ifq_init_maxlen(struct ifqueue *ifq, unsigned int maxlen)
539 {
540 /* this is not MP safe, use only during attach */
541 ifq->ifq_maxlen = maxlen;
542 }
543
544 unsigned int
ifq_purge(struct ifqueue * ifq)545 ifq_purge(struct ifqueue *ifq)
546 {
547 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
548 unsigned int rv;
549
550 mtx_enter(&ifq->ifq_mtx);
551 ifq->ifq_ops->ifqop_purge(ifq, &ml);
552 rv = ifq->ifq_len;
553 ifq->ifq_len = 0;
554 ifq->ifq_qdrops += rv;
555 mtx_leave(&ifq->ifq_mtx);
556
557 KASSERT(rv == ml_len(&ml));
558
559 ml_purge(&ml);
560
561 return (rv);
562 }
563
564 void *
ifq_q_enter(struct ifqueue * ifq,const struct ifq_ops * ops)565 ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
566 {
567 mtx_enter(&ifq->ifq_mtx);
568 if (ifq->ifq_ops == ops)
569 return (ifq->ifq_q);
570
571 mtx_leave(&ifq->ifq_mtx);
572
573 return (NULL);
574 }
575
576 void
ifq_q_leave(struct ifqueue * ifq,void * q)577 ifq_q_leave(struct ifqueue *ifq, void *q)
578 {
579 KASSERT(q == ifq->ifq_q);
580 mtx_leave(&ifq->ifq_mtx);
581 }
582
583 void
ifq_mfreem(struct ifqueue * ifq,struct mbuf * m)584 ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
585 {
586 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
587
588 ifq->ifq_len--;
589 ifq->ifq_qdrops++;
590 ml_enqueue(&ifq->ifq_free, m);
591 }
592
593 void
ifq_mfreeml(struct ifqueue * ifq,struct mbuf_list * ml)594 ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
595 {
596 MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
597
598 ifq->ifq_len -= ml_len(ml);
599 ifq->ifq_qdrops += ml_len(ml);
600 ml_enlist(&ifq->ifq_free, ml);
601 }
602
603 /*
604 * ifiq
605 */
606
607 #if NKSTAT > 0
608 struct ifiq_kstat_data {
609 struct kstat_kv kd_packets;
610 struct kstat_kv kd_bytes;
611 struct kstat_kv kd_fdrops;
612 struct kstat_kv kd_qdrops;
613 struct kstat_kv kd_errors;
614 struct kstat_kv kd_qlen;
615
616 struct kstat_kv kd_enqueues;
617 struct kstat_kv kd_dequeues;
618 };
619
620 static const struct ifiq_kstat_data ifiq_kstat_tpl = {
621 KSTAT_KV_UNIT_INITIALIZER("packets",
622 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
623 KSTAT_KV_UNIT_INITIALIZER("bytes",
624 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
625 KSTAT_KV_UNIT_INITIALIZER("fdrops",
626 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
627 KSTAT_KV_UNIT_INITIALIZER("qdrops",
628 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
629 KSTAT_KV_UNIT_INITIALIZER("errors",
630 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
631 KSTAT_KV_UNIT_INITIALIZER("qlen",
632 KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
633
634 KSTAT_KV_INITIALIZER("enqueues",
635 KSTAT_KV_T_COUNTER64),
636 KSTAT_KV_INITIALIZER("dequeues",
637 KSTAT_KV_T_COUNTER64),
638 };
639
640 int
ifiq_kstat_copy(struct kstat * ks,void * dst)641 ifiq_kstat_copy(struct kstat *ks, void *dst)
642 {
643 struct ifiqueue *ifiq = ks->ks_softc;
644 struct ifiq_kstat_data *kd = dst;
645
646 *kd = ifiq_kstat_tpl;
647 kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
648 kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
649 kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops;
650 kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
651 kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
652 kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
653
654 kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
655 kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
656
657 return (0);
658 }
659 #endif
660
661 static void ifiq_process(void *);
662
663 void
ifiq_init(struct ifiqueue * ifiq,struct ifnet * ifp,unsigned int idx)664 ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
665 {
666 ifiq->ifiq_if = ifp;
667 ifiq->ifiq_softnet = net_tq(idx);
668 ifiq->ifiq_softc = NULL;
669
670 mtx_init(&ifiq->ifiq_mtx, IPL_NET);
671 ml_init(&ifiq->ifiq_ml);
672 task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
673 ifiq->ifiq_pressure = 0;
674
675 ifiq->ifiq_packets = 0;
676 ifiq->ifiq_bytes = 0;
677 ifiq->ifiq_fdrops = 0;
678 ifiq->ifiq_qdrops = 0;
679 ifiq->ifiq_errors = 0;
680
681 ifiq->ifiq_idx = idx;
682
683 #if NKSTAT > 0
684 /* XXX xname vs driver name and unit */
685 ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
686 "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
687 KASSERT(ifiq->ifiq_kstat != NULL);
688 kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
689 ifiq->ifiq_kstat->ks_softc = ifiq;
690 ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
691 ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
692 kstat_install(ifiq->ifiq_kstat);
693 #endif
694 }
695
696 void
ifiq_destroy(struct ifiqueue * ifiq)697 ifiq_destroy(struct ifiqueue *ifiq)
698 {
699 #if NKSTAT > 0
700 kstat_destroy(ifiq->ifiq_kstat);
701 #endif
702
703 NET_ASSERT_UNLOCKED();
704 if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
705 taskq_barrier(ifiq->ifiq_softnet);
706
707 /* don't need to lock because this is the last use of the ifiq */
708 ml_purge(&ifiq->ifiq_ml);
709 }
710
711 unsigned int ifiq_maxlen_drop = 2048 * 5;
712 unsigned int ifiq_maxlen_return = 2048 * 3;
713
714 int
ifiq_input(struct ifiqueue * ifiq,struct mbuf_list * ml)715 ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
716 {
717 struct ifnet *ifp = ifiq->ifiq_if;
718 struct mbuf *m;
719 uint64_t packets;
720 uint64_t bytes = 0;
721 uint64_t fdrops = 0;
722 unsigned int len;
723 #if NBPFILTER > 0
724 caddr_t if_bpf;
725 #endif
726
727 if (ml_empty(ml))
728 return (0);
729
730 MBUF_LIST_FOREACH(ml, m) {
731 m->m_pkthdr.ph_ifidx = ifp->if_index;
732 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
733 bytes += m->m_pkthdr.len;
734 }
735 packets = ml_len(ml);
736
737 #if NBPFILTER > 0
738 if_bpf = ifp->if_bpf;
739 if (if_bpf) {
740 struct mbuf_list ml0 = *ml;
741
742 ml_init(ml);
743
744 while ((m = ml_dequeue(&ml0)) != NULL) {
745 if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
746 m_freem(m);
747 fdrops++;
748 } else
749 ml_enqueue(ml, m);
750 }
751
752 if (ml_empty(ml)) {
753 mtx_enter(&ifiq->ifiq_mtx);
754 ifiq->ifiq_packets += packets;
755 ifiq->ifiq_bytes += bytes;
756 ifiq->ifiq_fdrops += fdrops;
757 mtx_leave(&ifiq->ifiq_mtx);
758
759 return (0);
760 }
761 }
762 #endif
763
764 mtx_enter(&ifiq->ifiq_mtx);
765 ifiq->ifiq_packets += packets;
766 ifiq->ifiq_bytes += bytes;
767 ifiq->ifiq_fdrops += fdrops;
768
769 len = ml_len(&ifiq->ifiq_ml);
770 if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
771 if (len > ifiq_maxlen_drop)
772 ifiq->ifiq_qdrops += ml_len(ml);
773 else {
774 ifiq->ifiq_enqueues++;
775 ml_enlist(&ifiq->ifiq_ml, ml);
776 }
777 }
778 mtx_leave(&ifiq->ifiq_mtx);
779
780 if (ml_empty(ml))
781 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
782 else
783 ml_purge(ml);
784
785 return (len > ifiq_maxlen_return);
786 }
787
788 void
ifiq_add_data(struct ifiqueue * ifiq,struct if_data * data)789 ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
790 {
791 mtx_enter(&ifiq->ifiq_mtx);
792 data->ifi_ipackets += ifiq->ifiq_packets;
793 data->ifi_ibytes += ifiq->ifiq_bytes;
794 data->ifi_iqdrops += ifiq->ifiq_qdrops;
795 mtx_leave(&ifiq->ifiq_mtx);
796 }
797
798 int
ifiq_enqueue_qlim(struct ifiqueue * ifiq,struct mbuf * m,unsigned int qlim)799 ifiq_enqueue_qlim(struct ifiqueue *ifiq, struct mbuf *m, unsigned int qlim)
800 {
801 struct ifnet *ifp = ifiq->ifiq_if;
802 unsigned int len;
803 #if NBPFILTER > 0
804 caddr_t if_bpf = ifp->if_bpf;
805 #endif
806
807 m->m_pkthdr.ph_ifidx = ifp->if_index;
808 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
809
810 #if NBPFILTER > 0
811 if_bpf = ifp->if_bpf;
812 if (if_bpf) {
813 if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
814 mtx_enter(&ifiq->ifiq_mtx);
815 ifiq->ifiq_packets++;
816 ifiq->ifiq_bytes += m->m_pkthdr.len;
817 ifiq->ifiq_fdrops++;
818 mtx_leave(&ifiq->ifiq_mtx);
819
820 m_freem(m);
821 return (0);
822 }
823 }
824 #endif
825
826 mtx_enter(&ifiq->ifiq_mtx);
827 ifiq->ifiq_packets++;
828 ifiq->ifiq_bytes += m->m_pkthdr.len;
829
830 if (qlim && ((len = ml_len(&ifiq->ifiq_ml) >= qlim))) {
831 ifiq->ifiq_qdrops++;
832 } else {
833 ifiq->ifiq_enqueues++;
834 ml_enqueue(&ifiq->ifiq_ml, m);
835 m = NULL;
836 }
837
838 mtx_leave(&ifiq->ifiq_mtx);
839
840 if (m) {
841 m_freem(m);
842 return (0);
843 }
844
845 task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
846
847 return (0);
848 }
849
850 static void
ifiq_process(void * arg)851 ifiq_process(void *arg)
852 {
853 struct ifiqueue *ifiq = arg;
854 struct mbuf_list ml;
855
856 if (ifiq_empty(ifiq))
857 return;
858
859 mtx_enter(&ifiq->ifiq_mtx);
860 ifiq->ifiq_dequeues++;
861 ml = ifiq->ifiq_ml;
862 ml_init(&ifiq->ifiq_ml);
863 mtx_leave(&ifiq->ifiq_mtx);
864
865 if_input_process(ifiq->ifiq_if, &ml);
866 }
867
868 int
net_ifiq_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)869 net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
870 void *newp, size_t newlen)
871 {
872 int error = EOPNOTSUPP;
873 /* pressure is disabled for 6.6-release */
874 #if 0
875 int val;
876
877 if (namelen != 1)
878 return (EISDIR);
879
880 switch (name[0]) {
881 case NET_LINK_IFRXQ_PRESSURE_RETURN:
882 val = ifiq_pressure_return;
883 error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
884 if (error != 0)
885 return (error);
886 if (val < 1 || val > ifiq_pressure_drop)
887 return (EINVAL);
888 ifiq_pressure_return = val;
889 break;
890 case NET_LINK_IFRXQ_PRESSURE_DROP:
891 val = ifiq_pressure_drop;
892 error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
893 if (error != 0)
894 return (error);
895 if (ifiq_pressure_return > val)
896 return (EINVAL);
897 ifiq_pressure_drop = val;
898 break;
899 default:
900 error = EOPNOTSUPP;
901 break;
902 }
903 #endif
904
905 return (error);
906 }
907
908 /*
909 * priq implementation
910 */
911
912 unsigned int
priq_idx(unsigned int nqueues,const struct mbuf * m)913 priq_idx(unsigned int nqueues, const struct mbuf *m)
914 {
915 unsigned int flow = 0;
916
917 if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
918 flow = m->m_pkthdr.ph_flowid;
919
920 return (flow % nqueues);
921 }
922
923 void *
priq_alloc(unsigned int idx,void * null)924 priq_alloc(unsigned int idx, void *null)
925 {
926 struct priq *pq;
927 int i;
928
929 pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
930 for (i = 0; i < IFQ_NQUEUES; i++)
931 ml_init(&pq->pq_lists[i]);
932 return (pq);
933 }
934
935 void
priq_free(unsigned int idx,void * pq)936 priq_free(unsigned int idx, void *pq)
937 {
938 free(pq, M_DEVBUF, sizeof(struct priq));
939 }
940
941 struct mbuf *
priq_enq(struct ifqueue * ifq,struct mbuf * m)942 priq_enq(struct ifqueue *ifq, struct mbuf *m)
943 {
944 struct priq *pq;
945 struct mbuf_list *pl;
946 struct mbuf *n = NULL;
947 unsigned int prio;
948
949 pq = ifq->ifq_q;
950 KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
951
952 /* Find a lower priority queue to drop from */
953 if (ifq_len(ifq) >= ifq->ifq_maxlen) {
954 for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
955 pl = &pq->pq_lists[prio];
956 if (ml_len(pl) > 0) {
957 n = ml_dequeue(pl);
958 goto enqueue;
959 }
960 }
961 /*
962 * There's no lower priority queue that we can
963 * drop from so don't enqueue this one.
964 */
965 return (m);
966 }
967
968 enqueue:
969 pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
970 ml_enqueue(pl, m);
971
972 return (n);
973 }
974
975 struct mbuf *
priq_deq_begin(struct ifqueue * ifq,void ** cookiep)976 priq_deq_begin(struct ifqueue *ifq, void **cookiep)
977 {
978 struct priq *pq = ifq->ifq_q;
979 struct mbuf_list *pl;
980 unsigned int prio = nitems(pq->pq_lists);
981 struct mbuf *m;
982
983 do {
984 pl = &pq->pq_lists[--prio];
985 m = MBUF_LIST_FIRST(pl);
986 if (m != NULL) {
987 *cookiep = pl;
988 return (m);
989 }
990 } while (prio > 0);
991
992 return (NULL);
993 }
994
995 void
priq_deq_commit(struct ifqueue * ifq,struct mbuf * m,void * cookie)996 priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
997 {
998 struct mbuf_list *pl = cookie;
999
1000 KASSERT(MBUF_LIST_FIRST(pl) == m);
1001
1002 ml_dequeue(pl);
1003 }
1004
1005 void
priq_purge(struct ifqueue * ifq,struct mbuf_list * ml)1006 priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
1007 {
1008 struct priq *pq = ifq->ifq_q;
1009 struct mbuf_list *pl;
1010 unsigned int prio = nitems(pq->pq_lists);
1011
1012 do {
1013 pl = &pq->pq_lists[--prio];
1014 ml_enlist(ml, pl);
1015 } while (prio > 0);
1016 }
1017