1 /*        $NetBSD: mbuf.h,v 1.240 2024/05/12 10:34:56 rillig Exp $    */
2 
3 /*
4  * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *        The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *        @(#)mbuf.h          8.5 (Berkeley) 2/19/95
62  */
63 
64 #ifndef _SYS_MBUF_H_
65 #define _SYS_MBUF_H_
66 
67 #ifdef _KERNEL_OPT
68 #include "opt_mbuftrace.h"
69 #endif
70 
71 #ifndef M_WAITOK
72 #include <sys/malloc.h>
73 #endif
74 #include <sys/pool.h>
75 #include <sys/queue.h>
76 #if defined(_KERNEL)
77 #include <sys/percpu_types.h>
78 #include <sys/socket.h>       /* for AF_UNSPEC */
79 #include <sys/psref.h>
80 #endif /* defined(_KERNEL) */
81 
82 /* For offsetof() */
83 #if defined(_KERNEL) || defined(_STANDALONE)
84 #include <sys/systm.h>
85 #else
86 #include <stddef.h>
87 #endif
88 
89 #include <uvm/uvm_param.h>    /* for MIN_PAGE_SIZE */
90 
91 #include <net/if.h>
92 
93 /*
94  * Mbufs are of a single size, MSIZE (machine/param.h), which
95  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
96  * MCLBYTES (also in machine/param.h), which has no additional overhead
97  * and is used instead of the internal data area; this is done when
98  * at least MINCLSIZE of data must be stored.
99  */
100 
101 /* Packet tags structure */
102 struct m_tag {
103           SLIST_ENTRY(m_tag)  m_tag_link;         /* List of packet tags */
104           uint16_t            m_tag_id; /* Tag ID */
105           uint16_t            m_tag_len;          /* Length of data */
106 };
107 
108 /* mbuf ownership structure */
109 struct mowner {
110           char mo_name[16];             /* owner name (fxp0) */
111           char mo_descr[16];            /* owner description (input) */
112           LIST_ENTRY(mowner) mo_link;   /* */
113           struct percpu *mo_counters;
114 };
115 
116 #define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117 
118 enum mowner_counter_index {
119           MOWNER_COUNTER_CLAIMS,                  /* # of small mbuf claimed */
120           MOWNER_COUNTER_RELEASES,      /* # of small mbuf released */
121           MOWNER_COUNTER_CLUSTER_CLAIMS,          /* # of cluster mbuf claimed */
122           MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */
123           MOWNER_COUNTER_EXT_CLAIMS,    /* # of M_EXT mbuf claimed */
124           MOWNER_COUNTER_EXT_RELEASES,  /* # of M_EXT mbuf released */
125 
126           MOWNER_COUNTER_NCOUNTERS,
127 };
128 
129 #if defined(_KERNEL)
130 struct mowner_counter {
131           u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132 };
133 #endif
134 
135 /* userland-exported version of struct mowner */
136 struct mowner_user {
137           char mo_name[16];             /* owner name (fxp0) */
138           char mo_descr[16];            /* owner description (input) */
139           LIST_ENTRY(mowner) mo_link;   /* unused padding; for compatibility */
140           u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141 };
142 
143 /*
144  * Macros for type conversion
145  * mtod(m,t) -      convert mbuf pointer to data pointer of correct type
146  */
147 #define mtod(m, t)  ((t)((m)->m_data))
148 
149 /* header at beginning of each mbuf */
150 struct m_hdr {
151           struct    mbuf *mh_next;                /* next buffer in chain */
152           struct    mbuf *mh_nextpkt;   /* next chain in queue/record */
153           char      *mh_data;           /* location of data */
154           struct    mowner *mh_owner;   /* mbuf owner */
155           int       mh_len;                       /* amount of data in this mbuf */
156           int       mh_flags;           /* flags; see below */
157           paddr_t   mh_paddr;           /* physical address of mbuf */
158           short     mh_type;            /* type of data in this mbuf */
159 };
160 
161 /*
162  * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163  *
164  * A note about csum_data:
165  *
166  *  o For the out-bound direction, the low 16 bits indicates the offset after
167  *    the L4 header where the final L4 checksum value is to be stored and the
168  *    high 16 bits is the length of the L3 header (the start of the data to
169  *    be checksummed).
170  *
171  *  o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is
172  *    set. In this case, an L4 checksum has been calculated by hardware and
173  *    is stored in csum_data, but it is up to software to perform final
174  *    verification.
175  *
176  * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT
177  * be bit-wise inverted (the final step in the calculation of an IP
178  * checksum) -- this is so we can accumulate the checksum for fragmented
179  * packets during reassembly.
180  *
181  * Size ILP32: 40
182  *       LP64: 56
183  */
184 struct pkthdr {
185           union {
186                     void                *ctx;               /* for M_GETCTX/M_SETCTX */
187                     if_index_t          index;              /* rcv interface index */
188           } _rcvif;
189 #define rcvif_index           _rcvif.index
190           SLIST_HEAD(packet_tags, m_tag) tags;    /* list of packet tags */
191           int                 len;                          /* total packet length */
192           int                 csum_flags;                   /* checksum flags */
193           uint32_t  csum_data;                    /* checksum data */
194           u_int               segsz;                        /* segment size */
195           uint16_t  ether_vtag;                   /* ethernet 802.1p+q vlan tag */
196           uint16_t  pkthdr_flags;                 /* flags for pkthdr, see blow */
197 #define PKTHDR_FLAG_IPSEC_SKIP_PFIL     0x0001    /* skip pfil_run_hooks() after ipsec decrypt */
198 
199           /*
200            * Following three fields are open-coded struct altq_pktattr
201            * to rearrange struct pkthdr fields flexibly.
202            */
203           int       pattr_af;           /* ALTQ: address family */
204           void      *pattr_class;                 /* ALTQ: sched class set by classifier */
205           void      *pattr_hdr;                   /* ALTQ: saved header position in mbuf */
206 };
207 
208 /* Checksumming flags (csum_flags). */
209 #define M_CSUM_TCPv4                    0x00000001          /* TCP header/payload */
210 #define M_CSUM_UDPv4                    0x00000002          /* UDP header/payload */
211 #define M_CSUM_TCP_UDP_BAD    0x00000004          /* TCP/UDP checksum bad */
212 #define M_CSUM_DATA           0x00000008          /* consult csum_data */
213 #define M_CSUM_TCPv6                    0x00000010          /* IPv6 TCP header/payload */
214 #define M_CSUM_UDPv6                    0x00000020          /* IPv6 UDP header/payload */
215 #define M_CSUM_IPv4           0x00000040          /* IPv4 header */
216 #define M_CSUM_IPv4_BAD                 0x00000080          /* IPv4 header checksum bad */
217 #define M_CSUM_TSOv4                    0x00000100          /* TCPv4 segmentation offload */
218 #define M_CSUM_TSOv6                    0x00000200          /* TCPv6 segmentation offload */
219 
220 /* Checksum-assist quirks: keep separate from jump-table bits. */
221 #define M_CSUM_BLANK                    0x40000000          /* csum is missing */
222 #define M_CSUM_NO_PSEUDOHDR   0x80000000          /* Rx csum_data does not include
223                                                              * the UDP/TCP pseudo-hdr, and
224                                                              * is not yet 1s-complemented.
225                                                              */
226 
227 #define M_CSUM_BITS \
228     "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
229     "\11TSOv4\12TSOv6\37BLANK\40NO_PSEUDOHDR"
230 
231 /*
232  * Macros for manipulating csum_data on outgoing packets. These are
233  * used to pass information down from the L4/L3 to the L2.
234  *
235  *   _IPHL:   Length of the IPv{4/6} header, plus the options; in other
236  *            words the offset of the UDP/TCP header in the packet.
237  *   _OFFSET: Offset of the checksum field in the UDP/TCP header.
238  */
239 #define M_CSUM_DATA_IPv4_IPHL(x)        ((x) >> 16)
240 #define M_CSUM_DATA_IPv4_OFFSET(x)      ((x) & 0xffff)
241 #define M_CSUM_DATA_IPv6_IPHL(x)        ((x) >> 16)
242 #define M_CSUM_DATA_IPv6_OFFSET(x)      ((x) & 0xffff)
243 #define M_CSUM_DATA_IPv6_SET(x, v)      (x) = ((x) & 0xffff) | ((v) << 16)
244 
245 /*
246  * Max # of pages we can attach to m_ext.  This is carefully chosen
247  * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
248  */
249 #ifdef MIN_PAGE_SIZE
250 #define M_EXT_MAXPAGES                  ((65536 / MIN_PAGE_SIZE) + 1)
251 #endif
252 
253 /*
254  * Description of external storage mapped into mbuf, valid if M_EXT set.
255  */
256 struct _m_ext_storage {
257           unsigned int ext_refcnt;
258           char *ext_buf;                          /* start of buffer */
259           void (*ext_free)              /* free routine if not the usual */
260                     (struct mbuf *, void *, size_t, void *);
261           void *ext_arg;                          /* argument for ext_free */
262           size_t ext_size;              /* size of buffer, for ext_free */
263 
264           union {
265                     /* M_EXT_CLUSTER: physical address */
266                     paddr_t extun_paddr;
267 #ifdef M_EXT_MAXPAGES
268                     /* M_EXT_PAGES: pages */
269                     struct vm_page *extun_pgs[M_EXT_MAXPAGES];
270 #endif
271           } ext_un;
272 #define ext_paddr   ext_un.extun_paddr
273 #define ext_pgs               ext_un.extun_pgs
274 };
275 
276 struct _m_ext {
277           struct mbuf *ext_ref;
278           struct _m_ext_storage ext_storage;
279 };
280 
281 #define M_PADDR_INVALID                 POOL_PADDR_INVALID
282 
283 /*
284  * Definition of "struct mbuf".
285  * Don't change this without understanding how MHLEN/MLEN are defined.
286  */
287 #define MBUF_DEFINE(name, mhlen, mlen)                                          \
288           struct name {                                                                   \
289                     struct m_hdr m_hdr;                                         \
290                     union {                                                               \
291                               struct {                                          \
292                                         struct pkthdr MH_pkthdr;                \
293                                         union {                                           \
294                                                   struct _m_ext MH_ext;                   \
295                                                   char MH_databuf[(mhlen)];     \
296                                         } MH_dat;                               \
297                               } MH;                                                       \
298                               char M_databuf[(mlen)];                                     \
299                     } M_dat;                                                    \
300           }
301 #define m_next                m_hdr.mh_next
302 #define m_len                 m_hdr.mh_len
303 #define m_data                m_hdr.mh_data
304 #define m_owner               m_hdr.mh_owner
305 #define m_type                m_hdr.mh_type
306 #define m_flags               m_hdr.mh_flags
307 #define m_nextpkt   m_hdr.mh_nextpkt
308 #define m_paddr               m_hdr.mh_paddr
309 #define m_pkthdr    M_dat.MH.MH_pkthdr
310 #define m_ext_storage         M_dat.MH.MH_dat.MH_ext.ext_storage
311 #define m_ext_ref   M_dat.MH.MH_dat.MH_ext.ext_ref
312 #define m_ext                 m_ext_ref->m_ext_storage
313 #define m_pktdat    M_dat.MH.MH_dat.MH_databuf
314 #define m_dat                 M_dat.M_databuf
315 
316 /*
317  * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
318  * into account inter-structure padding.
319  */
320 MBUF_DEFINE(_mbuf_dummy, 1, 1);
321 
322 /* normal data len */
323 #define MLEN                  ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat)))
324 /* data len w/pkthdr */
325 #define MHLEN                 ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat)))
326 
327 #define MINCLSIZE   (MHLEN+MLEN+1)      /* smallest amount to put in cluster */
328 
329 /*
330  * The *real* struct mbuf
331  */
332 MBUF_DEFINE(mbuf, MHLEN, MLEN);
333 
334 /* mbuf flags */
335 #define M_EXT                 0x00000001          /* has associated external storage */
336 #define M_PKTHDR    0x00000002          /* start of record */
337 #define M_EOR                 0x00000004          /* end of record */
338 #define M_PROTO1    0x00000008          /* protocol-specific */
339 
340 /* mbuf pkthdr flags, also in m_flags */
341 #define M_AUTHIPHDR 0x00000010          /* authenticated (IPsec) */
342 #define M_DECRYPTED 0x00000020          /* decrypted (IPsec) */
343 #define M_LOOP                0x00000040          /* received on loopback */
344 #define M_BCAST               0x00000100          /* send/received as L2 broadcast */
345 #define M_MCAST               0x00000200          /* send/received as L2 multicast */
346 #define M_CANFASTFWD          0x00000400          /* packet can be fast-forwarded */
347 #define M_ANYCAST6  0x00000800          /* received as IPv6 anycast */
348 
349 #define M_LINK0               0x00001000          /* link layer specific flag */
350 #define M_LINK1               0x00002000          /* link layer specific flag */
351 #define M_LINK2               0x00004000          /* link layer specific flag */
352 #define M_LINK3               0x00008000          /* link layer specific flag */
353 #define M_LINK4               0x00010000          /* link layer specific flag */
354 #define M_LINK5               0x00020000          /* link layer specific flag */
355 #define M_LINK6               0x00040000          /* link layer specific flag */
356 #define M_LINK7               0x00080000          /* link layer specific flag */
357 
358 #define M_VLANTAG   0x00100000          /* ether_vtag is valid */
359 
360 /* additional flags for M_EXT mbufs */
361 #define M_EXT_FLAGS 0xff000000
362 #define M_EXT_CLUSTER         0x01000000          /* ext is a cluster */
363 #define M_EXT_PAGES 0x02000000          /* ext_pgs is valid */
364 #define M_EXT_ROMAP 0x04000000          /* ext mapping is r-o at MMU */
365 #define M_EXT_RW    0x08000000          /* ext storage is writable */
366 
367 /* for source-level compatibility */
368 #define M_NOTIFICATION        M_PROTO1
369 
370 #define M_FLAGS_BITS \
371     "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \
372     "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
373     "\21LINK4\22LINK5\23LINK6\24LINK7" \
374     "\25VLANTAG" \
375     "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
376 
377 /* flags copied when copying m_pkthdr */
378 #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \
379     M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \
380     M_VLANTAG)
381 
382 /* flag copied when shallow-copying external storage */
383 #define M_EXTCOPYFLAGS        (M_EXT|M_EXT_FLAGS)
384 
385 /* mbuf types */
386 #define MT_FREE               0         /* should be on free list */
387 #define MT_DATA               1         /* dynamic (data) allocation */
388 #define MT_HEADER   2         /* packet header */
389 #define MT_SONAME   3         /* socket name */
390 #define MT_SOOPTS   4         /* socket options */
391 #define MT_FTABLE   5         /* fragment reassembly header */
392 #define MT_CONTROL  6         /* extra-data protocol message */
393 #define MT_OOBDATA  7         /* expedited data  */
394 
395 #ifdef MBUFTYPES
396 const char * const mbuftypes[] = {
397           "mbfree",
398           "mbdata",
399           "mbheader",
400           "mbsoname",
401           "mbsopts",
402           "mbftable",
403           "mbcontrol",
404           "mboobdata",
405 };
406 #else
407 extern const char * const mbuftypes[];
408 #endif
409 
410 /* flags to m_get/MGET */
411 #define M_DONTWAIT  M_NOWAIT
412 #define M_WAIT                M_WAITOK
413 
414 #ifdef MBUFTRACE
415 /* Mbuf allocation tracing. */
416 void mowner_init_owner(struct mowner *, const char *, const char *);
417 void mowner_init(struct mbuf *, int);
418 void mowner_ref(struct mbuf *, int);
419 void m_claim(struct mbuf *, struct mowner *);
420 void mowner_revoke(struct mbuf *, bool, int);
421 void mowner_attach(struct mowner *);
422 void mowner_detach(struct mowner *);
423 void m_claimm(struct mbuf *, struct mowner *);
424 #else
425 #define mowner_init_owner(mo, n, d)     __nothing
426 #define mowner_init(m, type)            __nothing
427 #define mowner_ref(m, flags)            __nothing
428 #define mowner_revoke(m, all, flags)    __nothing
429 #define m_claim(m, mowner)              __nothing
430 #define mowner_attach(mo)               __nothing
431 #define mowner_detach(mo)               __nothing
432 #define m_claimm(m, mo)                           __nothing
433 #endif
434 
435 #define MCLAIM(m, mo)                   m_claim((m), (mo))
436 #define MOWNER_ATTACH(mo)     mowner_attach(mo)
437 #define MOWNER_DETACH(mo)     mowner_detach(mo)
438 
439 /*
440  * mbuf allocation/deallocation macros:
441  *
442  *        MGET(struct mbuf *m, int how, int type)
443  * allocates an mbuf and initializes it to contain internal data.
444  *
445  *        MGETHDR(struct mbuf *m, int how, int type)
446  * allocates an mbuf and initializes it to contain a packet header
447  * and internal data.
448  *
449  * If 'how' is M_WAIT, these macros (and the corresponding functions)
450  * are guaranteed to return successfully.
451  */
452 #define MGET(m, how, type)    m = m_get((how), (type))
453 #define MGETHDR(m, how, type) m = m_gethdr((how), (type))
454 
455 #if defined(_KERNEL)
456 
457 #define MCLINITREFERENCE(m)                                                     \
458 do {                                                                                      \
459           KASSERT(((m)->m_flags & M_EXT) == 0);                                 \
460           (m)->m_ext_ref = (m);                                                           \
461           (m)->m_ext.ext_refcnt = 1;                                            \
462 } while (0)
463 
464 /*
465  * Macros for mbuf external storage.
466  *
467  * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
468  * the flag M_EXT is set upon success.
469  *
470  * MEXTMALLOC allocates external storage and adds it to
471  * a normal mbuf; the flag M_EXT is set upon success.
472  *
473  * MEXTADD adds pre-allocated external storage to
474  * a normal mbuf; the flag M_EXT is set upon success.
475  */
476 
477 #define MCLGET(m, how)        m_clget((m), (how))
478 
479 #define MEXTMALLOC(m, size, how)                                                \
480 do {                                                                                      \
481           (m)->m_ext_storage.ext_buf = malloc((size), 0, (how));                \
482           if ((m)->m_ext_storage.ext_buf != NULL) {                             \
483                     MCLINITREFERENCE(m);                                                  \
484                     (m)->m_data = (m)->m_ext.ext_buf;                           \
485                     (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
486                                         M_EXT|M_EXT_RW;                                   \
487                     (m)->m_ext.ext_size = (size);                               \
488                     (m)->m_ext.ext_free = NULL;                                 \
489                     (m)->m_ext.ext_arg = NULL;                                  \
490                     mowner_ref((m), M_EXT);                                               \
491           }                                                                               \
492 } while (0)
493 
494 #define MEXTADD(m, buf, size, type, free, arg)                                  \
495 do {                                                                                      \
496           MCLINITREFERENCE(m);                                                            \
497           (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf);           \
498           (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT;    \
499           (m)->m_ext.ext_size = (size);                                         \
500           (m)->m_ext.ext_free = (free);                                         \
501           (m)->m_ext.ext_arg = (arg);                                           \
502           mowner_ref((m), M_EXT);                                                         \
503 } while (0)
504 
505 #define M_BUFADDR(m)                                                                      \
506           (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :                        \
507               ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
508 
509 #define M_BUFSIZE(m)                                                                      \
510           (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :                       \
511               ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN)
512 
513 #define MRESETDATA(m)         (m)->m_data = M_BUFADDR(m)
514 
515 /*
516  * Compute the offset of the beginning of the data buffer of a non-ext
517  * mbuf.
518  */
519 #define M_BUFOFFSET(m)                                                                    \
520           (((m)->m_flags & M_PKTHDR) ?                                          \
521            offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
522 
523 /*
524  * Determine if an mbuf's data area is read-only.  This is true
525  * if external storage is read-only mapped, or not marked as R/W,
526  * or referenced by more than one mbuf.
527  */
528 #define M_READONLY(m)                                                                     \
529           (((m)->m_flags & M_EXT) != 0 &&                                                 \
530             (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW ||   \
531             (m)->m_ext.ext_refcnt > 1))
532 
533 #define M_UNWRITABLE(__m, __len)                                                \
534           ((__m)->m_len < (__len) || M_READONLY((__m)))
535 
536 /*
537  * Determine if an mbuf's data area is read-only at the MMU.
538  */
539 #define M_ROMAP(m)                                                              \
540           (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
541 
542 /*
543  * Compute the amount of space available before the current start of
544  * data in an mbuf.
545  */
546 #define M_LEADINGSPACE(m)                                                       \
547           (M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m)))
548 
549 /*
550  * Compute the amount of space available
551  * after the end of data in an mbuf.
552  */
553 #define _M_TRAILINGSPACE(m)                                                     \
554           ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
555            ((m)->m_data + (m)->m_len) :                                         \
556            &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
557 
558 #define M_TRAILINGSPACE(m)                                                      \
559           (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
560 
561 /*
562  * Arrange to prepend space of size plen to mbuf m.
563  * If a new mbuf must be allocated, how specifies whether to wait.
564  * If how is M_DONTWAIT and allocation fails, the original mbuf chain
565  * is freed and m is set to NULL.
566  */
567 #define M_PREPEND(m, plen, how)                                                           \
568 do {                                                                                      \
569           if (M_LEADINGSPACE(m) >= (plen)) {                                    \
570                     (m)->m_data -= (plen);                                                \
571                     (m)->m_len += (plen);                                                 \
572           } else                                                                          \
573                     (m) = m_prepend((m), (plen), (how));                        \
574           if ((m) && (m)->m_flags & M_PKTHDR)                                   \
575                     (m)->m_pkthdr.len += (plen);                                \
576 } while (0)
577 
578 /* change mbuf to new type */
579 #define MCHTYPE(m, t)                                                                     \
580 do {                                                                                      \
581           KASSERT((t) != MT_FREE);                                              \
582           mbstat_type_add((m)->m_type, -1);                                     \
583           mbstat_type_add(t, 1);                                                          \
584           (m)->m_type = t;                                                      \
585 } while (0)
586 
587 #ifdef DIAGNOSTIC
588 #define M_VERIFY_PACKET(m)    m_verify_packet(m)
589 #else
590 #define M_VERIFY_PACKET(m)    __nothing
591 #endif
592 
593 /* The "copy all" special length. */
594 #define M_COPYALL   -1
595 
596 /*
597  * Allow drivers and/or protocols to store private context information.
598  */
599 #define M_GETCTX(m, t)                  ((t)(m)->m_pkthdr._rcvif.ctx)
600 #define M_SETCTX(m, c)                  ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
601 #define M_CLEARCTX(m)                   M_SETCTX((m), NULL)
602 
603 /*
604  * M_REGION_GET ensures that the "len"-sized region of type "typ" starting
605  * from "off" within "m" is located in a single mbuf, contiguously.
606  *
607  * The pointer to the region will be returned to pointer variable "val".
608  */
609 #define M_REGION_GET(val, typ, m, off, len) \
610 do {                                                                                      \
611           struct mbuf *_t;                                                      \
612           int _tmp;                                                             \
613           if ((m)->m_len >= (off) + (len))                                      \
614                     (val) = (typ)(mtod((m), char *) + (off));                   \
615           else {                                                                          \
616                     _t = m_pulldown((m), (off), (len), &_tmp);                  \
617                     if (_t) {                                                   \
618                               if (_t->m_len < _tmp + (len))                     \
619                                         panic("m_pulldown malfunction");        \
620                               (val) = (typ)(mtod(_t, char *) + _tmp); \
621                     } else {                                                    \
622                               (val) = (typ)NULL;                                \
623                               (m) = NULL;                                                 \
624                     }                                                                     \
625           }                                                                               \
626 } while (0)
627 
628 #endif /* defined(_KERNEL) */
629 
630 /*
631  * Simple mbuf queueing system
632  *
633  * this is basically a SIMPLEQ adapted to mbuf use (ie using
634  * m_nextpkt instead of field.sqe_next).
635  *
636  * m_next is ignored, so queueing chains of mbufs is possible
637  */
638 #define MBUFQ_HEAD(name)                                              \
639 struct name {                                                                   \
640           struct mbuf *mq_first;                                                \
641           struct mbuf **mq_last;                                                \
642 }
643 
644 #define MBUFQ_INIT(q)                   do {                                    \
645           (q)->mq_first = NULL;                                                 \
646           (q)->mq_last = &(q)->mq_first;                                        \
647 } while (0)
648 
649 #define MBUFQ_ENQUEUE(q, m)   do {                                    \
650           (m)->m_nextpkt = NULL;                                                \
651           *(q)->mq_last = (m);                                                  \
652           (q)->mq_last = &(m)->m_nextpkt;                                       \
653 } while (0)
654 
655 #define MBUFQ_PREPEND(q, m)   do {                                    \
656           if (((m)->m_nextpkt = (q)->mq_first) == NULL)               \
657                     (q)->mq_last = &(m)->m_nextpkt;                             \
658           (q)->mq_first = (m);                                                  \
659 } while (0)
660 
661 #define MBUFQ_DEQUEUE(q, m)   do {                                    \
662           if (((m) = (q)->mq_first) != NULL) {                        \
663                     if (((q)->mq_first = (m)->m_nextpkt) == NULL)     \
664                               (q)->mq_last = &(q)->mq_first;                    \
665                     else                                                        \
666                               (m)->m_nextpkt = NULL;                            \
667           }                                                                     \
668 } while (0)
669 
670 #define MBUFQ_DRAIN(q)                  do {                                    \
671           struct mbuf *__m0;                                          \
672           while ((__m0 = (q)->mq_first) != NULL) {                    \
673                     (q)->mq_first = __m0->m_nextpkt;                  \
674                     m_freem(__m0);                                              \
675           }                                                                     \
676           (q)->mq_last = &(q)->mq_first;                                        \
677 } while (0)
678 
679 #define MBUFQ_FIRST(q)                  ((q)->mq_first)
680 #define MBUFQ_NEXT(m)                   ((m)->m_nextpkt)
681 #define MBUFQ_LAST(q)                   (*(q)->mq_last)
682 
683 /*
684  * Mbuf statistics.
685  * For statistics related to mbuf and cluster allocations, see also the
686  * pool headers (mb_cache and mcl_cache).
687  */
688 struct mbstat {
689           u_long    _m_spare; /* formerly m_mbufs */
690           u_long    _m_spare1;          /* formerly m_clusters */
691           u_long    _m_spare2;          /* spare field */
692           u_long    _m_spare3;          /* formely m_clfree - free clusters */
693           u_long    m_drops;  /* times failed to find space */
694           u_long    m_wait;             /* times waited for space */
695           u_long    m_drain;  /* times drained protocols for space */
696           u_short   m_mtypes[256];      /* type specific mbuf allocations */
697 };
698 
699 struct mbstat_cpu {
700           u_int     m_mtypes[256];      /* type specific mbuf allocations */
701 };
702 
703 /*
704  * Mbuf sysctl variables.
705  */
706 #define MBUF_MSIZE            1         /* int: mbuf base size */
707 #define MBUF_MCLBYTES                   2         /* int: mbuf cluster size */
708 #define MBUF_NMBCLUSTERS      3         /* int: limit on the # of clusters */
709 #define MBUF_MBLOWAT                    4         /* int: mbuf low water mark */
710 #define MBUF_MCLLOWAT                   5         /* int: mbuf cluster low water mark */
711 #define MBUF_STATS            6         /* struct: mbstat */
712 #define MBUF_MOWNERS                    7         /* struct: m_owner[] */
713 #define MBUF_NMBCLUSTERS_LIMIT          8         /* int: limit of nmbclusters */
714 
715 #ifdef _KERNEL
716 extern struct mbstat mbstat;
717 extern int nmbclusters;                 /* limit on the # of clusters */
718 extern int mblowat;           /* mbuf low water mark */
719 extern int mcllowat;                    /* mbuf cluster low water mark */
720 extern int max_linkhdr;                 /* largest link-level header */
721 extern int max_protohdr;                /* largest protocol header */
722 extern int max_hdr;           /* largest link+protocol header */
723 extern int max_datalen;                 /* MHLEN - max_hdr */
724 extern const int msize;                           /* mbuf base size */
725 extern const int mclbytes;              /* mbuf cluster size */
726 extern pool_cache_t mb_cache;
727 #ifdef MBUFTRACE
728 LIST_HEAD(mownerhead, mowner);
729 extern struct mownerhead mowners;
730 extern struct mowner unknown_mowners[];
731 extern struct mowner revoked_mowner;
732 #endif
733 
734 MALLOC_DECLARE(M_MBUF);
735 MALLOC_DECLARE(M_SONAME);
736 
737 struct    mbuf *m_copym(struct mbuf *, int, int, int);
738 struct    mbuf *m_copypacket(struct mbuf *, int);
739 struct    mbuf *m_devget(char *, int, int, struct ifnet *);
740 struct    mbuf *m_dup(struct mbuf *, int, int, int);
741 struct    mbuf *m_get(int, int);
742 struct    mbuf *m_gethdr(int, int);
743 struct    mbuf *m_get_n(int, int, size_t, size_t);
744 struct    mbuf *m_gethdr_n(int, int, size_t, size_t);
745 struct    mbuf *m_prepend(struct mbuf *,int, int);
746 struct    mbuf *m_pulldown(struct mbuf *, int, int, int *);
747 struct    mbuf *m_pullup(struct mbuf *, int);
748 struct    mbuf *m_copyup(struct mbuf *, int, int);
749 struct    mbuf *m_split(struct mbuf *,int, int);
750 struct    mbuf *m_getptr(struct mbuf *, int, int *);
751 void      m_adj(struct mbuf *, int);
752 struct    mbuf *m_defrag(struct mbuf *, int);
753 int       m_apply(struct mbuf *, int, int,
754     int (*)(void *, void *, unsigned int), void *);
755 void      m_cat(struct mbuf *,struct mbuf *);
756 void      m_clget(struct mbuf *, int);
757 void      m_copyback(struct mbuf *, int, int, const void *);
758 struct    mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
759 int       m_makewritable(struct mbuf **, int, int, int);
760 struct    mbuf *m_getcl(int, int, int);
761 void      m_copydata(struct mbuf *, int, int, void *);
762 void      m_verify_packet(struct mbuf *);
763 struct    mbuf *m_free(struct mbuf *);
764 void      m_freem(struct mbuf *);
765 void      mbinit(void);
766 void      m_remove_pkthdr(struct mbuf *);
767 void      m_copy_pkthdr(struct mbuf *, struct mbuf *);
768 void      m_move_pkthdr(struct mbuf *, struct mbuf *);
769 void      m_align(struct mbuf *, int);
770 
771 bool      m_ensure_contig(struct mbuf **, int);
772 struct mbuf *m_add(struct mbuf *, struct mbuf *);
773 
774 /* Inline routines. */
775 static __inline u_int m_length(const struct mbuf *) __unused;
776 
777 /* Statistics */
778 void mbstat_type_add(int, int);
779 
780 /* Packet tag routines */
781 struct    m_tag *m_tag_get(int, int, int);
782 void      m_tag_free(struct m_tag *);
783 void      m_tag_prepend(struct mbuf *, struct m_tag *);
784 void      m_tag_unlink(struct mbuf *, struct m_tag *);
785 void      m_tag_delete(struct mbuf *, struct m_tag *);
786 void      m_tag_delete_chain(struct mbuf *);
787 struct    m_tag *m_tag_find(const struct mbuf *, int);
788 struct    m_tag *m_tag_copy(struct m_tag *);
789 int       m_tag_copy_chain(struct mbuf *, struct mbuf *);
790 
791 /* Packet tag types */
792 #define PACKET_TAG_NONE                           0  /* Nothing */
793 #define PACKET_TAG_SO                             4  /* sending socket pointer */
794 #define PACKET_TAG_NPF                            10 /* packet filter */
795 #define PACKET_TAG_PF                             11 /* packet filter */
796 #define PACKET_TAG_ALTQ_QID             12 /* ALTQ queue id */
797 #define PACKET_TAG_IPSEC_OUT_DONE       18
798 #define PACKET_TAG_IPSEC_NAT_T_PORTS    25 /* two uint16_t */
799 #define PACKET_TAG_INET6                26 /* IPv6 info */
800 #define PACKET_TAG_TUNNEL_INFO                    28 /* tunnel identification and
801                                                       * protocol callback, for loop
802                                                       * detection/recovery
803                                                       */
804 #define PACKET_TAG_MPLS                           29 /* Indicate it's for MPLS */
805 #define PACKET_TAG_SRCROUTE             30 /* IPv4 source routing */
806 #define PACKET_TAG_ETHERNET_SRC                   31 /* Ethernet source address */
807 
808 /*
809  * Return the number of bytes in the mbuf chain, m.
810  */
811 static __inline u_int
m_length(const struct mbuf * m)812 m_length(const struct mbuf *m)
813 {
814           const struct mbuf *m0;
815           u_int pktlen;
816 
817           if ((m->m_flags & M_PKTHDR) != 0)
818                     return m->m_pkthdr.len;
819 
820           pktlen = 0;
821           for (m0 = m; m0 != NULL; m0 = m0->m_next)
822                     pktlen += m0->m_len;
823           return pktlen;
824 }
825 
826 static __inline void
m_set_rcvif(struct mbuf * m,const struct ifnet * ifp)827 m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
828 {
829           KASSERT(m->m_flags & M_PKTHDR);
830           m->m_pkthdr.rcvif_index = ifp->if_index;
831 }
832 
833 static __inline void
m_reset_rcvif(struct mbuf * m)834 m_reset_rcvif(struct mbuf *m)
835 {
836           KASSERT(m->m_flags & M_PKTHDR);
837           /* A caller may expect whole _rcvif union is zeroed */
838           /* m->m_pkthdr.rcvif_index = 0; */
839           m->m_pkthdr._rcvif.ctx = NULL;
840 }
841 
842 static __inline void
m_copy_rcvif(struct mbuf * m,const struct mbuf * n)843 m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
844 {
845           KASSERT(m->m_flags & M_PKTHDR);
846           KASSERT(n->m_flags & M_PKTHDR);
847           m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
848 }
849 
850 #define M_GET_ALIGNED_HDR(m, type, linkhdr) \
851     m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr))
852 
853 static __inline int
m_get_aligned_hdr(struct mbuf ** m,int mask,size_t hlen,bool linkhdr)854 m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr)
855 {
856 #ifndef __NO_STRICT_ALIGNMENT
857           if (((uintptr_t)mtod(*m, void *) & mask) != 0)
858                     *m = m_copyup(*m, hlen,
859                           linkhdr ? (max_linkhdr + mask) & ~mask : 0);
860           else
861 #endif
862           if (__predict_false((size_t)(*m)->m_len < hlen))
863                     *m = m_pullup(*m, hlen);
864 
865           return *m == NULL;
866 }
867 
868 void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
869     __printflike(1, 2));
870 
871 /* from uipc_mbufdebug.c */
872 void      m_examine(const struct mbuf *, int, const char *,
873     void (*)(const char *, ...) __printflike(1, 2));
874 
875 /* parsers for m_examine() */
876 void m_examine_ether(const struct mbuf *, int, const char *,
877     void (*)(const char *, ...) __printflike(1, 2));
878 void m_examine_pppoe(const struct mbuf *, int, const char *,
879     void (*)(const char *, ...) __printflike(1, 2));
880 void m_examine_ppp(const struct mbuf *, int, const char *,
881     void (*)(const char *, ...) __printflike(1, 2));
882 void m_examine_arp(const struct mbuf *, int, const char *,
883     void (*)(const char *, ...) __printflike(1, 2));
884 void m_examine_ip(const struct mbuf *, int, const char *,
885     void (*)(const char *, ...) __printflike(1, 2));
886 void m_examine_icmp(const struct mbuf *, int, const char *,
887     void (*)(const char *, ...) __printflike(1, 2));
888 void m_examine_ip6(const struct mbuf *, int, const char *,
889     void (*)(const char *, ...) __printflike(1, 2));
890 void m_examine_icmp6(const struct mbuf *, int, const char *,
891     void (*)(const char *, ...) __printflike(1, 2));
892 void m_examine_tcp(const struct mbuf *, int, const char *,
893     void (*)(const char *, ...) __printflike(1, 2));
894 void m_examine_udp(const struct mbuf *, int, const char *,
895     void (*)(const char *, ...) __printflike(1, 2));
896 void m_examine_hex(const struct mbuf *, int, const char *,
897     void (*)(const char *, ...) __printflike(1, 2));
898 
899 /*
900  * Get rcvif of a mbuf.
901  *
902  * The caller must call m_put_rcvif after using rcvif if the returned rcvif
903  * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call
904  * m_put_rcvif (although calling it is safe).
905  *
906  * The caller must not block or sleep while using rcvif. The API ensures a
907  * returned rcvif isn't freed until m_put_rcvif is called.
908  */
909 static __inline struct ifnet *
m_get_rcvif(const struct mbuf * m,int * s)910 m_get_rcvif(const struct mbuf *m, int *s)
911 {
912           struct ifnet *ifp;
913 
914           KASSERT(m->m_flags & M_PKTHDR);
915           *s = pserialize_read_enter();
916           ifp = if_byindex(m->m_pkthdr.rcvif_index);
917           if (__predict_false(ifp == NULL))
918                     pserialize_read_exit(*s);
919 
920           return ifp;
921 }
922 
923 static __inline void
m_put_rcvif(struct ifnet * ifp,int * s)924 m_put_rcvif(struct ifnet *ifp, int *s)
925 {
926 
927           if (ifp == NULL)
928                     return;
929           pserialize_read_exit(*s);
930 }
931 
932 /*
933  * Get rcvif of a mbuf.
934  *
935  * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
936  * a got rcvif isn't be freed until m_put_rcvif_psref is called.
937  */
938 static __inline struct ifnet *
m_get_rcvif_psref(const struct mbuf * m,struct psref * psref)939 m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
940 {
941           KASSERT(m->m_flags & M_PKTHDR);
942           return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
943 }
944 
945 static __inline void
m_put_rcvif_psref(struct ifnet * ifp,struct psref * psref)946 m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
947 {
948 
949           if (ifp == NULL)
950                     return;
951           if_put(ifp, psref);
952 }
953 
954 /*
955  * Get rcvif of a mbuf.
956  *
957  * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
958  */
959 static __inline struct ifnet *
m_get_rcvif_NOMPSAFE(const struct mbuf * m)960 m_get_rcvif_NOMPSAFE(const struct mbuf *m)
961 {
962           KASSERT(m->m_flags & M_PKTHDR);
963           return if_byindex(m->m_pkthdr.rcvif_index);
964 }
965 
966 #endif /* _KERNEL */
967 #endif /* !_SYS_MBUF_H_ */
968