1 /*        $NetBSD: pool.h,v 1.96 2021/12/22 16:57:28 thorpej Exp $    */
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2000, 2007, 2020
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef _SYS_POOL_H_
35 #define _SYS_POOL_H_
36 
37 #include <sys/stdbool.h>
38 #include <sys/stdint.h>
39 
40 struct pool_sysctl {
41           char pr_wchan[16];
42           uint64_t pr_flags;
43           uint64_t pr_size;
44           uint64_t pr_pagesize;
45           uint64_t pr_itemsperpage;
46           uint64_t pr_nitems;
47           uint64_t pr_nout;
48           uint64_t pr_hardlimit;
49           uint64_t pr_npages;
50           uint64_t pr_minpages;
51           uint64_t pr_maxpages;
52 
53           uint64_t pr_nget;
54           uint64_t pr_nfail;
55           uint64_t pr_nput;
56           uint64_t pr_npagealloc;
57           uint64_t pr_npagefree;
58           uint64_t pr_hiwat;
59           uint64_t pr_nidle;
60 
61           uint64_t pr_cache_meta_size;
62           uint64_t pr_cache_nfull;
63           uint64_t pr_cache_npartial;
64           uint64_t pr_cache_nempty;
65           uint64_t pr_cache_ncontended;
66           uint64_t pr_cache_nmiss_global;
67           uint64_t pr_cache_nhit_global;
68           uint64_t pr_cache_nmiss_pcpu;
69           uint64_t pr_cache_nhit_pcpu;
70 };
71 
72 #ifdef _KERNEL
73 #define __POOL_EXPOSE
74 #endif
75 
76 #ifdef __POOL_EXPOSE
77 #include <sys/param.h>
78 #include <sys/mutex.h>
79 #include <sys/condvar.h>
80 #include <sys/queue.h>
81 #include <sys/time.h>
82 #include <sys/tree.h>
83 #include <sys/callback.h>
84 
85 #ifdef _KERNEL_OPT
86 #include "opt_pool.h"
87 #endif
88 
89 #define POOL_PADDR_INVALID    ((paddr_t) -1)
90 
91 struct pool;
92 
93 struct pool_allocator {
94           void                *(*pa_alloc)(struct pool *, int);
95           void                (*pa_free)(struct pool *, void *);
96           unsigned int        pa_pagesz;
97 
98           /* The following fields are for internal use only. */
99           kmutex_t  pa_lock;
100           TAILQ_HEAD(, pool) pa_list;   /* list of pools using this allocator */
101           uint32_t  pa_refcnt;          /* number of pools using this allocator */
102           int                 pa_pagemask;
103           int                 pa_pageshift;
104 };
105 
106 LIST_HEAD(pool_pagelist,pool_item_header);
107 SPLAY_HEAD(phtree, pool_item_header);
108 
109 #define POOL_QUARANTINE_DEPTH 128
110 typedef struct {
111           size_t rotor;
112           intptr_t list[POOL_QUARANTINE_DEPTH];
113 } pool_quar_t;
114 
115 struct pool {
116           TAILQ_ENTRY(pool)
117                               pr_poollist;
118           struct pool_pagelist
119                               pr_emptypages;      /* Empty pages */
120           struct pool_pagelist
121                               pr_fullpages;       /* Full pages */
122           struct pool_pagelist
123                               pr_partpages;       /* Partially-allocated pages */
124           struct pool_item_header       *pr_curpage;
125           struct pool         *pr_phpool;         /* Pool item header pool */
126           struct pool_cache *pr_cache;  /* Cache for this pool */
127           unsigned int        pr_size;  /* Size of item */
128           unsigned int        pr_align; /* Requested alignment, must be 2^n */
129           unsigned int        pr_itemoffset;      /* offset of the item space */
130           unsigned int        pr_minitems;        /* minimum # of free items to keep */
131           unsigned int        pr_maxitems;        /* maximum # of free items to keep */
132           unsigned int        pr_minpages;        /* minimum # of pages to keep */
133           unsigned int        pr_maxpages;        /* maximum # of pages to keep */
134           unsigned int        pr_npages;          /* # of pages allocated */
135           unsigned int        pr_itemsperpage;/* # items that fit in a page */
136           unsigned int        pr_poolid;          /* id of the pool */
137           unsigned int        pr_nitems;          /* number of free items in pool */
138           unsigned int        pr_nout;  /* # items currently allocated */
139           unsigned int        pr_hardlimit;       /* hard limit to number of allocated
140                                                      items */
141           unsigned int        pr_refcnt;          /* ref count for pagedaemon, etc */
142           struct pool_allocator *pr_alloc;/* back-end allocator */
143           TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */
144 
145           /* Drain hook. */
146           void                (*pr_drain_hook)(void *, int);
147           void                *pr_drain_hook_arg;
148 
149           const char          *pr_wchan;          /* tsleep(9) identifier */
150           unsigned int        pr_flags; /* r/w flags */
151           unsigned int        pr_roflags;         /* r/o flags */
152 #define PR_WAITOK   0x01      /* Note: matches KM_SLEEP */
153 #define PR_NOWAIT   0x02      /* Note: matches KM_NOSLEEP */
154 #define PR_WANTED   0x04      /* waiting for free objects */
155 #define PR_PHINPAGE 0x40      /* page header in page */
156 #define PR_LIMITFAIL          0x100     /* even if waiting, fail if we hit limit */
157 #define PR_RECURSIVE          0x200     /* pool contains pools, for vmstat(8) */
158 #define PR_NOTOUCH  0x400     /* don't use free items to keep internal state*/
159 #define PR_NOALIGN  0x800     /* don't assume backend alignment */
160 #define PR_LARGECACHE         0x1000    /* use large cache groups */
161 #define PR_GROWING  0x2000    /* pool_grow in progress */
162 #define PR_GROWINGNOWAIT 0x4000         /* pool_grow in progress by PR_NOWAIT alloc */
163 #define PR_ZERO               0x8000    /* zero data before returning */
164 #define PR_USEBMAP  0x10000   /* use a bitmap to manage freed items */
165 #define PR_PSERIALIZE         0x20000   /* needs pserialize sync point before free */
166 
167           /*
168            * `pr_lock' protects the pool's data structures when removing
169            * items from or returning items to the pool, or when reading
170            * or updating read/write fields in the pool descriptor.
171            *
172            * We assume back-end page allocators provide their own locking
173            * scheme.  They will be called with the pool descriptor _unlocked_,
174            * since the page allocators may block.
175            */
176           kmutex_t  pr_lock;
177           kcondvar_t          pr_cv;
178           int                 pr_ipl;
179 
180           struct phtree       pr_phtree;
181 
182           int                 pr_maxcolor;        /* Cache colouring */
183           int                 pr_curcolor;
184           int                 pr_phoffset;        /* unused */
185 
186           /*
187            * Warning message to be issued, and a per-time-delta rate cap,
188            * if the hard limit is reached.
189            */
190           const char          *pr_hardlimit_warning;
191           struct timeval      pr_hardlimit_ratecap;
192           struct timeval      pr_hardlimit_warning_last;
193 
194           /*
195            * Instrumentation
196            */
197           unsigned long       pr_nget;  /* # of successful requests */
198           unsigned long       pr_nfail; /* # of unsuccessful requests */
199           unsigned long       pr_nput;  /* # of releases */
200           unsigned long       pr_npagealloc;      /* # of pages allocated */
201           unsigned long       pr_npagefree;       /* # of pages released */
202           unsigned int        pr_hiwat; /* max # of pages in pool */
203           unsigned long       pr_nidle; /* # of idle pages */
204 
205           /*
206            * Diagnostic aides.
207            */
208           void                *pr_freecheck;
209           void                *pr_qcache;
210           bool                pr_redzone;
211           size_t              pr_reqsize;
212           size_t              pr_reqsize_with_redzone;
213 #ifdef POOL_QUARANTINE
214           pool_quar_t         pr_quar;
215 #endif
216 };
217 
218 /*
219  * Cache group sizes, assuming 4-byte paddr_t on !_LP64.
220  * All groups will be aligned to COHERENCY_UNIT.
221  */
222 #ifdef _LP64
223 #define PCG_NOBJECTS_NORMAL   15        /* 256 byte group */
224 #define PCG_NOBJECTS_LARGE    63        /* 1024 byte group */
225 #else
226 #define PCG_NOBJECTS_NORMAL   14        /* 124 byte group */
227 #define PCG_NOBJECTS_LARGE    62        /* 508 byte group */
228 #endif
229 
230 typedef struct pcgpair {
231           void      *pcgo_va;           /* object virtual address */
232           paddr_t   pcgo_pa;            /* object physical address */
233 } pcgpair_t;
234 
235 /* The pool cache group. */
236 typedef struct pool_cache_group {
237           struct pool_cache_group       *pcg_next;          /* link to next group */
238           u_int                         pcg_avail;          /* # available objects */
239           u_int                         pcg_size; /* max number objects */
240           pcgpair_t                     pcg_objects[1];     /* the objects */
241 } pcg_t;
242 
243 /* Pool cache CPU.  Sized to 64 bytes on _LP64. */
244 typedef struct pool_cache_cpu {
245           struct pool_cache_group       *cc_current;
246           struct pool_cache_group       *cc_previous;
247           pcg_t *volatile     *cc_pcgcache;
248           uint64_t            cc_misses;
249           uint64_t            cc_hits;
250           uint64_t            cc_pcmisses;
251           uint64_t            cc_contended;
252           uint32_t            cc_nfull;
253           uint32_t            cc_npart;
254 } pool_cache_cpu_t;
255 
256 struct pool_cache {
257           /* Pool layer. */
258           struct pool         pc_pool;
259 
260           /* Cache layer. */
261           TAILQ_ENTRY(pool_cache)
262                               pc_cachelist;       /* entry on global cache list */
263           struct pool         *pc_pcgpool;        /* Pool of cache groups */
264           pcg_t *volatile *pc_pcgcache; /* list of empty cache groups */
265           int                 pc_pcgsize;         /* Use large cache groups? */
266           int                 pc_ncpu;  /* number cpus set up */
267           int                 (*pc_ctor)(void *, void *, int);
268           void                (*pc_dtor)(void *, void *);
269           void                *pc_arg;  /* for ctor/dtor */
270           unsigned int        pc_refcnt;          /* ref count for pagedaemon, etc */
271           unsigned int        pc_roflags;         /* r/o cache flags */
272           void                *pc_cpus[MAXCPUS];
273 
274           /* Diagnostic aides. */
275           void                *pc_freecheck;
276           bool                pc_redzone;
277           size_t              pc_reqsize;
278 
279           /* Hot items. */
280           pcg_t *volatile pc_fullgroups /* list of full cache groups */
281               __aligned(CACHE_LINE_SIZE);
282           pcg_t *volatile pc_partgroups;          /* groups for reclamation */
283 
284           /* Boot cpu. */
285           pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE);
286 };
287 
288 #endif /* __POOL_EXPOSE */
289 
290 typedef struct pool_cache *pool_cache_t;
291 
292 #ifdef _KERNEL
293 /*
294  * pool_allocator_kmem is the default that all pools get unless
295  * otherwise specified.  pool_allocator_nointr is provided for
296  * pools that know they will never be accessed in interrupt
297  * context.
298  */
299 extern struct pool_allocator pool_allocator_kmem;
300 extern struct pool_allocator pool_allocator_nointr;
301 extern struct pool_allocator pool_allocator_meta;
302 
303 void                pool_subsystem_init(void);
304 
305 void                pool_init(struct pool *, size_t, u_int, u_int,
306                         int, const char *, struct pool_allocator *, int);
307 void                pool_destroy(struct pool *);
308 
309 void                pool_set_drain_hook(struct pool *,
310                         void (*)(void *, int), void *);
311 
312 void                *pool_get(struct pool *, int);
313 void                pool_put(struct pool *, void *);
314 int                 pool_reclaim(struct pool *);
315 
316 void                pool_prime(struct pool *, int);
317 void                pool_setlowat(struct pool *, int);
318 void                pool_sethiwat(struct pool *, int);
319 void                pool_sethardlimit(struct pool *, int, const char *, int);
320 bool                pool_drain(struct pool **);
321 int                 pool_totalpages(void);
322 int                 pool_totalpages_locked(void);
323 
324 unsigned int        pool_nget(struct pool *);
325 unsigned int        pool_nput(struct pool *);
326 
327 /*
328  * Debugging and diagnostic aides.
329  */
330 void                pool_printit(struct pool *, const char *,
331     void (*)(const char *, ...) __printflike(1, 2));
332 void                pool_printall(const char *, void (*)(const char *, ...)
333     __printflike(1, 2));
334 int                 pool_chk(struct pool *, const char *);
335 
336 /*
337  * Pool cache routines.
338  */
339 pool_cache_t        pool_cache_init(size_t, u_int, u_int, u_int, const char *,
340                         struct pool_allocator *, int, int (*)(void *, void *, int),
341                         void (*)(void *, void *), void *);
342 void                pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int,
343                         const char *, struct pool_allocator *, int,
344                         int (*)(void *, void *, int), void (*)(void *, void *),
345                         void *);
346 void                pool_cache_destroy(pool_cache_t);
347 void                pool_cache_bootstrap_destroy(pool_cache_t);
348 void                *pool_cache_get_paddr(pool_cache_t, int, paddr_t *);
349 void                pool_cache_put_paddr(pool_cache_t, void *, paddr_t);
350 void                pool_cache_destruct_object(pool_cache_t, void *);
351 void                pool_cache_invalidate(pool_cache_t);
352 bool                pool_cache_reclaim(pool_cache_t);
353 void                pool_cache_set_drain_hook(pool_cache_t,
354                         void (*)(void *, int), void *);
355 void                pool_cache_setlowat(pool_cache_t, int);
356 void                pool_cache_sethiwat(pool_cache_t, int);
357 void                pool_cache_sethardlimit(pool_cache_t, int, const char *, int);
358 void                pool_cache_prime(pool_cache_t, int);
359 void                pool_cache_cpu_init(struct cpu_info *);
360 
361 unsigned int        pool_cache_nget(pool_cache_t);
362 unsigned int        pool_cache_nput(pool_cache_t);
363 
364 #define             pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL)
365 #define             pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \
366                                                   POOL_PADDR_INVALID)
367 
368 void                pool_whatis(uintptr_t, void (*)(const char *, ...)
369     __printflike(1, 2));
370 #endif /* _KERNEL */
371 
372 #endif /* _SYS_POOL_H_ */
373