xref: /dragonfly/sys/kern/kern_sysref.c (revision dd0e3cd77910a3aedf956ab0d5656eeef6c20577)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * System resource control module for all cluster-addressable system resource
36  * structures.
37  *
38  * This module implements the core ref counting, sysid registration, and
39  * objcache-backed allocation mechanism for all major system resource
40  * structures.
41  *
42  * sysid registrations operate via the objcache ctor/dtor mechanism and
43  * sysids will be reused if the resource is not explicitly accessed via
44  * its sysid.  This removes all RB tree handling overhead from the critical
45  * path for locally used resources.
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/malloc.h>
52 #include <sys/tree.h>
53 #include <sys/spinlock.h>
54 #include <machine/atomic.h>
55 #include <machine/cpufunc.h>
56 
57 #include <sys/spinlock2.h>
58 #include <sys/sysref2.h>
59 
60 static boolean_t sysref_ctor(void *data, void *privdata, int ocflags);
61 static void sysref_dtor(void *data, void *privdata);
62 
63 /*
64  * Red-Black tree support
65  */
66 static int rb_sysref_compare(struct sysref *sr1, struct sysref *sr2);
67 RB_GENERATE2(sysref_rb_tree, sysref, rbnode, rb_sysref_compare, sysid_t, sysid);
68 
69 static struct srpercpu {
70           struct sysref_rb_tree rbtree;
71           struct spinlock spin;
72 } sysref_array[MAXCPU];
73 
74 static void
sysrefbootinit(void * dummy __unused)75 sysrefbootinit(void *dummy __unused)
76 {
77           struct srpercpu *sa;
78           int i;
79 
80           for (i = 0; i < ncpus; ++i) {
81                     sa = &sysref_array[i];
82                     spin_init(&sa->spin, "sysrefbootinit");
83                     RB_INIT(&sa->rbtree);
84           }
85 }
86 
87 SYSINIT(sysref, SI_BOOT2_MACHDEP, SI_ORDER_ANY, sysrefbootinit, NULL);
88 
89 static
90 int
rb_sysref_compare(struct sysref * sr1,struct sysref * sr2)91 rb_sysref_compare(struct sysref *sr1, struct sysref *sr2)
92 {
93           if (sr1->sysid < sr2->sysid)
94                     return(-1);
95           if (sr1->sysid > sr2->sysid)
96                     return(1);
97           return(0);
98 }
99 
100 /*
101  * Manual initialization of a resource structure's sysref, only used during
102  * booting to set up certain statically declared resources which cannot
103  * be deallocated.
104  */
105 void
sysref_init(struct sysref * sr,struct sysref_class * srclass)106 sysref_init(struct sysref *sr, struct sysref_class *srclass)
107 {
108           struct srpercpu *sa;
109           globaldata_t gd;
110 
111           gd = mycpu;
112           crit_enter_gd(gd);
113           gd->gd_sysid_alloc += ncpus_fit; /* next unique sysid */
114           sr->sysid = gd->gd_sysid_alloc;
115           KKASSERT(((int)sr->sysid & ncpus_fit_mask) == gd->gd_cpuid);
116           sr->refcnt = -0x40000000;
117           sr->flags = 0;
118           sr->srclass = srclass;
119 
120           sa = &sysref_array[gd->gd_cpuid];
121           spin_lock(&sa->spin);
122           sysref_rb_tree_RB_INSERT(&sa->rbtree, sr);
123           spin_unlock(&sa->spin);
124           crit_exit_gd(gd);
125 }
126 
127 /*
128  * Allocate a resource structure of the specified class, initialize a
129  * sysid and add the resource to the RB tree.  The caller must complete
130  * initialization of the resource and call sysref_activate() to activate it.
131  */
132 void *
sysref_alloc(struct sysref_class * srclass)133 sysref_alloc(struct sysref_class *srclass)
134 {
135           struct sysref *sr;
136           char *data;
137           int n;
138 
139           /*
140            * Create the object cache backing store.
141            */
142           if (srclass->oc == NULL) {
143                     KKASSERT(srclass->mtype != NULL);
144                     srclass->oc = objcache_create_mbacked(
145                                         srclass->mtype, srclass->objsize,
146                                         0, srclass->nom_cache,
147                                         sysref_ctor, sysref_dtor, srclass);
148           }
149 
150           /*
151            * Allocate the resource.
152            */
153           data = objcache_get(srclass->oc, M_WAITOK);
154           sr = (struct sysref *)(data + srclass->offset);
155           KKASSERT(sr->flags & SRF_PUTAWAY);
156           sr->flags &= ~SRF_PUTAWAY;
157 
158           /*
159            * Refcnt isn't touched while it is zero.  The objcache ctor
160            * function has already allocated a sysid and emplaced the
161            * structure in the RB tree.
162            */
163           KKASSERT(sr->refcnt == 0);
164           sr->refcnt = -0x40000000;
165 
166           /*
167            * Clean out the structure unless the caller wants to deal with
168            * it (e.g. like the vmspace code).
169            */
170           if ((srclass->flags & SRC_MANAGEDINIT) == 0) {
171                     if (srclass->offset != 0)
172                               bzero(data, srclass->offset);
173                     n = srclass->offset + sizeof(struct sysref);
174                     KKASSERT(n <= srclass->objsize);
175                     if (n != srclass->objsize)
176                               bzero(data + n, srclass->objsize - n);
177           }
178           return(data);
179 }
180 
181 /*
182  * Object cache backing store ctor function.
183  *
184  * This allocates the sysid and associates the structure with the
185  * red-black tree, allowing it to be looked up.  The actual resource
186  * structure has NOT yet been allocated so it is marked free.
187  *
188  * If the sysid is not used to access the resource, we will just
189  * allow the sysid to be reused when the resource structure is reused,
190  * allowing the RB tree operation to be 'cached'.  This results in
191  * virtually no performance penalty for using the sysref facility.
192  */
193 static
194 boolean_t
sysref_ctor(void * data,void * privdata,int ocflags)195 sysref_ctor(void *data, void *privdata, int ocflags)
196 {
197           globaldata_t gd;
198           struct srpercpu *sa;
199           struct sysref_class *srclass = privdata;
200           struct sysref *sr = (void *)((char *)data + srclass->offset);
201 
202           /*
203            * Resource structures need to be cleared when allocating from
204            * malloc backing store.  This is different from the zeroing
205            * that we do in sysref_alloc().
206            */
207           bzero(data, srclass->objsize);
208 
209           /*
210            * Resources managed by our objcache do the sysid and RB tree
211            * handling in the objcache ctor/dtor, so we can reuse the
212            * structure without re-treeing it over and over again.
213            */
214           gd = mycpu;
215           crit_enter_gd(gd);
216           gd->gd_sysid_alloc += ncpus_fit; /* next unique sysid */
217           sr->sysid = gd->gd_sysid_alloc;
218           KKASSERT(((int)sr->sysid & ncpus_fit_mask) == gd->gd_cpuid);
219           /* sr->refcnt= 0; already zero */
220           sr->flags = SRF_ALLOCATED | SRF_PUTAWAY;
221           sr->srclass = srclass;
222 
223           sa = &sysref_array[gd->gd_cpuid];
224           spin_lock(&sa->spin);
225           sysref_rb_tree_RB_INSERT(&sa->rbtree, sr);
226           spin_unlock(&sa->spin);
227           crit_exit_gd(gd);
228 
229           /*
230            * Execute the class's ctor function, if any.  NOTE: The class
231            * should not try to zero out the structure, we've already handled
232            * that and preinitialized the sysref.
233            *
234            * XXX ignores return value for now
235            */
236           if (srclass->ctor)
237                     srclass->ctor(data, privdata, ocflags);
238           return TRUE;
239 }
240 
241 /*
242  * Object cache destructor, allowing the structure to be returned
243  * to the system memory pool.  The resource structure must be
244  * removed from the RB tree.  All other references have already
245  * been destroyed and the RB tree will not create any new references
246  * to the structure in its current state.
247  */
248 static
249 void
sysref_dtor(void * data,void * privdata)250 sysref_dtor(void *data, void *privdata)
251 {
252           struct srpercpu *sa;
253           struct sysref_class *srclass = privdata;
254           struct sysref *sr = (void *)((char *)data + srclass->offset);
255 
256           KKASSERT(sr->refcnt == 0);
257           sa = &sysref_array[(int)sr->sysid & ncpus_fit_mask];
258           spin_lock(&sa->spin);
259           sysref_rb_tree_RB_REMOVE(&sa->rbtree, sr);
260           spin_unlock(&sa->spin);
261           if (srclass->dtor)
262                     srclass->dtor(data, privdata);
263 }
264 
265 /*
266  * Activate or reactivate a resource. 0x40000001 is added to the ref count
267  * so -0x40000000 (during initialization) will translate to a ref count of 1.
268  * Any references made during initialization will translate to additional
269  * positive ref counts.
270  *
271  * MPSAFE
272  */
273 void
sysref_activate(struct sysref * sr)274 sysref_activate(struct sysref *sr)
275 {
276           int count;
277 
278           for (;;) {
279                     count = sr->refcnt;
280                     KASSERT(count < 0 && count + 0x40000001 > 0,
281                               ("sysref_activate: bad count %08x", count));
282                     if (atomic_cmpset_int(&sr->refcnt, count, count + 0x40000001))
283                               break;
284                     cpu_pause();
285           }
286 }
287 
288 /*
289  * Release a reference under special circumstances.  This call is made
290  * from the sysref_put() inline from sys/sysref2.h for any 1->0 transitions,
291  * negative->negative 'termination in progress' transitions, and when the
292  * cmpset instruction fails during a normal transition.
293  *
294  * This function is called from the sysref_put() inline in sys/sysref2.h,
295  * but handles all cases regardless.
296  */
297 void
_sysref_put(struct sysref * sr)298 _sysref_put(struct sysref *sr)
299 {
300           int count;
301           void *data;
302 
303           KKASSERT((sr->flags & SRF_PUTAWAY) == 0);
304 
305           for (;;) {
306                     count = sr->refcnt;
307                     if (count > 1) {
308                               /*
309                                * release 1 count, nominal case, active resource
310                                * structure, no other action required.
311                                */
312                               if (atomic_cmpset_int(&sr->refcnt, count, count - 1))
313                                         break;
314                     } else if (count == 1) {
315                               /*
316                                * 1->0 transitions transition to -0x40000000 instead,
317                                * placing the resource structure into a termination-
318                                * in-progress state.  The termination function is
319                                * then called.
320                                */
321                               data = (char *)sr - sr->srclass->offset;
322                               sr->srclass->ops.lock(data);
323                               if (atomic_cmpset_int(&sr->refcnt, count, -0x40000000)) {
324                                         sr->srclass->ops.terminate(data);
325                                         /* callback unlocks */
326                                         break;
327                               }
328                               sr->srclass->ops.unlock(data);
329                     } else if (count > -0x40000000) {
330                               /*
331                                * release 1 count, nominal case, resource undergoing
332                                * termination.  The Resource can be ref'd and
333                                * deref'd while undergoing termination.
334                                */
335                               if (atomic_cmpset_int(&sr->refcnt, count, count - 1))
336                                         break;
337                     } else {
338                               /*
339                                * Final release, set refcnt to 0.
340                                * Resource must have been allocated.
341                                *
342                                * If SRF_SYSIDUSED is not set just objcache_put() the
343                                * resource, otherwise objcache_dtor() the resource.
344                                */
345                               KKASSERT(count == -0x40000000);
346                               if (atomic_cmpset_int(&sr->refcnt, count, 0)) {
347                                         KKASSERT(sr->flags & SRF_ALLOCATED);
348                                         sr->flags |= SRF_PUTAWAY;
349                                         data = (char *)sr - sr->srclass->offset;
350                                         if (sr->flags & SRF_SYSIDUSED)
351                                                   objcache_dtor(sr->srclass->oc, data);
352                                         else
353                                                   objcache_put(sr->srclass->oc, data);
354                                         break;
355                               }
356                     }
357                     /* loop until the cmpset succeeds */
358                     cpu_pause();
359           }
360 }
361 
362 sysid_t
allocsysid(void)363 allocsysid(void)
364 {
365           globaldata_t gd = mycpu;
366           sysid_t sysid;
367 
368           crit_enter_gd(gd);
369           gd->gd_sysid_alloc += ncpus_fit;
370           sysid = gd->gd_sysid_alloc;
371           crit_exit_gd(gd);
372           return(sysid);
373 }
374 
375