1 /*        $NetBSD: pthread_atfork.c,v 1.27 2025/04/09 22:10:59 kre Exp $        */
2 
3 /*-
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: pthread_atfork.c,v 1.27 2025/04/09 22:10:59 kre Exp $");
35 #endif /* LIBC_SCCS and not lint */
36 
37 #include "namespace.h"
38 
39 #include <errno.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 
43 #include <sys/mman.h>
44 #include <sys/param.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 
48 #include "extern.h"
49 #include "reentrant.h"
50 
51 #ifdef __weak_alias
__weak_alias(pthread_atfork,_pthread_atfork)52 __weak_alias(pthread_atfork, _pthread_atfork)
53 __weak_alias(fork, _fork)
54 #endif /* __weak_alias */
55 
56 pid_t
57 __locked_fork(int *my_errno)
58 {
59           return __fork();
60 }
61 
62 struct atfork_callback {
63           SIMPLEQ_ENTRY(atfork_callback) next;
64           void (*fn)(void);
65 };
66 
67 struct atfork_cb_header {
68           uint16_t  entries;
69           uint16_t  used;
70 };
71 
72 struct atfork_cb_block {
73           union {
74                     struct atfork_callback block;
75                     struct atfork_cb_header hdr;
76           } u;
77 };
78 
79 #define   cb_blocks(bp)       (&(bp)->u.block)
80 #define   cb_ents(bp)         (bp)->u.hdr.entries
81 #define   cb_used(bp)         (bp)->u.hdr.used
82 
83 /*
84  * We need to keep a cache for of at least 6, one for prepare, one for parent,
85  * one for child x 2 bexause of the two uses in the libpthread (pthread_init,
86  * pthread_tsd_init) constructors, where it is too early to call malloc(3).
87  * This does not guarantee that we will have enough, because other libraries
88  * can also call pthread_atfork() from their own constructors, so this is not
89  * a complete solution and will need to be fixed properly. For now a keep
90  * space for 16 since it is just 256 bytes.
91  */
92 static struct atfork_callback atfork_builtin[16];
93 static struct atfork_cb_block *atfork_storage = NULL;
94 static int hw_pagesize = 0;
95 
96 static const int hw_pagesize_sysctl[2] = { CTL_HW, HW_PAGESIZE };
97 
98 /*
99  * Hypothetically, we could protect the queues with a rwlock which is
100  * write-locked by pthread_atfork() and read-locked by fork(), but
101  * since the intended use of the functions is obtaining locks to hold
102  * across the fork, forking is going to be serialized anyway.
103  */
104 #ifdef _REENTRANT
105 static mutex_t atfork_lock = MUTEX_INITIALIZER;
106 #endif
107 SIMPLEQ_HEAD(atfork_callback_q, atfork_callback);
108 
109 static struct atfork_callback_q prepareq = SIMPLEQ_HEAD_INITIALIZER(prepareq);
110 static struct atfork_callback_q parentq = SIMPLEQ_HEAD_INITIALIZER(parentq);
111 static struct atfork_callback_q childq = SIMPLEQ_HEAD_INITIALIZER(childq);
112 
113 /*
114  * Nb: nothing allocated by this allocator is ever freed.
115  * (there is no API to free anything, and no need for one)
116  *
117  * The code relies upon this.
118  */
119 static struct atfork_callback *
af_alloc(unsigned int blocks)120 af_alloc(unsigned int blocks)
121 {
122           struct atfork_callback *result;
123 
124           if (__predict_false(blocks == 0))
125                     return NULL;
126 
127           if (__predict_true(atfork_storage == NULL)) {
128                     for (size_t i = 0; i < __arraycount(atfork_builtin); i++) {
129                               if (atfork_builtin[i].fn == NULL) {
130                                         if (i + blocks <= __arraycount(atfork_builtin))
131                                                   return &atfork_builtin[i];
132                                         else
133                                                   break;
134                               }
135                     }
136           }
137 
138           if (__predict_false(atfork_storage == NULL ||
139               cb_used(atfork_storage) + blocks > cb_ents(atfork_storage))) {
140                     if (__predict_false(hw_pagesize == 0)) {
141                               size_t len = sizeof(hw_pagesize);
142 
143                               if (sysctl(hw_pagesize_sysctl, 2, &hw_pagesize,
144                                   &len, NULL, 0) != 0)
145                                         return NULL;
146                               if (len != sizeof(hw_pagesize))
147                                         return NULL;
148                               if (hw_pagesize == 0 || (hw_pagesize & 0xFF) != 0)
149                                         return NULL;
150                     }
151                     atfork_storage = mmap(0, hw_pagesize, PROT_READ|PROT_WRITE,
152                         MAP_PRIVATE | MAP_ANON, -1, 0);
153                     if (__predict_false(atfork_storage == NULL))
154                               return NULL;
155                     cb_used(atfork_storage) = 1;
156                     cb_ents(atfork_storage) =
157                         (uint16_t)(hw_pagesize / sizeof(struct atfork_cb_block));
158                     if (__predict_false(cb_ents(atfork_storage) < blocks + 1))
159                               return NULL;
160           }
161 
162           result = cb_blocks(atfork_storage) + cb_used(atfork_storage);
163           cb_used(atfork_storage) += blocks;
164 
165           return result;
166 }
167 
168 int
pthread_atfork(void (* prepare)(void),void (* parent)(void),void (* child)(void))169 pthread_atfork(void (*prepare)(void), void (*parent)(void),
170     void (*child)(void))
171 {
172           struct atfork_callback *newprepare, *newparent, *newchild;
173           sigset_t mask, omask;
174           int error;
175 
176           sigfillset(&mask);
177           thr_sigsetmask(SIG_SETMASK, &mask, &omask);
178 
179           mutex_lock(&atfork_lock);
180 
181           /*
182            * Note here that we either get all the blocks
183            * we need, in one call, or we get NULL.
184            *
185            * Note also that a NULL return is not an error
186            * if no blocks were required (all args == NULL)
187            */
188           newprepare = af_alloc((prepare != NULL) +
189               (parent != NULL) + (child != NULL));
190 
191           error = ENOMEM;               /* in case of "goto out" */
192 
193           newparent = newprepare;
194           if (prepare != NULL) {
195                     if (__predict_false(newprepare == NULL))
196                               goto out;
197                     newprepare->fn = prepare;
198                     newparent++;
199           }
200 
201           newchild = newparent;
202           if (parent != NULL) {
203                     if (__predict_false(newparent == NULL))
204                               goto out;
205                     newparent->fn = parent;
206                     newchild++;
207           }
208 
209           if (child != NULL) {
210                     if (__predict_false(newchild == NULL))
211                               goto out;
212                     newchild->fn = child;
213           }
214 
215           /*
216            * The order in which the functions are called is specified as
217            * LIFO for the prepare handler and FIFO for the others; insert
218            * at the head and tail as appropriate so that SIMPLEQ_FOREACH()
219            * produces the right order.
220            */
221           if (prepare)
222                     SIMPLEQ_INSERT_HEAD(&prepareq, newprepare, next);
223           if (parent)
224                     SIMPLEQ_INSERT_TAIL(&parentq, newparent, next);
225           if (child)
226                     SIMPLEQ_INSERT_TAIL(&childq, newchild, next);
227 
228           error = 0;
229 
230  out:;
231           mutex_unlock(&atfork_lock);
232           thr_sigsetmask(SIG_SETMASK, &omask, NULL);
233           return error;
234 }
235 
236 pid_t
fork(void)237 fork(void)
238 {
239           struct atfork_callback *iter;
240           pid_t ret;
241 
242           mutex_lock(&atfork_lock);
243           SIMPLEQ_FOREACH(iter, &prepareq, next)
244                     (*iter->fn)();
245           _malloc_prefork();
246 
247           ret = __locked_fork(&errno);
248 
249           if (ret != 0) {
250                     /*
251                      * We are the parent. It doesn't matter here whether
252                      * the fork call succeeded or failed.
253                      */
254                     _malloc_postfork();
255                     SIMPLEQ_FOREACH(iter, &parentq, next)
256                               (*iter->fn)();
257                     mutex_unlock(&atfork_lock);
258           } else {
259                     /* We are the child */
260                     _malloc_postfork_child();
261                     SIMPLEQ_FOREACH(iter, &childq, next)
262                               (*iter->fn)();
263                     /*
264                      * Note: We are explicitly *not* unlocking
265                      * atfork_lock.  Unlocking atfork_lock is problematic,
266                      * because if any threads in the parent blocked on it
267                      * between the initial lock and the fork() syscall,
268                      * unlocking in the child will try to schedule
269                      * threads, and either the internal mutex interlock or
270                      * the runqueue spinlock could have been held at the
271                      * moment of fork(). Since the other threads do not
272                      * exist in this process, the spinlock will never be
273                      * unlocked, and we would wedge.
274                      * Instead, we reinitialize atfork_lock, since we know
275                      * that the state of the atfork lists is consistent here,
276                      * and that there are no other threads to be affected by
277                      * the forcible cleaning of the queue.
278                      * This permits double-forking to work, although
279                      * it requires knowing that it's "safe" to initialize
280                      * a locked mutex in this context.
281                      *
282                      * The problem exists for users of this interface,
283                      * too, since the intended use of pthread_atfork() is
284                      * to acquire locks across the fork call to ensure
285                      * that the child sees consistent state. There's not
286                      * much that can usefully be done in a child handler,
287                      * and conventional wisdom discourages using them, but
288                      * they're part of the interface, so here we are...
289                      */
290                     mutex_init(&atfork_lock, NULL);
291           }
292 
293           return ret;
294 }
295