1 /*        $NetBSD: slmdb.c,v 1.5 2025/02/25 19:15:52 christos Exp $   */
2 
3 /*++
4 /* NAME
5 /*        slmdb 3
6 /* SUMMARY
7 /*        Simplified LMDB API
8 /* SYNOPSIS
9 /*        #include <slmdb.h>
10 /*
11 /*        int       slmdb_init(slmdb, curr_limit, size_incr, hard_limit)
12 /*        SLMDB     *slmdb;
13 /*        size_t    curr_limit;
14 /*        int       size_incr;
15 /*        size_t    hard_limit;
16 /*
17 /*        int       slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags)
18 /*        SLMDB     *slmdb;
19 /*        const char *path;
20 /*        int       open_flags;
21 /*        int       lmdb_flags;
22 /*        int       slmdb_flags;
23 /*
24 /*        int       slmdb_close(slmdb)
25 /*        SLMDB     *slmdb;
26 /*
27 /*        int       slmdb_get(slmdb, mdb_key, mdb_value)
28 /*        SLMDB     *slmdb;
29 /*        MDB_val   *mdb_key;
30 /*        MDB_val   *mdb_value;
31 /*
32 /*        int       slmdb_put(slmdb, mdb_key, mdb_value, flags)
33 /*        SLMDB     *slmdb;
34 /*        MDB_val   *mdb_key;
35 /*        MDB_val   *mdb_value;
36 /*        int       flags;
37 /*
38 /*        int       slmdb_del(slmdb, mdb_key)
39 /*        SLMDB     *slmdb;
40 /*        MDB_val   *mdb_key;
41 /*
42 /*        int       slmdb_cursor_get(slmdb, mdb_key, mdb_value, op)
43 /*        SLMDB     *slmdb;
44 /*        MDB_val   *mdb_key;
45 /*        MDB_val   *mdb_value;
46 /*        MDB_cursor_op op;
47 /* AUXILIARY FUNCTIONS
48 /*        int       slmdb_fd(slmdb)
49 /*        SLMDB     *slmdb;
50 /*
51 /*        size_t    slmdb_curr_limit(slmdb)
52 /*        SLMDB     *slmdb;
53 /*
54 /*        int       slmdb_control(slmdb, request, ...)
55 /*        SLMDB     *slmdb;
56 /*        int       request;
57 /* DESCRIPTION
58 /*        This module simplifies the LMDB API by hiding recoverable
59 /*        errors from the application.  Details are given in the
60 /*        section "ERROR RECOVERY".
61 /*
62 /*        slmdb_init() performs mandatory initialization before opening
63 /*        an LMDB database. The result value is an LMDB status code
64 /*        (zero in case of success).
65 /*
66 /*        slmdb_open() opens an LMDB database.  The result value is
67 /*        an LMDB status code (zero in case of success).
68 /*
69 /*        slmdb_close() finalizes an optional bulk-mode transaction
70 /*        and closes a successfully-opened LMDB database.  The result
71 /*        value is an LMDB status code (zero in case of success).
72 /*
73 /*        slmdb_get() is an mdb_get() wrapper with automatic error
74 /*        recovery.  The result value is an LMDB status code (zero
75 /*        in case of success).
76 /*
77 /*        slmdb_put() is an mdb_put() wrapper with automatic error
78 /*        recovery.  The result value is an LMDB status code (zero
79 /*        in case of success).
80 /*
81 /*        slmdb_del() is an mdb_del() wrapper with automatic error
82 /*        recovery.  The result value is an LMDB status code (zero
83 /*        in case of success).
84 /*
85 /*        slmdb_cursor_get() is an mdb_cursor_get() wrapper with
86 /*        automatic error recovery.  The result value is an LMDB
87 /*        status code (zero in case of success). This wrapper supports
88 /*        only one cursor per database.
89 /*
90 /*        slmdb_fd() returns the file descriptor for the specified
91 /*        database.  This may be used for file status queries or
92 /*        application-controlled locking.
93 /*
94 /*        slmdb_curr_limit() returns the current database size limit
95 /*        for the specified database.
96 /*
97 /*        slmdb_control() specifies optional features. The result is
98 /*        an LMDB status code (zero in case of success).
99 /*
100 /*        Arguments:
101 /* .IP slmdb
102 /*        Pointer to caller-provided storage.
103 /* .IP curr_limit
104 /*        The initial memory mapping size limit. This limit is
105 /*        automatically increased when the database becomes full.
106 /* .IP size_incr
107 /*        An integer factor by which the memory mapping size limit
108 /*        is increased when the database becomes full.
109 /* .IP hard_limit
110 /*        The upper bound for the memory mapping size limit.
111 /* .IP path
112 /*        LMDB database pathname.
113 /* .IP open_flags
114 /*        Flags that control file open operations. Do not specify
115 /*        locking flags here.
116 /* .IP lmdb_flags
117 /*        Flags that control the LMDB environment. If MDB_NOLOCK is
118 /*        specified, then each slmdb_get() or slmdb_cursor_get() call
119 /*        must be protected with a shared (or exclusive) external lock,
120 /*        and each slmdb_put() or slmdb_del() call must be protected
121 /*        with an exclusive external lock. A lock may be released
122 /*        after the call returns. A writer may atomically downgrade
123 /*        an exclusive lock to shared, but it must obtain an exclusive
124 /*        lock before making another slmdb(3) write request.
125 /* .sp
126 /*        Note: when a database is opened with MDB_NOLOCK, external
127 /*        locks such as fcntl() do not protect slmdb(3) requests
128 /*        within the same process against each other.  If a program
129 /*        cannot avoid making simultaneous slmdb(3) requests, then
130 /*        it must synchronize these requests with in-process locks,
131 /*        in addition to the per-process fcntl(2) locks.
132 /* .IP slmdb_flags
133 /*        Bit-wise OR of zero or more of the following:
134 /* .RS
135 /* .IP SLMDB_FLAG_BULK
136 /*        Open the database and create a "bulk" transaction that is
137 /*        committed when the database is closed. If MDB_NOLOCK is
138 /*        specified, then the entire transaction must be protected
139 /*        with a persistent external lock.  All slmdb_get(), slmdb_put()
140 /*        and slmdb_del() requests will be directed to the "bulk"
141 /*        transaction.
142 /* .RE
143 /* .IP mdb_key
144 /*        Pointer to caller-provided lookup key storage.
145 /* .IP mdb_value
146 /*        Pointer to caller-provided value storage.
147 /* .IP op
148 /*        LMDB cursor operation.
149 /* .IP request
150 /*        The start of a list of (name, value) pairs, terminated with
151 /*        CA_SLMDB_CTL_END.  The following text enumerates the symbolic
152 /*        request names and the corresponding argument types.
153 /* .RS
154 /* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))"
155 /*        Call-back function pointer. The function is called to repeat
156 /*        a failed bulk-mode transaction from the start. The arguments
157 /*        are the application context and the setjmp() or sigsetjmp()
158 /*        result value.
159 /* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))"
160 /*        Call-back function pointer. The function is called to report
161 /*        successful error recovery. The arguments are the application
162 /*        context, the MDB error code, and additional arguments that
163 /*        depend on the error code.  Details are given in the section
164 /*        "ERROR RECOVERY".
165 /* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))"
166 /*        Call-back function pointer.  The function is called to
167 /*        report an LMDB internal assertion failure. The arguments
168 /*        are the application context, and text that describes the
169 /*        problem.
170 /* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)"
171 /*        Application context that is passed in call-back function
172 /*        calls.
173 /* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)"
174 /*        How many times to recover from LMDB errors within the
175 /*        execution of a single slmdb(3) API call before giving up.
176 /* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)"
177 /*        How many times to recover from a bulk-mode transaction
178 /*        before giving up.
179 /* .RE
180 /* ERROR RECOVERY
181 /* .ad
182 /* .fi
183 /*        This module automatically repeats failed requests after
184 /*        recoverable errors, up to the limits specified with
185 /*        slmdb_control().
186 /*
187 /*        Recoverable errors are reported through an optional
188 /*        notification function specified with slmdb_control().  With
189 /*        recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the
190 /*        additional argument is a size_t value with the updated
191 /*        current database size limit; with recoverable MDB_READERS_FULL
192 /*        errors there is no additional argument.
193 /* BUGS
194 /*        Recovery from MDB_MAP_FULL involves resizing the database
195 /*        memory mapping.  According to LMDB documentation this
196 /*        requires that there is no concurrent activity in the same
197 /*        database by other threads in the same memory address space.
198 /* SEE ALSO
199 /*        lmdb(3) API manpage (currently, non-existent).
200 /* AUTHOR(S)
201 /*        Howard Chu
202 /*        Symas Corporation
203 /*
204 /*        Wietse Venema
205 /*        IBM T.J. Watson Research
206 /*        P.O. Box 704
207 /*        Yorktown Heights, NY 10598, USA
208 /*
209 /*        Wietse Venema
210 /*        Google, Inc.
211 /*        111 8th Avenue
212 /*        New York, NY 10011, USA
213 /*--*/
214 
215  /*
216   * DO NOT include other Postfix-specific header files. This LMDB wrapper
217   * must be usable outside Postfix.
218   */
219 
220 #ifdef HAS_LMDB
221 
222 /* System library. */
223 
224 #include <sys/stat.h>
225 #include <errno.h>
226 #include <fcntl.h>
227 #include <string.h>
228 #include <unistd.h>
229 #include <limits.h>
230 #include <stdarg.h>
231 #include <string.h>
232 #include <stdlib.h>
233 
234 /* Application-specific. */
235 
236 #include <slmdb.h>
237 
238  /*
239   * Minimum LMDB patchlevel.
240   *
241   * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of
242   * falling out of the sky without any explanation. Without such logging,
243   * Postfix with LMDB would be too hard to support.
244   *
245   * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096
246   * bytes of uninitialized heap memory to a database. This was a security
247   * violation because it made information persistent that was not meant to be
248   * persisted, or it was sharing information that was not meant to be shared.
249   *
250   * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of
251   * having to use world-writable LMDB lock files.
252   *
253   * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so
254   * that it can recover from an MDB_MAP_FULL error without having to close
255   * the database. It also allows an application to "pick up" a new database
256   * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED
257   * error without having to close the database.
258   *
259   * The database size limit that remains is imposed by the hardware memory
260   * address space (31 or 47 bits, typically) or file system. The LMDB
261   * implementation is supposed to handle databases larger than physical
262   * memory. However, this is not necessarily guaranteed for (bulk)
263   * transactions larger than physical memory.
264   */
265 #if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11)
266 #error "This Postfix version requires LMDB version 0.9.11 or later"
267 #endif
268 
269  /*
270   * Error recovery.
271   *
272   * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable
273   * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the
274   * way, applications can pretend that those quirks don't exist, and focus on
275   * their own job.
276   *
277   * - To recover from a single-transaction LMDB error, each wrapper function
278   * uses tail recursion instead of goto. Since LMDB errors are rare, code
279   * clarity is more important than speed.
280   *
281   * - To recover from a bulk-transaction LMDB error, the error-recovery code
282   * triggers a long jump back into the caller to some pre-arranged point (the
283   * closest thing that C has to exception handling). The application is then
284   * expected to repeat the bulk transaction from scratch.
285   *
286   * When any code aborts a bulk transaction, it must reset slmdb->txn to null to
287   * avoid a use-after-free problem in slmdb_close().
288   */
289 
290  /*
291   * Our default retry attempt limits. We allow a few retries per slmdb(3) API
292   * call for non-bulk transactions. We allow a number of bulk-transaction
293   * retries that is proportional to the memory address space.
294   */
295 #define SLMDB_DEF_API_RETRY_LIMIT 30    /* Retries per slmdb(3) API call */
296 #define SLMDB_DEF_BULK_RETRY_LIMIT \
297         (2 * sizeof(size_t) * CHAR_BIT) /* Retries per bulk-mode transaction */
298 
299  /*
300   * We increment the recursion counter each time we try to recover from
301   * error, and reset the recursion counter when returning to the application
302   * from the slmdb(3) API.
303   */
304 #define SLMDB_API_RETURN(slmdb, status) do { \
305           (slmdb)->api_retry_count = 0; \
306           return (status); \
307     } while (0)
308 
309  /*
310   * With MDB_NOLOCK, the application uses an external lock for inter-process
311   * synchronization. Because the caller may release the external lock after
312   * an SLMDB API call, each SLMDB API function must use a short-lived
313   * transaction unless the transaction is a bulk-mode transaction.
314   */
315 
316 /* slmdb_cursor_close - close cursor and its read transaction */
317 
slmdb_cursor_close(SLMDB * slmdb)318 static void slmdb_cursor_close(SLMDB *slmdb)
319 {
320     MDB_txn *txn;
321 
322     /*
323      * Close the cursor and its read transaction. We can restore it later
324      * from the saved key information.
325      */
326     txn = mdb_cursor_txn(slmdb->cursor);
327     mdb_cursor_close(slmdb->cursor);
328     slmdb->cursor = 0;
329     mdb_txn_abort(txn);
330 }
331 
332 /* slmdb_saved_key_init - initialize saved key info */
333 
slmdb_saved_key_init(SLMDB * slmdb)334 static void slmdb_saved_key_init(SLMDB *slmdb)
335 {
336     slmdb->saved_key.mv_data = 0;
337     slmdb->saved_key.mv_size = 0;
338     slmdb->saved_key_size = 0;
339 }
340 
341 /* slmdb_saved_key_free - destroy saved key info */
342 
slmdb_saved_key_free(SLMDB * slmdb)343 static void slmdb_saved_key_free(SLMDB *slmdb)
344 {
345     free(slmdb->saved_key.mv_data);
346     slmdb_saved_key_init(slmdb);
347 }
348 
349 #define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0)
350 
351 /* slmdb_saved_key_assign - copy the saved key */
352 
slmdb_saved_key_assign(SLMDB * slmdb,MDB_val * key_val)353 static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val)
354 {
355 
356     /*
357      * Extend the buffer to fit the key, so that we can avoid malloc()
358      * overhead most of the time.
359      */
360     if (slmdb->saved_key_size < key_val->mv_size) {
361           if (slmdb->saved_key.mv_data == 0)
362               slmdb->saved_key.mv_data = malloc(key_val->mv_size);
363           else
364               slmdb->saved_key.mv_data =
365                     realloc(slmdb->saved_key.mv_data, key_val->mv_size);
366           if (slmdb->saved_key.mv_data == 0) {
367               slmdb_saved_key_init(slmdb);
368               return (ENOMEM);
369           } else {
370               slmdb->saved_key_size = key_val->mv_size;
371           }
372     }
373 
374     /*
375      * Copy the key under the cursor.
376      */
377     memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size);
378     slmdb->saved_key.mv_size = key_val->mv_size;
379     return (0);
380 }
381 
382 /* slmdb_prepare - LMDB-specific (re)initialization before actual access */
383 
slmdb_prepare(SLMDB * slmdb)384 static int slmdb_prepare(SLMDB *slmdb)
385 {
386     int     status = 0;
387 
388     /*
389      * This is called before accessing the database, or after recovery from
390      * an LMDB error. Note: this code cannot recover from errors itself.
391      * slmdb->txn is either the database open() transaction or a
392      * freshly-created bulk-mode transaction. When slmdb_prepare() commits or
393      * aborts commits a transaction, it must set slmdb->txn to null to avoid
394      * a use-after-free error in slmdb_close().
395      *
396      * - With O_TRUNC we make a "drop" request before updating the database.
397      *
398      * - With a bulk-mode transaction we commit when the database is closed.
399      */
400     if (slmdb->open_flags & O_TRUNC) {
401           if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0) {
402               mdb_txn_abort(slmdb->txn);
403               slmdb->txn = 0;
404               return (status);
405           }
406           if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
407               status = mdb_txn_commit(slmdb->txn);
408               slmdb->txn = 0;
409               if (status != 0)
410                     return (status);
411           }
412     } else if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
413           mdb_txn_abort(slmdb->txn);
414           slmdb->txn = 0;
415     }
416     slmdb->api_retry_count = 0;
417     return (status);
418 }
419 
420 /* slmdb_recover - recover from LMDB errors */
421 
slmdb_recover(SLMDB * slmdb,int status)422 static int slmdb_recover(SLMDB *slmdb, int status)
423 {
424     MDB_envinfo info;
425     int     original_status = status;
426 
427     /*
428      * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough
429      * that we don't care about a few wasted cycles.
430      */
431     if (slmdb->cursor != 0)
432           slmdb_cursor_close(slmdb);
433 
434     /*
435      * Limit the number of recovery attempts per slmdb(3) API request.
436      */
437     if ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit)
438           return (status);
439 
440     /*
441      * Limit the number of bulk transaction recovery attempts.
442      */
443     if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0
444           && (slmdb->bulk_retry_count += 1) > slmdb->bulk_retry_limit)
445           return (status);
446 
447     /*
448      * Try to clear the error condition.
449      */
450     switch (status) {
451 
452           /*
453            * As of LMDB 0.9.8 when a non-bulk update runs into a "map full"
454            * error, we can resize the environment's memory map and clear the
455            * error condition. The caller should retry immediately.
456            */
457     case MDB_MAP_FULL:
458           /* Can we increase the memory map? Give up if we can't. */
459           if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) {
460               slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr;
461           } else if (slmdb->curr_limit < slmdb->hard_limit) {
462               slmdb->curr_limit = slmdb->hard_limit;
463           } else {
464               /* Sorry, we are already maxed out. */
465               break;
466           }
467           if (slmdb->notify_fn)
468               slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL,
469                                    slmdb->curr_limit);
470           status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit);
471           break;
472 
473           /*
474            * When a writer resizes the database, read-only applications must
475            * increase their LMDB memory map size limit, too. Otherwise, they
476            * won't be able to read a table after it grows.
477            *
478            * As of LMDB 0.9.8 we can import the new memory map size limit into the
479            * database environment by calling mdb_env_set_mapsize() with a zero
480            * size argument. Then we extract the map size limit for later use.
481            * The caller should retry immediately.
482            */
483     case MDB_MAP_RESIZED:
484           if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) {
485               /* Do not panic. Maps may shrink after bulk update. */
486               mdb_env_info(slmdb->env, &info);
487               slmdb->curr_limit = info.me_mapsize;
488               if (slmdb->notify_fn)
489                     slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED,
490                                          slmdb->curr_limit);
491           }
492           break;
493 
494           /*
495            * What is it with these built-in hard limits that cause systems to
496            * stop when demand is at its highest? When the system is under
497            * stress it should slow down and keep making progress.
498            */
499     case MDB_READERS_FULL:
500           if (slmdb->notify_fn)
501               slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL);
502           sleep(1);
503           status = 0;
504           break;
505 
506           /*
507            * We can't solve this problem. The application should terminate with
508            * a fatal run-time error and the program should be re-run later.
509            */
510     default:
511           break;
512     }
513 
514     /*
515      * If we cleared the error condition for a non-bulk transaction, return a
516      * success status. The caller should retry the failed operation
517      * immediately.
518      */
519     if (status == 0 && (slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) {
520 
521           /*
522            * We cleared the error condition for a bulk transaction. If the
523            * transaction is not restartable, return the original error. The
524            * caller should terminate with a fatal run-time error, and the
525            * program should be re-run later.
526            */
527           if (slmdb->longjmp_fn == 0)
528               return (original_status);
529 
530           /*
531            * Rebuild a bulk transaction from scratch, by making a long jump
532            * back into the caller at some pre-arranged point. In MDB_NOLOCK
533            * mode, there is no need to upgrade a lock to "exclusive", because a
534            * failed write transaction has no side effects.
535            */
536           if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0,
537                                             slmdb->lmdb_flags & MDB_RDONLY,
538                                             &slmdb->txn)) == 0
539               && (status = slmdb_prepare(slmdb)) == 0)
540               slmdb->longjmp_fn(slmdb->cb_context, 1);
541     }
542     return (status);
543 }
544 
545 /* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */
546 
slmdb_txn_begin(SLMDB * slmdb,int rdonly,MDB_txn ** txn)547 static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn)
548 {
549     int     status;
550 
551     if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0
552           && (status = slmdb_recover(slmdb, status)) == 0)
553           status = slmdb_txn_begin(slmdb, rdonly, txn);
554 
555     return (status);
556 }
557 
558 /* slmdb_get - mdb_get() wrapper with LMDB error recovery */
559 
slmdb_get(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value)560 int     slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value)
561 {
562     MDB_txn *txn;
563     int     status;
564 
565     /*
566      * Start a read transaction if there's no bulk-mode txn.
567      */
568     if (slmdb->txn)
569           txn = slmdb->txn;
570     else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
571           SLMDB_API_RETURN(slmdb, status);
572 
573     /*
574      * Do the lookup.
575      */
576     if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0
577           && status != MDB_NOTFOUND) {
578           mdb_txn_abort(txn);
579           if (txn == slmdb->txn)
580               slmdb->txn = 0;
581           if ((status = slmdb_recover(slmdb, status)) == 0)
582               status = slmdb_get(slmdb, mdb_key, mdb_value);
583           SLMDB_API_RETURN(slmdb, status);
584     }
585 
586     /*
587      * Close the read txn if it's not the bulk-mode txn.
588      */
589     if (slmdb->txn == 0)
590           mdb_txn_abort(txn);
591 
592     SLMDB_API_RETURN(slmdb, status);
593 }
594 
595 /* slmdb_put - mdb_put() wrapper with LMDB error recovery */
596 
slmdb_put(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value,int flags)597 int     slmdb_put(SLMDB *slmdb, MDB_val *mdb_key,
598                               MDB_val *mdb_value, int flags)
599 {
600     MDB_txn *txn;
601     int     status;
602 
603     /*
604      * Start a write transaction if there's no bulk-mode txn.
605      */
606     if (slmdb->txn)
607           txn = slmdb->txn;
608     else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
609           SLMDB_API_RETURN(slmdb, status);
610 
611     /*
612      * Do the update.
613      */
614     if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) {
615           if (status != MDB_KEYEXIST) {
616               mdb_txn_abort(txn);
617               if (txn == slmdb->txn)
618                     slmdb->txn = 0;
619               if ((status = slmdb_recover(slmdb, status)) == 0)
620                     status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
621               SLMDB_API_RETURN(slmdb, status);
622           } else {
623               /* Abort non-bulk transaction only. */
624               if (slmdb->txn == 0)
625                     mdb_txn_abort(txn);
626           }
627     }
628 
629     /*
630      * Commit the transaction if it's not the bulk-mode txn.
631      */
632     if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
633           && (status = slmdb_recover(slmdb, status)) == 0)
634           status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
635 
636     SLMDB_API_RETURN(slmdb, status);
637 }
638 
639 /* slmdb_del - mdb_del() wrapper with LMDB error recovery */
640 
slmdb_del(SLMDB * slmdb,MDB_val * mdb_key)641 int     slmdb_del(SLMDB *slmdb, MDB_val *mdb_key)
642 {
643     MDB_txn *txn;
644     int     status;
645 
646     /*
647      * Start a write transaction if there's no bulk-mode txn.
648      */
649     if (slmdb->txn)
650           txn = slmdb->txn;
651     else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
652           SLMDB_API_RETURN(slmdb, status);
653 
654     /*
655      * Do the update.
656      */
657     if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) {
658           if (status != MDB_NOTFOUND) {
659               mdb_txn_abort(txn);
660               if (txn == slmdb->txn)
661                     slmdb->txn = 0;
662               if ((status = slmdb_recover(slmdb, status)) == 0)
663                     status = slmdb_del(slmdb, mdb_key);
664               SLMDB_API_RETURN(slmdb, status);
665           } else {
666               /* Abort non-bulk transaction only. */
667               if (slmdb->txn == 0)
668                     mdb_txn_abort(txn);
669           }
670     }
671 
672     /*
673      * Commit the transaction if it's not the bulk-mode txn.
674      */
675     if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
676           && (status = slmdb_recover(slmdb, status)) == 0)
677           status = slmdb_del(slmdb, mdb_key);
678 
679     SLMDB_API_RETURN(slmdb, status);
680 }
681 
682 /* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */
683 
slmdb_cursor_get(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value,MDB_cursor_op op)684 int     slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key,
685                                        MDB_val *mdb_value, MDB_cursor_op op)
686 {
687     MDB_txn *txn;
688     int     status = 0;
689 
690     /*
691      * TODO: figure how we would recover a failing bulk transaction.
692      */
693     if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) {
694           if (slmdb->assert_fn)
695               slmdb->assert_fn(slmdb->cb_context,
696                          "slmdb_cursor_get: bulk transaction is not supported");
697           return (MDB_PANIC);
698     }
699 
700     /*
701      * Open a read transaction and cursor if needed.
702      */
703     if (slmdb->cursor == 0) {
704           if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
705               SLMDB_API_RETURN(slmdb, status);
706           if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) {
707               mdb_txn_abort(txn);
708               if ((status = slmdb_recover(slmdb, status)) == 0)
709                     status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
710               SLMDB_API_RETURN(slmdb, status);
711           }
712 
713           /*
714            * Restore the cursor position from the saved key information.
715            */
716           if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST)
717               status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key,
718                                             (MDB_val *) 0, MDB_SET);
719     }
720 
721     /*
722      * Database lookup.
723      */
724     if (status == 0)
725           status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op);
726 
727     /*
728      * Save the cursor position if successful. This can fail only with
729      * ENOMEM.
730      *
731      * Close the cursor read transaction if in MDB_NOLOCK mode, because the
732      * caller may release the external lock after we return.
733      */
734     if (status == 0) {
735           status = slmdb_saved_key_assign(slmdb, mdb_key);
736           if (slmdb->lmdb_flags & MDB_NOLOCK)
737               slmdb_cursor_close(slmdb);
738     }
739 
740     /*
741      * Handle end-of-database or other error.
742      */
743     else {
744           /* Do not hand-optimize out the slmdb_cursor_close() calls below. */
745           if (status == MDB_NOTFOUND) {
746               slmdb_cursor_close(slmdb);
747               if (HAVE_SLMDB_SAVED_KEY(slmdb))
748                     slmdb_saved_key_free(slmdb);
749           } else {
750               slmdb_cursor_close(slmdb);
751               if ((status = slmdb_recover(slmdb, status)) == 0)
752                     status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
753               SLMDB_API_RETURN(slmdb, status);
754               /* Do not hand-optimize out the above return statement. */
755           }
756     }
757     SLMDB_API_RETURN(slmdb, status);
758 }
759 
760 /* slmdb_assert_cb - report LMDB assertion failure */
761 
slmdb_assert_cb(MDB_env * env,const char * text)762 static void slmdb_assert_cb(MDB_env *env, const char *text)
763 {
764     SLMDB  *slmdb = (SLMDB *) mdb_env_get_userctx(env);
765 
766     if (slmdb->assert_fn)
767           slmdb->assert_fn(slmdb->cb_context, text);
768 }
769 
770 /* slmdb_control - control optional settings */
771 
slmdb_control(SLMDB * slmdb,int first,...)772 int     slmdb_control(SLMDB *slmdb, int first,...)
773 {
774     va_list ap;
775     int     status = 0;
776     int     reqno;
777     int     rc;
778 
779     va_start(ap, first);
780     for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) {
781           switch (reqno) {
782           case SLMDB_CTL_LONGJMP_FN:
783               slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN);
784               break;
785           case SLMDB_CTL_NOTIFY_FN:
786               slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN);
787               break;
788           case SLMDB_CTL_ASSERT_FN:
789               slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN);
790               if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0
791                || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0)
792                     status = rc;
793               break;
794           case SLMDB_CTL_CB_CONTEXT:
795               slmdb->cb_context = va_arg(ap, void *);
796               break;
797           case SLMDB_CTL_API_RETRY_LIMIT:
798               slmdb->api_retry_limit = va_arg(ap, int);
799               break;
800           case SLMDB_CTL_BULK_RETRY_LIMIT:
801               slmdb->bulk_retry_limit = va_arg(ap, int);
802               break;
803           default:
804               status = errno = EINVAL;
805               break;
806           }
807     }
808     va_end(ap);
809     return (status);
810 }
811 
812 /* slmdb_close - wrapper with LMDB error recovery */
813 
slmdb_close(SLMDB * slmdb)814 int     slmdb_close(SLMDB *slmdb)
815 {
816     int     status = 0;
817 
818     /*
819      * Finish an open bulk transaction. If slmdb_recover() returns after a
820      * bulk-transaction error, then it was unable to clear the error
821      * condition, or unable to restart the bulk transaction.
822      */
823     if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0 && slmdb->txn != 0
824           && (status = mdb_txn_commit(slmdb->txn)) != 0)
825           status = slmdb_recover(slmdb, status);
826 
827     /*
828      * Clean up after an unfinished sequence() operation.
829      */
830     if (slmdb->cursor != 0)
831           slmdb_cursor_close(slmdb);
832 
833     mdb_env_close(slmdb->env);
834 
835     /*
836      * Clean up the saved key information.
837      */
838     if (HAVE_SLMDB_SAVED_KEY(slmdb))
839           slmdb_saved_key_free(slmdb);
840 
841     SLMDB_API_RETURN(slmdb, status);
842 }
843 
844 /* slmdb_init - mandatory initialization */
845 
slmdb_init(SLMDB * slmdb,size_t curr_limit,int size_incr,size_t hard_limit)846 int     slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr,
847                                size_t hard_limit)
848 {
849 
850     /*
851      * This is a separate operation to keep the slmdb_open() API simple.
852      * Don't allocate resources here. Just store control information,
853      */
854     slmdb->curr_limit = curr_limit;
855     slmdb->size_incr = size_incr;
856     slmdb->hard_limit = hard_limit;
857 
858     return (MDB_SUCCESS);
859 }
860 
861 /* slmdb_open - open wrapped LMDB database */
862 
slmdb_open(SLMDB * slmdb,const char * path,int open_flags,int lmdb_flags,int slmdb_flags)863 int     slmdb_open(SLMDB *slmdb, const char *path, int open_flags,
864                                int lmdb_flags, int slmdb_flags)
865 {
866     struct stat st;
867     MDB_env *env;
868     MDB_txn *txn;
869     MDB_dbi dbi;
870     int     db_fd;
871     int     status;
872 
873     /*
874      * Create LMDB environment.
875      */
876     if ((status = mdb_env_create(&env)) != 0)
877           return (status);
878 
879     /*
880      * Make sure that the memory map has room to store and commit an initial
881      * "drop" transaction as well as fixed database metadata. We have no way
882      * to recover from errors before the first application-level I/O request.
883      */
884 #define SLMDB_FUDGE      10240
885 
886     if (slmdb->curr_limit < SLMDB_FUDGE)
887           slmdb->curr_limit = SLMDB_FUDGE;
888     if (stat(path, &st) == 0
889           && st.st_size > slmdb->curr_limit - SLMDB_FUDGE) {
890           if (st.st_size > slmdb->hard_limit)
891               slmdb->hard_limit = st.st_size;
892           if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE)
893               slmdb->curr_limit = st.st_size + SLMDB_FUDGE;
894           else
895               slmdb->curr_limit = slmdb->hard_limit;
896     }
897 
898     /*
899      * mdb_open() requires a txn, but since the default DB always exists in
900      * an LMDB environment, we usually don't need to do anything else with
901      * the txn. It is currently used for truncate and for bulk transactions.
902      */
903     if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0
904           || (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0
905           || (status = mdb_txn_begin(env, (MDB_txn *) 0,
906                                            lmdb_flags & MDB_RDONLY, &txn)) != 0
907           || (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0
908           || (status = mdb_env_get_fd(env, &db_fd)) != 0) {
909           mdb_env_close(env);
910           return (status);
911     }
912 
913     /*
914      * Bundle up.
915      */
916     slmdb->open_flags = open_flags;
917     slmdb->lmdb_flags = lmdb_flags;
918     slmdb->slmdb_flags = slmdb_flags;
919     slmdb->env = env;
920     slmdb->dbi = dbi;
921     slmdb->db_fd = db_fd;
922     slmdb->cursor = 0;
923     slmdb_saved_key_init(slmdb);
924     slmdb->api_retry_count = 0;
925     slmdb->bulk_retry_count = 0;
926     slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT;
927     slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT;
928     slmdb->longjmp_fn = 0;
929     slmdb->notify_fn = 0;
930     slmdb->assert_fn = 0;
931     slmdb->cb_context = 0;
932     slmdb->txn = txn;
933 
934     if ((status = slmdb_prepare(slmdb)) != 0)
935           mdb_env_close(env);
936 
937     return (status);
938 }
939 
940 #endif
941