1 /*        Id: dbm.c,v 1.6 2018/11/19 19:22:07 schwarze Exp  */
2 /*
3  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Map-based version of the mandoc database, for read-only access.
18  * The interface is defined in "dbm.h".
19  */
20 #include "config.h"
21 
22 #include <assert.h>
23 #if HAVE_ENDIAN
24 #include <endian.h>
25 #elif HAVE_SYS_ENDIAN
26 #include <sys/endian.h>
27 #elif HAVE_NTOHL
28 #include <arpa/inet.h>
29 #endif
30 #if HAVE_ERR
31 #include <err.h>
32 #endif
33 #include <errno.h>
34 #include <regex.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #include "mansearch.h"
41 #include "dbm_map.h"
42 #include "dbm.h"
43 
44 struct macro {
45           int32_t   value;
46           int32_t   pages;
47 };
48 
49 struct page {
50           int32_t   name;
51           int32_t   sect;
52           int32_t   arch;
53           int32_t   desc;
54           int32_t   file;
55 };
56 
57 enum iter {
58           ITER_NONE = 0,
59           ITER_NAME,
60           ITER_SECT,
61           ITER_ARCH,
62           ITER_DESC,
63           ITER_MACRO
64 };
65 
66 static struct macro *macros[MACRO_MAX];
67 static int32_t                 nvals[MACRO_MAX];
68 static struct page  *pages;
69 static int32_t                 npages;
70 static enum iter     iteration;
71 
72 static struct dbm_res          page_bytitle(enum iter, const struct dbm_match *);
73 static struct dbm_res          page_byarch(const struct dbm_match *);
74 static struct dbm_res          page_bymacro(int32_t, const struct dbm_match *);
75 static char                   *macro_bypage(int32_t, int32_t);
76 
77 
78 /*** top level functions **********************************************/
79 
80 /*
81  * Open a disk-based mandoc database for read-only access.
82  * Map the pages and macros[] arrays.
83  * Return 0 on success.  Return -1 and set errno on failure.
84  */
85 int
dbm_open(const char * fname)86 dbm_open(const char *fname)
87 {
88           const int32_t       *mp, *ep;
89           int32_t              im;
90 
91           if (dbm_map(fname) == -1)
92                     return -1;
93 
94           if ((npages = be32toh(*dbm_getint(4))) < 0) {
95                     warnx("dbm_open(%s): Invalid number of pages: %d",
96                         fname, npages);
97                     goto fail;
98           }
99           pages = (struct page *)dbm_getint(5);
100 
101           if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
102                     warnx("dbm_open(%s): Invalid offset of macros array", fname);
103                     goto fail;
104           }
105           if (be32toh(*mp) != MACRO_MAX) {
106                     warnx("dbm_open(%s): Invalid number of macros: %d",
107                         fname, be32toh(*mp));
108                     goto fail;
109           }
110           for (im = 0; im < MACRO_MAX; im++) {
111                     if ((ep = dbm_get(*++mp)) == NULL) {
112                               warnx("dbm_open(%s): Invalid offset of macro %d",
113                                   fname, im);
114                               goto fail;
115                     }
116                     nvals[im] = be32toh(*ep);
117                     macros[im] = (struct macro *)__UNCONST(++ep);
118           }
119           return 0;
120 
121 fail:
122           dbm_unmap();
123           errno = EFTYPE;
124           return -1;
125 }
126 
127 void
dbm_close(void)128 dbm_close(void)
129 {
130           dbm_unmap();
131 }
132 
133 
134 /*** functions for handling pages *************************************/
135 
136 int32_t
dbm_page_count(void)137 dbm_page_count(void)
138 {
139           return npages;
140 }
141 
142 /*
143  * Give the caller pointers to the data for one manual page.
144  */
145 struct dbm_page *
dbm_page_get(int32_t ip)146 dbm_page_get(int32_t ip)
147 {
148           static struct dbm_page         res;
149 
150           assert(ip >= 0);
151           assert(ip < npages);
152           res.name = dbm_get(pages[ip].name);
153           if (res.name == NULL)
154                     res.name = "(NULL)\0";
155           res.sect = dbm_get(pages[ip].sect);
156           if (res.sect == NULL)
157                     res.sect = "(NULL)\0";
158           res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
159           res.desc = dbm_get(pages[ip].desc);
160           if (res.desc == NULL)
161                     res.desc = "(NULL)";
162           res.file = dbm_get(pages[ip].file);
163           if (res.file == NULL)
164                     res.file = " (NULL)\0";
165           res.addr = dbm_addr(pages + ip);
166           return &res;
167 }
168 
169 /*
170  * Functions to start filtered iterations over manual pages.
171  */
172 void
dbm_page_byname(const struct dbm_match * match)173 dbm_page_byname(const struct dbm_match *match)
174 {
175           assert(match != NULL);
176           page_bytitle(ITER_NAME, match);
177 }
178 
179 void
dbm_page_bysect(const struct dbm_match * match)180 dbm_page_bysect(const struct dbm_match *match)
181 {
182           assert(match != NULL);
183           page_bytitle(ITER_SECT, match);
184 }
185 
186 void
dbm_page_byarch(const struct dbm_match * match)187 dbm_page_byarch(const struct dbm_match *match)
188 {
189           assert(match != NULL);
190           page_byarch(match);
191 }
192 
193 void
dbm_page_bydesc(const struct dbm_match * match)194 dbm_page_bydesc(const struct dbm_match *match)
195 {
196           assert(match != NULL);
197           page_bytitle(ITER_DESC, match);
198 }
199 
200 void
dbm_page_bymacro(int32_t im,const struct dbm_match * match)201 dbm_page_bymacro(int32_t im, const struct dbm_match *match)
202 {
203           assert(im >= 0);
204           assert(im < MACRO_MAX);
205           assert(match != NULL);
206           page_bymacro(im, match);
207 }
208 
209 /*
210  * Return the number of the next manual page in the current iteration.
211  */
212 struct dbm_res
dbm_page_next(void)213 dbm_page_next(void)
214 {
215           struct dbm_res                           res = {-1, 0};
216 
217           switch(iteration) {
218           case ITER_NONE:
219                     return res;
220           case ITER_ARCH:
221                     return page_byarch(NULL);
222           case ITER_MACRO:
223                     return page_bymacro(0, NULL);
224           default:
225                     return page_bytitle(iteration, NULL);
226           }
227 }
228 
229 /*
230  * Functions implementing the iteration over manual pages.
231  */
232 static struct dbm_res
page_bytitle(enum iter arg_iter,const struct dbm_match * arg_match)233 page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
234 {
235           static const struct dbm_match *match;
236           static const char             *cp;
237           static int32_t                           ip;
238           struct dbm_res                           res = {-1, 0};
239 
240           assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
241               arg_iter == ITER_SECT);
242 
243           /* Initialize for a new iteration. */
244 
245           if (arg_match != NULL) {
246                     iteration = arg_iter;
247                     match = arg_match;
248                     switch (iteration) {
249                     case ITER_NAME:
250                               cp = dbm_get(pages[0].name);
251                               break;
252                     case ITER_SECT:
253                               cp = dbm_get(pages[0].sect);
254                               break;
255                     case ITER_DESC:
256                               cp = dbm_get(pages[0].desc);
257                               break;
258                     default:
259                               abort();
260                     }
261                     if (cp == NULL) {
262                               iteration = ITER_NONE;
263                               match = NULL;
264                               cp = NULL;
265                               ip = npages;
266                     } else
267                               ip = 0;
268                     return res;
269           }
270 
271           /* Search for a name. */
272 
273           while (ip < npages) {
274                     if (iteration == ITER_NAME)
275                               cp++;
276                     if (dbm_match(match, cp))
277                               break;
278                     cp = strchr(cp, '\0') + 1;
279                     if (iteration == ITER_DESC)
280                               ip++;
281                     else if (*cp == '\0') {
282                               cp++;
283                               ip++;
284                     }
285           }
286 
287           /* Reached the end without a match. */
288 
289           if (ip == npages) {
290                     iteration = ITER_NONE;
291                     match = NULL;
292                     cp = NULL;
293                     return res;
294           }
295 
296           /* Found a match; save the quality for later retrieval. */
297 
298           res.page = ip;
299           res.bits = iteration == ITER_NAME ? cp[-1] : 0;
300 
301           /* Skip the remaining names of this page. */
302 
303           if (++ip < npages) {
304                     do {
305                               cp++;
306                     } while (cp[-1] != '\0' ||
307                         (iteration != ITER_DESC && cp[-2] != '\0'));
308           }
309           return res;
310 }
311 
312 static struct dbm_res
page_byarch(const struct dbm_match * arg_match)313 page_byarch(const struct dbm_match *arg_match)
314 {
315           static const struct dbm_match *match;
316           struct dbm_res                           res = {-1, 0};
317           static int32_t                           ip;
318           const char                              *cp;
319 
320           /* Initialize for a new iteration. */
321 
322           if (arg_match != NULL) {
323                     iteration = ITER_ARCH;
324                     match = arg_match;
325                     ip = 0;
326                     return res;
327           }
328 
329           /* Search for an architecture. */
330 
331           for ( ; ip < npages; ip++)
332                     if (pages[ip].arch)
333                               for (cp = dbm_get(pages[ip].arch);
334                                   *cp != '\0';
335                                   cp = strchr(cp, '\0') + 1)
336                                         if (dbm_match(match, cp)) {
337                                                   res.page = ip++;
338                                                   return res;
339                                         }
340 
341           /* Reached the end without a match. */
342 
343           iteration = ITER_NONE;
344           match = NULL;
345           return res;
346 }
347 
348 static struct dbm_res
page_bymacro(int32_t arg_im,const struct dbm_match * arg_match)349 page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
350 {
351           static const struct dbm_match *match;
352           static const int32_t                    *pp;
353           static const char             *cp;
354           static int32_t                           im, iv;
355           struct dbm_res                           res = {-1, 0};
356 
357           assert(im >= 0);
358           assert(im < MACRO_MAX);
359 
360           /* Initialize for a new iteration. */
361 
362           if (arg_match != NULL) {
363                     iteration = ITER_MACRO;
364                     match = arg_match;
365                     im = arg_im;
366                     cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
367                     pp = NULL;
368                     iv = -1;
369                     return res;
370           }
371           if (iteration != ITER_MACRO)
372                     return res;
373 
374           /* Find the next matching macro value. */
375 
376           while (pp == NULL || *pp == 0) {
377                     if (++iv == nvals[im]) {
378                               iteration = ITER_NONE;
379                               return res;
380                     }
381                     if (iv)
382                               cp = strchr(cp, '\0') + 1;
383                     if (dbm_match(match, cp))
384                               pp = dbm_get(macros[im][iv].pages);
385           }
386 
387           /* Found a matching page. */
388 
389           res.page = (struct page *)dbm_get(*pp++) - pages;
390           return res;
391 }
392 
393 
394 /*** functions for handling macros ************************************/
395 
396 int32_t
dbm_macro_count(int32_t im)397 dbm_macro_count(int32_t im)
398 {
399           assert(im >= 0);
400           assert(im < MACRO_MAX);
401           return nvals[im];
402 }
403 
404 struct dbm_macro *
dbm_macro_get(int32_t im,int32_t iv)405 dbm_macro_get(int32_t im, int32_t iv)
406 {
407           static struct dbm_macro macro;
408 
409           assert(im >= 0);
410           assert(im < MACRO_MAX);
411           assert(iv >= 0);
412           assert(iv < nvals[im]);
413           macro.value = dbm_get(macros[im][iv].value);
414           macro.pp = dbm_get(macros[im][iv].pages);
415           return &macro;
416 }
417 
418 /*
419  * Filtered iteration over macro entries.
420  */
421 void
dbm_macro_bypage(int32_t im,int32_t ip)422 dbm_macro_bypage(int32_t im, int32_t ip)
423 {
424           assert(im >= 0);
425           assert(im < MACRO_MAX);
426           assert(ip != 0);
427           macro_bypage(im, ip);
428 }
429 
430 char *
dbm_macro_next(void)431 dbm_macro_next(void)
432 {
433           return macro_bypage(MACRO_MAX, 0);
434 }
435 
436 static char *
macro_bypage(int32_t arg_im,int32_t arg_ip)437 macro_bypage(int32_t arg_im, int32_t arg_ip)
438 {
439           static const int32_t          *pp;
440           static int32_t                 im, ip, iv;
441 
442           /* Initialize for a new iteration. */
443 
444           if (arg_im < MACRO_MAX && arg_ip != 0) {
445                     im = arg_im;
446                     ip = arg_ip;
447                     pp = dbm_get(macros[im]->pages);
448                     iv = 0;
449                     return NULL;
450           }
451           if (im >= MACRO_MAX)
452                     return NULL;
453 
454           /* Search for the next value. */
455 
456           while (iv < nvals[im]) {
457                     if (*pp == ip)
458                               break;
459                     if (*pp == 0)
460                               iv++;
461                     pp++;
462           }
463 
464           /* Reached the end without a match. */
465 
466           if (iv == nvals[im]) {
467                     im = MACRO_MAX;
468                     ip = 0;
469                     pp = NULL;
470                     return NULL;
471           }
472 
473           /* Found a match; skip the remaining pages of this entry. */
474 
475           if (++iv < nvals[im])
476                     while (*pp++ != 0)
477                               continue;
478 
479           return dbm_get(macros[im][iv - 1].value);
480 }
481