xref: /dragonfly/contrib/mdocml/dba.c (revision 54ba96075f5891e4574304da6ba88f1a1afe520b)
1 /*        $Id: dba.c,v 1.10 2017/02/17 14:43:54 schwarze Exp $ */
2 /*
3  * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Allocation-based version of the mandoc database, for read-write access.
18  * The interface is defined in "dba.h".
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 #if HAVE_ENDIAN
24 #include <endian.h>
25 #elif HAVE_SYS_ENDIAN
26 #include <sys/endian.h>
27 #elif HAVE_NTOHL
28 #include <arpa/inet.h>
29 #endif
30 #include <errno.h>
31 #include <stddef.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include "mandoc_aux.h"
38 #include "mandoc_ohash.h"
39 #include "mansearch.h"
40 #include "dba_write.h"
41 #include "dba_array.h"
42 #include "dba.h"
43 
44 struct macro_entry {
45           struct dba_array    *pages;
46           char                           value[];
47 };
48 
49 static void         *prepend(const char *, char);
50 static void          dba_pages_write(struct dba_array *);
51 static int           compare_names(const void *, const void *);
52 static int           compare_strings(const void *, const void *);
53 
54 static struct macro_entry
55                     *get_macro_entry(struct ohash *, const char *, int32_t);
56 static void          dba_macros_write(struct dba_array *);
57 static void          dba_macro_write(struct ohash *);
58 static int           compare_entries(const void *, const void *);
59 
60 
61 /*** top-level functions **********************************************/
62 
63 struct dba *
dba_new(int32_t npages)64 dba_new(int32_t npages)
65 {
66           struct dba          *dba;
67           struct ohash        *macro;
68           int32_t              im;
69 
70           dba = mandoc_malloc(sizeof(*dba));
71           dba->pages = dba_array_new(npages, DBA_GROW);
72           dba->macros = dba_array_new(MACRO_MAX, 0);
73           for (im = 0; im < MACRO_MAX; im++) {
74                     macro = mandoc_malloc(sizeof(*macro));
75                     mandoc_ohash_init(macro, 4,
76                         offsetof(struct macro_entry, value));
77                     dba_array_set(dba->macros, im, macro);
78           }
79           return dba;
80 }
81 
82 void
dba_free(struct dba * dba)83 dba_free(struct dba *dba)
84 {
85           struct dba_array    *page;
86           struct ohash                  *macro;
87           struct macro_entry  *entry;
88           unsigned int                   slot;
89 
90           dba_array_FOREACH(dba->macros, macro) {
91                     for (entry = ohash_first(macro, &slot); entry != NULL;
92                          entry = ohash_next(macro, &slot)) {
93                               dba_array_free(entry->pages);
94                               free(entry);
95                     }
96                     ohash_delete(macro);
97                     free(macro);
98           }
99           dba_array_free(dba->macros);
100 
101           dba_array_undel(dba->pages);
102           dba_array_FOREACH(dba->pages, page) {
103                     dba_array_free(dba_array_get(page, DBP_NAME));
104                     dba_array_free(dba_array_get(page, DBP_SECT));
105                     dba_array_free(dba_array_get(page, DBP_ARCH));
106                     free(dba_array_get(page, DBP_DESC));
107                     dba_array_free(dba_array_get(page, DBP_FILE));
108                     dba_array_free(page);
109           }
110           dba_array_free(dba->pages);
111 
112           free(dba);
113 }
114 
115 /*
116  * Write the complete mandoc database to disk; the format is:
117  * - One integer each for magic and version.
118  * - One pointer each to the macros table and to the final magic.
119  * - The pages table.
120  * - The macros table.
121  * - And at the very end, the magic integer again.
122  */
123 int
dba_write(const char * fname,struct dba * dba)124 dba_write(const char *fname, struct dba *dba)
125 {
126           int        save_errno;
127           int32_t    pos_end, pos_macros, pos_macros_ptr;
128 
129           if (dba_open(fname) == -1)
130                     return -1;
131           dba_int_write(MANDOCDB_MAGIC);
132           dba_int_write(MANDOCDB_VERSION);
133           pos_macros_ptr = dba_skip(1, 2);
134           dba_pages_write(dba->pages);
135           pos_macros = dba_tell();
136           dba_macros_write(dba->macros);
137           pos_end = dba_tell();
138           dba_int_write(MANDOCDB_MAGIC);
139           dba_seek(pos_macros_ptr);
140           dba_int_write(pos_macros);
141           dba_int_write(pos_end);
142           if (dba_close() == -1) {
143                     save_errno = errno;
144                     unlink(fname);
145                     errno = save_errno;
146                     return -1;
147           }
148           return 0;
149 }
150 
151 
152 /*** functions for handling pages *************************************/
153 
154 /*
155  * Create a new page and append it to the pages table.
156  */
157 struct dba_array *
dba_page_new(struct dba_array * pages,const char * arch,const char * desc,const char * file,enum form form)158 dba_page_new(struct dba_array *pages, const char *arch,
159     const char *desc, const char *file, enum form form)
160 {
161           struct dba_array *page, *entry;
162 
163           page = dba_array_new(DBP_MAX, 0);
164           entry = dba_array_new(1, DBA_STR | DBA_GROW);
165           dba_array_add(page, entry);
166           entry = dba_array_new(1, DBA_STR | DBA_GROW);
167           dba_array_add(page, entry);
168           if (arch != NULL && *arch != '\0') {
169                     entry = dba_array_new(1, DBA_STR | DBA_GROW);
170                     dba_array_add(entry, (void *)arch);
171           } else
172                     entry = NULL;
173           dba_array_add(page, entry);
174           dba_array_add(page, mandoc_strdup(desc));
175           entry = dba_array_new(1, DBA_STR | DBA_GROW);
176           dba_array_add(entry, prepend(file, form));
177           dba_array_add(page, entry);
178           dba_array_add(pages, page);
179           return page;
180 }
181 
182 /*
183  * Add a section, architecture, or file name to an existing page.
184  * Passing the NULL pointer for the architecture makes the page MI.
185  * In that case, any earlier or later architectures are ignored.
186  */
187 void
dba_page_add(struct dba_array * page,int32_t ie,const char * str)188 dba_page_add(struct dba_array *page, int32_t ie, const char *str)
189 {
190           struct dba_array    *entries;
191           char                          *entry;
192 
193           entries = dba_array_get(page, ie);
194           if (ie == DBP_ARCH) {
195                     if (entries == NULL)
196                               return;
197                     if (str == NULL || *str == '\0') {
198                               dba_array_free(entries);
199                               dba_array_set(page, DBP_ARCH, NULL);
200                               return;
201                     }
202           }
203           if (*str == '\0')
204                     return;
205           dba_array_FOREACH(entries, entry) {
206                     if (ie == DBP_FILE && *entry < ' ')
207                               entry++;
208                     if (strcmp(entry, str) == 0)
209                               return;
210           }
211           dba_array_add(entries, (void *)str);
212 }
213 
214 /*
215  * Add an additional name to an existing page.
216  */
217 void
dba_page_alias(struct dba_array * page,const char * name,uint64_t mask)218 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
219 {
220           struct dba_array    *entries;
221           char                          *entry;
222           char                           maskbyte;
223 
224           if (*name == '\0')
225                     return;
226           maskbyte = mask & NAME_MASK;
227           entries = dba_array_get(page, DBP_NAME);
228           dba_array_FOREACH(entries, entry) {
229                     if (strcmp(entry + 1, name) == 0) {
230                               *entry |= maskbyte;
231                               return;
232                     }
233           }
234           dba_array_add(entries, prepend(name, maskbyte));
235 }
236 
237 /*
238  * Return a pointer to a temporary copy of instr with inbyte prepended.
239  */
240 static void *
prepend(const char * instr,char inbyte)241 prepend(const char *instr, char inbyte)
242 {
243           static char         *outstr = NULL;
244           static size_t        outlen = 0;
245           size_t               newlen;
246 
247           newlen = strlen(instr) + 1;
248           if (newlen > outlen) {
249                     outstr = mandoc_realloc(outstr, newlen + 1);
250                     outlen = newlen;
251           }
252           *outstr = inbyte;
253           memcpy(outstr + 1, instr, newlen);
254           return outstr;
255 }
256 
257 /*
258  * Write the pages table to disk; the format is:
259  * - One integer containing the number of pages.
260  * - For each page, five pointers to the names, sections,
261  *   architectures, description, and file names of the page.
262  *   MI pages write 0 instead of the architecture pointer.
263  * - One list each for names, sections, architectures, descriptions and
264  *   file names.  The description for each page ends with a NUL byte.
265  *   For all the other lists, each string ends with a NUL byte,
266  *   and the last string for a page ends with two NUL bytes.
267  * - To assure alignment of following integers,
268  *   the end is padded with NUL bytes up to a multiple of four bytes.
269  */
270 static void
dba_pages_write(struct dba_array * pages)271 dba_pages_write(struct dba_array *pages)
272 {
273           struct dba_array    *page, *entry;
274           int32_t                        pos_pages, pos_end;
275 
276           pos_pages = dba_array_writelen(pages, 5);
277           dba_array_FOREACH(pages, page) {
278                     dba_array_setpos(page, DBP_NAME, dba_tell());
279                     entry = dba_array_get(page, DBP_NAME);
280                     dba_array_sort(entry, compare_names);
281                     dba_array_writelst(entry);
282           }
283           dba_array_FOREACH(pages, page) {
284                     dba_array_setpos(page, DBP_SECT, dba_tell());
285                     entry = dba_array_get(page, DBP_SECT);
286                     dba_array_sort(entry, compare_strings);
287                     dba_array_writelst(entry);
288           }
289           dba_array_FOREACH(pages, page) {
290                     if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
291                               dba_array_setpos(page, DBP_ARCH, dba_tell());
292                               dba_array_sort(entry, compare_strings);
293                               dba_array_writelst(entry);
294                     } else
295                               dba_array_setpos(page, DBP_ARCH, 0);
296           }
297           dba_array_FOREACH(pages, page) {
298                     dba_array_setpos(page, DBP_DESC, dba_tell());
299                     dba_str_write(dba_array_get(page, DBP_DESC));
300           }
301           dba_array_FOREACH(pages, page) {
302                     dba_array_setpos(page, DBP_FILE, dba_tell());
303                     dba_array_writelst(dba_array_get(page, DBP_FILE));
304           }
305           pos_end = dba_align();
306           dba_seek(pos_pages);
307           dba_array_FOREACH(pages, page)
308                     dba_array_writepos(page);
309           dba_seek(pos_end);
310 }
311 
312 static int
compare_names(const void * vp1,const void * vp2)313 compare_names(const void *vp1, const void *vp2)
314 {
315           const char          *cp1, *cp2;
316           int                  diff;
317 
318           cp1 = *(const char * const *)vp1;
319           cp2 = *(const char * const *)vp2;
320           return (diff = *cp2 - *cp1) ? diff :
321               strcasecmp(cp1 + 1, cp2 + 1);
322 }
323 
324 static int
compare_strings(const void * vp1,const void * vp2)325 compare_strings(const void *vp1, const void *vp2)
326 {
327           const char          *cp1, *cp2;
328 
329           cp1 = *(const char * const *)vp1;
330           cp2 = *(const char * const *)vp2;
331           return strcmp(cp1, cp2);
332 }
333 
334 /*** functions for handling macros ************************************/
335 
336 /*
337  * In the hash table for a single macro, look up an entry by
338  * the macro value or add an empty one if it doesn't exist yet.
339  */
340 static struct macro_entry *
get_macro_entry(struct ohash * macro,const char * value,int32_t np)341 get_macro_entry(struct ohash *macro, const char *value, int32_t np)
342 {
343           struct macro_entry  *entry;
344           size_t                         len;
345           unsigned int                   slot;
346 
347           slot = ohash_qlookup(macro, value);
348           if ((entry = ohash_find(macro, slot)) == NULL) {
349                     len = strlen(value) + 1;
350                     entry = mandoc_malloc(sizeof(*entry) + len);
351                     memcpy(&entry->value, value, len);
352                     entry->pages = dba_array_new(np, DBA_GROW);
353                     ohash_insert(macro, slot, entry);
354           }
355           return entry;
356 }
357 
358 /*
359  * In addition to get_macro_entry(), add multiple page references,
360  * converting them from the on-disk format (byte offsets in the file)
361  * to page pointers in memory.
362  */
363 void
dba_macro_new(struct dba * dba,int32_t im,const char * value,const int32_t * pp)364 dba_macro_new(struct dba *dba, int32_t im, const char *value,
365     const int32_t *pp)
366 {
367           struct macro_entry  *entry;
368           const int32_t                 *ip;
369           int32_t                        np;
370 
371           np = 0;
372           for (ip = pp; *ip; ip++)
373                     np++;
374 
375           entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
376           for (ip = pp; *ip; ip++)
377                     dba_array_add(entry->pages, dba_array_get(dba->pages,
378                         be32toh(*ip) / 5 / sizeof(*ip) - 1));
379 }
380 
381 /*
382  * In addition to get_macro_entry(), add one page reference,
383  * directly taking the in-memory page pointer as an argument.
384  */
385 void
dba_macro_add(struct dba_array * macros,int32_t im,const char * value,struct dba_array * page)386 dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
387     struct dba_array *page)
388 {
389           struct macro_entry  *entry;
390 
391           if (*value == '\0')
392                     return;
393           entry = get_macro_entry(dba_array_get(macros, im), value, 1);
394           dba_array_add(entry->pages, page);
395 }
396 
397 /*
398  * Write the macros table to disk; the format is:
399  * - The number of macro tables (actually, MACRO_MAX).
400  * - That number of pointers to the individual macro tables.
401  * - The individual macro tables.
402  */
403 static void
dba_macros_write(struct dba_array * macros)404 dba_macros_write(struct dba_array *macros)
405 {
406           struct ohash                  *macro;
407           int32_t                        im, pos_macros, pos_end;
408 
409           pos_macros = dba_array_writelen(macros, 1);
410           im = 0;
411           dba_array_FOREACH(macros, macro) {
412                     dba_array_setpos(macros, im++, dba_tell());
413                     dba_macro_write(macro);
414           }
415           pos_end = dba_tell();
416           dba_seek(pos_macros);
417           dba_array_writepos(macros);
418           dba_seek(pos_end);
419 }
420 
421 /*
422  * Write one individual macro table to disk; the format is:
423  * - The number of entries in the table.
424  * - For each entry, two pointers, the first one to the value
425  *   and the second one to the list of pages.
426  * - A list of values, each ending in a NUL byte.
427  * - To assure alignment of following integers,
428  *   padding with NUL bytes up to a multiple of four bytes.
429  * - A list of pointers to pages, each list ending in a 0 integer.
430  */
431 static void
dba_macro_write(struct ohash * macro)432 dba_macro_write(struct ohash *macro)
433 {
434           struct macro_entry  **entries, *entry;
435           struct dba_array     *page;
436           int32_t                        *kpos, *dpos;
437           unsigned int                    ie, ne, slot;
438           int                             use;
439           int32_t                         addr, pos_macro, pos_end;
440 
441           /* Temporary storage for filtering and sorting. */
442 
443           ne = ohash_entries(macro);
444           entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
445           kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
446           dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
447 
448           /* Build a list of non-empty entries and sort it. */
449 
450           ne = 0;
451           for (entry = ohash_first(macro, &slot); entry != NULL;
452                entry = ohash_next(macro, &slot)) {
453                     use = 0;
454                     dba_array_FOREACH(entry->pages, page)
455                               if (dba_array_getpos(page))
456                                         use = 1;
457                     if (use)
458                               entries[ne++] = entry;
459           }
460           qsort(entries, ne, sizeof(*entries), compare_entries);
461 
462           /* Number of entries, and space for the pointer pairs. */
463 
464           dba_int_write(ne);
465           pos_macro = dba_skip(2, ne);
466 
467           /* String table. */
468 
469           for (ie = 0; ie < ne; ie++) {
470                     kpos[ie] = dba_tell();
471                     dba_str_write(entries[ie]->value);
472           }
473           dba_align();
474 
475           /* Pages table. */
476 
477           for (ie = 0; ie < ne; ie++) {
478                     dpos[ie] = dba_tell();
479                     dba_array_FOREACH(entries[ie]->pages, page)
480                               if ((addr = dba_array_getpos(page)))
481                                         dba_int_write(addr);
482                     dba_int_write(0);
483           }
484           pos_end = dba_tell();
485 
486           /* Fill in the pointer pairs. */
487 
488           dba_seek(pos_macro);
489           for (ie = 0; ie < ne; ie++) {
490                     dba_int_write(kpos[ie]);
491                     dba_int_write(dpos[ie]);
492           }
493           dba_seek(pos_end);
494 
495           free(entries);
496           free(kpos);
497           free(dpos);
498 }
499 
500 static int
compare_entries(const void * vp1,const void * vp2)501 compare_entries(const void *vp1, const void *vp2)
502 {
503           const struct macro_entry *ep1, *ep2;
504 
505           ep1 = *(const struct macro_entry * const *)vp1;
506           ep2 = *(const struct macro_entry * const *)vp2;
507           return strcmp(ep1->value, ep2->value);
508 }
509