1 /*        $NetBSD: dict_thash.c,v 1.5 2025/02/25 19:15:51 christos Exp $        */
2 
3 /*++
4 /* NAME
5 /*        dict_thash 3
6 /* SUMMARY
7 /*        dictionary manager interface to hashed flat text files
8 /* SYNOPSIS
9 /*        #include <dict_thash.h>
10 /*
11 /*        DICT      *dict_thash_open(path, open_flags, dict_flags)
12 /*        const char *name;
13 /*        const char *path;
14 /*        int       open_flags;
15 /*        int       dict_flags;
16 /* DESCRIPTION
17 /*        dict_thash_open() opens the named flat text file, creates
18 /*        an in-memory hash table, and makes it available via the
19 /*        generic interface described in dict_open(3). The input
20 /*        format is as with postmap(1).
21 /* DIAGNOSTICS
22 /*        Fatal errors: cannot open file, out of memory.
23 /* SEE ALSO
24 /*        dict(3) generic dictionary manager
25 /* LICENSE
26 /* .ad
27 /* .fi
28 /*        The Secure Mailer license must be distributed with this software.
29 /* AUTHOR(S)
30 /*        Wietse Venema
31 /*        IBM T.J. Watson Research
32 /*        P.O. Box 704
33 /*        Yorktown Heights, NY 10598, USA
34 /*
35 /*        Wietse Venema
36 /*        Google, Inc.
37 /*        111 8th Avenue
38 /*        New York, NY 10011, USA
39 /*--*/
40 
41 /* System library. */
42 
43 #include <sys_defs.h>
44 #include <sys/stat.h>
45 #include <ctype.h>
46 #include <string.h>
47 
48 /* Utility library. */
49 
50 #include <msg.h>
51 #include <mymalloc.h>
52 #include <iostuff.h>
53 #include <vstring.h>
54 #include <stringops.h>
55 #include <readlline.h>
56 #include <dict.h>
57 #include <dict_ht.h>
58 #include <dict_thash.h>
59 
60 /* Application-specific. */
61 
62 #define STR         vstring_str
63 #define LEN         VSTRING_LEN
64 
65 /* dict_thash_open - open flat text data base */
66 
dict_thash_open(const char * path,int open_flags,int dict_flags)67 DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
68 {
69     DICT   *dict;
70     VSTREAM *fp = 0;                              /* DICT_THASH_OPEN_RETURN() */
71     struct stat st;
72     time_t  before;
73     time_t  after;
74     VSTRING *line_buffer = 0;           /* DICT_THASH_OPEN_RETURN() */
75     int     lineno;
76     int     last_line;
77     char   *key;
78     char   *value;
79 
80     /*
81      * Let the optimizer worry about eliminating redundant code.
82      */
83 #define DICT_THASH_OPEN_RETURN(d) do { \
84           DICT *__d = (d); \
85           if (fp != 0) \
86               vstream_fclose(fp); \
87           if (line_buffer != 0) \
88               vstring_free(line_buffer); \
89           return (__d); \
90     } while (0)
91 
92     /*
93      * Sanity checks.
94      */
95     if (open_flags != O_RDONLY)
96           DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
97                                                         open_flags, dict_flags,
98                                           "%s:%s map requires O_RDONLY access mode",
99                                                         DICT_TYPE_THASH, path));
100 
101     /*
102      * Read the flat text file into in-memory hash. Read the file again if it
103      * may have changed while we were reading.
104      */
105     for (before = time((time_t *) 0); /* see below */ ; before = after) {
106           if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) {
107               DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
108                                                               open_flags, dict_flags,
109                                                        "open database %s: %m", path));
110           }
111 
112           /*
113            * Reuse the "internal" dictionary type.
114            */
115           dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags);
116           dict_type_override(dict, DICT_TYPE_THASH);
117 
118           /*
119            * XXX This duplicates the parser in postmap.c.
120            */
121           if (line_buffer == 0)
122               line_buffer = vstring_alloc(100);
123           last_line = 0;
124           while (readllines(line_buffer, fp, &last_line, &lineno)) {
125               int     in_quotes = 0;
126 
127               /*
128                * First some UTF-8 checks sans casefolding.
129                */
130               if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
131                     && allascii(STR(line_buffer)) == 0
132                     && valid_utf8_stringz(STR(line_buffer)) == 0) {
133                     msg_warn("%s, line %d: non-UTF-8 input \"%s\""
134                                " -- ignoring this line",
135                                VSTREAM_PATH(fp), lineno, STR(line_buffer));
136                     continue;
137               }
138 
139               /*
140                * Split on the first whitespace character, then trim leading and
141                * trailing whitespace from key and value.
142                */
143               for (value = STR(line_buffer); *value; value++) {
144                     if (*value == '\\') {
145                         if (*++value == 0)
146                               break;
147                     } else if (ISSPACE(*value)) {
148                         if (!in_quotes)
149                               break;
150                     } else if (*value == '"') {
151                         in_quotes = !in_quotes;
152                     }
153               }
154               if (in_quotes) {
155                     msg_warn("%s, line %d: unbalanced '\"' in '%s'"
156                                " -- ignoring this line",
157                                VSTREAM_PATH(fp), lineno, STR(line_buffer));
158                     continue;
159               }
160               if (*value)
161                     *value++ = 0;
162               while (ISSPACE(*value))
163                     value++;
164               trimblanks(value, 0)[0] = 0;
165 
166               /*
167                * Leave the key in quoted form, for consistency with postmap.c
168                * and dict_inline.c.
169                */
170               key = STR(line_buffer);
171 
172               /*
173                * Enforce the "key whitespace value" format. Disallow missing
174                * keys or missing values.
175                */
176               if (*key == 0 || *value == 0) {
177                     msg_warn("%s, line %d: expected format: key whitespace value"
178                                " -- ignoring this line", path, lineno);
179                     continue;
180               }
181               if (key[strlen(key) - 1] == ':')
182                     msg_warn("%s, line %d: record is in \"key: value\" format;"
183                                " is this an alias file?", path, lineno);
184 
185               /*
186                * Optionally treat the value as a filename, and replace the
187                * value with the BASE64-encoded content of the named file.
188                */
189               if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
190                     VSTRING *base64_buf;
191                     char   *err;
192 
193                     if ((base64_buf = dict_file_to_b64(dict, value)) == 0) {
194                         err = dict_file_get_error(dict);
195                         msg_warn("%s, line %d: %s: skipping this entry",
196                                    VSTREAM_PATH(fp), lineno, err);
197                         myfree(err);
198                         continue;
199                     }
200                     value = vstring_str(base64_buf);
201               }
202 
203               /*
204                * Store the value under the key. Handle duplicates
205                * appropriately. XXX Move this into dict_ht, but 1) that map
206                * ignores duplicates by default and we would have to check that
207                * we won't break existing code that depends on such behavior; 2)
208                * by inlining the checks here we can degrade gracefully instead
209                * of terminating with a fatal error. See comment in
210                * dict_inline.c.
211                */
212               if (dict->lookup(dict, key) != 0) {
213                     if (dict_flags & DICT_FLAG_DUP_IGNORE) {
214                          /* void */ ;
215                     } else if (dict_flags & DICT_FLAG_DUP_REPLACE) {
216                         dict->update(dict, key, value);
217                     } else if (dict_flags & DICT_FLAG_DUP_WARN) {
218                         msg_warn("%s, line %d: duplicate entry: \"%s\"",
219                                    path, lineno, key);
220                     } else {
221                         dict->close(dict);
222                         DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
223                                                                  open_flags, dict_flags,
224                                              "%s, line %d: duplicate entry: \"%s\"",
225                                                                       path, lineno, key));
226                     }
227               } else {
228                     dict->update(dict, key, value);
229               }
230           }
231 
232           /*
233            * See if the source file is hot.
234            */
235           if (fstat(vstream_fileno(fp), &st) < 0)
236               msg_fatal("fstat %s: %m", path);
237           if (vstream_fclose(fp))
238               msg_fatal("read %s: %m", path);
239           fp = 0;                                           /* DICT_THASH_OPEN_RETURN() */
240           after = time((time_t *) 0);
241           if (st.st_mtime < before - 1 || st.st_mtime > after)
242               break;
243 
244           /*
245            * Yes, it is hot. Discard the result and read the file again.
246            */
247           dict->close(dict);
248           if (msg_verbose > 1)
249               msg_info("pausing to let file %s cool down", path);
250           doze(300000);
251     }
252 
253     dict->owner.uid = st.st_uid;
254     dict->owner.status = (st.st_uid != 0);
255 
256     DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict));
257 }
258