xref: /dragonfly/contrib/file/src/is_json.c (revision 739f0ef867128a933e021db3d831e906fcafd825)
1 /*-
2  * Copyright (c) 2018 Christos Zoulas
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Parse JSON object serialization format (RFC-7159)
29  */
30 
31 #ifndef TEST
32 #include "file.h"
33 
34 #ifndef lint
35 FILE_RCSID("@(#)$File: is_json.c,v 1.26 2022/09/13 18:46:07 christos Exp $")
36 #endif
37 
38 #include "magic.h"
39 #else
40 #include <stdio.h>
41 #include <stddef.h>
42 #endif
43 #include <string.h>
44 
45 #ifdef DEBUG
46 #include <stdio.h>
47 #define DPRINTF(a, b, c)      \
48     printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
49           (int)(b - c), (const char *)(c))
50 #define __file_debugused
51 #else
52 #define DPRINTF(a, b, c)      do { } while (/*CONSTCOND*/0)
53 #define __file_debugused __attribute__((__unused__))
54 #endif
55 
56 #define JSON_ARRAY  0
57 #define JSON_CONSTANT         1
58 #define JSON_NUMBER 2
59 #define JSON_OBJECT 3
60 #define JSON_STRING 4
61 #define JSON_ARRAYN 5
62 #define JSON_MAX    6
63 
64 /*
65  * if JSON_COUNT != 0:
66  *        count all the objects, require that we have the whole data file
67  * otherwise:
68  *        stop if we find an object or an array
69  */
70 #ifndef JSON_COUNT
71 #define JSON_COUNT 0
72 #endif
73 
74 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75           size_t);
76 
77 static int
json_isspace(const unsigned char uc)78 json_isspace(const unsigned char uc)
79 {
80           switch (uc) {
81           case ' ':
82           case '\n':
83           case '\r':
84           case '\t':
85                     return 1;
86           default:
87                     return 0;
88           }
89 }
90 
91 static int
json_isdigit(unsigned char uc)92 json_isdigit(unsigned char uc)
93 {
94           switch (uc) {
95           case '0': case '1': case '2': case '3': case '4':
96           case '5': case '6': case '7': case '8': case '9':
97                     return 1;
98           default:
99                     return 0;
100           }
101 }
102 
103 static int
json_isxdigit(unsigned char uc)104 json_isxdigit(unsigned char uc)
105 {
106           if (json_isdigit(uc))
107                     return 1;
108           switch (uc) {
109           case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110           case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111                     return 1;
112           default:
113                     return 0;
114           }
115 }
116 
117 static const unsigned char *
json_skip_space(const unsigned char * uc,const unsigned char * ue)118 json_skip_space(const unsigned char *uc, const unsigned char *ue)
119 {
120           while (uc < ue && json_isspace(*uc))
121                     uc++;
122           return uc;
123 }
124 
125 /*ARGSUSED*/
126 static int
json_parse_string(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)127 json_parse_string(const unsigned char **ucp, const unsigned char *ue,
128     size_t lvl __file_debugused)
129 {
130           const unsigned char *uc = *ucp;
131           size_t i;
132 
133           DPRINTF("Parse string: ", uc, *ucp);
134           while (uc < ue) {
135                     switch (*uc++) {
136                     case '\0':
137                               goto out;
138                     case '\\':
139                               if (uc == ue)
140                                         goto out;
141                               switch (*uc++) {
142                               case '\0':
143                                         goto out;
144                               case '"':
145                               case '\\':
146                               case '/':
147                               case 'b':
148                               case 'f':
149                               case 'n':
150                               case 'r':
151                               case 't':
152                                         continue;
153                               case 'u':
154                                         if (ue - uc < 4) {
155                                                   uc = ue;
156                                                   goto out;
157                                         }
158                                         for (i = 0; i < 4; i++)
159                                                   if (!json_isxdigit(*uc++))
160                                                             goto out;
161                                         continue;
162                               default:
163                                         goto out;
164                               }
165                     case '"':
166                               DPRINTF("Good string: ", uc, *ucp);
167                               *ucp = uc;
168                               return 1;
169                     default:
170                               continue;
171                     }
172           }
173 out:
174           DPRINTF("Bad string: ", uc, *ucp);
175           *ucp = uc;
176           return 0;
177 }
178 
179 static int
json_parse_array(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)180 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
181           size_t *st, size_t lvl)
182 {
183           const unsigned char *uc = *ucp;
184 
185           DPRINTF("Parse array: ", uc, *ucp);
186           while (uc < ue) {
187                     uc = json_skip_space(uc, ue);
188                     if (uc == ue)
189                               goto out;
190                     if (*uc == ']')
191                               goto done;
192                     if (!json_parse(&uc, ue, st, lvl + 1))
193                               goto out;
194                     if (uc == ue)
195                               goto out;
196                     switch (*uc) {
197                     case ',':
198                               uc++;
199                               continue;
200                     case ']':
201                     done:
202                               st[JSON_ARRAYN]++;
203                               DPRINTF("Good array: ", uc, *ucp);
204                               *ucp = uc + 1;
205                               return 1;
206                     default:
207                               goto out;
208                     }
209           }
210 out:
211           DPRINTF("Bad array: ", uc,  *ucp);
212           *ucp = uc;
213           return 0;
214 }
215 
216 static int
json_parse_object(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)217 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
218           size_t *st, size_t lvl)
219 {
220           const unsigned char *uc = *ucp;
221           DPRINTF("Parse object: ", uc, *ucp);
222           while (uc < ue) {
223                     uc = json_skip_space(uc, ue);
224                     if (uc == ue)
225                               goto out;
226                     if (*uc == '}') {
227                               uc++;
228                               goto done;
229                     }
230                     if (*uc++ != '"') {
231                               DPRINTF("not string", uc, *ucp);
232                               goto out;
233                     }
234                     DPRINTF("next field", uc, *ucp);
235                     if (!json_parse_string(&uc, ue, lvl)) {
236                               DPRINTF("not string", uc, *ucp);
237                               goto out;
238                     }
239                     uc = json_skip_space(uc, ue);
240                     if (uc == ue)
241                               goto out;
242                     if (*uc++ != ':') {
243                               DPRINTF("not colon", uc, *ucp);
244                               goto out;
245                     }
246                     if (!json_parse(&uc, ue, st, lvl + 1)) {
247                               DPRINTF("not json", uc, *ucp);
248                               goto out;
249                     }
250                     if (uc == ue)
251                               goto out;
252                     switch (*uc++) {
253                     case ',':
254                               continue;
255                     case '}': /* { */
256                     done:
257                               DPRINTF("Good object: ", uc, *ucp);
258                               *ucp = uc;
259                               return 1;
260                     default:
261                               DPRINTF("not more", uc, *ucp);
262                               *ucp = uc - 1;
263                               goto out;
264                     }
265           }
266 out:
267           DPRINTF("Bad object: ", uc, *ucp);
268           *ucp = uc;
269           return 0;
270 }
271 
272 /*ARGSUSED*/
273 static int
json_parse_number(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)274 json_parse_number(const unsigned char **ucp, const unsigned char *ue,
275     size_t lvl __file_debugused)
276 {
277           const unsigned char *uc = *ucp;
278           int got = 0;
279 
280           DPRINTF("Parse number: ", uc, *ucp);
281           if (uc == ue)
282                     return 0;
283           if (*uc == '-')
284                     uc++;
285 
286           for (; uc < ue; uc++) {
287                     if (!json_isdigit(*uc))
288                               break;
289                     got = 1;
290           }
291           if (uc == ue)
292                     goto out;
293           if (*uc == '.')
294                     uc++;
295           for (; uc < ue; uc++) {
296                     if (!json_isdigit(*uc))
297                               break;
298                     got = 1;
299           }
300           if (uc == ue)
301                     goto out;
302           if (got && (*uc == 'e' || *uc == 'E')) {
303                     uc++;
304                     got = 0;
305                     if (uc == ue)
306                               goto out;
307                     if (*uc == '+' || *uc == '-')
308                               uc++;
309                     for (; uc < ue; uc++) {
310                               if (!json_isdigit(*uc))
311                                         break;
312                               got = 1;
313                     }
314           }
315 out:
316           if (!got)
317                     DPRINTF("Bad number: ", uc, *ucp);
318           else
319                     DPRINTF("Good number: ", uc, *ucp);
320           *ucp = uc;
321           return got;
322 }
323 
324 /*ARGSUSED*/
325 static int
json_parse_const(const unsigned char ** ucp,const unsigned char * ue,const char * str,size_t len,size_t lvl __file_debugused)326 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
327     const char *str, size_t len, size_t lvl __file_debugused)
328 {
329           const unsigned char *uc = *ucp;
330 
331           DPRINTF("Parse const: ", uc, *ucp);
332           *ucp += --len - 1;
333           if (*ucp > ue)
334                     *ucp = ue;
335           for (; uc < ue && --len;) {
336                     if (*uc++ != *++str) {
337                               DPRINTF("Bad const: ", uc, *ucp);
338                               return 0;
339                     }
340           }
341           DPRINTF("Good const: ", uc, *ucp);
342           return 1;
343 }
344 
345 static int
json_parse(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)346 json_parse(const unsigned char **ucp, const unsigned char *ue,
347     size_t *st, size_t lvl)
348 {
349           const unsigned char *uc, *ouc;
350           int rv = 0;
351           int t;
352 
353           ouc = uc = json_skip_space(*ucp, ue);
354           if (uc == ue)
355                     goto out;
356 
357           // Avoid recursion
358           if (lvl > 500) {
359                     DPRINTF("Too many levels", uc, *ucp);
360                     return 0;
361           }
362 #if JSON_COUNT
363           /* bail quickly if not counting */
364           if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
365                     return 1;
366 #endif
367 
368           DPRINTF("Parse general: ", uc, *ucp);
369           switch (*uc++) {
370           case '"':
371                     rv = json_parse_string(&uc, ue, lvl + 1);
372                     t = JSON_STRING;
373                     break;
374           case '[':
375                     rv = json_parse_array(&uc, ue, st, lvl + 1);
376                     t = JSON_ARRAY;
377                     break;
378           case '{': /* '}' */
379                     rv = json_parse_object(&uc, ue, st, lvl + 1);
380                     t = JSON_OBJECT;
381                     break;
382           case 't':
383                     rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
384                     t = JSON_CONSTANT;
385                     break;
386           case 'f':
387                     rv = json_parse_const(&uc, ue, "false", sizeof("false"),
388                         lvl + 1);
389                     t = JSON_CONSTANT;
390                     break;
391           case 'n':
392                     rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
393                     t = JSON_CONSTANT;
394                     break;
395           default:
396                     --uc;
397                     rv = json_parse_number(&uc, ue, lvl + 1);
398                     t = JSON_NUMBER;
399                     break;
400           }
401           if (rv)
402                     st[t]++;
403           uc = json_skip_space(uc, ue);
404 out:
405           DPRINTF("End general: ", uc, *ucp);
406           *ucp = uc;
407           if (lvl == 0) {
408                     if (!rv)
409                               return 0;
410                     if (uc == ue)
411                               return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
412                     if (*ouc == *uc && json_parse(&uc, ue, st, 1))
413                               return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
414                     else
415                               return 0;
416           }
417           return rv;
418 }
419 
420 #ifndef TEST
421 int
file_is_json(struct magic_set * ms,const struct buffer * b)422 file_is_json(struct magic_set *ms, const struct buffer *b)
423 {
424           const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
425           const unsigned char *ue = uc + b->flen;
426           size_t st[JSON_MAX];
427           int mime = ms->flags & MAGIC_MIME;
428           int jt;
429 
430 
431           if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
432                     return 0;
433 
434           memset(st, 0, sizeof(st));
435 
436           if ((jt = json_parse(&uc, ue, st, 0)) == 0)
437                     return 0;
438 
439           if (mime == MAGIC_MIME_ENCODING)
440                     return 1;
441           if (mime) {
442                     if (file_printf(ms, "application/%s",
443                         jt == 1 ? "json" : "x-ndjason") == -1)
444                               return -1;
445                     return 1;
446           }
447           if (file_printf(ms, "%sJSON text data",
448               jt == 1 ? "" : "New Line Delimited ") == -1)
449                     return -1;
450 #if JSON_COUNT
451 #define P(n) st[n], st[n] > 1 ? "s" : ""
452           if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
453               "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
454               "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
455               "u >1array%s)",
456               P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
457               P(JSON_NUMBER), P(JSON_ARRAYN))
458               == -1)
459                     return -1;
460 #endif
461           return 1;
462 }
463 
464 #else
465 
466 #include <sys/types.h>
467 #include <sys/stat.h>
468 #include <stdio.h>
469 #include <fcntl.h>
470 #include <unistd.h>
471 #include <stdlib.h>
472 #include <stdint.h>
473 #include <err.h>
474 
475 int
main(int argc,char * argv[])476 main(int argc, char *argv[])
477 {
478           int fd, rv;
479           struct stat st;
480           unsigned char *p;
481           size_t stats[JSON_MAX];
482 
483           if ((fd = open(argv[1], O_RDONLY)) == -1)
484                     err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
485 
486           if (fstat(fd, &st) == -1)
487                     err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
488 
489           if ((p = CAST(char *, malloc(st.st_size))) == NULL)
490                     err(EXIT_FAILURE, "Can't allocate %jd bytes",
491                         (intmax_t)st.st_size);
492           if (read(fd, p, st.st_size) != st.st_size)
493                     err(EXIT_FAILURE, "Can't read %jd bytes",
494                         (intmax_t)st.st_size);
495           memset(stats, 0, sizeof(stats));
496           printf("is json %d\n", json_parse((const unsigned char **)&p,
497               p + st.st_size, stats, 0));
498           return 0;
499 }
500 #endif
501