1 /*        $NetBSD: printable.c,v 1.4 2025/02/25 19:15:52 christos Exp $         */
2 
3 /*++
4 /* NAME
5 /*        printable 3
6 /* SUMMARY
7 /*        mask non-printable characters
8 /* SYNOPSIS
9 /*        #include <stringops.h>
10 /*
11 /*        int       util_utf8_enable;
12 /*
13 /*        char      *printable(buffer, replacement)
14 /*        char      *buffer;
15 /*        int       replacement;
16 /*
17 /*        char      *printable_except(buffer, replacement, except)
18 /*        char      *buffer;
19 /*        int       replacement;
20 /*        const char *except;
21 /* DESCRIPTION
22 /*        printable() replaces non-printable characters
23 /*        in its input with the given replacement.
24 /*
25 /*        util_utf8_enable controls whether UTF8 is considered printable.
26 /*        With util_utf8_enable equal to zero, non-ASCII text is replaced.
27 /*
28 /*        Arguments:
29 /* .IP buffer
30 /*        The null-terminated input string.
31 /* .IP replacement
32 /*        Replacement value for characters in \fIbuffer\fR that do not
33 /*        pass the ASCII isprint(3) test or that are not valid UTF8.
34 /* .IP except
35 /*        Null-terminated sequence of non-replaced ASCII characters.
36 /* LICENSE
37 /* .ad
38 /* .fi
39 /*        The Secure Mailer license must be distributed with this software.
40 /* AUTHOR(S)
41 /*        Wietse Venema
42 /*        IBM T.J. Watson Research
43 /*        P.O. Box 704
44 /*        Yorktown Heights, NY 10598, USA
45 /*
46 /*        Wietse Venema
47 /*        Google, Inc.
48 /*        111 8th Avenue
49 /*        New York, NY 10011, USA
50 /*
51 /*        Wietse Venema
52 /*        porcupine.org
53 /*        Amawalk, NY 10501, USA
54 /*--*/
55 
56 /* System library. */
57 
58 #include "sys_defs.h"
59 #include <ctype.h>
60 #include <string.h>
61 
62 /* Utility library. */
63 
64 #include "stringops.h"
65 #include "parse_utf8_char.h"
66 
67 int     util_utf8_enable = 0;
68 
69 /* printable -  binary compatibility */
70 
71 #undef printable
72 
73 char   *printable(char *, int);
74 
printable(char * string,int replacement)75 char   *printable(char *string, int replacement)
76 {
77     return (printable_except(string, replacement, (char *) 0));
78 }
79 
80 /* printable_except -  pass through printable or other preserved characters */
81 
printable_except(char * string,int replacement,const char * except)82 char   *printable_except(char *string, int replacement, const char *except)
83 {
84     char   *cp;
85     char   *last;
86     int     ch;
87 
88     /*
89      * In case of a non-UTF8 sequence (bad leader byte, bad non-leader byte,
90      * over-long encodings, out-of-range code points, etc), replace the first
91      * byte, and try to resynchronize at the next byte.
92      */
93 #define PRINT_OR_EXCEPT(ch) (ISPRINT(ch) || (except && strchr(except, ch)))
94 
95     for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) {
96           if (util_utf8_enable == 0) {
97               if (ISASCII(ch) && PRINT_OR_EXCEPT(ch))
98                     continue;
99           } else if ((last = parse_utf8_char(cp, 0)) == cp) {         /* ASCII */
100               if (PRINT_OR_EXCEPT(ch))
101                     continue;
102           } else if (last != 0) {                           /* Other UTF8 */
103               cp = last;
104               continue;
105           }
106           *cp = replacement;
107     }
108     return (string);
109 }
110 
111 #ifdef TEST
112 
113 #include <stdlib.h>
114 #include <string.h>
115 #include <msg.h>
116 #include <msg_vstream.h>
117 #include <mymalloc.h>
118 #include <vstream.h>
119 
120  /*
121   * Test cases for 1-, 2-, and 3-byte encodings. Originally contributed by
122   * Viktor Dukhovni, and annotated using translate.google.com.
123   *
124   * See valid_utf8_string.c for single-error tests.
125   *
126   * XXX Need a test for 4-byte encodings, preferably with strings that can be
127   * displayed.
128   */
129 struct testcase {
130     const char *name;
131     const char *input;
132     const char *expected;;
133 };
134 static const struct testcase testcases[] = {
135     {"Printable ASCII",
136           "printable", "printable"
137     },
138     {"ASCII with control character",
139           "non\bn-printable", "non?n-printable"
140     },
141     {"Latin accented text, no error",
142           "na\303\257ve", "na\303\257ve"
143     },
144     {"Latin text, with error",
145           "na\303ve", "na?ve"
146     },
147     {"Viktor, Cyrillic, no error",
148           "\320\262\320\270\320\272\321\202\320\276\321\200",
149           "\320\262\320\270\320\272\321\202\320\276\321\200"
150     },
151     {"Viktor, Cyrillic, two errors",
152           "\320\262\320\320\272\272\321\202\320\276\321\200",
153           "\320\262?\320\272?\321\202\320\276\321\200"
154     },
155     {"Viktor, Hebrew, no error",
156           "\327\225\327\231\327\247\327\230\327\225\326\274\327\250",
157           "\327\225\327\231\327\247\327\230\327\225\326\274\327\250"
158     },
159     {"Viktor, Hebrew, with error",
160           "\327\225\231\327\247\327\230\327\225\326\274\327\250",
161           "\327\225?\327\247\327\230\327\225\326\274\327\250"
162     },
163     {"Chinese (Simplified), no error",
164           "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
165           "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
166           "\237\350\256\241\346\212\245\345\221\212",
167           "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
168           "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
169           "\237\350\256\241\346\212\245\345\221\212"
170     },
171     {"Chinese (Simplified), with errors",
172           "\344\270\255\345\344\272\222\350\224\347\275\221\347"
173           "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
174           "\237\350\256\241\346\212\245\345",
175           "\344\270\255?\344\272\222??\347\275\221\347"
176           "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
177           "\237\350\256\241\346\212\245?"
178     },
179 };
180 
main(int argc,char ** argv)181 int     main(int argc, char **argv)
182 {
183     const struct testcase *tp;
184     int     pass;
185     int     fail;
186 
187 #define NUM_TESTS   sizeof(testcases)/sizeof(testcases[0])
188 
189     msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
190     util_utf8_enable = 1;
191 
192     for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
193           char   *input;
194           char   *actual;
195           int     ok = 0;
196 
197           /*
198            * Notes:
199            *
200            * - The input is modified, therefore it must be copied.
201            *
202            * - The msg(3) functions use printable() which interferes when logging
203            * inputs and outputs. Use vstream_fprintf() instead.
204            */
205           vstream_fprintf(VSTREAM_ERR, "RUN  %s\n", tp->name);
206           input = mystrdup(tp->input);
207           actual = printable(input, '?');
208 
209           if (strcmp(actual, tp->expected) != 0) {
210               vstream_fprintf(VSTREAM_ERR, "input: >%s<, got: >%s<, want: >%s<\n",
211                                   tp->input, actual, tp->expected);
212           } else {
213               vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
214                                   tp->input, actual);
215               ok = 1;
216           }
217           if (ok) {
218               vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
219               pass++;
220           } else {
221               vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
222               fail++;
223           }
224           myfree(input);
225     }
226     msg_info("PASS=%d FAIL=%d", pass, fail);
227     return (fail > 0);
228 }
229 
230 #endif
231