1 /* $NetBSD: printable.c,v 1.4 2025/02/25 19:15:52 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* printable 3
6 /* SUMMARY
7 /* mask non-printable characters
8 /* SYNOPSIS
9 /* #include <stringops.h>
10 /*
11 /* int util_utf8_enable;
12 /*
13 /* char *printable(buffer, replacement)
14 /* char *buffer;
15 /* int replacement;
16 /*
17 /* char *printable_except(buffer, replacement, except)
18 /* char *buffer;
19 /* int replacement;
20 /* const char *except;
21 /* DESCRIPTION
22 /* printable() replaces non-printable characters
23 /* in its input with the given replacement.
24 /*
25 /* util_utf8_enable controls whether UTF8 is considered printable.
26 /* With util_utf8_enable equal to zero, non-ASCII text is replaced.
27 /*
28 /* Arguments:
29 /* .IP buffer
30 /* The null-terminated input string.
31 /* .IP replacement
32 /* Replacement value for characters in \fIbuffer\fR that do not
33 /* pass the ASCII isprint(3) test or that are not valid UTF8.
34 /* .IP except
35 /* Null-terminated sequence of non-replaced ASCII characters.
36 /* LICENSE
37 /* .ad
38 /* .fi
39 /* The Secure Mailer license must be distributed with this software.
40 /* AUTHOR(S)
41 /* Wietse Venema
42 /* IBM T.J. Watson Research
43 /* P.O. Box 704
44 /* Yorktown Heights, NY 10598, USA
45 /*
46 /* Wietse Venema
47 /* Google, Inc.
48 /* 111 8th Avenue
49 /* New York, NY 10011, USA
50 /*
51 /* Wietse Venema
52 /* porcupine.org
53 /* Amawalk, NY 10501, USA
54 /*--*/
55
56 /* System library. */
57
58 #include "sys_defs.h"
59 #include <ctype.h>
60 #include <string.h>
61
62 /* Utility library. */
63
64 #include "stringops.h"
65 #include "parse_utf8_char.h"
66
67 int util_utf8_enable = 0;
68
69 /* printable - binary compatibility */
70
71 #undef printable
72
73 char *printable(char *, int);
74
printable(char * string,int replacement)75 char *printable(char *string, int replacement)
76 {
77 return (printable_except(string, replacement, (char *) 0));
78 }
79
80 /* printable_except - pass through printable or other preserved characters */
81
printable_except(char * string,int replacement,const char * except)82 char *printable_except(char *string, int replacement, const char *except)
83 {
84 char *cp;
85 char *last;
86 int ch;
87
88 /*
89 * In case of a non-UTF8 sequence (bad leader byte, bad non-leader byte,
90 * over-long encodings, out-of-range code points, etc), replace the first
91 * byte, and try to resynchronize at the next byte.
92 */
93 #define PRINT_OR_EXCEPT(ch) (ISPRINT(ch) || (except && strchr(except, ch)))
94
95 for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) {
96 if (util_utf8_enable == 0) {
97 if (ISASCII(ch) && PRINT_OR_EXCEPT(ch))
98 continue;
99 } else if ((last = parse_utf8_char(cp, 0)) == cp) { /* ASCII */
100 if (PRINT_OR_EXCEPT(ch))
101 continue;
102 } else if (last != 0) { /* Other UTF8 */
103 cp = last;
104 continue;
105 }
106 *cp = replacement;
107 }
108 return (string);
109 }
110
111 #ifdef TEST
112
113 #include <stdlib.h>
114 #include <string.h>
115 #include <msg.h>
116 #include <msg_vstream.h>
117 #include <mymalloc.h>
118 #include <vstream.h>
119
120 /*
121 * Test cases for 1-, 2-, and 3-byte encodings. Originally contributed by
122 * Viktor Dukhovni, and annotated using translate.google.com.
123 *
124 * See valid_utf8_string.c for single-error tests.
125 *
126 * XXX Need a test for 4-byte encodings, preferably with strings that can be
127 * displayed.
128 */
129 struct testcase {
130 const char *name;
131 const char *input;
132 const char *expected;;
133 };
134 static const struct testcase testcases[] = {
135 {"Printable ASCII",
136 "printable", "printable"
137 },
138 {"ASCII with control character",
139 "non\bn-printable", "non?n-printable"
140 },
141 {"Latin accented text, no error",
142 "na\303\257ve", "na\303\257ve"
143 },
144 {"Latin text, with error",
145 "na\303ve", "na?ve"
146 },
147 {"Viktor, Cyrillic, no error",
148 "\320\262\320\270\320\272\321\202\320\276\321\200",
149 "\320\262\320\270\320\272\321\202\320\276\321\200"
150 },
151 {"Viktor, Cyrillic, two errors",
152 "\320\262\320\320\272\272\321\202\320\276\321\200",
153 "\320\262?\320\272?\321\202\320\276\321\200"
154 },
155 {"Viktor, Hebrew, no error",
156 "\327\225\327\231\327\247\327\230\327\225\326\274\327\250",
157 "\327\225\327\231\327\247\327\230\327\225\326\274\327\250"
158 },
159 {"Viktor, Hebrew, with error",
160 "\327\225\231\327\247\327\230\327\225\326\274\327\250",
161 "\327\225?\327\247\327\230\327\225\326\274\327\250"
162 },
163 {"Chinese (Simplified), no error",
164 "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
165 "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
166 "\237\350\256\241\346\212\245\345\221\212",
167 "\344\270\255\345\233\275\344\272\222\350\201\224\347\275\221\347"
168 "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
169 "\237\350\256\241\346\212\245\345\221\212"
170 },
171 {"Chinese (Simplified), with errors",
172 "\344\270\255\345\344\272\222\350\224\347\275\221\347"
173 "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
174 "\237\350\256\241\346\212\245\345",
175 "\344\270\255?\344\272\222??\347\275\221\347"
176 "\273\234\345\217\221\345\261\225\347\212\266\345\206\265\347\273"
177 "\237\350\256\241\346\212\245?"
178 },
179 };
180
main(int argc,char ** argv)181 int main(int argc, char **argv)
182 {
183 const struct testcase *tp;
184 int pass;
185 int fail;
186
187 #define NUM_TESTS sizeof(testcases)/sizeof(testcases[0])
188
189 msg_vstream_init(basename(argv[0]), VSTREAM_ERR);
190 util_utf8_enable = 1;
191
192 for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
193 char *input;
194 char *actual;
195 int ok = 0;
196
197 /*
198 * Notes:
199 *
200 * - The input is modified, therefore it must be copied.
201 *
202 * - The msg(3) functions use printable() which interferes when logging
203 * inputs and outputs. Use vstream_fprintf() instead.
204 */
205 vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name);
206 input = mystrdup(tp->input);
207 actual = printable(input, '?');
208
209 if (strcmp(actual, tp->expected) != 0) {
210 vstream_fprintf(VSTREAM_ERR, "input: >%s<, got: >%s<, want: >%s<\n",
211 tp->input, actual, tp->expected);
212 } else {
213 vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
214 tp->input, actual);
215 ok = 1;
216 }
217 if (ok) {
218 vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
219 pass++;
220 } else {
221 vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
222 fail++;
223 }
224 myfree(input);
225 }
226 msg_info("PASS=%d FAIL=%d", pass, fail);
227 return (fail > 0);
228 }
229
230 #endif
231