1 /*
2  * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3  *        The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *        This product includes software developed by the Computer Systems
16  *        Engineering Group at Lawrence Berkeley Laboratory.
17  * 4. Neither the name of the University nor of the Laboratory may be used
18  *    to endorse or promote products derived from this software without
19  *    specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * Utilities for message formatting used both by libpcap and rpcapd.
36  */
37 
38 #include <config.h>
39 
40 #include "ftmacros.h"
41 
42 #include <stddef.h>
43 #include <stdarg.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <errno.h>
47 
48 #include "pcap-int.h"
49 
50 #include "portability.h"
51 
52 #include "fmtutils.h"
53 
54 #ifdef _WIN32
55 #include "charconv.h"
56 #endif
57 
58 /*
59  * Set the encoding.
60  */
61 #ifdef _WIN32
62 /*
63  * True if we should use UTF-8.
64  */
65 static int use_utf_8;
66 
67 void
pcapint_fmt_set_encoding(unsigned int opts)68 pcapint_fmt_set_encoding(unsigned int opts)
69 {
70           if (opts == PCAP_CHAR_ENC_UTF_8)
71                     use_utf_8 = 1;
72 }
73 #else
74 void
pcapint_fmt_set_encoding(unsigned int opts _U_)75 pcapint_fmt_set_encoding(unsigned int opts _U_)
76 {
77           /*
78            * Nothing to do here.
79            */
80 }
81 #endif
82 
83 #ifdef _WIN32
84 /*
85  * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
86  * a buffer starting at the specified location and stopping if we go
87  * past the specified size.  This will only put out complete UTF-8
88  * sequences.
89  *
90  * We do this ourselves because Microsoft doesn't offer a "convert and
91  * stop at a UTF-8 character boundary if we run out of space" routine.
92  */
93 #define IS_LEADING_SURROGATE(c) \
94           ((c) >= 0xd800 && (c) < 0xdc00)
95 #define IS_TRAILING_SURROGATE(c) \
96           ((c) >= 0xdc00 && (c) < 0xe000)
97 #define SURROGATE_VALUE(leading, trailing) \
98           (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
99 #define REPLACEMENT_CHARACTER 0x0FFFD
100 
101 static char *
utf_16le_to_utf_8_truncated(const wchar_t * utf_16,char * utf_8,size_t utf_8_len)102 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
103     size_t utf_8_len)
104 {
105           wchar_t c, c2;
106           uint32_t uc;
107 
108           if (utf_8_len == 0) {
109                     /*
110                      * Not even enough room for a trailing '\0'.
111                      * Don't put anything into the buffer.
112                      */
113                     return (utf_8);
114           }
115 
116           while ((c = *utf_16++) != '\0') {
117                     if (IS_LEADING_SURROGATE(c)) {
118                               /*
119                                * Leading surrogate.  Must be followed by
120                                * a trailing surrogate.
121                                */
122                               c2 = *utf_16;
123                               if (c2 == '\0') {
124                                         /*
125                                          * Oops, string ends with a lead
126                                          * surrogate.  Try to drop in
127                                          * a REPLACEMENT CHARACTER, and
128                                          * don't move the string pointer,
129                                          * so on the next trip through
130                                          * the loop we grab the terminating
131                                          * '\0' and quit.
132                                          */
133                                         uc = REPLACEMENT_CHARACTER;
134                               } else {
135                                         /*
136                                          * OK, we can consume this 2-octet
137                                          * value.
138                                          */
139                                         utf_16++;
140                                         if (IS_TRAILING_SURROGATE(c2)) {
141                                                   /*
142                                                    * Trailing surrogate.
143                                                    * This calculation will,
144                                                    * for c being a leading
145                                                    * surrogate and c2 being
146                                                    * a trailing surrogate,
147                                                    * produce a value between
148                                                    * 0x100000 and 0x10ffff,
149                                                    * so it's always going to be
150                                                    * a valid Unicode code point.
151                                                    */
152                                                   uc = SURROGATE_VALUE(c, c2);
153                                         } else {
154                                                   /*
155                                                    * Not a trailing surrogate;
156                                                    * try to drop in a
157                                                    * REPLACEMENT CHARACTER.
158                                                    */
159                                                   uc = REPLACEMENT_CHARACTER;
160                                         }
161                               }
162                     } else {
163                               /*
164                                * Not a leading surrogate.
165                                */
166                               if (IS_TRAILING_SURROGATE(c)) {
167                                         /*
168                                          * Trailing surrogate without
169                                          * a preceding leading surrogate.
170                                          * Try to drop in a REPLACEMENT
171                                          * CHARACTER.
172                                          */
173                                         uc = REPLACEMENT_CHARACTER;
174                               } else {
175                                         /*
176                                          * This is a valid BMP character;
177                                          * drop it in.
178                                          */
179                                         uc = c;
180                               }
181                     }
182 
183                     /*
184                      * OK, uc is a valid Unicode character; how
185                      * many bytes worth of UTF-8 does it require?
186                      */
187                     if (uc < 0x0080) {
188                               /* 1 byte. */
189                               if (utf_8_len < 2) {
190                                         /*
191                                          * Not enough room for that byte
192                                          * plus a trailing '\0'.
193                                          */
194                                         break;
195                               }
196                               *utf_8++ = (char)uc;
197                               utf_8_len--;
198                     } else if (uc < 0x0800) {
199                               /* 2 bytes. */
200                               if (utf_8_len < 3) {
201                                         /*
202                                          * Not enough room for those bytes
203                                          * plus a trailing '\0'.
204                                          */
205                                         break;
206                               }
207                               *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
208                               *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
209                               utf_8_len -= 2;
210                     } else if (uc < 0x010000) {
211                               /* 3 bytes. */
212                               if (utf_8_len < 4) {
213                                         /*
214                                          * Not enough room for those bytes
215                                          * plus a trailing '\0'.
216                                          */
217                                         break;
218                               }
219                               *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
220                               *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
221                               *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
222                               utf_8_len -= 3;
223                     } else {
224                               /* 4 bytes. */
225                               if (utf_8_len < 5) {
226                                         /*
227                                          * Not enough room for those bytes
228                                          * plus a trailing '\0'.
229                                          */
230                                         break;
231                               }
232                               *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
233                               *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
234                               *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
235                               *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
236                               utf_8_len -= 3;
237                     }
238           }
239 
240           /*
241            * OK, we have enough room for (at least) a trailing '\0'.
242            * (We started out with enough room, thanks to the test
243            * for a zero-length buffer at the beginning, and if
244            * there wasn't enough room for any character we wanted
245            * to put into the buffer *plus* a trailing '\0',
246            * we'd have quit before putting it into the buffer,
247            * and thus would have left enough room for the trailing
248            * '\0'.)
249            *
250            * Drop it in.
251            */
252           *utf_8 = '\0';
253 
254           /*
255            * Return a pointer to the terminating '\0', in case we
256            * want to drop something in after that.
257            */
258           return (utf_8);
259 }
260 #endif /* _WIN32 */
261 
262 /*
263  * Generate an error message based on a format, arguments, and an
264  * errno, with a message for the errno after the formatted output.
265  */
266 void
pcapint_fmt_errmsg_for_errno(char * errbuf,size_t errbuflen,int errnum,const char * fmt,...)267 pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
268     const char *fmt, ...)
269 {
270           va_list ap;
271 
272           va_start(ap, fmt);
273           pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
274           va_end(ap);
275 }
276 
277 void
pcapint_vfmt_errmsg_for_errno(char * errbuf,size_t errbuflen,int errnum,const char * fmt,va_list ap)278 pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
279     const char *fmt, va_list ap)
280 {
281           size_t msglen;
282           char *p;
283           size_t errbuflen_remaining;
284 
285           (void)vsnprintf(errbuf, errbuflen, fmt, ap);
286           msglen = strlen(errbuf);
287 
288           /*
289            * Do we have enough space to append ": "?
290            * Including the terminating '\0', that's 3 bytes.
291            */
292           if (msglen + 3 > errbuflen) {
293                     /* No - just give them what we've produced. */
294                     return;
295           }
296           p = errbuf + msglen;
297           errbuflen_remaining = errbuflen - msglen;
298           *p++ = ':';
299           *p++ = ' ';
300           *p = '\0';
301           errbuflen_remaining -= 2;
302 
303           /*
304            * Now append the string for the error code.
305            */
306 #if defined(HAVE__WCSERROR_S)
307           /*
308            * We have a Windows-style _wcserror_s().
309            * Generate a UTF-16LE error message.
310            */
311           wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
312           errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
313           if (err != 0) {
314                     /*
315                      * It doesn't appear to be documented anywhere obvious
316                      * what the error returns from _wcserror_s().
317                      */
318                     snprintf(p, errbuflen_remaining, "Error %d", errnum);
319                     return;
320           }
321 
322           /*
323            * Now convert it from UTF-16LE to UTF-8, dropping it in the
324            * remaining space in the buffer, and truncating it - cleanly,
325            * on a UTF-8 character boundary - if it doesn't fit.
326            */
327           utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
328 
329           /*
330            * Now, if we're not in UTF-8 mode, convert errbuf to the
331            * local code page.
332            */
333           if (!use_utf_8)
334                     utf_8_to_acp_truncated(errbuf);
335 #else
336           /*
337            * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
338            * solve the non-UTF-16 part of this problem space.
339            */
340           snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
341 #endif
342 }
343 
344 #ifdef _WIN32
345 /*
346  * Generate an error message based on a format, arguments, and a
347  * Win32 error, with a message for the Win32 error after the formatted output.
348  */
349 void
pcapint_fmt_errmsg_for_win32_err(char * errbuf,size_t errbuflen,DWORD errnum,const char * fmt,...)350 pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
351     const char *fmt, ...)
352 {
353           va_list ap;
354 
355           va_start(ap, fmt);
356           pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
357           va_end(ap);
358 }
359 
360 void
pcapint_vfmt_errmsg_for_win32_err(char * errbuf,size_t errbuflen,DWORD errnum,const char * fmt,va_list ap)361 pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
362     const char *fmt, va_list ap)
363 {
364           size_t msglen;
365           char *p;
366           size_t errbuflen_remaining;
367           DWORD retval;
368           wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
369           size_t utf_8_len;
370 
371           vsnprintf(errbuf, errbuflen, fmt, ap);
372           msglen = strlen(errbuf);
373 
374           /*
375            * Do we have enough space to append ": "?
376            * Including the terminating '\0', that's 3 bytes.
377            */
378           if (msglen + 3 > errbuflen) {
379                     /* No - just give them what we've produced. */
380                     return;
381           }
382           p = errbuf + msglen;
383           errbuflen_remaining = errbuflen - msglen;
384           *p++ = ':';
385           *p++ = ' ';
386           *p = '\0';
387           msglen += 2;
388           errbuflen_remaining -= 2;
389 
390           /*
391            * Now append the string for the error code.
392            *
393            * XXX - what language ID to use?
394            *
395            * For UN*Xes, pcap_strerror() may or may not return localized
396            * strings.
397            *
398            * We currently don't have localized messages for libpcap, but
399            * we might want to do so.  On the other hand, if most of these
400            * messages are going to be read by libpcap developers and
401            * perhaps by developers of libpcap-based applications, English
402            * might be a better choice, so the developer doesn't have to
403            * get the message translated if it's in a language they don't
404            * happen to understand.
405            */
406           retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
407               NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
408               utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
409           if (retval == 0) {
410                     /*
411                      * Failed.
412                      */
413                     snprintf(p, errbuflen_remaining,
414                         "Couldn't get error message for error (%lu)", errnum);
415                     return;
416           }
417 
418           /*
419            * Now convert it from UTF-16LE to UTF-8.
420            */
421           p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
422 
423           /*
424            * Now append the error number, if it fits.
425            */
426           utf_8_len = p - errbuf;
427           errbuflen_remaining -= utf_8_len;
428           if (utf_8_len == 0) {
429                     /* The message was empty. */
430                     snprintf(p, errbuflen_remaining, "(%lu)", errnum);
431           } else
432                     snprintf(p, errbuflen_remaining, " (%lu)", errnum);
433 
434           /*
435            * Now, if we're not in UTF-8 mode, convert errbuf to the
436            * local code page.
437            */
438           if (!use_utf_8)
439                     utf_8_to_acp_truncated(errbuf);
440 }
441 #endif
442