xref: /NextBSD/contrib/subversion/subversion/libsvn_subr/utf.c (revision 84d351007654069f9643c8e4b4802a7f5f08ee42)
1 /*
2  * utf.c:  UTF-8 conversion routines
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 
26 #include <stdlib.h>
27 #include <string.h>
28 #include <assert.h>
29 
30 #include <apr_strings.h>
31 #include <apr_lib.h>
32 #include <apr_xlate.h>
33 #include <apr_atomic.h>
34 
35 #include "svn_hash.h"
36 #include "svn_string.h"
37 #include "svn_error.h"
38 #include "svn_pools.h"
39 #include "svn_ctype.h"
40 #include "svn_utf.h"
41 #include "svn_private_config.h"
42 #include "win32_xlate.h"
43 
44 #include "private/svn_utf_private.h"
45 #include "private/svn_dep_compat.h"
46 #include "private/svn_string_private.h"
47 #include "private/svn_mutex.h"
48 
49 
50 
51 /* Use these static strings to maximize performance on standard conversions.
52  * Any strings on other locations are still valid, however.
53  */
54 static const char *SVN_UTF_NTOU_XLATE_HANDLE = "svn-utf-ntou-xlate-handle";
55 static const char *SVN_UTF_UTON_XLATE_HANDLE = "svn-utf-uton-xlate-handle";
56 
57 static const char *SVN_APR_UTF8_CHARSET = "UTF-8";
58 
59 static svn_mutex__t *xlate_handle_mutex = NULL;
60 static svn_boolean_t assume_native_charset_is_utf8 = FALSE;
61 
62 #if defined(WIN32)
63 typedef svn_subr__win32_xlate_t xlate_handle_t;
64 #else
65 typedef apr_xlate_t xlate_handle_t;
66 #endif
67 
68 /* The xlate handle cache is a global hash table with linked lists of xlate
69  * handles.  In multi-threaded environments, a thread "borrows" an xlate
70  * handle from the cache during a translation and puts it back afterwards.
71  * This avoids holding a global lock for all translations.
72  * If there is no handle for a particular key when needed, a new is
73  * handle is created and put in the cache after use.
74  * This means that there will be at most N handles open for a key, where N
75  * is the number of simultanous handles in use for that key. */
76 
77 typedef struct xlate_handle_node_t {
78   xlate_handle_t *handle;
79   /* FALSE if the handle is not valid, since its pool is being
80      destroyed. */
81   svn_boolean_t valid;
82   /* The name of a char encoding or APR_LOCALE_CHARSET. */
83   const char *frompage, *topage;
84   struct xlate_handle_node_t *next;
85 } xlate_handle_node_t;
86 
87 /* This maps const char * userdata_key strings to xlate_handle_node_t **
88    handles to the first entry in the linked list of xlate handles.  We don't
89    store the pointer to the list head directly in the hash table, since we
90    remove/insert entries at the head in the list in the code below, and
91    we can't use apr_hash_set() in each character translation because that
92    function allocates memory in each call where the value is non-NULL.
93    Since these allocations take place in a global pool, this would be a
94    memory leak. */
95 static apr_hash_t *xlate_handle_hash = NULL;
96 
97 /* "1st level cache" to standard conversion maps. We may access these
98  * using atomic xchange ops, i.e. without further thread synchronization.
99  * If the respective item is NULL, fallback to hash lookup.
100  */
101 static void * volatile xlat_ntou_static_handle = NULL;
102 static void * volatile xlat_uton_static_handle = NULL;
103 
104 /* Clean up the xlate handle cache. */
105 static apr_status_t
xlate_cleanup(void * arg)106 xlate_cleanup(void *arg)
107 {
108   /* We set the cache variables to NULL so that translation works in other
109      cleanup functions, even if it isn't cached then. */
110   xlate_handle_hash = NULL;
111 
112   /* ensure no stale objects get accessed */
113   xlat_ntou_static_handle = NULL;
114   xlat_uton_static_handle = NULL;
115 
116   return APR_SUCCESS;
117 }
118 
119 /* Set the handle of ARG to NULL. */
120 static apr_status_t
xlate_handle_node_cleanup(void * arg)121 xlate_handle_node_cleanup(void *arg)
122 {
123   xlate_handle_node_t *node = arg;
124 
125   node->valid = FALSE;
126   return APR_SUCCESS;
127 }
128 
129 void
svn_utf_initialize2(svn_boolean_t assume_native_utf8,apr_pool_t * pool)130 svn_utf_initialize2(svn_boolean_t assume_native_utf8,
131                     apr_pool_t *pool)
132 {
133   if (!xlate_handle_hash)
134     {
135       /* We create our own subpool, which we protect with the mutex.
136          We can't use the pool passed to us by the caller, since we will
137          use it for xlate handle allocations, possibly in multiple threads,
138          and pool allocation is not thread-safe. */
139       apr_pool_t *subpool = svn_pool_create(pool);
140       svn_mutex__t *mutex;
141       svn_error_t *err = svn_mutex__init(&mutex, TRUE, subpool);
142       if (err)
143         {
144           svn_error_clear(err);
145           return;
146         }
147 
148       xlate_handle_mutex = mutex;
149       xlate_handle_hash = apr_hash_make(subpool);
150 
151       apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
152                                 apr_pool_cleanup_null);
153     }
154 
155     if (!assume_native_charset_is_utf8)
156       assume_native_charset_is_utf8 = assume_native_utf8;
157 }
158 
159 /* Return a unique string key based on TOPAGE and FROMPAGE.  TOPAGE and
160  * FROMPAGE can be any valid arguments of the same name to
161  * apr_xlate_open().  Allocate the returned string in POOL. */
162 static const char*
get_xlate_key(const char * topage,const char * frompage,apr_pool_t * pool)163 get_xlate_key(const char *topage,
164               const char *frompage,
165               apr_pool_t *pool)
166 {
167   /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
168    * topage/frompage is really an int, not a valid string.  So generate a
169    * unique key accordingly. */
170   if (frompage == SVN_APR_LOCALE_CHARSET)
171     frompage = "APR_LOCALE_CHARSET";
172   else if (frompage == SVN_APR_DEFAULT_CHARSET)
173     frompage = "APR_DEFAULT_CHARSET";
174 
175   if (topage == SVN_APR_LOCALE_CHARSET)
176     topage = "APR_LOCALE_CHARSET";
177   else if (topage == SVN_APR_DEFAULT_CHARSET)
178     topage = "APR_DEFAULT_CHARSET";
179 
180   return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage,
181                      "-xlate-handle", SVN_VA_NULL);
182 }
183 
184 /* Atomically replace the content in *MEM with NEW_VALUE and return
185  * the previous content of *MEM. If atomicy cannot be guaranteed,
186  * *MEM will not be modified and NEW_VALUE is simply returned to
187  * the caller.
188  */
189 static APR_INLINE void*
atomic_swap(void * volatile * mem,void * new_value)190 atomic_swap(void * volatile * mem, void *new_value)
191 {
192 #if APR_HAS_THREADS
193   /* Cast is necessary because of APR bug:
194      https://issues.apache.org/bugzilla/show_bug.cgi?id=50731 */
195    return apr_atomic_xchgptr((volatile void **)mem, new_value);
196 #else
197    /* no threads - no sync. necessary */
198    void *old_value = (void*)*mem;
199    *mem = new_value;
200    return old_value;
201 #endif
202 }
203 
204 /* Set *RET to a newly created handle node for converting from FROMPAGE
205    to TOPAGE, If apr_xlate_open() returns APR_EINVAL or APR_ENOTIMPL, set
206    (*RET)->handle to NULL.  If fail for any other reason, return the error.
207    Allocate *RET and its xlate handle in POOL. */
208 static svn_error_t *
xlate_alloc_handle(xlate_handle_node_t ** ret,const char * topage,const char * frompage,apr_pool_t * pool)209 xlate_alloc_handle(xlate_handle_node_t **ret,
210                    const char *topage, const char *frompage,
211                    apr_pool_t *pool)
212 {
213   apr_status_t apr_err;
214   xlate_handle_t *handle;
215   const char *name;
216 
217   /* The error handling doesn't support the following cases, since we don't
218      use them currently.  Catch this here. */
219   SVN_ERR_ASSERT(frompage != SVN_APR_DEFAULT_CHARSET
220                  && topage != SVN_APR_DEFAULT_CHARSET
221                  && (frompage != SVN_APR_LOCALE_CHARSET
222                      || topage != SVN_APR_LOCALE_CHARSET));
223 
224   /* Try to create a handle. */
225 #if defined(WIN32)
226   apr_err = svn_subr__win32_xlate_open(&handle, topage,
227                                        frompage, pool);
228   name = "win32-xlate: ";
229 #else
230   apr_err = apr_xlate_open(&handle, topage, frompage, pool);
231   name = "APR: ";
232 #endif
233 
234   if (APR_STATUS_IS_EINVAL(apr_err) || APR_STATUS_IS_ENOTIMPL(apr_err))
235     handle = NULL;
236   else if (apr_err != APR_SUCCESS)
237     {
238       const char *errstr;
239       char apr_strerr[512];
240 
241       /* Can't use svn_error_wrap_apr here because it calls functions in
242          this file, leading to infinite recursion. */
243       if (frompage == SVN_APR_LOCALE_CHARSET)
244         errstr = apr_psprintf(pool,
245                               _("Can't create a character converter from "
246                                 "native encoding to '%s'"), topage);
247       else if (topage == SVN_APR_LOCALE_CHARSET)
248         errstr = apr_psprintf(pool,
249                               _("Can't create a character converter from "
250                                 "'%s' to native encoding"), frompage);
251       else
252         errstr = apr_psprintf(pool,
253                               _("Can't create a character converter from "
254                                 "'%s' to '%s'"), frompage, topage);
255 
256       /* Just put the error on the stack, since svn_error_create duplicates it
257          later.  APR_STRERR will be in the local encoding, not in UTF-8, though.
258        */
259       svn_strerror(apr_err, apr_strerr, sizeof(apr_strerr));
260       return svn_error_createf(SVN_ERR_PLUGIN_LOAD_FAILURE,
261                                svn_error_create(apr_err, NULL, apr_strerr),
262                                "%s%s", name, errstr);
263     }
264 
265   /* Allocate and initialize the node. */
266   *ret = apr_palloc(pool, sizeof(xlate_handle_node_t));
267   (*ret)->handle = handle;
268   (*ret)->valid = TRUE;
269   (*ret)->frompage = ((frompage != SVN_APR_LOCALE_CHARSET)
270                       ? apr_pstrdup(pool, frompage) : frompage);
271   (*ret)->topage = ((topage != SVN_APR_LOCALE_CHARSET)
272                     ? apr_pstrdup(pool, topage) : topage);
273   (*ret)->next = NULL;
274 
275   /* If we are called from inside a pool cleanup handler, the just created
276      xlate handle will be closed when that handler returns by a newly
277      registered cleanup handler, however, the handle is still cached by us.
278      To prevent this, we register a cleanup handler that will reset the valid
279      flag of our node, so we don't use an invalid handle. */
280   if (handle)
281     apr_pool_cleanup_register(pool, *ret, xlate_handle_node_cleanup,
282                               apr_pool_cleanup_null);
283 
284   return SVN_NO_ERROR;
285 }
286 
287 /* Extend xlate_alloc_handle by using USERDATA_KEY as a key in our
288    global hash map, if available.
289 
290    Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
291    hasn't been called or USERDATA_KEY is NULL.  Else, allocate them
292    in the pool of xlate_handle_hash.
293 
294    Note: this function is not thread-safe. Call get_xlate_handle_node
295    instead. */
296 static svn_error_t *
get_xlate_handle_node_internal(xlate_handle_node_t ** ret,const char * topage,const char * frompage,const char * userdata_key,apr_pool_t * pool)297 get_xlate_handle_node_internal(xlate_handle_node_t **ret,
298                                const char *topage, const char *frompage,
299                                const char *userdata_key, apr_pool_t *pool)
300 {
301   /* If we already have a handle, just return it. */
302   if (userdata_key && xlate_handle_hash)
303     {
304       xlate_handle_node_t *old_node = NULL;
305 
306       /* 2nd level: hash lookup */
307       xlate_handle_node_t **old_node_p = svn_hash_gets(xlate_handle_hash,
308                                                        userdata_key);
309       if (old_node_p)
310         old_node = *old_node_p;
311       if (old_node)
312         {
313           /* Ensure that the handle is still valid. */
314           if (old_node->valid)
315             {
316               /* Remove from the list. */
317               *old_node_p = old_node->next;
318               old_node->next = NULL;
319               *ret = old_node;
320               return SVN_NO_ERROR;
321             }
322         }
323     }
324 
325   /* Note that we still have the mutex locked (if it is initialized), so we
326      can use the global pool for creating the new xlate handle. */
327 
328   /* Use the correct pool for creating the handle. */
329   pool = apr_hash_pool_get(xlate_handle_hash);
330 
331   return xlate_alloc_handle(ret, topage, frompage, pool);
332 }
333 
334 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
335    creating the handle node if it doesn't exist in USERDATA_KEY.
336    If a node is not cached and apr_xlate_open() returns APR_EINVAL or
337    APR_ENOTIMPL, set (*RET)->handle to NULL.  If fail for any other
338    reason, return the error.
339 
340    Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
341    hasn't been called or USERDATA_KEY is NULL.  Else, allocate them
342    in the pool of xlate_handle_hash. */
343 static svn_error_t *
get_xlate_handle_node(xlate_handle_node_t ** ret,const char * topage,const char * frompage,const char * userdata_key,apr_pool_t * pool)344 get_xlate_handle_node(xlate_handle_node_t **ret,
345                       const char *topage, const char *frompage,
346                       const char *userdata_key, apr_pool_t *pool)
347 {
348   xlate_handle_node_t *old_node = NULL;
349 
350   /* If we already have a handle, just return it. */
351   if (userdata_key)
352     {
353       if (xlate_handle_hash)
354         {
355           /* 1st level: global, static items */
356           if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE)
357             old_node = atomic_swap(&xlat_ntou_static_handle, NULL);
358           else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE)
359             old_node = atomic_swap(&xlat_uton_static_handle, NULL);
360 
361           if (old_node && old_node->valid)
362             {
363               *ret = old_node;
364               return SVN_NO_ERROR;
365             }
366         }
367       else
368         {
369           void *p;
370           /* We fall back on a per-pool cache instead. */
371           apr_pool_userdata_get(&p, userdata_key, pool);
372           old_node = p;
373           /* Ensure that the handle is still valid. */
374           if (old_node && old_node->valid)
375             {
376               *ret = old_node;
377               return SVN_NO_ERROR;
378             }
379 
380           return xlate_alloc_handle(ret, topage, frompage, pool);
381         }
382     }
383 
384   SVN_MUTEX__WITH_LOCK(xlate_handle_mutex,
385                        get_xlate_handle_node_internal(ret,
386                                                       topage,
387                                                       frompage,
388                                                       userdata_key,
389                                                       pool));
390 
391   return SVN_NO_ERROR;
392 }
393 
394 /* Put back NODE into the xlate handle cache for use by other calls.
395 
396    Note: this function is not thread-safe. Call put_xlate_handle_node
397    instead. */
398 static svn_error_t *
put_xlate_handle_node_internal(xlate_handle_node_t * node,const char * userdata_key)399 put_xlate_handle_node_internal(xlate_handle_node_t *node,
400                                const char *userdata_key)
401 {
402   xlate_handle_node_t **node_p = svn_hash_gets(xlate_handle_hash, userdata_key);
403   if (node_p == NULL)
404     {
405       userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash),
406                                   userdata_key);
407       node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash),
408                           sizeof(*node_p));
409       *node_p = NULL;
410       svn_hash_sets(xlate_handle_hash, userdata_key, node_p);
411     }
412   node->next = *node_p;
413   *node_p = node;
414 
415   return SVN_NO_ERROR;
416 }
417 
418 /* Put back NODE into the xlate handle cache for use by other calls.
419    If there is no global cache, store the handle in POOL.
420    Ignore errors related to locking/unlocking the mutex. */
421 static svn_error_t *
put_xlate_handle_node(xlate_handle_node_t * node,const char * userdata_key,apr_pool_t * pool)422 put_xlate_handle_node(xlate_handle_node_t *node,
423                       const char *userdata_key,
424                       apr_pool_t *pool)
425 {
426   assert(node->next == NULL);
427   if (!userdata_key)
428     return SVN_NO_ERROR;
429 
430   /* push previous global node to the hash */
431   if (xlate_handle_hash)
432     {
433       /* 1st level: global, static items */
434       if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE)
435         node = atomic_swap(&xlat_ntou_static_handle, node);
436       else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE)
437         node = atomic_swap(&xlat_uton_static_handle, node);
438       if (node == NULL)
439         return SVN_NO_ERROR;
440 
441       SVN_MUTEX__WITH_LOCK(xlate_handle_mutex,
442                            put_xlate_handle_node_internal(node,
443                                                           userdata_key));
444     }
445   else
446     {
447       /* Store it in the per-pool cache. */
448       apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool);
449     }
450 
451   return SVN_NO_ERROR;
452 }
453 
454 /* Return the apr_xlate handle for converting native characters to UTF-8. */
455 static svn_error_t *
get_ntou_xlate_handle_node(xlate_handle_node_t ** ret,apr_pool_t * pool)456 get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
457 {
458   return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
459                                assume_native_charset_is_utf8
460                                  ? SVN_APR_UTF8_CHARSET
461                                  : SVN_APR_LOCALE_CHARSET,
462                                SVN_UTF_NTOU_XLATE_HANDLE, pool);
463 }
464 
465 
466 /* Return the apr_xlate handle for converting UTF-8 to native characters.
467    Create one if it doesn't exist.  If unable to find a handle, or
468    unable to create one because apr_xlate_open returned APR_EINVAL, then
469    set *RET to null and return SVN_NO_ERROR; if fail for some other
470    reason, return error. */
471 static svn_error_t *
get_uton_xlate_handle_node(xlate_handle_node_t ** ret,apr_pool_t * pool)472 get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
473 {
474   return get_xlate_handle_node(ret,
475                                assume_native_charset_is_utf8
476                                  ? SVN_APR_UTF8_CHARSET
477                                  : SVN_APR_LOCALE_CHARSET,
478                                SVN_APR_UTF8_CHARSET,
479                                SVN_UTF_UTON_XLATE_HANDLE, pool);
480 }
481 
482 
483 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
484    in *DEST, which is allocated in POOL. */
485 static svn_error_t *
convert_to_stringbuf(xlate_handle_node_t * node,const char * src_data,apr_size_t src_length,svn_stringbuf_t ** dest,apr_pool_t * pool)486 convert_to_stringbuf(xlate_handle_node_t *node,
487                      const char *src_data,
488                      apr_size_t src_length,
489                      svn_stringbuf_t **dest,
490                      apr_pool_t *pool)
491 {
492 #ifdef WIN32
493   apr_status_t apr_err;
494 
495   apr_err = svn_subr__win32_xlate_to_stringbuf(node->handle, src_data,
496                                                src_length, dest, pool);
497 #else
498   apr_size_t buflen = src_length * 2;
499   apr_status_t apr_err;
500   apr_size_t srclen = src_length;
501   apr_size_t destlen = buflen;
502 
503   /* Initialize *DEST to an empty stringbuf.
504      A 1:2 ratio of input bytes to output bytes (as assigned above)
505      should be enough for most translations, and if it turns out not
506      to be enough, we'll grow the buffer again, sizing it based on a
507      1:3 ratio of the remainder of the string. */
508   *dest = svn_stringbuf_create_ensure(buflen + 1, pool);
509 
510   /* Not only does it not make sense to convert an empty string, but
511      apr-iconv is quite unreasonable about not allowing that. */
512   if (src_length == 0)
513     return SVN_NO_ERROR;
514 
515   do
516     {
517       /* Set up state variables for xlate. */
518       destlen = buflen - (*dest)->len;
519 
520       /* Attempt the conversion. */
521       apr_err = apr_xlate_conv_buffer(node->handle,
522                                       src_data + (src_length - srclen),
523                                       &srclen,
524                                       (*dest)->data + (*dest)->len,
525                                       &destlen);
526 
527       /* Now, update the *DEST->len to track the amount of output data
528          churned out so far from this loop. */
529       (*dest)->len += ((buflen - (*dest)->len) - destlen);
530       buflen += srclen * 3; /* 3 is middle ground, 2 wasn't enough
531                                for all characters in the buffer, 4 is
532                                maximum character size (currently) */
533 
534 
535     } while (apr_err == APR_SUCCESS && srclen != 0);
536 #endif
537 
538   /* If we exited the loop with an error, return the error. */
539   if (apr_err)
540     {
541       const char *errstr;
542       svn_error_t *err;
543 
544       /* Can't use svn_error_wrap_apr here because it calls functions in
545          this file, leading to infinite recursion. */
546       if (node->frompage == SVN_APR_LOCALE_CHARSET)
547         errstr = apr_psprintf
548           (pool, _("Can't convert string from native encoding to '%s':"),
549            node->topage);
550       else if (node->topage == SVN_APR_LOCALE_CHARSET)
551         errstr = apr_psprintf
552           (pool, _("Can't convert string from '%s' to native encoding:"),
553            node->frompage);
554       else
555         errstr = apr_psprintf
556           (pool, _("Can't convert string from '%s' to '%s':"),
557            node->frompage, node->topage);
558 
559       err = svn_error_create(
560           apr_err, NULL, svn_utf__fuzzy_escape(src_data, src_length, pool));
561       return svn_error_create(apr_err, err, errstr);
562     }
563   /* Else, exited due to success.  Trim the result buffer down to the
564      right length. */
565   (*dest)->data[(*dest)->len] = '\0';
566 
567   return SVN_NO_ERROR;
568 }
569 
570 
571 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
572    other than seven-bit, non-control (except for whitespace) ASCII
573    characters, finding the error pool from POOL.  Otherwise, return
574    SVN_NO_ERROR. */
575 static svn_error_t *
check_non_ascii(const char * data,apr_size_t len,apr_pool_t * pool)576 check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool)
577 {
578   const char *data_start = data;
579 
580   for (; len > 0; --len, data++)
581     {
582       if ((! svn_ctype_isascii(*data))
583           || ((! svn_ctype_isspace(*data))
584               && svn_ctype_iscntrl(*data)))
585         {
586           /* Show the printable part of the data, followed by the
587              decimal code of the questionable character.  Because if a
588              user ever gets this error, she's going to have to spend
589              time tracking down the non-ASCII data, so we want to help
590              as much as possible.  And yes, we just call the unsafe
591              data "non-ASCII", even though the actual constraint is
592              somewhat more complex than that. */
593 
594           if (data - data_start)
595             {
596               const char *error_data
597                 = apr_pstrndup(pool, data_start, (data - data_start));
598 
599               return svn_error_createf
600                 (APR_EINVAL, NULL,
601                  _("Safe data '%s' was followed by non-ASCII byte %d: "
602                    "unable to convert to/from UTF-8"),
603                  error_data, *((const unsigned char *) data));
604             }
605           else
606             {
607               return svn_error_createf
608                 (APR_EINVAL, NULL,
609                  _("Non-ASCII character (code %d) detected, "
610                    "and unable to convert to/from UTF-8"),
611                  *((const unsigned char *) data));
612             }
613         }
614     }
615 
616   return SVN_NO_ERROR;
617 }
618 
619 /* Construct an error with code APR_EINVAL and with a suitable message
620  * to describe the invalid UTF-8 sequence DATA of length LEN (which
621  * may have embedded NULLs).  We can't simply print the data, almost
622  * by definition we don't really know how it is encoded.
623  */
624 static svn_error_t *
invalid_utf8(const char * data,apr_size_t len,apr_pool_t * pool)625 invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
626 {
627   const char *last = svn_utf__last_valid(data, len);
628   const char *valid_txt = "", *invalid_txt = "";
629   apr_size_t i;
630   size_t valid, invalid;
631 
632   /* We will display at most 24 valid octets (this may split a leading
633      multi-byte character) as that should fit on one 80 character line. */
634   valid = last - data;
635   if (valid > 24)
636     valid = 24;
637   for (i = 0; i < valid; ++i)
638     valid_txt = apr_pstrcat(pool, valid_txt,
639                             apr_psprintf(pool, " %02x",
640                                          (unsigned char)last[i-valid]),
641                                          SVN_VA_NULL);
642 
643   /* 4 invalid octets will guarantee that the faulty octet is displayed */
644   invalid = data + len - last;
645   if (invalid > 4)
646     invalid = 4;
647   for (i = 0; i < invalid; ++i)
648     invalid_txt = apr_pstrcat(pool, invalid_txt,
649                               apr_psprintf(pool, " %02x",
650                                            (unsigned char)last[i]),
651                                            SVN_VA_NULL);
652 
653   return svn_error_createf(APR_EINVAL, NULL,
654                            _("Valid UTF-8 data\n(hex:%s)\n"
655                              "followed by invalid UTF-8 sequence\n(hex:%s)"),
656                            valid_txt, invalid_txt);
657 }
658 
659 /* Verify that the sequence DATA of length LEN is valid UTF-8.
660    If it is not, return an error with code APR_EINVAL. */
661 static svn_error_t *
check_utf8(const char * data,apr_size_t len,apr_pool_t * pool)662 check_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
663 {
664   if (! svn_utf__is_valid(data, len))
665     return invalid_utf8(data, len, pool);
666   return SVN_NO_ERROR;
667 }
668 
669 /* Verify that the NULL terminated sequence DATA is valid UTF-8.
670    If it is not, return an error with code APR_EINVAL. */
671 static svn_error_t *
check_cstring_utf8(const char * data,apr_pool_t * pool)672 check_cstring_utf8(const char *data, apr_pool_t *pool)
673 {
674 
675   if (! svn_utf__cstring_is_valid(data))
676     return invalid_utf8(data, strlen(data), pool);
677   return SVN_NO_ERROR;
678 }
679 
680 
681 svn_error_t *
svn_utf_stringbuf_to_utf8(svn_stringbuf_t ** dest,const svn_stringbuf_t * src,apr_pool_t * pool)682 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
683                           const svn_stringbuf_t *src,
684                           apr_pool_t *pool)
685 {
686   xlate_handle_node_t *node;
687   svn_error_t *err;
688 
689   SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
690 
691   if (node->handle)
692     {
693       err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
694       if (! err)
695         err = check_utf8((*dest)->data, (*dest)->len, pool);
696     }
697   else
698     {
699       err = check_non_ascii(src->data, src->len, pool);
700       if (! err)
701         *dest = svn_stringbuf_dup(src, pool);
702     }
703 
704   return svn_error_compose_create(err,
705                                   put_xlate_handle_node
706                                      (node,
707                                       SVN_UTF_NTOU_XLATE_HANDLE,
708                                       pool));
709 }
710 
711 
712 svn_error_t *
svn_utf_string_to_utf8(const svn_string_t ** dest,const svn_string_t * src,apr_pool_t * pool)713 svn_utf_string_to_utf8(const svn_string_t **dest,
714                        const svn_string_t *src,
715                        apr_pool_t *pool)
716 {
717   svn_stringbuf_t *destbuf;
718   xlate_handle_node_t *node;
719   svn_error_t *err;
720 
721   SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
722 
723   if (node->handle)
724     {
725       err = convert_to_stringbuf(node, src->data, src->len, &destbuf, pool);
726       if (! err)
727         err = check_utf8(destbuf->data, destbuf->len, pool);
728       if (! err)
729         *dest = svn_stringbuf__morph_into_string(destbuf);
730     }
731   else
732     {
733       err = check_non_ascii(src->data, src->len, pool);
734       if (! err)
735         *dest = svn_string_dup(src, pool);
736     }
737 
738   return svn_error_compose_create(err,
739                                   put_xlate_handle_node
740                                      (node,
741                                       SVN_UTF_NTOU_XLATE_HANDLE,
742                                       pool));
743 }
744 
745 
746 /* Common implementation for svn_utf_cstring_to_utf8,
747    svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
748    svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
749    the translator and allocating from POOL. */
750 static svn_error_t *
convert_cstring(const char ** dest,const char * src,xlate_handle_node_t * node,apr_pool_t * pool)751 convert_cstring(const char **dest,
752                 const char *src,
753                 xlate_handle_node_t *node,
754                 apr_pool_t *pool)
755 {
756   if (node->handle)
757     {
758       svn_stringbuf_t *destbuf;
759       SVN_ERR(convert_to_stringbuf(node, src, strlen(src),
760                                    &destbuf, pool));
761       *dest = destbuf->data;
762     }
763   else
764     {
765       apr_size_t len = strlen(src);
766       SVN_ERR(check_non_ascii(src, len, pool));
767       *dest = apr_pstrmemdup(pool, src, len);
768     }
769   return SVN_NO_ERROR;
770 }
771 
772 
773 svn_error_t *
svn_utf_cstring_to_utf8(const char ** dest,const char * src,apr_pool_t * pool)774 svn_utf_cstring_to_utf8(const char **dest,
775                         const char *src,
776                         apr_pool_t *pool)
777 {
778   xlate_handle_node_t *node;
779   svn_error_t *err;
780 
781   SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
782   err = convert_cstring(dest, src, node, pool);
783   SVN_ERR(svn_error_compose_create(err,
784                                    put_xlate_handle_node
785                                       (node,
786                                        SVN_UTF_NTOU_XLATE_HANDLE,
787                                        pool)));
788   return check_cstring_utf8(*dest, pool);
789 }
790 
791 
792 svn_error_t *
svn_utf_cstring_to_utf8_ex2(const char ** dest,const char * src,const char * frompage,apr_pool_t * pool)793 svn_utf_cstring_to_utf8_ex2(const char **dest,
794                             const char *src,
795                             const char *frompage,
796                             apr_pool_t *pool)
797 {
798   xlate_handle_node_t *node;
799   svn_error_t *err;
800   const char *convset_key = get_xlate_key(SVN_APR_UTF8_CHARSET, frompage,
801                                           pool);
802 
803   SVN_ERR(get_xlate_handle_node(&node, SVN_APR_UTF8_CHARSET, frompage,
804                                 convset_key, pool));
805   err = convert_cstring(dest, src, node, pool);
806   SVN_ERR(svn_error_compose_create(err,
807                                    put_xlate_handle_node
808                                       (node,
809                                        SVN_UTF_NTOU_XLATE_HANDLE,
810                                        pool)));
811 
812   return check_cstring_utf8(*dest, pool);
813 }
814 
815 
816 svn_error_t *
svn_utf_cstring_to_utf8_ex(const char ** dest,const char * src,const char * frompage,const char * convset_key,apr_pool_t * pool)817 svn_utf_cstring_to_utf8_ex(const char **dest,
818                            const char *src,
819                            const char *frompage,
820                            const char *convset_key,
821                            apr_pool_t *pool)
822 {
823   return svn_utf_cstring_to_utf8_ex2(dest, src, frompage, pool);
824 }
825 
826 
827 svn_error_t *
svn_utf_stringbuf_from_utf8(svn_stringbuf_t ** dest,const svn_stringbuf_t * src,apr_pool_t * pool)828 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
829                             const svn_stringbuf_t *src,
830                             apr_pool_t *pool)
831 {
832   xlate_handle_node_t *node;
833   svn_error_t *err;
834 
835   SVN_ERR(get_uton_xlate_handle_node(&node, pool));
836 
837   if (node->handle)
838     {
839       err = check_utf8(src->data, src->len, pool);
840       if (! err)
841         err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
842     }
843   else
844     {
845       err = check_non_ascii(src->data, src->len, pool);
846       if (! err)
847         *dest = svn_stringbuf_dup(src, pool);
848     }
849 
850   err = svn_error_compose_create(
851           err,
852           put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
853 
854   return err;
855 }
856 
857 
858 svn_error_t *
svn_utf_string_from_utf8(const svn_string_t ** dest,const svn_string_t * src,apr_pool_t * pool)859 svn_utf_string_from_utf8(const svn_string_t **dest,
860                          const svn_string_t *src,
861                          apr_pool_t *pool)
862 {
863   svn_stringbuf_t *dbuf;
864   xlate_handle_node_t *node;
865   svn_error_t *err;
866 
867   SVN_ERR(get_uton_xlate_handle_node(&node, pool));
868 
869   if (node->handle)
870     {
871       err = check_utf8(src->data, src->len, pool);
872       if (! err)
873         err = convert_to_stringbuf(node, src->data, src->len,
874                                    &dbuf, pool);
875       if (! err)
876         *dest = svn_stringbuf__morph_into_string(dbuf);
877     }
878   else
879     {
880       err = check_non_ascii(src->data, src->len, pool);
881       if (! err)
882         *dest = svn_string_dup(src, pool);
883     }
884 
885   err = svn_error_compose_create(
886           err,
887           put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
888 
889   return err;
890 }
891 
892 
893 svn_error_t *
svn_utf_cstring_from_utf8(const char ** dest,const char * src,apr_pool_t * pool)894 svn_utf_cstring_from_utf8(const char **dest,
895                           const char *src,
896                           apr_pool_t *pool)
897 {
898   xlate_handle_node_t *node;
899   svn_error_t *err;
900 
901   SVN_ERR(check_cstring_utf8(src, pool));
902 
903   SVN_ERR(get_uton_xlate_handle_node(&node, pool));
904   err = convert_cstring(dest, src, node, pool);
905   err = svn_error_compose_create(
906           err,
907           put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
908 
909   return err;
910 }
911 
912 
913 svn_error_t *
svn_utf_cstring_from_utf8_ex2(const char ** dest,const char * src,const char * topage,apr_pool_t * pool)914 svn_utf_cstring_from_utf8_ex2(const char **dest,
915                               const char *src,
916                               const char *topage,
917                               apr_pool_t *pool)
918 {
919   xlate_handle_node_t *node;
920   svn_error_t *err;
921   const char *convset_key = get_xlate_key(topage, SVN_APR_UTF8_CHARSET,
922                                           pool);
923 
924   SVN_ERR(check_cstring_utf8(src, pool));
925 
926   SVN_ERR(get_xlate_handle_node(&node, topage, SVN_APR_UTF8_CHARSET,
927                                 convset_key, pool));
928   err = convert_cstring(dest, src, node, pool);
929   err = svn_error_compose_create(
930           err,
931           put_xlate_handle_node(node, convset_key, pool));
932 
933   return err;
934 }
935 
936 const char *
svn_utf__cstring_from_utf8_fuzzy(const char * src,apr_pool_t * pool,svn_error_t * (* convert_from_utf8)(const char **,const char *,apr_pool_t *))937 svn_utf__cstring_from_utf8_fuzzy(const char *src,
938                                  apr_pool_t *pool,
939                                  svn_error_t *(*convert_from_utf8)
940                                  (const char **, const char *, apr_pool_t *))
941 {
942   const char *escaped, *converted;
943   svn_error_t *err;
944 
945   escaped = svn_utf__fuzzy_escape(src, strlen(src), pool);
946 
947   /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
948      contain only 7-bit bytes :-).  Recode to native... */
949   err = convert_from_utf8(((const char **) &converted), escaped, pool);
950 
951   if (err)
952     {
953       svn_error_clear(err);
954       return escaped;
955     }
956   else
957     return converted;
958 
959   /* ### Check the client locale, maybe we can avoid that second
960    * conversion!  See Ulrich Drepper's patch at
961    * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
962    */
963 }
964 
965 
966 const char *
svn_utf_cstring_from_utf8_fuzzy(const char * src,apr_pool_t * pool)967 svn_utf_cstring_from_utf8_fuzzy(const char *src,
968                                 apr_pool_t *pool)
969 {
970   return svn_utf__cstring_from_utf8_fuzzy(src, pool,
971                                           svn_utf_cstring_from_utf8);
972 }
973 
974 
975 svn_error_t *
svn_utf_cstring_from_utf8_stringbuf(const char ** dest,const svn_stringbuf_t * src,apr_pool_t * pool)976 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
977                                     const svn_stringbuf_t *src,
978                                     apr_pool_t *pool)
979 {
980   svn_stringbuf_t *destbuf;
981 
982   SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf, src, pool));
983   *dest = destbuf->data;
984 
985   return SVN_NO_ERROR;
986 }
987 
988 
989 svn_error_t *
svn_utf_cstring_from_utf8_string(const char ** dest,const svn_string_t * src,apr_pool_t * pool)990 svn_utf_cstring_from_utf8_string(const char **dest,
991                                  const svn_string_t *src,
992                                  apr_pool_t *pool)
993 {
994   svn_stringbuf_t *dbuf;
995   xlate_handle_node_t *node;
996   svn_error_t *err;
997 
998   SVN_ERR(get_uton_xlate_handle_node(&node, pool));
999 
1000   if (node->handle)
1001     {
1002       err = check_utf8(src->data, src->len, pool);
1003       if (! err)
1004         err = convert_to_stringbuf(node, src->data, src->len,
1005                                    &dbuf, pool);
1006       if (! err)
1007         *dest = dbuf->data;
1008     }
1009   else
1010     {
1011       err = check_non_ascii(src->data, src->len, pool);
1012       if (! err)
1013         *dest = apr_pstrmemdup(pool, src->data, src->len);
1014     }
1015 
1016   err = svn_error_compose_create(
1017           err,
1018           put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
1019 
1020   return err;
1021 }
1022 
1023 
1024 /* Insert the given UCS-4 VALUE into BUF at the given OFFSET. */
1025 static void
membuf_insert_ucs4(svn_membuf_t * buf,apr_size_t offset,apr_int32_t value)1026 membuf_insert_ucs4(svn_membuf_t *buf, apr_size_t offset, apr_int32_t value)
1027 {
1028   svn_membuf__resize(buf, (offset + 1) * sizeof(value));
1029   ((apr_int32_t*)buf->data)[offset] = value;
1030 }
1031 
1032 /* TODO: Use compiler intrinsics for byte swaps. */
1033 #define SWAP_SHORT(x)  ((((x) & 0xff) << 8) | (((x) >> 8) & 0xff))
1034 #define SWAP_LONG(x)   ((((x) & 0xff) << 24) | (((x) & 0xff00) << 8)    \
1035                         | (((x) >> 8) & 0xff00) | (((x) >> 24) & 0xff))
1036 
1037 #define IS_UTF16_LEAD_SURROGATE(c)   ((c) >= 0xd800 && (c) <= 0xdbff)
1038 #define IS_UTF16_TRAIL_SURROGATE(c)  ((c) >= 0xdc00 && (c) <= 0xdfff)
1039 
1040 svn_error_t *
svn_utf__utf16_to_utf8(const svn_string_t ** result,const apr_uint16_t * utf16str,apr_size_t utf16len,svn_boolean_t big_endian,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1041 svn_utf__utf16_to_utf8(const svn_string_t **result,
1042                        const apr_uint16_t *utf16str,
1043                        apr_size_t utf16len,
1044                        svn_boolean_t big_endian,
1045                        apr_pool_t *result_pool,
1046                        apr_pool_t *scratch_pool)
1047 {
1048   static const apr_uint16_t endiancheck = 0xa55a;
1049   const svn_boolean_t arch_big_endian =
1050     (((const char*)&endiancheck)[sizeof(endiancheck) - 1] == '\x5a');
1051   const svn_boolean_t swap_order = (!big_endian != !arch_big_endian);
1052 
1053   apr_uint16_t lead_surrogate;
1054   apr_size_t length;
1055   apr_size_t offset;
1056   svn_membuf_t ucs4buf;
1057   svn_membuf_t resultbuf;
1058   svn_string_t *res;
1059 
1060   if (utf16len == SVN_UTF__UNKNOWN_LENGTH)
1061     {
1062       const apr_uint16_t *endp = utf16str;
1063       while (*endp++)
1064         ;
1065       utf16len = (endp - utf16str);
1066     }
1067 
1068   svn_membuf__create(&ucs4buf, utf16len * sizeof(apr_int32_t), scratch_pool);
1069 
1070   for (lead_surrogate = 0, length = 0, offset = 0;
1071        offset < utf16len; ++offset)
1072     {
1073       const apr_uint16_t code =
1074         (swap_order ? SWAP_SHORT(utf16str[offset]) : utf16str[offset]);
1075 
1076       if (lead_surrogate)
1077         {
1078           if (IS_UTF16_TRAIL_SURROGATE(code))
1079             {
1080               /* Combine the lead and trail currogates into a 32-bit code. */
1081               membuf_insert_ucs4(&ucs4buf, length++,
1082                                  (0x010000
1083                                   + (((lead_surrogate & 0x03ff) << 10)
1084                                      | (code & 0x03ff))));
1085               lead_surrogate = 0;
1086               continue;
1087             }
1088           else
1089             {
1090               /* If we didn't find a surrogate pair, just dump the
1091                  lead surrogate into the stream. */
1092               membuf_insert_ucs4(&ucs4buf, length++, lead_surrogate);
1093               lead_surrogate = 0;
1094             }
1095         }
1096 
1097       if ((offset + 1) < utf16len && IS_UTF16_LEAD_SURROGATE(code))
1098         {
1099           /* Store a lead surrogate that is followed by at least one
1100              code for the next iteration. */
1101           lead_surrogate = code;
1102           continue;
1103         }
1104       else
1105         membuf_insert_ucs4(&ucs4buf, length++, code);
1106     }
1107 
1108   /* Convert the UCS-4 buffer to UTF-8, assuming an average of 2 bytes
1109      per code point for encoding. The buffer will grow as
1110      necessary. */
1111   svn_membuf__create(&resultbuf, length * 2, result_pool);
1112   SVN_ERR(svn_utf__encode_ucs4_string(
1113               &resultbuf, ucs4buf.data, length, &length));
1114 
1115   res = apr_palloc(result_pool, sizeof(*res));
1116   res->data = resultbuf.data;
1117   res->len = length;
1118   *result = res;
1119   return SVN_NO_ERROR;
1120 }
1121 
1122 
1123 svn_error_t *
svn_utf__utf32_to_utf8(const svn_string_t ** result,const apr_int32_t * utf32str,apr_size_t utf32len,svn_boolean_t big_endian,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1124 svn_utf__utf32_to_utf8(const svn_string_t **result,
1125                        const apr_int32_t *utf32str,
1126                        apr_size_t utf32len,
1127                        svn_boolean_t big_endian,
1128                        apr_pool_t *result_pool,
1129                        apr_pool_t *scratch_pool)
1130 {
1131   static const apr_int32_t endiancheck = 0xa5cbbc5a;
1132   const svn_boolean_t arch_big_endian =
1133     (((const char*)&endiancheck)[sizeof(endiancheck) - 1] == '\x5a');
1134   const svn_boolean_t swap_order = (!big_endian != !arch_big_endian);
1135 
1136   apr_size_t length;
1137   svn_membuf_t resultbuf;
1138   svn_string_t *res;
1139 
1140   if (utf32len == SVN_UTF__UNKNOWN_LENGTH)
1141     {
1142       const apr_int32_t *endp = utf32str;
1143       while (*endp++)
1144         ;
1145       utf32len = (endp - utf32str);
1146     }
1147 
1148   if (swap_order)
1149     {
1150       apr_size_t offset;
1151       svn_membuf_t ucs4buf;
1152 
1153       svn_membuf__create(&ucs4buf, utf32len * sizeof(apr_int32_t),
1154                          scratch_pool);
1155 
1156       for (offset = 0; offset < utf32len; ++offset)
1157         {
1158           const apr_int32_t code = SWAP_LONG(utf32str[offset]);
1159           membuf_insert_ucs4(&ucs4buf, offset, code);
1160         }
1161       utf32str = ucs4buf.data;
1162     }
1163 
1164   /* Convert the UCS-4 buffer to UTF-8, assuming an average of 2 bytes
1165      per code point for encoding. The buffer will grow as
1166      necessary. */
1167   svn_membuf__create(&resultbuf, utf32len * 2, result_pool);
1168   SVN_ERR(svn_utf__encode_ucs4_string(
1169               &resultbuf, utf32str, utf32len, &length));
1170 
1171   res = apr_palloc(result_pool, sizeof(*res));
1172   res->data = resultbuf.data;
1173   res->len = length;
1174   *result = res;
1175   return SVN_NO_ERROR;
1176 }
1177 
1178 
1179 #ifdef WIN32
1180 
1181 
1182 svn_error_t *
svn_utf__win32_utf8_to_utf16(const WCHAR ** result,const char * src,const WCHAR * prefix,apr_pool_t * result_pool)1183 svn_utf__win32_utf8_to_utf16(const WCHAR **result,
1184                              const char *src,
1185                              const WCHAR *prefix,
1186                              apr_pool_t *result_pool)
1187 {
1188   const int utf8_count = strlen(src);
1189   const int prefix_len = (prefix ? lstrlenW(prefix) : 0);
1190   WCHAR *wide_str;
1191   int wide_count;
1192 
1193   if (0 == prefix_len + utf8_count)
1194     {
1195       *result = L"";
1196       return SVN_NO_ERROR;
1197     }
1198 
1199   wide_count = MultiByteToWideChar(CP_UTF8, 0, src, utf8_count, NULL, 0);
1200   if (wide_count == 0)
1201     return svn_error_wrap_apr(apr_get_os_error(),
1202                               _("Conversion to UTF-16 failed"));
1203 
1204   wide_str = apr_palloc(result_pool,
1205                         (prefix_len + wide_count + 1) * sizeof(*wide_str));
1206   if (prefix_len)
1207     memcpy(wide_str, prefix, prefix_len * sizeof(*wide_str));
1208   if (0 == MultiByteToWideChar(CP_UTF8, 0, src, utf8_count,
1209                                wide_str + prefix_len, wide_count))
1210     return svn_error_wrap_apr(apr_get_os_error(),
1211                               _("Conversion to UTF-16 failed"));
1212 
1213   wide_str[prefix_len + wide_count] = 0;
1214   *result = wide_str;
1215 
1216   return SVN_NO_ERROR;
1217 }
1218 
1219 svn_error_t *
svn_utf__win32_utf16_to_utf8(const char ** result,const WCHAR * src,const char * prefix,apr_pool_t * result_pool)1220 svn_utf__win32_utf16_to_utf8(const char **result,
1221                              const WCHAR *src,
1222                              const char *prefix,
1223                              apr_pool_t *result_pool)
1224 {
1225   const int wide_count = lstrlenW(src);
1226   const int prefix_len = (prefix ? strlen(prefix) : 0);
1227   char *utf8_str;
1228   int utf8_count;
1229 
1230   if (0 == prefix_len + wide_count)
1231     {
1232       *result = "";
1233       return SVN_NO_ERROR;
1234     }
1235 
1236   utf8_count = WideCharToMultiByte(CP_UTF8, 0, src, wide_count,
1237                                    NULL, 0, NULL, FALSE);
1238   if (utf8_count == 0)
1239     return svn_error_wrap_apr(apr_get_os_error(),
1240                               _("Conversion from UTF-16 failed"));
1241 
1242   utf8_str = apr_palloc(result_pool,
1243                         (prefix_len + utf8_count + 1) * sizeof(*utf8_str));
1244   if (prefix_len)
1245     memcpy(utf8_str, prefix, prefix_len * sizeof(*utf8_str));
1246   if (0 == WideCharToMultiByte(CP_UTF8, 0, src, wide_count,
1247                                utf8_str + prefix_len, utf8_count,
1248                                NULL, FALSE))
1249     return svn_error_wrap_apr(apr_get_os_error(),
1250                               _("Conversion from UTF-16 failed"));
1251 
1252   utf8_str[prefix_len + utf8_count] = 0;
1253   *result = utf8_str;
1254 
1255   return SVN_NO_ERROR;
1256 }
1257 
1258 #endif /* WIN32 */
1259