1 /** 2 * @copyright 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 * @endcopyright 22 * 23 * @file svn_utf.h 24 * @brief UTF-8 conversion routines 25 * 26 * Whenever a conversion routine cannot convert to or from UTF-8, the 27 * error returned has code @c APR_EINVAL. 28 */ 29 30 31 32 #ifndef SVN_UTF_H 33 #define SVN_UTF_H 34 35 #include <apr_pools.h> 36 #include <apr_xlate.h> /* for APR_*_CHARSET */ 37 38 #include "svn_types.h" 39 #include "svn_string.h" 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif /* __cplusplus */ 44 45 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET 46 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET 47 48 /** 49 * Initialize the UTF-8 encoding/decoding routines. 50 * Allocate cached translation handles in a subpool of @a pool. 51 * 52 * If @a assume_native_utf8 is TRUE, the native character set is 53 * assumed to be UTF-8, i.e. conversion is a no-op. This is useful 54 * in contexts where the native character set is ASCII but UTF-8 55 * should be used regardless (e.g. for mod_dav_svn which runs within 56 * httpd and always uses the "C" locale). 57 * 58 * @note It is optional to call this function, but if it is used, no other 59 * svn function may be in use in other threads during the call of this 60 * function or when @a pool is cleared or destroyed. 61 * Initializing the UTF-8 routines will improve performance. 62 * 63 * @since New in 1.8. 64 */ 65 void 66 svn_utf_initialize2(svn_boolean_t assume_native_utf8, 67 apr_pool_t *pool); 68 69 /** 70 * Like svn_utf_initialize2() but without the ability to force the 71 * native encoding to UTF-8. 72 * 73 * @deprecated Provided for backward compatibility with the 1.7 API. 74 */ 75 SVN_DEPRECATED 76 void 77 svn_utf_initialize(apr_pool_t *pool); 78 79 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 80 * allocate @a *dest in @a pool. 81 */ 82 svn_error_t * 83 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, 84 const svn_stringbuf_t *src, 85 apr_pool_t *pool); 86 87 88 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 89 * @a *dest in @a pool. 90 */ 91 svn_error_t * 92 svn_utf_string_to_utf8(const svn_string_t **dest, 93 const svn_string_t *src, 94 apr_pool_t *pool); 95 96 97 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 98 * allocate @a *dest in @a pool. 99 */ 100 svn_error_t * 101 svn_utf_cstring_to_utf8(const char **dest, 102 const char *src, 103 apr_pool_t *pool); 104 105 106 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C 107 * string @a src; allocate @a *dest in @a pool. 108 * 109 * @since New in 1.4. 110 */ 111 svn_error_t * 112 svn_utf_cstring_to_utf8_ex2(const char **dest, 113 const char *src, 114 const char *frompage, 115 apr_pool_t *pool); 116 117 118 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is 119 * ignored. 120 * 121 * @deprecated Provided for backward compatibility with the 1.3 API. 122 */ 123 SVN_DEPRECATED 124 svn_error_t * 125 svn_utf_cstring_to_utf8_ex(const char **dest, 126 const char *src, 127 const char *frompage, 128 const char *convset_key, 129 apr_pool_t *pool); 130 131 132 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 133 * allocate @a *dest in @a pool. 134 */ 135 svn_error_t * 136 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, 137 const svn_stringbuf_t *src, 138 apr_pool_t *pool); 139 140 141 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 142 * allocate @a *dest in @a pool. 143 */ 144 svn_error_t * 145 svn_utf_string_from_utf8(const svn_string_t **dest, 146 const svn_string_t *src, 147 apr_pool_t *pool); 148 149 150 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 151 * allocate @a *dest in @a pool. 152 */ 153 svn_error_t * 154 svn_utf_cstring_from_utf8(const char **dest, 155 const char *src, 156 apr_pool_t *pool); 157 158 159 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string 160 * @a src; allocate @a *dest in @a pool. 161 * 162 * @since New in 1.4. 163 */ 164 svn_error_t * 165 svn_utf_cstring_from_utf8_ex2(const char **dest, 166 const char *src, 167 const char *topage, 168 apr_pool_t *pool); 169 170 171 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is 172 * ignored. 173 * 174 * @deprecated Provided for backward compatibility with the 1.3 API. 175 */ 176 SVN_DEPRECATED 177 svn_error_t * 178 svn_utf_cstring_from_utf8_ex(const char **dest, 179 const char *src, 180 const char *topage, 181 const char *convset_key, 182 apr_pool_t *pool); 183 184 185 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 186 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 187 * characters the same, and substitutes "?\\XXX" for others, where XXX 188 * is the unsigned decimal code for that character. 189 * 190 * This function cannot error; it is guaranteed to return something. 191 * First it will recode as described above and then attempt to convert 192 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 193 * will return the raw fuzzily recoded string, which may or may not be 194 * meaningful in the client's locale, but is (presumably) better than 195 * nothing. 196 * 197 * ### Notes: 198 * 199 * Improvement is possible, even imminent. The original problem was 200 * that if you converted a UTF-8 string (say, a log message) into a 201 * locale that couldn't represent all the characters, you'd just get a 202 * static placeholder saying "[unconvertible log message]". Then 203 * Justin Erenkrantz pointed out how on platforms that didn't support 204 * conversion at all, "svn log" would still fail completely when it 205 * encountered unconvertible data. 206 * 207 * Now for both cases, the caller can at least fall back on this 208 * function, which converts the message as best it can, substituting 209 * "?\\XXX" escape codes for the non-ascii characters. 210 * 211 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 212 * so when we can detect that at configure time, things will change. 213 * Also, this should (?) be moved to apr/apu eventually. 214 * 215 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 216 * details. 217 */ 218 const char * 219 svn_utf_cstring_from_utf8_fuzzy(const char *src, 220 apr_pool_t *pool); 221 222 223 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 224 * allocate @a *dest in @a pool. 225 */ 226 svn_error_t * 227 svn_utf_cstring_from_utf8_stringbuf(const char **dest, 228 const svn_stringbuf_t *src, 229 apr_pool_t *pool); 230 231 232 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 233 * allocate @a *dest in @a pool. 234 */ 235 svn_error_t * 236 svn_utf_cstring_from_utf8_string(const char **dest, 237 const svn_string_t *src, 238 apr_pool_t *pool); 239 240 /** Return the display width of UTF-8-encoded C string @a cstr. 241 * If the string is not printable or invalid UTF-8, return -1. 242 * 243 * @since New in 1.8. 244 */ 245 int 246 svn_utf_cstring_utf8_width(const char *cstr); 247 248 #ifdef __cplusplus 249 } 250 #endif /* __cplusplus */ 251 252 #endif /* SVN_UTF_H */ 253