idn.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at https://curl.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. * SPDX-License-Identifier: curl
  22. *
  23. ***************************************************************************/
  24. /*
  25. * IDN conversions
  26. */
  27. #include "curl_setup.h"
  28. #include "urldata.h"
  29. #include "idn.h"
  30. #include "sendf.h"
  31. #include "curl_multibyte.h"
  32. #include "warnless.h"
  33. #ifdef USE_LIBIDN2
  34. #include <idn2.h>
  35. #if defined(_WIN32) && defined(UNICODE)
  36. #define IDN2_LOOKUP(name, host, flags) \
  37. idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
  38. #else
  39. #define IDN2_LOOKUP(name, host, flags) \
  40. idn2_lookup_ul((const char *)name, (char **)host, flags)
  41. #endif
  42. #endif /* USE_LIBIDN2 */
  43. /* The last 3 #include files should be in this order */
  44. #include "curl_printf.h"
  45. #include "curl_memory.h"
  46. #include "memdebug.h"
  47. /* for macOS and iOS targets */
  48. #if defined(USE_APPLE_IDN)
  49. #include <unicode/uidna.h>
  50. #define MAX_HOST_LENGTH 512
  51. static CURLcode mac_idn_to_ascii(const char *in, char **out)
  52. {
  53. size_t inlen = strlen(in);
  54. if(inlen < MAX_HOST_LENGTH) {
  55. UErrorCode err = U_ZERO_ERROR;
  56. UIDNA* idna = uidna_openUTS46(
  57. UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_ASCII, &err);
  58. if(!U_FAILURE(err)) {
  59. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  60. char buffer[MAX_HOST_LENGTH] = {0};
  61. (void)uidna_nameToASCII_UTF8(idna, in, -1, buffer,
  62. sizeof(buffer) - 1, &info, &err);
  63. uidna_close(idna);
  64. if(!U_FAILURE(err)) {
  65. *out = strdup(buffer);
  66. if(*out)
  67. return CURLE_OK;
  68. else
  69. return CURLE_OUT_OF_MEMORY;
  70. }
  71. }
  72. }
  73. return CURLE_URL_MALFORMAT;
  74. }
  75. static CURLcode mac_ascii_to_idn(const char *in, char **out)
  76. {
  77. size_t inlen = strlen(in);
  78. if(inlen < MAX_HOST_LENGTH) {
  79. UErrorCode err = U_ZERO_ERROR;
  80. UIDNA* idna = uidna_openUTS46(
  81. UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_UNICODE, &err);
  82. if(!U_FAILURE(err)) {
  83. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  84. char buffer[MAX_HOST_LENGTH] = {0};
  85. (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer,
  86. sizeof(buffer) - 1, &info, &err);
  87. uidna_close(idna);
  88. if(!U_FAILURE(err)) {
  89. *out = strdup(buffer);
  90. if(*out)
  91. return CURLE_OK;
  92. else
  93. return CURLE_OUT_OF_MEMORY;
  94. }
  95. }
  96. }
  97. return CURLE_URL_MALFORMAT;
  98. }
  99. #endif
  100. #ifdef USE_WIN32_IDN
  101. /* using Windows kernel32 and normaliz libraries. */
  102. #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600
  103. WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
  104. const WCHAR *lpUnicodeCharStr,
  105. int cchUnicodeChar,
  106. WCHAR *lpASCIICharStr,
  107. int cchASCIIChar);
  108. WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
  109. const WCHAR *lpASCIICharStr,
  110. int cchASCIIChar,
  111. WCHAR *lpUnicodeCharStr,
  112. int cchUnicodeChar);
  113. #endif
  114. #define IDN_MAX_LENGTH 255
  115. static CURLcode win32_idn_to_ascii(const char *in, char **out)
  116. {
  117. wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
  118. *out = NULL;
  119. if(in_w) {
  120. wchar_t punycode[IDN_MAX_LENGTH];
  121. int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
  122. IDN_MAX_LENGTH);
  123. curlx_unicodefree(in_w);
  124. if(chars) {
  125. char *mstr = curlx_convert_wchar_to_UTF8(punycode);
  126. if(mstr) {
  127. *out = strdup(mstr);
  128. curlx_unicodefree(mstr);
  129. if(!*out)
  130. return CURLE_OUT_OF_MEMORY;
  131. }
  132. else
  133. return CURLE_OUT_OF_MEMORY;
  134. }
  135. else
  136. return CURLE_URL_MALFORMAT;
  137. }
  138. else
  139. return CURLE_URL_MALFORMAT;
  140. return CURLE_OK;
  141. }
  142. static CURLcode win32_ascii_to_idn(const char *in, char **output)
  143. {
  144. char *out = NULL;
  145. wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
  146. if(in_w) {
  147. WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
  148. int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
  149. IDN_MAX_LENGTH);
  150. if(chars) {
  151. /* 'chars' is "the number of characters retrieved" */
  152. char *mstr = curlx_convert_wchar_to_UTF8(idn);
  153. if(mstr) {
  154. out = strdup(mstr);
  155. curlx_unicodefree(mstr);
  156. if(!out)
  157. return CURLE_OUT_OF_MEMORY;
  158. }
  159. }
  160. else
  161. return CURLE_URL_MALFORMAT;
  162. }
  163. else
  164. return CURLE_URL_MALFORMAT;
  165. *output = out;
  166. return CURLE_OK;
  167. }
  168. #endif /* USE_WIN32_IDN */
  169. /*
  170. * Helpers for IDNA conversions.
  171. */
  172. bool Curl_is_ASCII_name(const char *hostname)
  173. {
  174. /* get an UNSIGNED local version of the pointer */
  175. const unsigned char *ch = (const unsigned char *)hostname;
  176. if(!hostname) /* bad input, consider it ASCII! */
  177. return TRUE;
  178. while(*ch) {
  179. if(*ch++ & 0x80)
  180. return FALSE;
  181. }
  182. return TRUE;
  183. }
  184. #ifdef USE_IDN
  185. /*
  186. * Curl_idn_decode() returns an allocated IDN decoded string if it was
  187. * possible. NULL on error.
  188. *
  189. * CURLE_URL_MALFORMAT - the hostname could not be converted
  190. * CURLE_OUT_OF_MEMORY - memory problem
  191. *
  192. */
  193. static CURLcode idn_decode(const char *input, char **output)
  194. {
  195. char *decoded = NULL;
  196. CURLcode result = CURLE_OK;
  197. #ifdef USE_LIBIDN2
  198. if(idn2_check_version(IDN2_VERSION)) {
  199. int flags = IDN2_NFC_INPUT
  200. #if IDN2_VERSION_NUMBER >= 0x00140000
  201. /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
  202. IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
  203. processing. */
  204. | IDN2_NONTRANSITIONAL
  205. #endif
  206. ;
  207. int rc = IDN2_LOOKUP(input, &decoded, flags);
  208. if(rc != IDN2_OK)
  209. /* fallback to TR46 Transitional mode for better IDNA2003
  210. compatibility */
  211. rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
  212. if(rc != IDN2_OK)
  213. result = CURLE_URL_MALFORMAT;
  214. }
  215. else
  216. /* a too old libidn2 version */
  217. result = CURLE_NOT_BUILT_IN;
  218. #elif defined(USE_WIN32_IDN)
  219. result = win32_idn_to_ascii(input, &decoded);
  220. #elif defined(USE_APPLE_IDN)
  221. result = mac_idn_to_ascii(input, &decoded);
  222. #endif
  223. if(!result)
  224. *output = decoded;
  225. return result;
  226. }
  227. static CURLcode idn_encode(const char *puny, char **output)
  228. {
  229. char *enc = NULL;
  230. #ifdef USE_LIBIDN2
  231. int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
  232. if(rc != IDNA_SUCCESS)
  233. return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
  234. #elif defined(USE_WIN32_IDN)
  235. CURLcode result = win32_ascii_to_idn(puny, &enc);
  236. if(result)
  237. return result;
  238. #elif defined(USE_APPLE_IDN)
  239. CURLcode result = mac_ascii_to_idn(puny, &enc);
  240. if(result)
  241. return result;
  242. #endif
  243. *output = enc;
  244. return CURLE_OK;
  245. }
  246. CURLcode Curl_idn_decode(const char *input, char **output)
  247. {
  248. char *d = NULL;
  249. CURLcode result = idn_decode(input, &d);
  250. #ifdef USE_LIBIDN2
  251. if(!result) {
  252. char *c = strdup(d);
  253. idn2_free(d);
  254. if(c)
  255. d = c;
  256. else
  257. result = CURLE_OUT_OF_MEMORY;
  258. }
  259. #endif
  260. if(!result)
  261. *output = d;
  262. return result;
  263. }
  264. CURLcode Curl_idn_encode(const char *puny, char **output)
  265. {
  266. char *d = NULL;
  267. CURLcode result = idn_encode(puny, &d);
  268. #ifdef USE_LIBIDN2
  269. if(!result) {
  270. char *c = strdup(d);
  271. idn2_free(d);
  272. if(c)
  273. d = c;
  274. else
  275. result = CURLE_OUT_OF_MEMORY;
  276. }
  277. #endif
  278. if(!result)
  279. *output = d;
  280. return result;
  281. }
  282. /*
  283. * Frees data allocated by idnconvert_hostname()
  284. */
  285. void Curl_free_idnconverted_hostname(struct hostname *host)
  286. {
  287. Curl_safefree(host->encalloc);
  288. }
  289. #endif /* USE_IDN */
  290. /*
  291. * Perform any necessary IDN conversion of hostname
  292. */
  293. CURLcode Curl_idnconvert_hostname(struct hostname *host)
  294. {
  295. /* set the name we use to display the hostname */
  296. host->dispname = host->name;
  297. #ifdef USE_IDN
  298. /* Check name for non-ASCII and convert hostname if we can */
  299. if(!Curl_is_ASCII_name(host->name)) {
  300. char *decoded;
  301. CURLcode result = Curl_idn_decode(host->name, &decoded);
  302. if(result)
  303. return result;
  304. /* successful */
  305. host->name = host->encalloc = decoded;
  306. }
  307. #endif
  308. return CURLE_OK;
  309. }