idn.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at https://curl.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. * SPDX-License-Identifier: curl
  22. *
  23. ***************************************************************************/
  24. /*
  25. * IDN conversions
  26. */
  27. #include "curl_setup.h"
  28. #include "urldata.h"
  29. #include "idn.h"
  30. #include "sendf.h"
  31. #include "curl_multibyte.h"
  32. #include "warnless.h"
  33. #ifdef USE_LIBIDN2
  34. #include <idn2.h>
  35. #if defined(_WIN32) && defined(UNICODE)
  36. #define IDN2_LOOKUP(name, host, flags) \
  37. idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
  38. #else
  39. #define IDN2_LOOKUP(name, host, flags) \
  40. idn2_lookup_ul((const char *)name, (char **)host, flags)
  41. #endif
  42. #endif /* USE_LIBIDN2 */
  43. /* The last 3 #include files should be in this order */
  44. #include "curl_printf.h"
  45. #include "curl_memory.h"
  46. #include "memdebug.h"
  47. /* for macOS and iOS targets */
  48. #if defined(USE_APPLE_IDN)
  49. #include <unicode/uidna.h>
  50. #include <iconv.h>
  51. #include <langinfo.h>
  52. #define MAX_HOST_LENGTH 512
  53. static CURLcode iconv_to_utf8(const char *in, size_t inlen,
  54. char **out, size_t *outlen)
  55. {
  56. iconv_t cd = iconv_open("UTF-8", nl_langinfo(CODESET));
  57. if(cd != (iconv_t)-1) {
  58. size_t iconv_outlen = *outlen;
  59. char *iconv_in = (char *)in;
  60. size_t iconv_inlen = inlen;
  61. size_t iconv_result = iconv(cd, &iconv_in, &iconv_inlen,
  62. out, &iconv_outlen);
  63. *outlen -= iconv_outlen;
  64. iconv_close(cd);
  65. if(iconv_result == (size_t)-1) {
  66. if(errno == ENOMEM)
  67. return CURLE_OUT_OF_MEMORY;
  68. else
  69. return CURLE_URL_MALFORMAT;
  70. }
  71. return CURLE_OK;
  72. }
  73. else {
  74. if(errno == ENOMEM)
  75. return CURLE_OUT_OF_MEMORY;
  76. else
  77. return CURLE_FAILED_INIT;
  78. }
  79. }
  80. static CURLcode mac_idn_to_ascii(const char *in, char **out)
  81. {
  82. size_t inlen = strlen(in);
  83. if(inlen < MAX_HOST_LENGTH) {
  84. char iconv_buffer[MAX_HOST_LENGTH] = {0};
  85. char *iconv_outptr = iconv_buffer;
  86. size_t iconv_outlen = sizeof(iconv_buffer);
  87. CURLcode iconv_result = iconv_to_utf8(in, inlen,
  88. &iconv_outptr, &iconv_outlen);
  89. if(!iconv_result) {
  90. UErrorCode err = U_ZERO_ERROR;
  91. UIDNA* idna = uidna_openUTS46(
  92. UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_ASCII, &err);
  93. if(!U_FAILURE(err)) {
  94. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  95. char buffer[MAX_HOST_LENGTH] = {0};
  96. (void)uidna_nameToASCII_UTF8(idna, iconv_buffer, (int)iconv_outlen,
  97. buffer, sizeof(buffer) - 1, &info, &err);
  98. uidna_close(idna);
  99. if(!U_FAILURE(err) && !info.errors) {
  100. *out = strdup(buffer);
  101. if(*out)
  102. return CURLE_OK;
  103. else
  104. return CURLE_OUT_OF_MEMORY;
  105. }
  106. }
  107. }
  108. else
  109. return iconv_result;
  110. }
  111. return CURLE_URL_MALFORMAT;
  112. }
  113. static CURLcode mac_ascii_to_idn(const char *in, char **out)
  114. {
  115. size_t inlen = strlen(in);
  116. if(inlen < MAX_HOST_LENGTH) {
  117. UErrorCode err = U_ZERO_ERROR;
  118. UIDNA* idna = uidna_openUTS46(
  119. UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_UNICODE, &err);
  120. if(!U_FAILURE(err)) {
  121. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  122. char buffer[MAX_HOST_LENGTH] = {0};
  123. (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer,
  124. sizeof(buffer) - 1, &info, &err);
  125. uidna_close(idna);
  126. if(!U_FAILURE(err)) {
  127. *out = strdup(buffer);
  128. if(*out)
  129. return CURLE_OK;
  130. else
  131. return CURLE_OUT_OF_MEMORY;
  132. }
  133. }
  134. }
  135. return CURLE_URL_MALFORMAT;
  136. }
  137. #endif
  138. #ifdef USE_WIN32_IDN
  139. /* using Windows kernel32 and normaliz libraries. */
  140. #if (!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600) && \
  141. (!defined(WINVER) || WINVER < 0x600)
  142. WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
  143. const WCHAR *lpUnicodeCharStr,
  144. int cchUnicodeChar,
  145. WCHAR *lpASCIICharStr,
  146. int cchASCIIChar);
  147. WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
  148. const WCHAR *lpASCIICharStr,
  149. int cchASCIIChar,
  150. WCHAR *lpUnicodeCharStr,
  151. int cchUnicodeChar);
  152. #endif
  153. #define IDN_MAX_LENGTH 255
  154. static CURLcode win32_idn_to_ascii(const char *in, char **out)
  155. {
  156. wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
  157. *out = NULL;
  158. if(in_w) {
  159. wchar_t punycode[IDN_MAX_LENGTH];
  160. int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
  161. IDN_MAX_LENGTH);
  162. curlx_unicodefree(in_w);
  163. if(chars) {
  164. char *mstr = curlx_convert_wchar_to_UTF8(punycode);
  165. if(mstr) {
  166. *out = strdup(mstr);
  167. curlx_unicodefree(mstr);
  168. if(!*out)
  169. return CURLE_OUT_OF_MEMORY;
  170. }
  171. else
  172. return CURLE_OUT_OF_MEMORY;
  173. }
  174. else
  175. return CURLE_URL_MALFORMAT;
  176. }
  177. else
  178. return CURLE_URL_MALFORMAT;
  179. return CURLE_OK;
  180. }
  181. static CURLcode win32_ascii_to_idn(const char *in, char **output)
  182. {
  183. char *out = NULL;
  184. wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
  185. if(in_w) {
  186. WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
  187. int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
  188. IDN_MAX_LENGTH);
  189. if(chars) {
  190. /* 'chars' is "the number of characters retrieved" */
  191. char *mstr = curlx_convert_wchar_to_UTF8(idn);
  192. if(mstr) {
  193. out = strdup(mstr);
  194. curlx_unicodefree(mstr);
  195. if(!out)
  196. return CURLE_OUT_OF_MEMORY;
  197. }
  198. }
  199. else
  200. return CURLE_URL_MALFORMAT;
  201. }
  202. else
  203. return CURLE_URL_MALFORMAT;
  204. *output = out;
  205. return CURLE_OK;
  206. }
  207. #endif /* USE_WIN32_IDN */
  208. /*
  209. * Helpers for IDNA conversions.
  210. */
  211. bool Curl_is_ASCII_name(const char *hostname)
  212. {
  213. /* get an UNSIGNED local version of the pointer */
  214. const unsigned char *ch = (const unsigned char *)hostname;
  215. if(!hostname) /* bad input, consider it ASCII! */
  216. return TRUE;
  217. while(*ch) {
  218. if(*ch++ & 0x80)
  219. return FALSE;
  220. }
  221. return TRUE;
  222. }
  223. #ifdef USE_IDN
  224. /*
  225. * Curl_idn_decode() returns an allocated IDN decoded string if it was
  226. * possible. NULL on error.
  227. *
  228. * CURLE_URL_MALFORMAT - the hostname could not be converted
  229. * CURLE_OUT_OF_MEMORY - memory problem
  230. *
  231. */
  232. static CURLcode idn_decode(const char *input, char **output)
  233. {
  234. char *decoded = NULL;
  235. CURLcode result = CURLE_OK;
  236. #ifdef USE_LIBIDN2
  237. if(idn2_check_version(IDN2_VERSION)) {
  238. int flags = IDN2_NFC_INPUT
  239. #if IDN2_VERSION_NUMBER >= 0x00140000
  240. /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
  241. IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
  242. processing. */
  243. | IDN2_NONTRANSITIONAL
  244. #endif
  245. ;
  246. int rc = IDN2_LOOKUP(input, &decoded, flags);
  247. if(rc != IDN2_OK)
  248. /* fallback to TR46 Transitional mode for better IDNA2003
  249. compatibility */
  250. rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
  251. if(rc != IDN2_OK)
  252. result = CURLE_URL_MALFORMAT;
  253. }
  254. else
  255. /* a too old libidn2 version */
  256. result = CURLE_NOT_BUILT_IN;
  257. #elif defined(USE_WIN32_IDN)
  258. result = win32_idn_to_ascii(input, &decoded);
  259. #elif defined(USE_APPLE_IDN)
  260. result = mac_idn_to_ascii(input, &decoded);
  261. #endif
  262. if(!result)
  263. *output = decoded;
  264. return result;
  265. }
  266. static CURLcode idn_encode(const char *puny, char **output)
  267. {
  268. char *enc = NULL;
  269. #ifdef USE_LIBIDN2
  270. int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
  271. if(rc != IDNA_SUCCESS)
  272. return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
  273. #elif defined(USE_WIN32_IDN)
  274. CURLcode result = win32_ascii_to_idn(puny, &enc);
  275. if(result)
  276. return result;
  277. #elif defined(USE_APPLE_IDN)
  278. CURLcode result = mac_ascii_to_idn(puny, &enc);
  279. if(result)
  280. return result;
  281. #endif
  282. *output = enc;
  283. return CURLE_OK;
  284. }
  285. CURLcode Curl_idn_decode(const char *input, char **output)
  286. {
  287. char *d = NULL;
  288. CURLcode result = idn_decode(input, &d);
  289. #ifdef USE_LIBIDN2
  290. if(!result) {
  291. char *c = strdup(d);
  292. idn2_free(d);
  293. if(c)
  294. d = c;
  295. else
  296. result = CURLE_OUT_OF_MEMORY;
  297. }
  298. #endif
  299. if(!result)
  300. *output = d;
  301. return result;
  302. }
  303. CURLcode Curl_idn_encode(const char *puny, char **output)
  304. {
  305. char *d = NULL;
  306. CURLcode result = idn_encode(puny, &d);
  307. #ifdef USE_LIBIDN2
  308. if(!result) {
  309. char *c = strdup(d);
  310. idn2_free(d);
  311. if(c)
  312. d = c;
  313. else
  314. result = CURLE_OUT_OF_MEMORY;
  315. }
  316. #endif
  317. if(!result)
  318. *output = d;
  319. return result;
  320. }
  321. /*
  322. * Frees data allocated by idnconvert_hostname()
  323. */
  324. void Curl_free_idnconverted_hostname(struct hostname *host)
  325. {
  326. Curl_safefree(host->encalloc);
  327. }
  328. #endif /* USE_IDN */
  329. /*
  330. * Perform any necessary IDN conversion of hostname
  331. */
  332. CURLcode Curl_idnconvert_hostname(struct hostname *host)
  333. {
  334. /* set the name we use to display the hostname */
  335. host->dispname = host->name;
  336. #ifdef USE_IDN
  337. /* Check name for non-ASCII and convert hostname if we can */
  338. if(!Curl_is_ASCII_name(host->name)) {
  339. char *decoded;
  340. CURLcode result = Curl_idn_decode(host->name, &decoded);
  341. if(result)
  342. return result;
  343. /* successful */
  344. host->name = host->encalloc = decoded;
  345. }
  346. #endif
  347. return CURLE_OK;
  348. }