a_utf8.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. /*
  2. * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. #include <stdio.h>
  10. #include "internal/cryptlib.h"
  11. #include <openssl/asn1.h>
  12. /* UTF8 utilities */
  13. /*-
  14. * This parses a UTF8 string one character at a time. It is passed a pointer
  15. * to the string and the length of the string. It sets 'value' to the value of
  16. * the current character. It returns the number of characters read or a
  17. * negative error code:
  18. * -1 = string too short
  19. * -2 = illegal character
  20. * -3 = subsequent characters not of the form 10xxxxxx
  21. * -4 = character encoded incorrectly (not minimal length).
  22. */
  23. int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
  24. {
  25. const unsigned char *p;
  26. unsigned long value;
  27. int ret;
  28. if (len <= 0)
  29. return 0;
  30. p = str;
  31. /* Check syntax and work out the encoded value (if correct) */
  32. if ((*p & 0x80) == 0) {
  33. value = *p++ & 0x7f;
  34. ret = 1;
  35. } else if ((*p & 0xe0) == 0xc0) {
  36. if (len < 2)
  37. return -1;
  38. if ((p[1] & 0xc0) != 0x80)
  39. return -3;
  40. value = (*p++ & 0x1f) << 6;
  41. value |= *p++ & 0x3f;
  42. if (value < 0x80)
  43. return -4;
  44. ret = 2;
  45. } else if ((*p & 0xf0) == 0xe0) {
  46. if (len < 3)
  47. return -1;
  48. if (((p[1] & 0xc0) != 0x80)
  49. || ((p[2] & 0xc0) != 0x80))
  50. return -3;
  51. value = (*p++ & 0xf) << 12;
  52. value |= (*p++ & 0x3f) << 6;
  53. value |= *p++ & 0x3f;
  54. if (value < 0x800)
  55. return -4;
  56. ret = 3;
  57. } else if ((*p & 0xf8) == 0xf0) {
  58. if (len < 4)
  59. return -1;
  60. if (((p[1] & 0xc0) != 0x80)
  61. || ((p[2] & 0xc0) != 0x80)
  62. || ((p[3] & 0xc0) != 0x80))
  63. return -3;
  64. value = ((unsigned long)(*p++ & 0x7)) << 18;
  65. value |= (*p++ & 0x3f) << 12;
  66. value |= (*p++ & 0x3f) << 6;
  67. value |= *p++ & 0x3f;
  68. if (value < 0x10000)
  69. return -4;
  70. ret = 4;
  71. } else if ((*p & 0xfc) == 0xf8) {
  72. if (len < 5)
  73. return -1;
  74. if (((p[1] & 0xc0) != 0x80)
  75. || ((p[2] & 0xc0) != 0x80)
  76. || ((p[3] & 0xc0) != 0x80)
  77. || ((p[4] & 0xc0) != 0x80))
  78. return -3;
  79. value = ((unsigned long)(*p++ & 0x3)) << 24;
  80. value |= ((unsigned long)(*p++ & 0x3f)) << 18;
  81. value |= ((unsigned long)(*p++ & 0x3f)) << 12;
  82. value |= (*p++ & 0x3f) << 6;
  83. value |= *p++ & 0x3f;
  84. if (value < 0x200000)
  85. return -4;
  86. ret = 5;
  87. } else if ((*p & 0xfe) == 0xfc) {
  88. if (len < 6)
  89. return -1;
  90. if (((p[1] & 0xc0) != 0x80)
  91. || ((p[2] & 0xc0) != 0x80)
  92. || ((p[3] & 0xc0) != 0x80)
  93. || ((p[4] & 0xc0) != 0x80)
  94. || ((p[5] & 0xc0) != 0x80))
  95. return -3;
  96. value = ((unsigned long)(*p++ & 0x1)) << 30;
  97. value |= ((unsigned long)(*p++ & 0x3f)) << 24;
  98. value |= ((unsigned long)(*p++ & 0x3f)) << 18;
  99. value |= ((unsigned long)(*p++ & 0x3f)) << 12;
  100. value |= (*p++ & 0x3f) << 6;
  101. value |= *p++ & 0x3f;
  102. if (value < 0x4000000)
  103. return -4;
  104. ret = 6;
  105. } else
  106. return -2;
  107. *val = value;
  108. return ret;
  109. }
  110. /*
  111. * This takes a character 'value' and writes the UTF8 encoded value in 'str'
  112. * where 'str' is a buffer containing 'len' characters. Returns the number of
  113. * characters written or -1 if 'len' is too small. 'str' can be set to NULL
  114. * in which case it just returns the number of characters. It will need at
  115. * most 6 characters.
  116. */
  117. int UTF8_putc(unsigned char *str, int len, unsigned long value)
  118. {
  119. if (!str)
  120. len = 6; /* Maximum we will need */
  121. else if (len <= 0)
  122. return -1;
  123. if (value < 0x80) {
  124. if (str)
  125. *str = (unsigned char)value;
  126. return 1;
  127. }
  128. if (value < 0x800) {
  129. if (len < 2)
  130. return -1;
  131. if (str) {
  132. *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
  133. *str = (unsigned char)((value & 0x3f) | 0x80);
  134. }
  135. return 2;
  136. }
  137. if (value < 0x10000) {
  138. if (len < 3)
  139. return -1;
  140. if (str) {
  141. *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
  142. *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
  143. *str = (unsigned char)((value & 0x3f) | 0x80);
  144. }
  145. return 3;
  146. }
  147. if (value < 0x200000) {
  148. if (len < 4)
  149. return -1;
  150. if (str) {
  151. *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
  152. *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
  153. *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
  154. *str = (unsigned char)((value & 0x3f) | 0x80);
  155. }
  156. return 4;
  157. }
  158. if (value < 0x4000000) {
  159. if (len < 5)
  160. return -1;
  161. if (str) {
  162. *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
  163. *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
  164. *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
  165. *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
  166. *str = (unsigned char)((value & 0x3f) | 0x80);
  167. }
  168. return 5;
  169. }
  170. if (len < 6)
  171. return -1;
  172. if (str) {
  173. *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
  174. *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
  175. *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
  176. *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
  177. *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
  178. *str = (unsigned char)((value & 0x3f) | 0x80);
  179. }
  180. return 6;
  181. }