rune 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. .TH RUNE 2
  2. .SH NAME
  3. runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
  4. .SH SYNOPSIS
  5. .ta \w'\fLchar*xx'u
  6. .B #include <u.h>
  7. .br
  8. .B #include <libc.h>
  9. .PP
  10. .B
  11. int runetochar(char *s, Rune *r)
  12. .PP
  13. .B
  14. int chartorune(Rune *r, char *s)
  15. .PP
  16. .B
  17. int runelen(long r)
  18. .PP
  19. .B
  20. int runenlen(Rune *r, int n)
  21. .PP
  22. .B
  23. int fullrune(char *s, int n)
  24. .PP
  25. .B
  26. char* utfecpy(char *s1, char *es1, char *s2)
  27. .PP
  28. .B
  29. int utflen(char *s)
  30. .PP
  31. .B
  32. int utfnlen(char *s, long n)
  33. .PP
  34. .B
  35. char* utfrune(char *s, long c)
  36. .PP
  37. .B
  38. char* utfrrune(char *s, long c)
  39. .PP
  40. .B
  41. char* utfutf(char *s1, char *s2)
  42. .SH DESCRIPTION
  43. These routines convert to and from a
  44. .SM UTF
  45. byte stream and runes.
  46. .PP
  47. .I Runetochar
  48. copies one rune at
  49. .I r
  50. to at most
  51. .B UTFmax
  52. bytes starting at
  53. .I s
  54. and returns the number of bytes copied.
  55. .BR UTFmax ,
  56. defined as
  57. .B 3
  58. in
  59. .BR <libc.h> ,
  60. is the maximum number of bytes required to represent a rune.
  61. .PP
  62. .I Chartorune
  63. copies at most
  64. .B UTFmax
  65. bytes starting at
  66. .I s
  67. to one rune at
  68. .I r
  69. and returns the number of bytes copied.
  70. If the input is not exactly in
  71. .SM UTF
  72. format,
  73. .I chartorune
  74. will convert to
  75. .B Runeerror
  76. (0xFFFD)
  77. and return 1.
  78. .PP
  79. .I Runelen
  80. returns the number of bytes
  81. required to convert
  82. .I r
  83. into
  84. .SM UTF.
  85. .PP
  86. .I Runenlen
  87. returns the number of bytes
  88. required to convert the
  89. .I n
  90. runes pointed to by
  91. .I r
  92. into
  93. .SM UTF.
  94. .PP
  95. .I Fullrune
  96. returns 1 if the string
  97. .I s
  98. of length
  99. .I n
  100. is long enough to be decoded by
  101. .I chartorune
  102. and 0 otherwise.
  103. This does not guarantee that the string
  104. contains a legal
  105. .SM UTF
  106. encoding.
  107. This routine is used by programs that
  108. obtain input a byte at
  109. a time and need to know when a full rune
  110. has arrived.
  111. .PP
  112. The following routines are analogous to the
  113. corresponding string routines with
  114. .B utf
  115. substituted for
  116. .B str
  117. and
  118. .B rune
  119. substituted for
  120. .BR chr .
  121. .PP
  122. .I Utfecpy
  123. copies UTF sequences until a null sequence has been copied, but writes no
  124. sequences beyond
  125. .IR es1 .
  126. If any sequences are copied,
  127. .I s1
  128. is terminated by a null sequence, and a pointer to that sequence is returned.
  129. Otherwise, the original
  130. .I s1
  131. is returned.
  132. .PP
  133. .I Utflen
  134. returns the number of runes that
  135. are represented by the
  136. .SM UTF
  137. string
  138. .IR s .
  139. .PP
  140. .I Utfnlen
  141. returns the number of complete runes that
  142. are represented by the first
  143. .I n
  144. bytes of
  145. .SM UTF
  146. string
  147. .IR s .
  148. If the last few bytes of the string contain an incompletely coded rune,
  149. .I utfnlen
  150. will not count them; in this way, it differs from
  151. .IR utflen ,
  152. which includes every byte of the string.
  153. .PP
  154. .I Utfrune
  155. .RI ( utfrrune )
  156. returns a pointer to the first (last)
  157. occurrence of rune
  158. .I c
  159. in the
  160. .SM UTF
  161. string
  162. .IR s ,
  163. or 0 if
  164. .I c
  165. does not occur in the string.
  166. The NUL byte terminating a string is considered to
  167. be part of the string
  168. .IR s .
  169. .PP
  170. .I Utfutf
  171. returns a pointer to the first occurrence of
  172. the
  173. .SM UTF
  174. string
  175. .I s2
  176. as a
  177. .SM UTF
  178. substring of
  179. .IR s1 ,
  180. or 0 if there is none.
  181. If
  182. .I s2
  183. is the null string,
  184. .I utfutf
  185. returns
  186. .IR s1 .
  187. .SH SOURCE
  188. .B /sys/src/libc/port/rune.c
  189. .br
  190. .B /sys/src/libc/port/utfrune.c
  191. .SH SEE ALSO
  192. .IR utf (6),
  193. .IR tcs (1)