rune 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. .TH RUNE 2
  2. .SH NAME
  3. runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
  4. .SH SYNOPSIS
  5. .ta \w'\fLchar*xx'u
  6. .B #include <u.h>
  7. .br
  8. .B #include <libc.h>
  9. .PP
  10. .B
  11. int runetochar(char *s, Rune *r)
  12. .PP
  13. .B
  14. int chartorune(Rune *r, char *s)
  15. .PP
  16. .B
  17. int runelen(long r)
  18. .PP
  19. .B
  20. int runenlen(Rune *r, int n)
  21. .PP
  22. .B
  23. int fullrune(char *s, int n)
  24. .PP
  25. .B
  26. char* utfecpy(char *s1, char *es1, char *s2)
  27. .PP
  28. .B
  29. int utflen(char *s)
  30. .PP
  31. .B
  32. int utfnlen(char *s, long n)
  33. .PP
  34. .B
  35. char* utfrune(char *s, long c)
  36. .PP
  37. .B
  38. char* utfrrune(char *s, long c)
  39. .PP
  40. .B
  41. char* utfutf(char *s1, char *s2)
  42. .SH DESCRIPTION
  43. These routines convert to and from a
  44. .SM UTF
  45. byte stream and runes.
  46. .PP
  47. .I Runetochar
  48. copies one rune at
  49. .I r
  50. to at most
  51. .B UTFmax
  52. bytes starting at
  53. .I s
  54. and returns the number of bytes copied.
  55. .BR UTFmax ,
  56. defined as
  57. .B 3
  58. in
  59. .BR <libc.h> ,
  60. is the maximum number of bytes required to represent a rune.
  61. .PP
  62. .I Chartorune
  63. copies at most
  64. .B UTFmax
  65. bytes starting at
  66. .I s
  67. to one rune at
  68. .I r
  69. and returns the number of bytes copied.
  70. If the input is not exactly in
  71. .SM UTF
  72. format,
  73. .I chartorune
  74. will convert to 0x80 and return 1.
  75. .PP
  76. .I Runelen
  77. returns the number of bytes
  78. required to convert
  79. .I r
  80. into
  81. .SM UTF.
  82. .PP
  83. .I Runenlen
  84. returns the number of bytes
  85. required to convert the
  86. .I n
  87. runes pointed to by
  88. .I r
  89. into
  90. .SM UTF.
  91. .PP
  92. .I Fullrune
  93. returns 1 if the string
  94. .I s
  95. of length
  96. .I n
  97. is long enough to be decoded by
  98. .I chartorune
  99. and 0 otherwise.
  100. This does not guarantee that the string
  101. contains a legal
  102. .SM UTF
  103. encoding.
  104. This routine is used by programs that
  105. obtain input a byte at
  106. a time and need to know when a full rune
  107. has arrived.
  108. .PP
  109. The following routines are analogous to the
  110. corresponding string routines with
  111. .B utf
  112. substituted for
  113. .B str
  114. and
  115. .B rune
  116. substituted for
  117. .BR chr .
  118. .PP
  119. .I Utfecpy
  120. copies UTF sequences until a null sequence has been copied, but writes no
  121. sequences beyond
  122. .IR es1 .
  123. If any sequences are copied,
  124. .I s1
  125. is terminated by a null sequence, and a pointer to that sequence is returned.
  126. Otherwise, the original
  127. .I s1
  128. is returned.
  129. .PP
  130. .I Utflen
  131. returns the number of runes that
  132. are represented by the
  133. .SM UTF
  134. string
  135. .IR s .
  136. .PP
  137. .I Utfnlen
  138. returns the number of complete runes that
  139. are represented by the first
  140. .I n
  141. bytes of
  142. .SM UTF
  143. string
  144. .IR s .
  145. If the last few bytes of the string contain an incompletely coded rune,
  146. .I utfnlen
  147. will not count them; in this way, it differs from
  148. .IR utflen ,
  149. which includes every byte of the string.
  150. .PP
  151. .I Utfrune
  152. .RI ( utfrrune )
  153. returns a pointer to the first (last)
  154. occurrence of rune
  155. .I c
  156. in the
  157. .SM UTF
  158. string
  159. .IR s ,
  160. or 0 if
  161. .I c
  162. does not occur in the string.
  163. The NUL byte terminating a string is considered to
  164. be part of the string
  165. .IR s .
  166. .PP
  167. .I Utfutf
  168. returns a pointer to the first occurrence of
  169. the
  170. .SM UTF
  171. string
  172. .I s2
  173. as a
  174. .SM UTF
  175. substring of
  176. .IR s1 ,
  177. or 0 if there is none.
  178. If
  179. .I s2
  180. is the null string,
  181. .I utfutf
  182. returns
  183. .IR s1 .
  184. .SH SOURCE
  185. .B /sys/src/libc/port/rune.c
  186. .br
  187. .B /sys/src/libc/port/utfrune.c
  188. .SH SEE ALSO
  189. .IR utf (6),
  190. .IR tcs (1)