123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- .TH RUNE 2
- .SH NAME
- runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
- .SH SYNOPSIS
- .ta \w'\fLchar*xx'u
- .B
- .br
- .B
- .PP
- .B
- int runetochar(char *s, Rune *r)
- .PP
- .B
- int chartorune(Rune *r, char *s)
- .PP
- .B
- int runelen(long r)
- .PP
- .B
- int runenlen(Rune *r, int n)
- .PP
- .B
- int fullrune(char *s, int n)
- .PP
- .B
- char* utfecpy(char *s1, char *es1, char *s2)
- .PP
- .B
- int utflen(char *s)
- .PP
- .B
- int utfnlen(char *s, long n)
- .PP
- .B
- char* utfrune(char *s, long c)
- .PP
- .B
- char* utfrrune(char *s, long c)
- .PP
- .B
- char* utfutf(char *s1, char *s2)
- .SH DESCRIPTION
- These routines convert to and from a
- .SM UTF
- byte stream and runes.
- .PP
- .I Runetochar
- copies one rune at
- .I r
- to at most
- .B UTFmax
- bytes starting at
- .I s
- and returns the number of bytes copied.
- .BR UTFmax ,
- defined as
- .B 3
- in
- .BR <libc.h> ,
- is the maximum number of bytes required to represent a rune.
- .PP
- .I Chartorune
- copies at most
- .B UTFmax
- bytes starting at
- .I s
- to one rune at
- .I r
- and returns the number of bytes copied.
- If the input is not exactly in
- .SM UTF
- format,
- .I chartorune
- will convert to 0x80 and return 1.
- .PP
- .I Runelen
- returns the number of bytes
- required to convert
- .I r
- into
- .SM UTF.
- .PP
- .I Runenlen
- returns the number of bytes
- required to convert the
- .I n
- runes pointed to by
- .I r
- into
- .SM UTF.
- .PP
- .I Fullrune
- returns 1 if the string
- .I s
- of length
- .I n
- is long enough to be decoded by
- .I chartorune
- and 0 otherwise.
- This does not guarantee that the string
- contains a legal
- .SM UTF
- encoding.
- This routine is used by programs that
- obtain input a byte at
- a time and need to know when a full rune
- has arrived.
- .PP
- The following routines are analogous to the
- corresponding string routines with
- .B utf
- substituted for
- .B str
- and
- .B rune
- substituted for
- .BR chr .
- .PP
- .I Utfecpy
- copies UTF sequences until a null sequence has been copied, but writes no
- sequences beyond
- .IR es1 .
- If any sequences are copied,
- .I s1
- is terminated by a null sequence, and a pointer to that sequence is returned.
- Otherwise, the original
- .I s1
- is returned.
- .PP
- .I Utflen
- returns the number of runes that
- are represented by the
- .SM UTF
- string
- .IR s .
- .PP
- .I Utfnlen
- returns the number of complete runes that
- are represented by the first
- .I n
- bytes of
- .SM UTF
- string
- .IR s .
- If the last few bytes of the string contain an incompletely coded rune,
- .I utfnlen
- will not count them; in this way, it differs from
- .IR utflen ,
- which includes every byte of the string.
- .PP
- .I Utfrune
- .RI ( utfrrune )
- returns a pointer to the first (last)
- occurrence of rune
- .I c
- in the
- .SM UTF
- string
- .IR s ,
- or 0 if
- .I c
- does not occur in the string.
- The NUL byte terminating a string is considered to
- be part of the string
- .IR s .
- .PP
- .I Utfutf
- returns a pointer to the first occurrence of
- the
- .SM UTF
- string
- .I s2
- as a
- .SM UTF
- substring of
- .IR s1 ,
- or 0 if there is none.
- If
- .I s2
- is the null string,
- .I utfutf
- returns
- .IR s1 .
- .SH SOURCE
- .B /sys/src/libc/port/rune.c
- .br
- .B /sys/src/libc/port/utfrune.c
- .SH SEE ALSO
- .IR utf (6),
- .IR tcs (1)
|