123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- #include <u.h>
- #include <libc.h>
- #include <draw.h>
- #include <html.h>
- #include "impl.h"
- Rune* whitespace = L" \t\n\r";
- Rune* notwhitespace = L"^ \t\n\r";
- // All lists start out like List structure.
- // List itself can be used as list of int.
- int
- _listlen(List* l)
- {
- int n = 0;
- while(l != nil) {
- l = l->next;
- n++;
- }
- return n;
- }
- // Cons
- List*
- _newlist(int val, List* rest)
- {
- List* ans;
- ans = (List*)emalloc(sizeof(List));
- ans->val = val;
- ans->next = rest;
- return ans;
- }
- // Reverse a list in place
- List*
- _revlist(List* l)
- {
- List* newl;
- List* nextl;
- newl = nil;
- while(l != nil) {
- nextl = l->next;
- l->next = newl;
- newl = l;
- l = nextl;
- }
- return newl;
- }
- // The next few routines take a "character class" as argument.
- // e.g., "a-zA-Z", or "^ \t\n"
- // (ranges indicated by - except in first position;
- // ^ is first position means "not in" the following class)
- // Splitl splits s[0:n] just before first character of class cl.
- // Answers go in (p1, n1) and (p2, n2).
- // If no split, the whole thing goes in the first component.
- // Note: answers contain pointers into original string.
- void
- _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
- {
- Rune* p;
- p = _Strnclass(s, cl, n);
- *p1 = s;
- if(p == nil) {
- *n1 = n;
- *p2 = nil;
- *n2 = 0;
- }
- else {
- *p2 = p;
- *n1 = p-s;
- *n2 = n-*n1;
- }
- }
- // Splitr splits s[0:n] just after last character of class cl.
- // Answers go in (p1, n1) and (p2, n2).
- // If no split, the whole thing goes in the last component.
- // Note: answers contain pointers into original string.
- void
- _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
- {
- Rune* p;
- p = _Strnrclass(s, cl, n);
- if(p == nil) {
- *p1 = nil;
- *n1 = 0;
- *p2 = s;
- *n2 = n;
- }
- else {
- *p1 = s;
- *p2 = p+1;
- *n1 = *p2-s;
- *n2 = n-*n1;
- }
- }
- // Splitall splits s[0:n] into parts that are separated by characters from class cl.
- // Each part will have nonzero length.
- // At most alen parts are found, and pointers to their starts go into
- // the strarr array, while their lengths go into the lenarr array.
- // The return value is the number of parts found.
- int
- _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
- {
- int i;
- Rune* p;
- Rune* q;
- Rune* slast;
- if(s == nil || n == 0)
- return 0;
- i = 0;
- p = s;
- slast = s+n;
- while(p < slast && i < alen) {
- while(p < slast && _inclass(*p, cl))
- p++;
- if(p == slast)
- break;
- q = _Strnclass(p, cl, slast-p);
- if(q == nil)
- q = slast;
- assert(q > p && q <= slast);
- strarr[i] = p;
- lenarr[i] = q-p;
- i++;
- p = q;
- }
- return i;
- }
- // Find part of s that excludes leading and trailing whitespace,
- // and return that part in *pans (and its length in *panslen).
- void
- _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
- {
- Rune* p;
- Rune* q;
- p = nil;
- if(n > 0) {
- p = _Strnclass(s, notwhitespace, n);
- if(p != nil) {
- q = _Strnrclass(s, notwhitespace, n);
- assert(q != nil);
- n = q+1-p;
- }
- }
- *pans = p;
- *panslen = n;
- }
- // _Strclass returns a pointer to the first element of s that is
- // a member of class cl, nil if none.
- Rune*
- _Strclass(Rune* s, Rune* cl)
- {
- Rune* p;
- for(p = s; *p != 0; p++)
- if(_inclass(*p, cl))
- return p;
- return nil;
- }
- // _Strnclass returns a pointer to the first element of s[0:n] that is
- // a member of class cl, nil if none.
- Rune*
- _Strnclass(Rune* s, Rune* cl, int n)
- {
- Rune* p;
- for(p = s; n-- && *p != 0; p++)
- if(_inclass(*p, cl))
- return p;
- return nil;
- }
- // _Strrclass returns a pointer to the last element of s that is
- // a member of class cl, nil if none
- Rune*
- _Strrclass(Rune* s, Rune* cl)
- {
- Rune* p;
- if(s == nil || *s == 0)
- return nil;
- p = s + runestrlen(s) - 1;
- while(p >= s) {
- if(_inclass(*p, cl))
- return p;
- p--;
- };
- return nil;
- }
- // _Strnrclass returns a pointer to the last element of s[0:n] that is
- // a member of class cl, nil if none
- Rune*
- _Strnrclass(Rune* s, Rune* cl, int n)
- {
- Rune* p;
- if(s == nil || *s == 0 || n == 0)
- return nil;
- p = s + n - 1;
- while(p >= s) {
- if(_inclass(*p, cl))
- return p;
- p--;
- };
- return nil;
- }
- // Is c in the class cl?
- int
- _inclass(Rune c, Rune* cl)
- {
- int n;
- int ans;
- int negate;
- int i;
- n = _Strlen(cl);
- if(n == 0)
- return 0;
- ans = 0;
- negate = 0;
- if(cl[0] == '^') {
- negate = 1;
- cl++;
- n--;
- }
- for(i = 0; i < n; i++) {
- if(cl[i] == '-' && i > 0 && i < n - 1) {
- if(c >= cl[i - 1] && c <= cl[i + 1]) {
- ans = 1;
- break;
- }
- i++;
- }
- else if(c == cl[i]) {
- ans = 1;
- break;
- }
- }
- if(negate)
- ans = !ans;
- return ans;
- }
- // Is pre a prefix of s?
- int
- _prefix(Rune* pre, Rune* s)
- {
- int ns;
- int n;
- int k;
- ns = _Strlen(s);
- n = _Strlen(pre);
- if(ns < n)
- return 0;
- for(k = 0; k < n; k++) {
- if(pre[k] != s[k])
- return 0;
- }
- return 1;
- }
- // Number of runes in (null-terminated) s
- int
- _Strlen(Rune* s)
- {
- if(s == nil)
- return 0;
- return runestrlen(s);
- }
- // -1, 0, 1 as s1 is lexicographically less, equal greater than s2
- int
- _Strcmp(Rune *s1, Rune *s2)
- {
- if(s1 == nil)
- return (s2 == nil || *s2 == 0) ? 0 : -1;
- if(s2 == nil)
- return (*s1 == 0) ? 0 : 1;
- return runestrcmp(s1, s2);
- }
- // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
- // Also, do a case-insensitive match, assuming s2
- // has no chars in [A-Z], only their lowercase versions.
- // (This routine is used for in-place keyword lookup, where s2 is in a keyword
- // list and s1 is some substring, possibly mixed-case, in a buffer.)
- int
- _Strncmpci(Rune *s1, int n1, Rune *s2)
- {
- Rune c1, c2;
- for(;;) {
- if(n1-- == 0) {
- if(*s2 == 0)
- return 0;
- return -1;
- }
- c1 = *s1++;
- c2 = *s2++;
- if(c1 >= 'A' && c1 <= 'Z')
- c1 = c1 - 'A' + 'a';
- if(c1 != c2) {
- if(c1 > c2)
- return 1;
- return -1;
- }
- }
- }
- // emalloc and copy
- Rune*
- _Strdup(Rune* s)
- {
- if(s == nil)
- return nil;
- return _Strndup(s, runestrlen(s));
- }
- // emalloc and copy n chars of s (assume s is at least that long),
- // and add 0 terminator.
- // Return nil if n==0.
- Rune*
- _Strndup(Rune* s, int n)
- {
- Rune* ans;
- if(n <= 0)
- return nil;
- ans = _newstr(n);
- memmove(ans, s, n*sizeof(Rune));
- ans[n] = 0;
- return ans;
- }
- // emalloc enough room for n Runes, plus 1 null terminator.
- // (Not initialized to anything.)
- Rune*
- _newstr(int n)
- {
- return (Rune*)emalloc((n+1)*sizeof(Rune));
- }
- // emalloc and copy s+t
- Rune*
- _Strdup2(Rune* s, Rune* t)
- {
- int ns, nt;
- Rune* ans;
- Rune* p;
- ns = _Strlen(s);
- nt = _Strlen(t);
- if(ns+nt == 0)
- return nil;
- ans = _newstr(ns+nt);
- p = _Stradd(ans, s, ns);
- p = _Stradd(p, t, nt);
- *p = 0;
- return ans;
- }
- // Return emalloc'd substring s[start:stop],
- Rune*
- _Strsubstr(Rune* s, int start, int stop)
- {
- Rune* t;
- if(start == stop)
- return nil;
- t = _Strndup(s+start, stop-start);
- return t;
- }
- // Copy n chars to s1 from s2, and return s1+n
- Rune*
- _Stradd(Rune* s1, Rune* s2, int n)
- {
- if(n == 0)
- return s1;
- memmove(s1, s2, n*sizeof(Rune));
- return s1+n;
- }
- // Like strtol, but converting from Rune* string
- #define LONG_MAX 2147483647L
- #define LONG_MIN -2147483648L
- long
- _Strtol(Rune* nptr, Rune** endptr, int base)
- {
- Rune* p;
- long n, nn;
- int c, ovfl, v, neg, ndig;
- p = nptr;
- neg = 0;
- n = 0;
- ndig = 0;
- ovfl = 0;
- /*
- * White space
- */
- for(;;p++){
- switch(*p){
- case ' ':
- case '\t':
- case '\n':
- case '\f':
- case '\r':
- case '\v':
- continue;
- }
- break;
- }
- /*
- * Sign
- */
- if(*p=='-' || *p=='+')
- if(*p++ == '-')
- neg = 1;
- /*
- * Base
- */
- if(base==0){
- if(*p != '0')
- base = 10;
- else{
- base = 8;
- if(p[1]=='x' || p[1]=='X'){
- p += 2;
- base = 16;
- }
- }
- }else if(base==16 && *p=='0'){
- if(p[1]=='x' || p[1]=='X')
- p += 2;
- }else if(base<0 || 36<base)
- goto Return;
- /*
- * Non-empty sequence of digits
- */
- for(;; p++,ndig++){
- c = *p;
- v = base;
- if('0'<=c && c<='9')
- v = c - '0';
- else if('a'<=c && c<='z')
- v = c - 'a' + 10;
- else if('A'<=c && c<='Z')
- v = c - 'A' + 10;
- if(v >= base)
- break;
- nn = n*base + v;
- if(nn < n)
- ovfl = 1;
- n = nn;
- }
- Return:
- if(ndig == 0)
- p = nptr;
- if(endptr)
- *endptr = p;
- if(ovfl){
- if(neg)
- return LONG_MIN;
- return LONG_MAX;
- }
- if(neg)
- return -n;
- return n;
- }
- // Convert buf[0:n], bytes whose character set is chset,
- // into a emalloc'd null-terminated Unicode string.
- Rune*
- toStr(uchar* buf, int n, int chset)
- {
- int i;
- int m;
- Rune ch;
- Rune* ans;
- switch(chset) {
- case US_Ascii:
- case ISO_8859_1:
- ans = (Rune*)emalloc((n+1)*sizeof(Rune));
- for(i = 0; i < n; i++)
- ans[i] = buf[i];
- ans[n] = 0;
- break;
- case UTF_8:
- m = 0;
- for(i = 0; i < n; ) {
- i += chartorune(&ch, (char*)(buf+i));
- m++;
- }
- ans = (Rune*)emalloc((m+1)*sizeof(Rune));
- m = 0;
- for(i = 0; i < n; ) {
- i += chartorune(&ch, (char*)(buf+i));
- ans[m++] = ch;
- }
- ans[m] = 0;
- break;
- default:
- ans = nil;
- assert(0);
- }
- return ans;
- }
- // Convert buf[0:n], Unicode characters,
- // into an emalloc'd null-terminated string in character set chset.
- // Use 0x80 for unconvertable characters.
- uchar*
- fromStr(Rune* buf, int n, int chset)
- {
- uchar* ans;
- int i, lim, m;
- Rune ch;
- uchar* p;
- uchar s[UTFmax];
- ans = nil;
- switch(chset) {
- case US_Ascii:
- case ISO_8859_1:
- ans = (uchar*)emalloc(n+1);
- lim = (chset==US_Ascii)? 127 : 255;
- for(i = 0; i < n; i++) {
- ch = buf[i];
- if(ch > lim)
- ch = 0x80;
- ans[i] = ch;
- }
- ans[n] = 0;
- break;
- case UTF_8:
- m = 0;
- for(i = 0; i < n; i++) {
- m += runetochar((char*)s, &buf[i]);
- }
- ans = (uchar*)emalloc(m+1);
- p = ans;
- for(i = 0; i < n; i++)
- p += runetochar((char*)p, &buf[i]);
- *p = 0;
- break;
- default:
- assert(0);
- }
- return ans;
- }
- // Convert n to emalloc'd String.
- Rune*
- _ltoStr(int n)
- {
- int m;
- uchar buf[20];
- m = snprint((char*)buf, sizeof(buf), "%d", n);
- return toStr(buf, m, US_Ascii);
- }
|