utils.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <draw.h>
  12. #include <html.h>
  13. #include "impl.h"
  14. Rune* whitespace = L" \t\n\r";
  15. Rune* notwhitespace = L"^ \t\n\r";
  16. // All lists start out like List structure.
  17. // List itself can be used as list of int.
  18. int
  19. _listlen(List* l)
  20. {
  21. int n = 0;
  22. while(l != nil) {
  23. l = l->next;
  24. n++;
  25. }
  26. return n;
  27. }
  28. // Cons
  29. List*
  30. _newlist(int val, List* rest)
  31. {
  32. List* ans;
  33. ans = (List*)emalloc(sizeof(List));
  34. ans->val = val;
  35. ans->next = rest;
  36. return ans;
  37. }
  38. // Reverse a list in place
  39. List*
  40. _revlist(List* l)
  41. {
  42. List* newl;
  43. List* nextl;
  44. newl = nil;
  45. while(l != nil) {
  46. nextl = l->next;
  47. l->next = newl;
  48. newl = l;
  49. l = nextl;
  50. }
  51. return newl;
  52. }
  53. // The next few routines take a "character class" as argument.
  54. // e.g., "a-zA-Z", or "^ \t\n"
  55. // (ranges indicated by - except in first position;
  56. // ^ is first position means "not in" the following class)
  57. // Splitl splits s[0:n] just before first character of class cl.
  58. // Answers go in (p1, n1) and (p2, n2).
  59. // If no split, the whole thing goes in the first component.
  60. // Note: answers contain pointers into original string.
  61. void
  62. _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
  63. {
  64. Rune* p;
  65. p = _Strnclass(s, cl, n);
  66. *p1 = s;
  67. if(p == nil) {
  68. *n1 = n;
  69. *p2 = nil;
  70. *n2 = 0;
  71. }
  72. else {
  73. *p2 = p;
  74. *n1 = p-s;
  75. *n2 = n-*n1;
  76. }
  77. }
  78. // Splitr splits s[0:n] just after last character of class cl.
  79. // Answers go in (p1, n1) and (p2, n2).
  80. // If no split, the whole thing goes in the last component.
  81. // Note: answers contain pointers into original string.
  82. void
  83. _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
  84. {
  85. Rune* p;
  86. p = _Strnrclass(s, cl, n);
  87. if(p == nil) {
  88. *p1 = nil;
  89. *n1 = 0;
  90. *p2 = s;
  91. *n2 = n;
  92. }
  93. else {
  94. *p1 = s;
  95. *p2 = p+1;
  96. *n1 = *p2-s;
  97. *n2 = n-*n1;
  98. }
  99. }
  100. // Splitall splits s[0:n] into parts that are separated by characters from class cl.
  101. // Each part will have nonzero length.
  102. // At most alen parts are found, and pointers to their starts go into
  103. // the strarr array, while their lengths go into the lenarr array.
  104. // The return value is the number of parts found.
  105. int
  106. _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
  107. {
  108. int i;
  109. Rune* p;
  110. Rune* q;
  111. Rune* slast;
  112. if(s == nil || n == 0)
  113. return 0;
  114. i = 0;
  115. p = s;
  116. slast = s+n;
  117. while(p < slast && i < alen) {
  118. while(p < slast && _inclass(*p, cl))
  119. p++;
  120. if(p == slast)
  121. break;
  122. q = _Strnclass(p, cl, slast-p);
  123. if(q == nil)
  124. q = slast;
  125. assert(q > p && q <= slast);
  126. strarr[i] = p;
  127. lenarr[i] = q-p;
  128. i++;
  129. p = q;
  130. }
  131. return i;
  132. }
  133. // Find part of s that excludes leading and trailing whitespace,
  134. // and return that part in *pans (and its length in *panslen).
  135. void
  136. _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
  137. {
  138. Rune* p;
  139. Rune* q;
  140. p = nil;
  141. if(n > 0) {
  142. p = _Strnclass(s, notwhitespace, n);
  143. if(p != nil) {
  144. q = _Strnrclass(s, notwhitespace, n);
  145. assert(q != nil);
  146. n = q+1-p;
  147. }
  148. }
  149. *pans = p;
  150. *panslen = n;
  151. }
  152. // _Strclass returns a pointer to the first element of s that is
  153. // a member of class cl, nil if none.
  154. Rune*
  155. _Strclass(Rune* s, Rune* cl)
  156. {
  157. Rune* p;
  158. for(p = s; *p != 0; p++)
  159. if(_inclass(*p, cl))
  160. return p;
  161. return nil;
  162. }
  163. // _Strnclass returns a pointer to the first element of s[0:n] that is
  164. // a member of class cl, nil if none.
  165. Rune*
  166. _Strnclass(Rune* s, Rune* cl, int n)
  167. {
  168. Rune* p;
  169. for(p = s; n-- && *p != 0; p++)
  170. if(_inclass(*p, cl))
  171. return p;
  172. return nil;
  173. }
  174. // _Strrclass returns a pointer to the last element of s that is
  175. // a member of class cl, nil if none
  176. Rune*
  177. _Strrclass(Rune* s, Rune* cl)
  178. {
  179. Rune* p;
  180. if(s == nil || *s == 0)
  181. return nil;
  182. p = s + runestrlen(s) - 1;
  183. while(p >= s) {
  184. if(_inclass(*p, cl))
  185. return p;
  186. p--;
  187. };
  188. return nil;
  189. }
  190. // _Strnrclass returns a pointer to the last element of s[0:n] that is
  191. // a member of class cl, nil if none
  192. Rune*
  193. _Strnrclass(Rune* s, Rune* cl, int n)
  194. {
  195. Rune* p;
  196. if(s == nil || *s == 0 || n == 0)
  197. return nil;
  198. p = s + n - 1;
  199. while(p >= s) {
  200. if(_inclass(*p, cl))
  201. return p;
  202. p--;
  203. };
  204. return nil;
  205. }
  206. // Is c in the class cl?
  207. int
  208. _inclass(Rune c, Rune* cl)
  209. {
  210. int n;
  211. int ans;
  212. int negate;
  213. int i;
  214. n = _Strlen(cl);
  215. if(n == 0)
  216. return 0;
  217. ans = 0;
  218. negate = 0;
  219. if(cl[0] == '^') {
  220. negate = 1;
  221. cl++;
  222. n--;
  223. }
  224. for(i = 0; i < n; i++) {
  225. if(cl[i] == '-' && i > 0 && i < n - 1) {
  226. if(c >= cl[i - 1] && c <= cl[i + 1]) {
  227. ans = 1;
  228. break;
  229. }
  230. i++;
  231. }
  232. else if(c == cl[i]) {
  233. ans = 1;
  234. break;
  235. }
  236. }
  237. if(negate)
  238. ans = !ans;
  239. return ans;
  240. }
  241. // Is pre a prefix of s?
  242. int
  243. _prefix(Rune* pre, Rune* s)
  244. {
  245. int ns;
  246. int n;
  247. int k;
  248. ns = _Strlen(s);
  249. n = _Strlen(pre);
  250. if(ns < n)
  251. return 0;
  252. for(k = 0; k < n; k++) {
  253. if(pre[k] != s[k])
  254. return 0;
  255. }
  256. return 1;
  257. }
  258. // Number of runes in (null-terminated) s
  259. int
  260. _Strlen(Rune* s)
  261. {
  262. if(s == nil)
  263. return 0;
  264. return runestrlen(s);
  265. }
  266. // -1, 0, 1 as s1 is lexicographically less, equal greater than s2
  267. int
  268. _Strcmp(Rune *s1, Rune *s2)
  269. {
  270. if(s1 == nil)
  271. return (s2 == nil || *s2 == 0) ? 0 : -1;
  272. if(s2 == nil)
  273. return (*s1 == 0) ? 0 : 1;
  274. return runestrcmp(s1, s2);
  275. }
  276. // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
  277. // Also, do a case-insensitive match, assuming s2
  278. // has no chars in [A-Z], only their lowercase versions.
  279. // (This routine is used for in-place keyword lookup, where s2 is in a keyword
  280. // list and s1 is some substring, possibly mixed-case, in a buffer.)
  281. int
  282. _Strncmpci(Rune *s1, int n1, Rune *s2)
  283. {
  284. Rune c1, c2;
  285. for(;;) {
  286. if(n1-- == 0) {
  287. if(*s2 == 0)
  288. return 0;
  289. return -1;
  290. }
  291. c1 = *s1++;
  292. c2 = *s2++;
  293. if(c1 >= 'A' && c1 <= 'Z')
  294. c1 = c1 - 'A' + 'a';
  295. if(c1 != c2) {
  296. if(c1 > c2)
  297. return 1;
  298. return -1;
  299. }
  300. }
  301. }
  302. // emalloc and copy
  303. Rune*
  304. _Strdup(Rune* s)
  305. {
  306. if(s == nil)
  307. return nil;
  308. return _Strndup(s, runestrlen(s));
  309. }
  310. // emalloc and copy n chars of s (assume s is at least that long),
  311. // and add 0 terminator.
  312. // Return nil if n==0.
  313. Rune*
  314. _Strndup(Rune* s, int n)
  315. {
  316. Rune* ans;
  317. if(n <= 0)
  318. return nil;
  319. ans = _newstr(n);
  320. memmove(ans, s, n*sizeof(Rune));
  321. ans[n] = 0;
  322. return ans;
  323. }
  324. // emalloc enough room for n Runes, plus 1 null terminator.
  325. // (Not initialized to anything.)
  326. Rune*
  327. _newstr(int n)
  328. {
  329. return (Rune*)emalloc((n+1)*sizeof(Rune));
  330. }
  331. // emalloc and copy s+t
  332. Rune*
  333. _Strdup2(Rune* s, Rune* t)
  334. {
  335. int ns, nt;
  336. Rune* ans;
  337. Rune* p;
  338. ns = _Strlen(s);
  339. nt = _Strlen(t);
  340. if(ns+nt == 0)
  341. return nil;
  342. ans = _newstr(ns+nt);
  343. p = _Stradd(ans, s, ns);
  344. p = _Stradd(p, t, nt);
  345. *p = 0;
  346. return ans;
  347. }
  348. // Return emalloc'd substring s[start:stop],
  349. Rune*
  350. _Strsubstr(Rune* s, int start, int stop)
  351. {
  352. Rune* t;
  353. if(start == stop)
  354. return nil;
  355. t = _Strndup(s+start, stop-start);
  356. return t;
  357. }
  358. // Copy n chars to s1 from s2, and return s1+n
  359. Rune*
  360. _Stradd(Rune* s1, Rune* s2, int n)
  361. {
  362. if(n == 0)
  363. return s1;
  364. memmove(s1, s2, n*sizeof(Rune));
  365. return s1+n;
  366. }
  367. // Like strtol, but converting from Rune* string
  368. #define LONG_MAX 2147483647L
  369. #define LONG_MIN -2147483648L
  370. int32_t
  371. _Strtol(Rune* nptr, Rune** endptr, int base)
  372. {
  373. Rune* p;
  374. int32_t n, nn;
  375. int c, ovfl, v, neg, ndig;
  376. p = nptr;
  377. neg = 0;
  378. n = 0;
  379. ndig = 0;
  380. ovfl = 0;
  381. /*
  382. * White space
  383. */
  384. for(;;p++){
  385. switch(*p){
  386. case ' ':
  387. case '\t':
  388. case '\n':
  389. case '\f':
  390. case '\r':
  391. case '\v':
  392. continue;
  393. }
  394. break;
  395. }
  396. /*
  397. * Sign
  398. */
  399. if(*p=='-' || *p=='+')
  400. if(*p++ == '-')
  401. neg = 1;
  402. /*
  403. * Base
  404. */
  405. if(base==0){
  406. if(*p != '0')
  407. base = 10;
  408. else{
  409. base = 8;
  410. if(p[1]=='x' || p[1]=='X'){
  411. p += 2;
  412. base = 16;
  413. }
  414. }
  415. }else if(base==16 && *p=='0'){
  416. if(p[1]=='x' || p[1]=='X')
  417. p += 2;
  418. }else if(base<0 || 36<base)
  419. goto Return;
  420. /*
  421. * Non-empty sequence of digits
  422. */
  423. for(;; p++,ndig++){
  424. c = *p;
  425. v = base;
  426. if('0'<=c && c<='9')
  427. v = c - '0';
  428. else if('a'<=c && c<='z')
  429. v = c - 'a' + 10;
  430. else if('A'<=c && c<='Z')
  431. v = c - 'A' + 10;
  432. if(v >= base)
  433. break;
  434. nn = n*base + v;
  435. if(nn < n)
  436. ovfl = 1;
  437. n = nn;
  438. }
  439. Return:
  440. if(ndig == 0)
  441. p = nptr;
  442. if(endptr)
  443. *endptr = p;
  444. if(ovfl){
  445. if(neg)
  446. return LONG_MIN;
  447. return LONG_MAX;
  448. }
  449. if(neg)
  450. return -n;
  451. return n;
  452. }
  453. // Convert buf[0:n], bytes whose character set is chset,
  454. // into a emalloc'd null-terminated Unicode string.
  455. Rune*
  456. toStr(uint8_t* buf, int n, int chset)
  457. {
  458. int i;
  459. int m;
  460. Rune ch;
  461. Rune* ans;
  462. switch(chset) {
  463. case US_Ascii:
  464. case ISO_8859_1:
  465. ans = (Rune*)emalloc((n+1)*sizeof(Rune));
  466. for(i = 0; i < n; i++)
  467. ans[i] = buf[i];
  468. ans[n] = 0;
  469. break;
  470. case UTF_8:
  471. m = 0;
  472. for(i = 0; i < n; ) {
  473. i += chartorune(&ch, (char*)(buf+i));
  474. m++;
  475. }
  476. ans = (Rune*)emalloc((m+1)*sizeof(Rune));
  477. m = 0;
  478. for(i = 0; i < n; ) {
  479. i += chartorune(&ch, (char*)(buf+i));
  480. ans[m++] = ch;
  481. }
  482. ans[m] = 0;
  483. break;
  484. default:
  485. ans = nil;
  486. assert(0);
  487. }
  488. return ans;
  489. }
  490. // Convert buf[0:n], Unicode characters,
  491. // into an emalloc'd null-terminated string in character set chset.
  492. // Use 0x80 for unconvertable characters.
  493. uint8_t*
  494. fromStr(Rune* buf, int n, int chset)
  495. {
  496. uint8_t* ans;
  497. int i, lim, m;
  498. Rune ch;
  499. uint8_t* p;
  500. uint8_t s[UTFmax];
  501. ans = nil;
  502. switch(chset) {
  503. case US_Ascii:
  504. case ISO_8859_1:
  505. ans = (uint8_t*)emalloc(n+1);
  506. lim = (chset==US_Ascii)? 127 : 255;
  507. for(i = 0; i < n; i++) {
  508. ch = buf[i];
  509. if(ch > lim)
  510. ch = 0x80;
  511. ans[i] = ch;
  512. }
  513. ans[n] = 0;
  514. break;
  515. case UTF_8:
  516. m = 0;
  517. for(i = 0; i < n; i++) {
  518. m += runetochar((char*)s, &buf[i]);
  519. }
  520. ans = (uint8_t*)emalloc(m+1);
  521. p = ans;
  522. for(i = 0; i < n; i++)
  523. p += runetochar((char*)p, &buf[i]);
  524. *p = 0;
  525. break;
  526. default:
  527. assert(0);
  528. }
  529. return ans;
  530. }