utils.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <draw.h>
  4. #include <html.h>
  5. #include "impl.h"
  6. Rune* whitespace = L" \t\n\r";
  7. Rune* notwhitespace = L"^ \t\n\r";
  8. // All lists start out like List structure.
  9. // List itself can be used as list of int.
  10. int
  11. _listlen(List* l)
  12. {
  13. int n = 0;
  14. while(l != nil) {
  15. l = l->next;
  16. n++;
  17. }
  18. return n;
  19. }
  20. // Cons
  21. List*
  22. _newlist(int val, List* rest)
  23. {
  24. List* ans;
  25. ans = (List*)emalloc(sizeof(List));
  26. ans->val = val;
  27. ans->next = rest;
  28. return ans;
  29. }
  30. // Reverse a list in place
  31. List*
  32. _revlist(List* l)
  33. {
  34. List* newl;
  35. List* nextl;
  36. newl = nil;
  37. while(l != nil) {
  38. nextl = l->next;
  39. l->next = newl;
  40. newl = l;
  41. l = nextl;
  42. }
  43. return newl;
  44. }
  45. // The next few routines take a "character class" as argument.
  46. // e.g., "a-zA-Z", or "^ \t\n"
  47. // (ranges indicated by - except in first position;
  48. // ^ is first position means "not in" the following class)
  49. // Splitl splits s[0:n] just before first character of class cl.
  50. // Answers go in (p1, n1) and (p2, n2).
  51. // If no split, the whole thing goes in the first component.
  52. // Note: answers contain pointers into original string.
  53. void
  54. _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
  55. {
  56. Rune* p;
  57. p = _Strnclass(s, cl, n);
  58. *p1 = s;
  59. if(p == nil) {
  60. *n1 = n;
  61. *p2 = nil;
  62. *n2 = 0;
  63. }
  64. else {
  65. *p2 = p;
  66. *n1 = p-s;
  67. *n2 = n-*n1;
  68. }
  69. }
  70. // Splitr splits s[0:n] just after last character of class cl.
  71. // Answers go in (p1, n1) and (p2, n2).
  72. // If no split, the whole thing goes in the last component.
  73. // Note: answers contain pointers into original string.
  74. void
  75. _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
  76. {
  77. Rune* p;
  78. p = _Strnrclass(s, cl, n);
  79. if(p == nil) {
  80. *p1 = nil;
  81. *n1 = 0;
  82. *p2 = s;
  83. *n2 = n;
  84. }
  85. else {
  86. *p1 = s;
  87. *p2 = p+1;
  88. *n1 = *p2-s;
  89. *n2 = n-*n1;
  90. }
  91. }
  92. // Splitall splits s[0:n] into parts that are separated by characters from class cl.
  93. // Each part will have nonzero length.
  94. // At most alen parts are found, and pointers to their starts go into
  95. // the strarr array, while their lengths go into the lenarr array.
  96. // The return value is the number of parts found.
  97. int
  98. _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
  99. {
  100. int i;
  101. Rune* p;
  102. Rune* q;
  103. Rune* slast;
  104. if(s == nil || n == 0)
  105. return 0;
  106. i = 0;
  107. p = s;
  108. slast = s+n;
  109. while(p < slast && i < alen) {
  110. while(p < slast && _inclass(*p, cl))
  111. p++;
  112. if(p == slast)
  113. break;
  114. q = _Strnclass(p, cl, slast-p);
  115. if(q == nil)
  116. q = slast;
  117. assert(q > p && q <= slast);
  118. strarr[i] = p;
  119. lenarr[i] = q-p;
  120. i++;
  121. p = q;
  122. }
  123. return i;
  124. }
  125. // Find part of s that excludes leading and trailing whitespace,
  126. // and return that part in *pans (and its length in *panslen).
  127. void
  128. _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
  129. {
  130. Rune* p;
  131. Rune* q;
  132. p = nil;
  133. if(n > 0) {
  134. p = _Strnclass(s, notwhitespace, n);
  135. if(p != nil) {
  136. q = _Strnrclass(s, notwhitespace, n);
  137. assert(q != nil);
  138. n = q+1-p;
  139. }
  140. }
  141. *pans = p;
  142. *panslen = n;
  143. }
  144. // _Strclass returns a pointer to the first element of s that is
  145. // a member of class cl, nil if none.
  146. Rune*
  147. _Strclass(Rune* s, Rune* cl)
  148. {
  149. Rune* p;
  150. for(p = s; *p != 0; p++)
  151. if(_inclass(*p, cl))
  152. return p;
  153. return nil;
  154. }
  155. // _Strnclass returns a pointer to the first element of s[0:n] that is
  156. // a member of class cl, nil if none.
  157. Rune*
  158. _Strnclass(Rune* s, Rune* cl, int n)
  159. {
  160. Rune* p;
  161. for(p = s; n-- && *p != 0; p++)
  162. if(_inclass(*p, cl))
  163. return p;
  164. return nil;
  165. }
  166. // _Strrclass returns a pointer to the last element of s that is
  167. // a member of class cl, nil if none
  168. Rune*
  169. _Strrclass(Rune* s, Rune* cl)
  170. {
  171. Rune* p;
  172. if(s == nil || *s == 0)
  173. return nil;
  174. p = s + runestrlen(s) - 1;
  175. while(p >= s) {
  176. if(_inclass(*p, cl))
  177. return p;
  178. p--;
  179. };
  180. return nil;
  181. }
  182. // _Strnrclass returns a pointer to the last element of s[0:n] that is
  183. // a member of class cl, nil if none
  184. Rune*
  185. _Strnrclass(Rune* s, Rune* cl, int n)
  186. {
  187. Rune* p;
  188. if(s == nil || *s == 0 || n == 0)
  189. return nil;
  190. p = s + n - 1;
  191. while(p >= s) {
  192. if(_inclass(*p, cl))
  193. return p;
  194. p--;
  195. };
  196. return nil;
  197. }
  198. // Is c in the class cl?
  199. int
  200. _inclass(Rune c, Rune* cl)
  201. {
  202. int n;
  203. int ans;
  204. int negate;
  205. int i;
  206. n = _Strlen(cl);
  207. if(n == 0)
  208. return 0;
  209. ans = 0;
  210. negate = 0;
  211. if(cl[0] == '^') {
  212. negate = 1;
  213. cl++;
  214. n--;
  215. }
  216. for(i = 0; i < n; i++) {
  217. if(cl[i] == '-' && i > 0 && i < n - 1) {
  218. if(c >= cl[i - 1] && c <= cl[i + 1]) {
  219. ans = 1;
  220. break;
  221. }
  222. i++;
  223. }
  224. else if(c == cl[i]) {
  225. ans = 1;
  226. break;
  227. }
  228. }
  229. if(negate)
  230. ans = !ans;
  231. return ans;
  232. }
  233. // Is pre a prefix of s?
  234. int
  235. _prefix(Rune* pre, Rune* s)
  236. {
  237. int ns;
  238. int n;
  239. int k;
  240. ns = _Strlen(s);
  241. n = _Strlen(pre);
  242. if(ns < n)
  243. return 0;
  244. for(k = 0; k < n; k++) {
  245. if(pre[k] != s[k])
  246. return 0;
  247. }
  248. return 1;
  249. }
  250. // Number of runes in (null-terminated) s
  251. int
  252. _Strlen(Rune* s)
  253. {
  254. if(s == nil)
  255. return 0;
  256. return runestrlen(s);
  257. }
  258. // -1, 0, 1 as s1 is lexicographically less, equal greater than s2
  259. int
  260. _Strcmp(Rune *s1, Rune *s2)
  261. {
  262. if(s1 == nil)
  263. return (s2 == nil || *s2 == 0) ? 0 : -1;
  264. if(s2 == nil)
  265. return (*s1 == 0) ? 0 : 1;
  266. return runestrcmp(s1, s2);
  267. }
  268. // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
  269. // Also, do a case-insensitive match, assuming s2
  270. // has no chars in [A-Z], only their lowercase versions.
  271. // (This routine is used for in-place keyword lookup, where s2 is in a keyword
  272. // list and s1 is some substring, possibly mixed-case, in a buffer.)
  273. int
  274. _Strncmpci(Rune *s1, int n1, Rune *s2)
  275. {
  276. Rune c1, c2;
  277. for(;;) {
  278. if(n1-- == 0) {
  279. if(*s2 == 0)
  280. return 0;
  281. return -1;
  282. }
  283. c1 = *s1++;
  284. c2 = *s2++;
  285. if(c1 >= 'A' && c1 <= 'Z')
  286. c1 = c1 - 'A' + 'a';
  287. if(c1 != c2) {
  288. if(c1 > c2)
  289. return 1;
  290. return -1;
  291. }
  292. }
  293. }
  294. // emalloc and copy
  295. Rune*
  296. _Strdup(Rune* s)
  297. {
  298. if(s == nil)
  299. return nil;
  300. return _Strndup(s, runestrlen(s));
  301. }
  302. // emalloc and copy n chars of s (assume s is at least that long),
  303. // and add 0 terminator.
  304. // Return nil if n==0.
  305. Rune*
  306. _Strndup(Rune* s, int n)
  307. {
  308. Rune* ans;
  309. if(n <= 0)
  310. return nil;
  311. ans = _newstr(n);
  312. memmove(ans, s, n*sizeof(Rune));
  313. ans[n] = 0;
  314. return ans;
  315. }
  316. // emalloc enough room for n Runes, plus 1 null terminator.
  317. // (Not initialized to anything.)
  318. Rune*
  319. _newstr(int n)
  320. {
  321. return (Rune*)emalloc((n+1)*sizeof(Rune));
  322. }
  323. // emalloc and copy s+t
  324. Rune*
  325. _Strdup2(Rune* s, Rune* t)
  326. {
  327. int ns, nt;
  328. Rune* ans;
  329. Rune* p;
  330. ns = _Strlen(s);
  331. nt = _Strlen(t);
  332. if(ns+nt == 0)
  333. return nil;
  334. ans = _newstr(ns+nt);
  335. p = _Stradd(ans, s, ns);
  336. p = _Stradd(p, t, nt);
  337. *p = 0;
  338. return ans;
  339. }
  340. // Return emalloc'd substring s[start:stop],
  341. Rune*
  342. _Strsubstr(Rune* s, int start, int stop)
  343. {
  344. Rune* t;
  345. if(start == stop)
  346. return nil;
  347. t = _Strndup(s+start, stop-start);
  348. return t;
  349. }
  350. // Copy n chars to s1 from s2, and return s1+n
  351. Rune*
  352. _Stradd(Rune* s1, Rune* s2, int n)
  353. {
  354. if(n == 0)
  355. return s1;
  356. memmove(s1, s2, n*sizeof(Rune));
  357. return s1+n;
  358. }
  359. // Like strtol, but converting from Rune* string
  360. #define LONG_MAX 2147483647L
  361. #define LONG_MIN -2147483648L
  362. long
  363. _Strtol(Rune* nptr, Rune** endptr, int base)
  364. {
  365. Rune* p;
  366. long n, nn;
  367. int c, ovfl, v, neg, ndig;
  368. p = nptr;
  369. neg = 0;
  370. n = 0;
  371. ndig = 0;
  372. ovfl = 0;
  373. /*
  374. * White space
  375. */
  376. for(;;p++){
  377. switch(*p){
  378. case ' ':
  379. case '\t':
  380. case '\n':
  381. case '\f':
  382. case '\r':
  383. case '\v':
  384. continue;
  385. }
  386. break;
  387. }
  388. /*
  389. * Sign
  390. */
  391. if(*p=='-' || *p=='+')
  392. if(*p++ == '-')
  393. neg = 1;
  394. /*
  395. * Base
  396. */
  397. if(base==0){
  398. if(*p != '0')
  399. base = 10;
  400. else{
  401. base = 8;
  402. if(p[1]=='x' || p[1]=='X'){
  403. p += 2;
  404. base = 16;
  405. }
  406. }
  407. }else if(base==16 && *p=='0'){
  408. if(p[1]=='x' || p[1]=='X')
  409. p += 2;
  410. }else if(base<0 || 36<base)
  411. goto Return;
  412. /*
  413. * Non-empty sequence of digits
  414. */
  415. for(;; p++,ndig++){
  416. c = *p;
  417. v = base;
  418. if('0'<=c && c<='9')
  419. v = c - '0';
  420. else if('a'<=c && c<='z')
  421. v = c - 'a' + 10;
  422. else if('A'<=c && c<='Z')
  423. v = c - 'A' + 10;
  424. if(v >= base)
  425. break;
  426. nn = n*base + v;
  427. if(nn < n)
  428. ovfl = 1;
  429. n = nn;
  430. }
  431. Return:
  432. if(ndig == 0)
  433. p = nptr;
  434. if(endptr)
  435. *endptr = p;
  436. if(ovfl){
  437. if(neg)
  438. return LONG_MIN;
  439. return LONG_MAX;
  440. }
  441. if(neg)
  442. return -n;
  443. return n;
  444. }
  445. // Convert buf[0:n], bytes whose character set is chset,
  446. // into a emalloc'd null-terminated Unicode string.
  447. Rune*
  448. toStr(uchar* buf, int n, int chset)
  449. {
  450. int i;
  451. int m;
  452. Rune ch;
  453. Rune* ans;
  454. switch(chset) {
  455. case US_Ascii:
  456. case ISO_8859_1:
  457. ans = (Rune*)emalloc((n+1)*sizeof(Rune));
  458. for(i = 0; i < n; i++)
  459. ans[i] = buf[i];
  460. ans[n] = 0;
  461. break;
  462. case UTF_8:
  463. m = 0;
  464. for(i = 0; i < n; ) {
  465. i += chartorune(&ch, (char*)(buf+i));
  466. m++;
  467. }
  468. ans = (Rune*)emalloc((m+1)*sizeof(Rune));
  469. m = 0;
  470. for(i = 0; i < n; ) {
  471. i += chartorune(&ch, (char*)(buf+i));
  472. ans[m++] = ch;
  473. }
  474. ans[m] = 0;
  475. break;
  476. default:
  477. ans = nil;
  478. assert(0);
  479. }
  480. return ans;
  481. }
  482. // Convert buf[0:n], Unicode characters,
  483. // into an emalloc'd null-terminated string in character set chset.
  484. // Use 0x80 for unconvertable characters.
  485. uchar*
  486. fromStr(Rune* buf, int n, int chset)
  487. {
  488. uchar* ans;
  489. int i, lim, m;
  490. Rune ch;
  491. uchar* p;
  492. uchar s[UTFmax];
  493. ans = nil;
  494. switch(chset) {
  495. case US_Ascii:
  496. case ISO_8859_1:
  497. ans = (uchar*)emalloc(n+1);
  498. lim = (chset==US_Ascii)? 127 : 255;
  499. for(i = 0; i < n; i++) {
  500. ch = buf[i];
  501. if(ch > lim)
  502. ch = 0x80;
  503. ans[i] = ch;
  504. }
  505. ans[n] = 0;
  506. break;
  507. case UTF_8:
  508. m = 0;
  509. for(i = 0; i < n; i++) {
  510. m += runetochar((char*)s, &buf[i]);
  511. }
  512. ans = (uchar*)emalloc(m+1);
  513. p = ans;
  514. for(i = 0; i < n; i++)
  515. p += runetochar((char*)p, &buf[i]);
  516. *p = 0;
  517. break;
  518. default:
  519. assert(0);
  520. }
  521. return ans;
  522. }
  523. // Convert n to emalloc'd String.
  524. Rune*
  525. _ltoStr(int n)
  526. {
  527. int m;
  528. uchar buf[20];
  529. m = snprint((char*)buf, sizeof(buf), "%d", n);
  530. return toStr(buf, m, US_Ascii);
  531. }