utils.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include "dict.h"
  5. Dict dicts[] = {
  6. {"oed", "Oxford English Dictionary, 2nd Ed.",
  7. "/lib/dict/oed2", "/lib/dict/oed2index",
  8. oednextoff, oedprintentry, oedprintkey},
  9. {"ahd", "American Heritage Dictionary, 2nd College Ed.",
  10. "/lib/ahd/DICT.DB", "/lib/ahd/index",
  11. ahdnextoff, ahdprintentry, ahdprintkey},
  12. {"pgw", "Project Gutenberg Webster Dictionary",
  13. "/lib/dict/pgw", "/lib/dict/pgwindex",
  14. pgwnextoff, pgwprintentry, pgwprintkey},
  15. {"thesaurus", "Collins Thesaurus",
  16. "/lib/dict/thesaurus", "/lib/dict/thesindex",
  17. thesnextoff, thesprintentry, thesprintkey},
  18. {"roget", "Project Gutenberg Roget's Thesaurus",
  19. "/lib/dict/roget", "/lib/dict/rogetindex",
  20. rogetnextoff, rogetprintentry, rogetprintkey},
  21. {"ce", "Gendai Chinese->English",
  22. "/lib/dict/world/sansdata/sandic24.dat",
  23. "/lib/dict/world/sansdata/ceindex",
  24. worldnextoff, worldprintentry, worldprintkey},
  25. {"ceh", "Gendai Chinese->English (Hanzi index)",
  26. "/lib/dict/world/sansdata/sandic24.dat",
  27. "/lib/dict/world/sansdata/cehindex",
  28. worldnextoff, worldprintentry, worldprintkey},
  29. {"ec", "Gendai English->Chinese",
  30. "/lib/dict/world/sansdata/sandic24.dat",
  31. "/lib/dict/world/sansdata/ecindex",
  32. worldnextoff, worldprintentry, worldprintkey},
  33. {"dae", "Gyldendal Danish->English",
  34. "/lib/dict/world/gylddata/sandic30.dat",
  35. "/lib/dict/world/gylddata/daeindex",
  36. worldnextoff, worldprintentry, worldprintkey},
  37. {"eda", "Gyldendal English->Danish",
  38. "/lib/dict/world/gylddata/sandic29.dat",
  39. "/lib/dict/world/gylddata/edaindex",
  40. worldnextoff, worldprintentry, worldprintkey},
  41. {"due", "Wolters-Noordhoff Dutch->English",
  42. "/lib/dict/world/woltdata/sandic07.dat",
  43. "/lib/dict/world/woltdata/deindex",
  44. worldnextoff, worldprintentry, worldprintkey},
  45. {"edu", "Wolters-Noordhoff English->Dutch",
  46. "/lib/dict/world/woltdata/sandic06.dat",
  47. "/lib/dict/world/woltdata/edindex",
  48. worldnextoff, worldprintentry, worldprintkey},
  49. {"fie", "WSOY Finnish->English",
  50. "/lib/dict/world/werndata/sandic32.dat",
  51. "/lib/dict/world/werndata/fieindex",
  52. worldnextoff, worldprintentry, worldprintkey},
  53. {"efi", "WSOY English->Finnish",
  54. "/lib/dict/world/werndata/sandic31.dat",
  55. "/lib/dict/world/werndata/efiindex",
  56. worldnextoff, worldprintentry, worldprintkey},
  57. {"fe", "Collins French->English",
  58. "/lib/dict/fe", "/lib/dict/feindex",
  59. pcollnextoff, pcollprintentry, pcollprintkey},
  60. {"ef", "Collins English->French",
  61. "/lib/dict/ef", "/lib/dict/efindex",
  62. pcollnextoff, pcollprintentry, pcollprintkey},
  63. {"ge", "Collins German->English",
  64. "/lib/dict/ge", "/lib/dict/geindex",
  65. pcollgnextoff, pcollgprintentry, pcollgprintkey},
  66. {"eg", "Collins English->German",
  67. "/lib/dict/eg", "/lib/dict/egindex",
  68. pcollgnextoff, pcollgprintentry, pcollgprintkey},
  69. {"ie", "Collins Italian->English",
  70. "/lib/dict/ie", "/lib/dict/ieindex",
  71. pcollnextoff, pcollprintentry, pcollprintkey},
  72. {"ei", "Collins English->Italian",
  73. "/lib/dict/ei", "/lib/dict/eiindex",
  74. pcollnextoff, pcollprintentry, pcollprintkey},
  75. {"je", "Sanshusha Japanese->English",
  76. "/lib/dict/world/sansdata/sandic18.dat",
  77. "/lib/dict/world/sansdata/jeindex",
  78. worldnextoff, worldprintentry, worldprintkey},
  79. {"jek", "Sanshusha Japanese->English (Kanji index)",
  80. "/lib/dict/world/sansdata/sandic18.dat",
  81. "/lib/dict/world/sansdata/jekindex",
  82. worldnextoff, worldprintentry, worldprintkey},
  83. {"ej", "Sanshusha English->Japanese",
  84. "/lib/dict/world/sansdata/sandic18.dat",
  85. "/lib/dict/world/sansdata/ejindex",
  86. worldnextoff, worldprintentry, worldprintkey},
  87. {"tjeg", "Sanshusha technical Japanese->English,German",
  88. "/lib/dict/world/sansdata/sandic16.dat",
  89. "/lib/dict/world/sansdata/tjegindex",
  90. worldnextoff, worldprintentry, worldprintkey},
  91. {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)",
  92. "/lib/dict/world/sansdata/sandic16.dat",
  93. "/lib/dict/world/sansdata/tjegkindex",
  94. worldnextoff, worldprintentry, worldprintkey},
  95. {"tegj", "Sanshusha technical English->German,Japanese",
  96. "/lib/dict/world/sansdata/sandic16.dat",
  97. "/lib/dict/world/sansdata/tegjindex",
  98. worldnextoff, worldprintentry, worldprintkey},
  99. {"tgje", "Sanshusha technical German->Japanese,English",
  100. "/lib/dict/world/sansdata/sandic16.dat",
  101. "/lib/dict/world/sansdata/tgjeindex",
  102. worldnextoff, worldprintentry, worldprintkey},
  103. {"ne", "Kunnskapforlaget Norwegian->English",
  104. "/lib/dict/world/kunndata/sandic28.dat",
  105. "/lib/dict/world/kunndata/neindex",
  106. worldnextoff, worldprintentry, worldprintkey},
  107. {"en", "Kunnskapforlaget English->Norwegian",
  108. "/lib/dict/world/kunndata/sandic27.dat",
  109. "/lib/dict/world/kunndata/enindex",
  110. worldnextoff, worldprintentry, worldprintkey},
  111. {"re", "Leon Ungier Russian->English",
  112. "/lib/dict/re", "/lib/dict/reindex",
  113. simplenextoff, simpleprintentry, simpleprintkey},
  114. {"er", "Leon Ungier English->Russian",
  115. "/lib/dict/re", "/lib/dict/erindex",
  116. simplenextoff, simpleprintentry, simpleprintkey},
  117. {"se", "Collins Spanish->English",
  118. "/lib/dict/se", "/lib/dict/seindex",
  119. pcollnextoff, pcollprintentry, pcollprintkey},
  120. {"es", "Collins English->Spanish",
  121. "/lib/dict/es", "/lib/dict/esindex",
  122. pcollnextoff, pcollprintentry, pcollprintkey},
  123. {"swe", "Esselte Studium Swedish->English",
  124. "/lib/dict/world/essedata/sandic34.dat",
  125. "/lib/dict/world/essedata/sweindex",
  126. worldnextoff, worldprintentry, worldprintkey},
  127. {"esw", "Esselte Studium English->Swedish",
  128. "/lib/dict/world/essedata/sandic33.dat",
  129. "/lib/dict/world/essedata/eswindex",
  130. worldnextoff, worldprintentry, worldprintkey},
  131. {"movie", "Movies -- by title",
  132. "/lib/movie/data", "/lib/dict/movtindex",
  133. movienextoff, movieprintentry, movieprintkey},
  134. {"moviea", "Movies -- by actor",
  135. "/lib/movie/data", "/lib/dict/movaindex",
  136. movienextoff, movieprintentry, movieprintkey},
  137. {"movied", "Movies -- by director",
  138. "/lib/movie/data", "/lib/dict/movdindex",
  139. movienextoff, movieprintentry, movieprintkey},
  140. {"slang", "English Slang",
  141. "/lib/dict/slang", "/lib/dict/slangindex",
  142. slangnextoff, slangprintentry, slangprintkey},
  143. {"robert", "Robert Électronique",
  144. "/lib/dict/robert/_pointers", "/lib/dict/robert/_index",
  145. robertnextoff, robertindexentry, robertprintkey},
  146. {"robertv", "Robert Électronique - formes des verbes",
  147. "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex",
  148. robertnextflex, robertflexentry, robertprintkey},
  149. {0, 0, 0, 0, 0}
  150. };
  151. typedef struct Lig Lig;
  152. struct Lig {
  153. Rune start; /* accent rune */
  154. Rune *pairs; /* <char,accented version> pairs */
  155. };
  156. static Lig ligtab[Nligs] = {
  157. [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"},
  158. [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"},
  159. [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"},
  160. [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"},
  161. [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"},
  162. [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"},
  163. [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"},
  164. [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"},
  165. [LDTB-LIGS] {L'.', L""},
  166. [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"},
  167. [LFRB-LIGS] {L'̯', L""},
  168. [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"},
  169. [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"},
  170. [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"},
  171. [LASP-LIGS] {L'ʽ', L""},
  172. [LLEN-LIGS] {L'ʼ', L""},
  173. [LBRB-LIGS] {L'̮', L""}
  174. };
  175. Rune *multitab[Nmulti] = {
  176. [MAAS-MULTI] L"ʽα",
  177. [MALN-MULTI] L"ʼα",
  178. [MAND-MULTI] L"and",
  179. [MAOQ-MULTI] L"a/q",
  180. [MBRA-MULTI] L"<|",
  181. [MDD-MULTI] L"..",
  182. [MDDD-MULTI] L"...",
  183. [MEAS-MULTI] L"ʽε",
  184. [MELN-MULTI] L"ʼε",
  185. [MEMM-MULTI] L"——",
  186. [MHAS-MULTI] L"ʽη",
  187. [MHLN-MULTI] L"ʼη",
  188. [MIAS-MULTI] L"ʽι",
  189. [MILN-MULTI] L"ʼι",
  190. [MLCT-MULTI] L"ct",
  191. [MLFF-MULTI] L"ff",
  192. [MLFFI-MULTI] L"ffi",
  193. [MLFFL-MULTI] L"ffl",
  194. [MLFL-MULTI] L"fl",
  195. [MLFI-MULTI] L"fi",
  196. [MLLS-MULTI] L"ɫɫ",
  197. [MLST-MULTI] L"st",
  198. [MOAS-MULTI] L"ʽο",
  199. [MOLN-MULTI] L"ʼο",
  200. [MOR-MULTI] L"or",
  201. [MRAS-MULTI] L"ʽρ",
  202. [MRLN-MULTI] L"ʼρ",
  203. [MTT-MULTI] L"~~",
  204. [MUAS-MULTI] L"ʽυ",
  205. [MULN-MULTI] L"ʼυ",
  206. [MWAS-MULTI] L"ʽω",
  207. [MWLN-MULTI] L"ʼω",
  208. [MOE-MULTI] L"oe",
  209. [MES-MULTI] L" ",
  210. };
  211. static Rune *ttabstack[20];
  212. static int ntt;
  213. /*
  214. * tab is an array of n Assoc's, sorted by key.
  215. * Look for key in tab, and return corresponding val
  216. * or -1 if not there
  217. */
  218. long
  219. lookassoc(Assoc *tab, int n, char *key)
  220. {
  221. Assoc *q;
  222. long i, low, high;
  223. int r;
  224. for(low = -1, high = n; high > low+1; ){
  225. i = (high+low)/2;
  226. q = &tab[i];
  227. if((r=strcmp(key, q->key))<0)
  228. high = i;
  229. else if(r == 0)
  230. return q->val;
  231. else
  232. low=i;
  233. }
  234. return -1;
  235. }
  236. long
  237. looknassoc(Nassoc *tab, int n, long key)
  238. {
  239. Nassoc *q;
  240. long i, low, high;
  241. for(low = -1, high = n; high > low+1; ){
  242. i = (high+low)/2;
  243. q = &tab[i];
  244. if(key < q->key)
  245. high = i;
  246. else if(key == q->key)
  247. return q->val;
  248. else
  249. low=i;
  250. }
  251. return -1;
  252. }
  253. void
  254. err(char *fmt, ...)
  255. {
  256. char buf[1000];
  257. va_list v;
  258. va_start(v, fmt);
  259. vsnprint(buf, sizeof(buf), fmt, v);
  260. va_end(v);
  261. fprint(2, "%s: %s\n", argv0, buf);
  262. }
  263. /*
  264. * Write the rune r to bout, keeping track of line length
  265. * and breaking the lines (at blanks) when they get too long
  266. */
  267. void
  268. outrune(long r)
  269. {
  270. if(outinhibit)
  271. return;
  272. if(++linelen > breaklen && r == L' ') {
  273. Bputc(bout, '\n');
  274. linelen = 0;
  275. } else
  276. Bputrune(bout, r);
  277. }
  278. void
  279. outrunes(Rune *rp)
  280. {
  281. Rune r;
  282. while((r = *rp++) != 0)
  283. outrune(r);
  284. }
  285. /* like outrune, but when arg is know to be a char */
  286. void
  287. outchar(int c)
  288. {
  289. if(outinhibit)
  290. return;
  291. if(++linelen > breaklen && c == ' ') {
  292. c ='\n';
  293. linelen = 0;
  294. }
  295. Bputc(bout, c);
  296. }
  297. void
  298. outchars(char *s)
  299. {
  300. char c;
  301. while((c = *s++) != 0)
  302. outchar(c);
  303. }
  304. void
  305. outprint(char *fmt, ...)
  306. {
  307. char buf[1000];
  308. va_list v;
  309. va_start(v, fmt);
  310. vsnprint(buf, sizeof(buf), fmt, v);
  311. va_end(v);
  312. outchars(buf);
  313. }
  314. void
  315. outpiece(char *b, char *e)
  316. {
  317. int c, lastc;
  318. lastc = 0;
  319. while(b < e) {
  320. c = *b++;
  321. if(c == '\n')
  322. c = ' ';
  323. if(!(c == ' ' && lastc == ' '))
  324. outchar(c);
  325. lastc = c;
  326. }
  327. }
  328. /*
  329. * Go to new line if not already there; indent if ind != 0.
  330. * If ind > 1, leave a blank line too.
  331. * Slight hack: assume if current line is only one or two
  332. * characters long, then they were spaces.
  333. */
  334. void
  335. outnl(int ind)
  336. {
  337. if(outinhibit)
  338. return;
  339. if(ind) {
  340. if(ind > 1) {
  341. if(linelen > 2)
  342. Bputc(bout, '\n');
  343. Bprint(bout, "\n ");
  344. } else if(linelen == 0)
  345. Bprint(bout, " ");
  346. else if(linelen == 1)
  347. Bputc(bout, ' ');
  348. else if(linelen != 2)
  349. Bprint(bout, "\n ");
  350. linelen = 2;
  351. } else {
  352. if(linelen) {
  353. Bputc(bout, '\n');
  354. linelen = 0;
  355. }
  356. }
  357. }
  358. /*
  359. * Fold the runes in null-terminated rp.
  360. * Use the sort(1) definition of folding (uppercase to lowercase,
  361. * accented characters to corresponding unaccented chars)
  362. */
  363. void
  364. fold(Rune *rp)
  365. {
  366. Rune r;
  367. while((r = *rp) != 0) {
  368. r = tobaserune(r);
  369. if(isupperrune(r))
  370. r = tolowerrune(r);
  371. *rp++ = r;
  372. }
  373. }
  374. /*
  375. * Like fold, but put folded result into new
  376. * (assumed to have enough space).
  377. * old is a regular expression, but we know that
  378. * metacharacters aren't affected
  379. */
  380. void
  381. foldre(char *new, char *old)
  382. {
  383. Rune r;
  384. while(*old) {
  385. old += chartorune(&r, old);
  386. r = tobaserune(r);
  387. if(isupperrune(r))
  388. r = tolowerrune(r);
  389. new += runetochar(new, &r);
  390. }
  391. *new = 0;
  392. }
  393. /*
  394. * acomp(s, t) returns:
  395. * -2 if s strictly precedes t
  396. * -1 if s is a prefix of t
  397. * 0 if s is the same as t
  398. * 1 if t is a prefix of s
  399. * 2 if t strictly precedes s
  400. */
  401. int
  402. acomp(Rune *s, Rune *t)
  403. {
  404. int cs, ct;
  405. for(;;) {
  406. cs = *s;
  407. ct = *t;
  408. if(cs != ct)
  409. break;
  410. if(cs == 0)
  411. return 0;
  412. s++;
  413. t++;
  414. }
  415. if(cs == 0)
  416. return -1;
  417. if(ct == 0)
  418. return 1;
  419. if(cs < ct)
  420. return -2;
  421. return 2;
  422. }
  423. /*
  424. * Copy null terminated Runes from 'from' to 'to'.
  425. */
  426. void
  427. runescpy(Rune *to, Rune *from)
  428. {
  429. while((*to++ = *from++) != 0)
  430. continue;
  431. }
  432. /*
  433. * Conversion of unsigned number to long, no overflow detection
  434. */
  435. long
  436. runetol(Rune *r)
  437. {
  438. int c;
  439. long n;
  440. n = 0;
  441. for(;; r++){
  442. c = *r;
  443. if(L'0'<=c && c<=L'9')
  444. c -= '0';
  445. else
  446. break;
  447. n = n*10 + c;
  448. }
  449. return n;
  450. }
  451. /*
  452. * See if there is a rune corresponding to the accented
  453. * version of r with accent acc (acc in [LIGS..LIGE-1]),
  454. * and return it if so, else return NONE.
  455. */
  456. Rune
  457. liglookup(Rune acc, Rune r)
  458. {
  459. Rune *p;
  460. if(acc < LIGS || acc >= LIGE)
  461. return NONE;
  462. for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
  463. if(*p == r)
  464. return *(p+1);
  465. return NONE;
  466. }
  467. /*
  468. * Maintain a translation table stack (a translation table
  469. * is an array of Runes indexed by bytes or 7-bit bytes).
  470. * If starting is true, push the curtab onto the stack
  471. * and return newtab; else pop the top of the stack and
  472. * return it.
  473. * If curtab is 0, initialize the stack and return.
  474. */
  475. Rune *
  476. changett(Rune *curtab, Rune *newtab, int starting)
  477. {
  478. if(curtab == 0) {
  479. ntt = 0;
  480. return 0;
  481. }
  482. if(starting) {
  483. if(ntt >= asize(ttabstack)) {
  484. if(debug)
  485. err("translation stack overflow");
  486. return curtab;
  487. }
  488. ttabstack[ntt++] = curtab;
  489. return newtab;
  490. } else {
  491. if(ntt == 0) {
  492. if(debug)
  493. err("translation stack underflow");
  494. return curtab;
  495. }
  496. return ttabstack[--ntt];
  497. }
  498. }