look.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. /* Macros for Rune support of ctype.h-like functions */
  13. #define isupper(r) (L'A' <= (r) && (r) <= L'Z')
  14. #define islower(r) (L'a' <= (r) && (r) <= L'z')
  15. #define isalpha(r) (isupper(r) || islower(r))
  16. #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
  17. #define isdigit(r) (L'0' <= (r) && (r) <= L'9')
  18. #define isalnum(r) (isalpha(r) || isdigit(r))
  19. #define isspace(r) ((r) == L' ' || (r) == L'\t' \
  20. || (0x0A <= (r) && (r) <= 0x0D))
  21. #define tolower(r) ((r)-'A'+'a')
  22. #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
  23. #define WORDSIZ 4000
  24. char *filename = "/lib/words";
  25. Biobuf *dfile;
  26. Biobuf bout;
  27. Biobuf bin;
  28. int fold;
  29. int direc;
  30. int exact;
  31. int iflag;
  32. int rev = 1; /*-1 for reverse-ordered file, not implemented*/
  33. int (*compare)(Rune*, Rune*);
  34. Rune tab = '\t';
  35. Rune entry[WORDSIZ];
  36. Rune word[WORDSIZ];
  37. Rune key[50], orig[50];
  38. Rune latin_fold_tab[] =
  39. {
  40. /* Table to fold latin 1 characters to ASCII equivalents
  41. based at Rune value 0xc0
  42. À Á Â Ã Ä Å Æ Ç
  43. È É Ê Ë Ì Í Î Ï
  44. Ð Ñ Ò Ó Ô Õ Ö ×
  45. Ø Ù Ú Û Ü Ý Þ ß
  46. à á â ã ä å æ ç
  47. è é ê ë ì í î ï
  48. ð ñ ò ó ô õ ö ÷
  49. ø ù ú û ü ý þ ÿ
  50. */
  51. 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
  52. 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
  53. 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
  54. 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
  55. 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
  56. 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
  57. 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
  58. 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
  59. };
  60. int locate(void);
  61. int acomp(Rune*, Rune*);
  62. int getword(Biobuf*, Rune *rp, int n);
  63. void torune(char*, Rune*);
  64. void rcanon(Rune*, Rune*);
  65. int ncomp(Rune*, Rune*);
  66. void
  67. usage(void)
  68. {
  69. fprint(2, "usage: %s [-dfinx] [-t c] [string] [file]\n", argv0);
  70. exits("usage");
  71. }
  72. void
  73. main(int argc, char *argv[])
  74. {
  75. int n;
  76. Binit(&bin, 0, OREAD);
  77. Binit(&bout, 1, OWRITE);
  78. compare = acomp;
  79. ARGBEGIN{
  80. case 'd':
  81. direc++;
  82. break;
  83. case 'f':
  84. fold++;
  85. break;
  86. case 'i':
  87. iflag++;
  88. break;
  89. case 'n':
  90. compare = ncomp;
  91. break;
  92. case 't':
  93. chartorune(&tab, EARGF(usage()));
  94. break;
  95. case 'x':
  96. exact++;
  97. break;
  98. default:
  99. fprint(2, "%s: bad option %c\n", argv0, ARGC());
  100. usage();
  101. } ARGEND
  102. if(!iflag){
  103. if(argc >= 1) {
  104. torune(argv[0], orig);
  105. argv++;
  106. argc--;
  107. } else
  108. iflag++;
  109. }
  110. if(argc < 1) {
  111. direc++;
  112. fold++;
  113. } else
  114. filename = argv[0];
  115. if (!iflag)
  116. rcanon(orig, key);
  117. dfile = Bopen(filename, OREAD);
  118. if(dfile == 0) {
  119. fprint(2, "look: can't open %s\n", filename);
  120. exits("no dictionary");
  121. }
  122. if(!iflag)
  123. if(!locate())
  124. exits("not found");
  125. do {
  126. if(iflag) {
  127. Bflush(&bout);
  128. if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
  129. exits(0);
  130. rcanon(orig, key);
  131. if(!locate())
  132. continue;
  133. }
  134. if (!exact || !acomp(word, key))
  135. Bprint(&bout, "%S\n", entry);
  136. while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
  137. rcanon(entry, word);
  138. n = compare(key, word);
  139. switch(n) {
  140. case -1:
  141. if(exact)
  142. break;
  143. case 0:
  144. if (!exact || !acomp(word, orig))
  145. Bprint(&bout, "%S\n", entry);
  146. continue;
  147. }
  148. break;
  149. }
  150. } while(iflag);
  151. exits(0);
  152. }
  153. int
  154. locate(void)
  155. {
  156. int64_t top, bot, mid;
  157. int32_t c;
  158. int n;
  159. bot = 0;
  160. top = Bseek(dfile, 0, 2);
  161. for(;;) {
  162. mid = (top+bot) / 2;
  163. Bseek(dfile, mid, 0);
  164. do
  165. c = Bgetrune(dfile);
  166. while(c>=0 && c!='\n');
  167. mid = Boffset(dfile);
  168. if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
  169. break;
  170. rcanon(entry, word);
  171. n = compare(key, word);
  172. switch(n) {
  173. case -2:
  174. case -1:
  175. case 0:
  176. if(top <= mid)
  177. break;
  178. top = mid;
  179. continue;
  180. case 1:
  181. case 2:
  182. bot = mid;
  183. continue;
  184. }
  185. break;
  186. }
  187. Bseek(dfile, bot, 0);
  188. while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
  189. rcanon(entry, word);
  190. n = compare(key, word);
  191. switch(n) {
  192. case -2:
  193. return 0;
  194. case -1:
  195. if(exact)
  196. return 0;
  197. case 0:
  198. return 1;
  199. case 1:
  200. case 2:
  201. continue;
  202. }
  203. }
  204. return 0;
  205. }
  206. /*
  207. * acomp(s, t) returns:
  208. * -2 if s strictly precedes t
  209. * -1 if s is a prefix of t
  210. * 0 if s is the same as t
  211. * 1 if t is a prefix of s
  212. * 2 if t strictly precedes s
  213. */
  214. int
  215. acomp(Rune *s, Rune *t)
  216. {
  217. int cs, ct;
  218. for(;;) {
  219. cs = *s;
  220. ct = *t;
  221. if(cs != ct)
  222. break;
  223. if(cs == 0)
  224. return 0;
  225. s++;
  226. t++;
  227. }
  228. if(cs == 0)
  229. return -1;
  230. if(ct == 0)
  231. return 1;
  232. if(cs < ct)
  233. return -2;
  234. return 2;
  235. }
  236. void
  237. torune(char *old, Rune *new)
  238. {
  239. do old += chartorune(new, old);
  240. while(*new++);
  241. }
  242. void
  243. rcanon(Rune *old, Rune *new)
  244. {
  245. Rune r;
  246. while((r = *old++) && r != tab) {
  247. if (islatin1(r) && latin_fold_tab[r-0xc0])
  248. r = latin_fold_tab[r-0xc0];
  249. if(direc)
  250. if(!(isalnum(r) || r == L' ' || r == L'\t'))
  251. continue;
  252. if(fold)
  253. if(isupper(r))
  254. r = tolower(r);
  255. *new++ = r;
  256. }
  257. *new = 0;
  258. }
  259. int
  260. ncomp(Rune *s, Rune *t)
  261. {
  262. Rune *is, *it, *js, *jt;
  263. int a, b;
  264. int ssgn, tsgn;
  265. while(isspace(*s))
  266. s++;
  267. while(isspace(*t))
  268. t++;
  269. ssgn = tsgn = -2*rev;
  270. if(*s == '-') {
  271. s++;
  272. ssgn = -ssgn;
  273. }
  274. if(*t == '-') {
  275. t++;
  276. tsgn = -tsgn;
  277. }
  278. for(is = s; isdigit(*is); is++)
  279. ;
  280. for(it = t; isdigit(*it); it++)
  281. ;
  282. js = is;
  283. jt = it;
  284. a = 0;
  285. if(ssgn == tsgn)
  286. while(it>t && is>s)
  287. if((b = *--it - *--is) != 0)
  288. a = b;
  289. while(is > s)
  290. if(*--is != '0')
  291. return -ssgn;
  292. while(it > t)
  293. if(*--it != '0')
  294. return tsgn;
  295. if(a)
  296. return sgn(a)*ssgn;
  297. if(*(s=js) == '.')
  298. s++;
  299. if(*(t=jt) == '.')
  300. t++;
  301. if(ssgn == tsgn)
  302. while(isdigit(*s) && isdigit(*t))
  303. if((a = *t++ - *s++) != 0)
  304. return sgn(a)*ssgn;
  305. while(isdigit(*s))
  306. if(*s++ != '0')
  307. return -ssgn;
  308. while(isdigit(*t))
  309. if(*t++ != '0')
  310. return tsgn;
  311. return 0;
  312. }
  313. int
  314. getword(Biobuf *f, Rune *rp, int n)
  315. {
  316. int32_t c;
  317. while(n-- > 0) {
  318. c = Bgetrune(f);
  319. if(c < 0)
  320. return 0;
  321. if(c == '\n') {
  322. *rp = L'\0';
  323. return 1;
  324. }
  325. *rp++ = c;
  326. }
  327. fprint(2, "Look: word too int32_t. Bailing out.\n");
  328. return 0;
  329. }