look.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. /* Macros for Rune support of ctype.h-like functions */
  5. #define isupper(r) (L'A' <= (r) && (r) <= L'Z')
  6. #define islower(r) (L'a' <= (r) && (r) <= L'z')
  7. #define isalpha(r) (isupper(r) || islower(r))
  8. #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
  9. #define isdigit(r) (L'0' <= (r) && (r) <= L'9')
  10. #define isalnum(r) (isalpha(r) || isdigit(r))
  11. #define isspace(r) ((r) == L' ' || (r) == L'\t' \
  12. || (0x0A <= (r) && (r) <= 0x0D))
  13. #define tolower(r) ((r)-'A'+'a')
  14. #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
  15. #define WORDSIZ 4000
  16. char *filename = "/lib/words";
  17. Biobuf *dfile;
  18. Biobuf bout;
  19. Biobuf bin;
  20. int fold;
  21. int direc;
  22. int exact;
  23. int iflag;
  24. int rev = 1; /*-1 for reverse-ordered file, not implemented*/
  25. int (*compare)(Rune*, Rune*);
  26. Rune tab = '\t';
  27. Rune entry[WORDSIZ];
  28. Rune word[WORDSIZ];
  29. Rune key[50], orig[50];
  30. Rune latin_fold_tab[] =
  31. {
  32. /* Table to fold latin 1 characters to ASCII equivalents
  33. based at Rune value 0xc0
  34. À Á Â Ã Ä Å Æ Ç
  35. È É Ê Ë Ì Í Î Ï
  36. Ð Ñ Ò Ó Ô Õ Ö ×
  37. Ø Ù Ú Û Ü Ý Þ ß
  38. à á â ã ä å æ ç
  39. è é ê ë ì í î ï
  40. ð ñ ò ó ô õ ö ÷
  41. ø ù ú û ü ý þ ÿ
  42. */
  43. 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
  44. 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
  45. 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
  46. 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
  47. 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
  48. 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
  49. 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
  50. 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
  51. };
  52. int locate(void);
  53. int acomp(Rune*, Rune*);
  54. int getword(Biobuf*, Rune *rp, int n);
  55. void torune(char*, Rune*);
  56. void rcanon(Rune*, Rune*);
  57. int ncomp(Rune*, Rune*);
  58. void
  59. usage(void)
  60. {
  61. fprint(2, "usage: %s [-dfinx] [-t c] [string] [file]\n", argv0);
  62. exits("usage");
  63. }
  64. void
  65. main(int argc, char *argv[])
  66. {
  67. int n;
  68. Binit(&bin, 0, OREAD);
  69. Binit(&bout, 1, OWRITE);
  70. compare = acomp;
  71. ARGBEGIN{
  72. case 'd':
  73. direc++;
  74. break;
  75. case 'f':
  76. fold++;
  77. break;
  78. case 'i':
  79. iflag++;
  80. break;
  81. case 'n':
  82. compare = ncomp;
  83. break;
  84. case 't':
  85. chartorune(&tab, EARGF(usage()));
  86. break;
  87. case 'x':
  88. exact++;
  89. break;
  90. default:
  91. fprint(2, "%s: bad option %c\n", argv0, ARGC());
  92. usage();
  93. } ARGEND
  94. if(!iflag){
  95. if(argc >= 1) {
  96. torune(argv[0], orig);
  97. argv++;
  98. argc--;
  99. } else
  100. iflag++;
  101. }
  102. if(argc < 1) {
  103. direc++;
  104. fold++;
  105. } else
  106. filename = argv[0];
  107. if (!iflag)
  108. rcanon(orig, key);
  109. dfile = Bopen(filename, OREAD);
  110. if(dfile == 0) {
  111. fprint(2, "look: can't open %s\n", filename);
  112. exits("no dictionary");
  113. }
  114. if(!iflag)
  115. if(!locate())
  116. exits("not found");
  117. do {
  118. if(iflag) {
  119. Bflush(&bout);
  120. if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
  121. exits(0);
  122. rcanon(orig, key);
  123. if(!locate())
  124. continue;
  125. }
  126. if (!exact || !acomp(word, key))
  127. Bprint(&bout, "%S\n", entry);
  128. while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
  129. rcanon(entry, word);
  130. n = compare(key, word);
  131. switch(n) {
  132. case -1:
  133. if(exact)
  134. break;
  135. case 0:
  136. if (!exact || !acomp(word, orig))
  137. Bprint(&bout, "%S\n", entry);
  138. continue;
  139. }
  140. break;
  141. }
  142. } while(iflag);
  143. exits(0);
  144. }
  145. int
  146. locate(void)
  147. {
  148. vlong top, bot, mid;
  149. long c;
  150. int n;
  151. bot = 0;
  152. top = Bseek(dfile, 0, 2);
  153. for(;;) {
  154. mid = (top+bot) / 2;
  155. Bseek(dfile, mid, 0);
  156. do
  157. c = Bgetrune(dfile);
  158. while(c>=0 && c!='\n');
  159. mid = Boffset(dfile);
  160. if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
  161. break;
  162. rcanon(entry, word);
  163. n = compare(key, word);
  164. switch(n) {
  165. case -2:
  166. case -1:
  167. case 0:
  168. if(top <= mid)
  169. break;
  170. top = mid;
  171. continue;
  172. case 1:
  173. case 2:
  174. bot = mid;
  175. continue;
  176. }
  177. break;
  178. }
  179. Bseek(dfile, bot, 0);
  180. while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
  181. rcanon(entry, word);
  182. n = compare(key, word);
  183. switch(n) {
  184. case -2:
  185. return 0;
  186. case -1:
  187. if(exact)
  188. return 0;
  189. case 0:
  190. return 1;
  191. case 1:
  192. case 2:
  193. continue;
  194. }
  195. }
  196. return 0;
  197. }
  198. /*
  199. * acomp(s, t) returns:
  200. * -2 if s strictly precedes t
  201. * -1 if s is a prefix of t
  202. * 0 if s is the same as t
  203. * 1 if t is a prefix of s
  204. * 2 if t strictly precedes s
  205. */
  206. int
  207. acomp(Rune *s, Rune *t)
  208. {
  209. int cs, ct;
  210. for(;;) {
  211. cs = *s;
  212. ct = *t;
  213. if(cs != ct)
  214. break;
  215. if(cs == 0)
  216. return 0;
  217. s++;
  218. t++;
  219. }
  220. if(cs == 0)
  221. return -1;
  222. if(ct == 0)
  223. return 1;
  224. if(cs < ct)
  225. return -2;
  226. return 2;
  227. }
  228. void
  229. torune(char *old, Rune *new)
  230. {
  231. do old += chartorune(new, old);
  232. while(*new++);
  233. }
  234. void
  235. rcanon(Rune *old, Rune *new)
  236. {
  237. Rune r;
  238. while((r = *old++) && r != tab) {
  239. if (islatin1(r) && latin_fold_tab[r-0xc0])
  240. r = latin_fold_tab[r-0xc0];
  241. if(direc)
  242. if(!(isalnum(r) || r == L' ' || r == L'\t'))
  243. continue;
  244. if(fold)
  245. if(isupper(r))
  246. r = tolower(r);
  247. *new++ = r;
  248. }
  249. *new = 0;
  250. }
  251. int
  252. ncomp(Rune *s, Rune *t)
  253. {
  254. Rune *is, *it, *js, *jt;
  255. int a, b;
  256. int ssgn, tsgn;
  257. while(isspace(*s))
  258. s++;
  259. while(isspace(*t))
  260. t++;
  261. ssgn = tsgn = -2*rev;
  262. if(*s == '-') {
  263. s++;
  264. ssgn = -ssgn;
  265. }
  266. if(*t == '-') {
  267. t++;
  268. tsgn = -tsgn;
  269. }
  270. for(is = s; isdigit(*is); is++)
  271. ;
  272. for(it = t; isdigit(*it); it++)
  273. ;
  274. js = is;
  275. jt = it;
  276. a = 0;
  277. if(ssgn == tsgn)
  278. while(it>t && is>s)
  279. if(b = *--it - *--is)
  280. a = b;
  281. while(is > s)
  282. if(*--is != '0')
  283. return -ssgn;
  284. while(it > t)
  285. if(*--it != '0')
  286. return tsgn;
  287. if(a)
  288. return sgn(a)*ssgn;
  289. if(*(s=js) == '.')
  290. s++;
  291. if(*(t=jt) == '.')
  292. t++;
  293. if(ssgn == tsgn)
  294. while(isdigit(*s) && isdigit(*t))
  295. if(a = *t++ - *s++)
  296. return sgn(a)*ssgn;
  297. while(isdigit(*s))
  298. if(*s++ != '0')
  299. return -ssgn;
  300. while(isdigit(*t))
  301. if(*t++ != '0')
  302. return tsgn;
  303. return 0;
  304. }
  305. int
  306. getword(Biobuf *f, Rune *rp, int n)
  307. {
  308. long c;
  309. while(n-- > 0) {
  310. c = Bgetrune(f);
  311. if(c < 0)
  312. return 0;
  313. if(c == '\n') {
  314. *rp = L'\0';
  315. return 1;
  316. }
  317. *rp++ = c;
  318. }
  319. fprint(2, "Look: word too long. Bailing out.\n");
  320. return 0;
  321. }