look.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. /* Macros for Rune support of ctype.h-like functions */
  5. #define isupper(r) (L'A' <= (r) && (r) <= L'Z')
  6. #define islower(r) (L'a' <= (r) && (r) <= L'z')
  7. #define isalpha(r) (isupper(r) || islower(r))
  8. #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
  9. #define isdigit(r) (L'0' <= (r) && (r) <= L'9')
  10. #define isalnum(r) (isalpha(r) || isdigit(r))
  11. #define isspace(r) ((r) == L' ' || (r) == L'\t' \
  12. || (0x0A <= (r) && (r) <= 0x0D))
  13. #define tolower(r) ((r)-'A'+'a')
  14. #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
  15. #define WORDSIZ 4000
  16. char *filename = "/lib/words";
  17. Biobuf *dfile;
  18. Biobuf bout;
  19. Biobuf bin;
  20. int fold;
  21. int direc;
  22. int exact;
  23. int iflag;
  24. int rev = 1; /*-1 for reverse-ordered file, not implemented*/
  25. int (*compare)(Rune*, Rune*);
  26. Rune tab = '\t';
  27. Rune entry[WORDSIZ];
  28. Rune word[WORDSIZ];
  29. Rune key[50], orig[50];
  30. Rune latin_fold_tab[] =
  31. {
  32. /* Table to fold latin 1 characters to ASCII equivalents
  33. based at Rune value 0xc0
  34. À Á Â Ã Ä Å Æ Ç
  35. È É Ê Ë Ì Í Î Ï
  36. Ð Ñ Ò Ó Ô Õ Ö ×
  37. Ø Ù Ú Û Ü Ý Þ ß
  38. à á â ã ä å æ ç
  39. è é ê ë ì í î ï
  40. ð ñ ò ó ô õ ö ÷
  41. ø ù ú û ü ý þ ÿ
  42. */
  43. 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
  44. 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
  45. 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
  46. 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
  47. 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
  48. 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
  49. 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
  50. 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
  51. };
  52. int locate(void);
  53. int acomp(Rune*, Rune*);
  54. int getword(Biobuf*, Rune *rp, int n);
  55. void torune(char*, Rune*);
  56. void rcanon(Rune*, Rune*);
  57. int ncomp(Rune*, Rune*);
  58. void
  59. main(int argc, char *argv[])
  60. {
  61. int n;
  62. Binit(&bin, 0, OREAD);
  63. Binit(&bout, 1, OWRITE);
  64. compare = acomp;
  65. ARGBEGIN{
  66. case 'd':
  67. direc++;
  68. break;
  69. case 'f':
  70. fold++;
  71. break;
  72. case 'i':
  73. iflag++;
  74. break;
  75. case 'n':
  76. compare = ncomp;
  77. break;
  78. case 't':
  79. chartorune(&tab,ARGF());
  80. break;
  81. case 'x':
  82. exact++;
  83. break;
  84. default:
  85. fprint(2, "%s: bad option %c\n", argv0, ARGC());
  86. fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
  87. exits("usage");
  88. } ARGEND
  89. if(!iflag){
  90. if(argc >= 1) {
  91. torune(argv[0], orig);
  92. argv++;
  93. argc--;
  94. } else
  95. iflag++;
  96. }
  97. if(argc < 1) {
  98. direc++;
  99. fold++;
  100. } else
  101. filename = argv[0];
  102. if (!iflag)
  103. rcanon(orig, key);
  104. dfile = Bopen(filename, OREAD);
  105. if(dfile == 0) {
  106. fprint(2, "look: can't open %s\n", filename);
  107. exits("no dictionary");
  108. }
  109. if(!iflag)
  110. if(!locate())
  111. exits("not found");
  112. do {
  113. if(iflag) {
  114. Bflush(&bout);
  115. if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
  116. exits(0);
  117. rcanon(orig, key);
  118. if(!locate())
  119. continue;
  120. }
  121. if (!exact || !acomp(word, key))
  122. Bprint(&bout, "%S\n", entry);
  123. while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
  124. rcanon(entry, word);
  125. n = compare(key, word);
  126. switch(n) {
  127. case -1:
  128. if(exact)
  129. break;
  130. case 0:
  131. if (!exact || !acomp(word, orig))
  132. Bprint(&bout, "%S\n", entry);
  133. continue;
  134. }
  135. break;
  136. }
  137. } while(iflag);
  138. exits(0);
  139. }
  140. int
  141. locate(void)
  142. {
  143. long top, bot, mid;
  144. long c;
  145. int n;
  146. bot = 0;
  147. top = Bseek(dfile, 0L, 2);
  148. for(;;) {
  149. mid = (top+bot) / 2;
  150. Bseek(dfile, mid, 0);
  151. do
  152. c = Bgetrune(dfile);
  153. while(c>=0 && c!='\n');
  154. mid = Boffset(dfile);
  155. if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
  156. break;
  157. rcanon(entry, word);
  158. n = compare(key, word);
  159. switch(n) {
  160. case -2:
  161. case -1:
  162. case 0:
  163. if(top <= mid)
  164. break;
  165. top = mid;
  166. continue;
  167. case 1:
  168. case 2:
  169. bot = mid;
  170. continue;
  171. }
  172. break;
  173. }
  174. Bseek(dfile, bot, 0);
  175. while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
  176. rcanon(entry, word);
  177. n = compare(key, word);
  178. switch(n) {
  179. case -2:
  180. return 0;
  181. case -1:
  182. if(exact)
  183. return 0;
  184. case 0:
  185. return 1;
  186. case 1:
  187. case 2:
  188. continue;
  189. }
  190. }
  191. return 0;
  192. }
  193. /*
  194. * acomp(s, t) returns:
  195. * -2 if s strictly precedes t
  196. * -1 if s is a prefix of t
  197. * 0 if s is the same as t
  198. * 1 if t is a prefix of s
  199. * 2 if t strictly precedes s
  200. */
  201. int
  202. acomp(Rune *s, Rune *t)
  203. {
  204. int cs, ct;
  205. for(;;) {
  206. cs = *s;
  207. ct = *t;
  208. if(cs != ct)
  209. break;
  210. if(cs == 0)
  211. return 0;
  212. s++;
  213. t++;
  214. }
  215. if(cs == 0)
  216. return -1;
  217. if(ct == 0)
  218. return 1;
  219. if(cs < ct)
  220. return -2;
  221. return 2;
  222. }
  223. void
  224. torune(char *old, Rune *new)
  225. {
  226. do old += chartorune(new, old);
  227. while(*new++);
  228. }
  229. void
  230. rcanon(Rune *old, Rune *new)
  231. {
  232. Rune r;
  233. while((r = *old++) && r != tab) {
  234. if (islatin1(r) && latin_fold_tab[r-0xc0])
  235. r = latin_fold_tab[r-0xc0];
  236. if(direc)
  237. if(!(isalnum(r) || r == L' ' || r == L'\t'))
  238. continue;
  239. if(fold)
  240. if(isupper(r))
  241. r = tolower(r);
  242. *new++ = r;
  243. }
  244. *new = 0;
  245. }
  246. int
  247. ncomp(Rune *s, Rune *t)
  248. {
  249. Rune *is, *it, *js, *jt;
  250. int a, b;
  251. int ssgn, tsgn;
  252. while(isspace(*s))
  253. s++;
  254. while(isspace(*t))
  255. t++;
  256. ssgn = tsgn = -2*rev;
  257. if(*s == '-') {
  258. s++;
  259. ssgn = -ssgn;
  260. }
  261. if(*t == '-') {
  262. t++;
  263. tsgn = -tsgn;
  264. }
  265. for(is = s; isdigit(*is); is++)
  266. ;
  267. for(it = t; isdigit(*it); it++)
  268. ;
  269. js = is;
  270. jt = it;
  271. a = 0;
  272. if(ssgn == tsgn)
  273. while(it>t && is>s)
  274. if(b = *--it - *--is)
  275. a = b;
  276. while(is > s)
  277. if(*--is != '0')
  278. return -ssgn;
  279. while(it > t)
  280. if(*--it != '0')
  281. return tsgn;
  282. if(a)
  283. return sgn(a)*ssgn;
  284. if(*(s=js) == '.')
  285. s++;
  286. if(*(t=jt) == '.')
  287. t++;
  288. if(ssgn == tsgn)
  289. while(isdigit(*s) && isdigit(*t))
  290. if(a = *t++ - *s++)
  291. return sgn(a)*ssgn;
  292. while(isdigit(*s))
  293. if(*s++ != '0')
  294. return -ssgn;
  295. while(isdigit(*t))
  296. if(*t++ != '0')
  297. return tsgn;
  298. return 0;
  299. }
  300. int
  301. getword(Biobuf *f, Rune *rp, int n)
  302. {
  303. long c;
  304. while(n-- > 0) {
  305. c = Bgetrune(f);
  306. if(c < 0)
  307. return 0;
  308. if(c == '\n') {
  309. *rp = L'\0';
  310. return 1;
  311. }
  312. *rp++ = c;
  313. }
  314. fprint(2, "Look: word too long. Bailing out.\n");
  315. return 0;
  316. }