ptx1.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. permuted title index
  11. ptx [-t] [-i ignore] [-o only] [-w num] [-r]
  12. [-c commands] [-g gap] [-f] [input]
  13. Ptx reads the input file and permutes on words in it.
  14. It excludes all words in the ignore file.
  15. Alternately it includes words in the only file.
  16. if neither is given it excludes the words in
  17. /sys/lib/man/permind/ignore.
  18. The width of the output line (except for -r field)
  19. can be changed to num,
  20. which is a troff width measure, ens by default.
  21. with no -w, num is 72n, or 100n under -t.
  22. the -f flag tells the program to fold the output
  23. the -t flag says the output is for troff
  24. font specifier -F implies -t.
  25. -g sets the gutter
  26. -h sets the hole between wrapped segments
  27. -r takes the first word on each line and makes it
  28. into a fifth field.
  29. -c inserts troff commands for font-setting etc at beginning
  30. */
  31. #include <u.h>
  32. #include <libc.h>
  33. #include <stdio.h>
  34. #include <ctype.h>
  35. #define DEFLTX "/sys/lib/man/permind/ignore"
  36. #define TILDE 0177 /* actually RUBOUT, not ~ */
  37. #define N 30
  38. #define MAX N*BUFSIZ
  39. #define LMAX 2048
  40. #define MAXT 2048
  41. #define MASK 03777
  42. #define ON 1
  43. #define isabreak(c) (btable[c])
  44. char *getline(void);
  45. void msg(char *, char *);
  46. void extra(int);
  47. void diag(char *, char *);
  48. void cmpline(char *);
  49. int cmpword(char *, char *, char *);
  50. void putline(char *, char *);
  51. void makek(void);
  52. void getsort(void);
  53. char *rtrim(char *, char *, int);
  54. char *ltrim(char *, char *, int);
  55. void putout(char *, char *);
  56. void setlen(void);
  57. void getlen(void);
  58. int hash(char *, char *);
  59. int storeh(int, char *);
  60. int status;
  61. char *hasht[MAXT];
  62. char line[LMAX];
  63. char mark[LMAX];
  64. struct word {
  65. char *p;
  66. int w;
  67. } word[LMAX/2];
  68. char btable[256];
  69. int ignore;
  70. int only;
  71. char *lenarg;
  72. char *gutarg;
  73. char *holarg;
  74. int llen;
  75. int spacesl;
  76. int gutter;
  77. int hole;
  78. int mlen = LMAX;
  79. int halflen;
  80. int rflag;
  81. char *strtbufp, *endbufp;
  82. char *empty = "";
  83. char *font = "R";
  84. char *roff = "/bin/nroff";
  85. char *troff = "/bin/troff";
  86. char *infile = "/fd/0";
  87. FILE *inptr;
  88. FILE *outptr = stdout;
  89. char *sortfile = "ptxsort"; /* output of sort program */
  90. char nofold[] = {'-', 'd', 't', TILDE, 0};
  91. char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
  92. char *sortopt = nofold;
  93. FILE *sortptr;
  94. char *kfile = "ptxmark"; /* ptxsort + troff goo for widths */
  95. FILE *kptr;
  96. char *wfile = "ptxwidth"; /* widths of words in ptxsort */
  97. FILE *wptr;
  98. char *bfile; /*contains user supplied break chars */
  99. FILE *bptr;
  100. char *cmds;
  101. main(int argc, char **argv)
  102. {
  103. int c;
  104. char *bufp;
  105. char *pend;
  106. char *xfile;
  107. FILE *xptr;
  108. Waitmsg *w;
  109. /* argument decoding */
  110. xfile = DEFLTX;
  111. ARGBEGIN {
  112. case 'r':
  113. rflag = 1;
  114. break;
  115. case 'f':
  116. sortopt = fold;
  117. break;
  118. case 'w':
  119. if(lenarg)
  120. extra(ARGC());
  121. lenarg = ARGF();
  122. break;
  123. case 'c':
  124. if(cmds)
  125. extra(ARGC());
  126. cmds = ARGF();
  127. case 't':
  128. roff = troff;
  129. break;
  130. case 'g':
  131. if(gutarg)
  132. extra(ARGC());
  133. gutarg = ARGF();
  134. break;
  135. case 'h':
  136. if(holarg)
  137. extra(ARGC());
  138. holarg = ARGF();
  139. break;
  140. case 'i':
  141. if(only|ignore)
  142. extra(ARGC());
  143. ignore++;
  144. xfile = ARGF();
  145. break;
  146. case 'o':
  147. if(only|ignore)
  148. extra(ARGC());
  149. only++;
  150. xfile = ARGF();
  151. break;
  152. case 'b':
  153. if(bfile)
  154. extra(ARGC());
  155. bfile = ARGF();
  156. break;
  157. default:
  158. diag("Illegal argument:",*argv);
  159. } ARGEND
  160. if(lenarg == 0)
  161. lenarg = troff? "100n": "72n";
  162. if(gutarg == 0)
  163. gutarg = "3n";
  164. if(holarg == 0)
  165. holarg = gutarg;
  166. if(argc > 1)
  167. diag("Too many filenames",empty);
  168. if(argc == 1)
  169. infile = *argv;
  170. /* Default breaks of blank, tab and newline */
  171. btable[' '] = ON;
  172. btable['\t'] = ON;
  173. btable['\n'] = ON;
  174. if(bfile) {
  175. if((bptr = fopen(bfile,"r")) == NULL)
  176. diag("Cannot open break char file",bfile);
  177. while((c = getc(bptr)) != EOF)
  178. btable[c] = ON;
  179. }
  180. /*
  181. Allocate space for a buffer. If only or ignore file present
  182. read it into buffer. Else read in default ignore file
  183. and put resulting words in buffer.
  184. */
  185. if((strtbufp = calloc(N,BUFSIZ)) == NULL)
  186. diag("Out of memory space",empty);
  187. bufp = strtbufp;
  188. endbufp = strtbufp+MAX;
  189. if((xptr = fopen(xfile,"r")) == NULL)
  190. diag("Cannot open file",xfile);
  191. while(bufp < endbufp && (c = getc(xptr)) != EOF)
  192. if(isabreak(c)) {
  193. if(storeh(hash(strtbufp,bufp),strtbufp))
  194. diag("Too many words",xfile);
  195. *bufp++ = '\0';
  196. strtbufp = bufp;
  197. } else
  198. *bufp++ = (isupper(c)?tolower(c):c);
  199. if (bufp >= endbufp)
  200. diag("Too many words in file",xfile);
  201. endbufp = --bufp;
  202. /* open output file for sorting */
  203. if((sortptr = fopen(sortfile, "w")) == NULL)
  204. diag("Cannot open output for sorting:",sortfile);
  205. /*
  206. get a line of data and compare each word for
  207. inclusion or exclusion in the sort phase
  208. */
  209. if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
  210. diag("Cannot open data: ",infile);
  211. while((pend = getline()) != NULL)
  212. cmpline(pend);
  213. fclose(sortptr);
  214. if(fork()==0){
  215. execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
  216. sortfile, "-o", sortfile, 0);
  217. diag("Sort exec failed","");
  218. }
  219. if((w = wait()) == NULL || w->msg[0] != '\0')
  220. diag("Sort failed","");
  221. free(w);
  222. makek();
  223. if(fork()==0){
  224. if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
  225. diag("Cannot create width file:",wfile);
  226. execl(roff, roff, "-a", kfile, 0);
  227. diag("Sort exec failed","");
  228. }
  229. if((w = wait()) == NULL || w->msg[0] != '\0')
  230. diag("Sort failed","");
  231. free(w);
  232. getsort();
  233. /*
  234. remove(sortfile);
  235. remove(kfile);
  236. */
  237. fflush(0);
  238. _exits(0);
  239. /* I don't know what's wrong with the atexit func... */
  240. /* exits(0); */
  241. }
  242. void
  243. msg(char *s, char *arg)
  244. {
  245. fprintf(stderr,"ptx: %s %s\n",s,arg);
  246. }
  247. void
  248. extra(int c)
  249. {
  250. char s[] = "-x.";
  251. s[1] = c;
  252. diag("Extra option", s);
  253. }
  254. void
  255. diag(char *s, char *arg)
  256. {
  257. msg(s,arg);
  258. /*
  259. remove(sortfile);
  260. remove(kfile);
  261. */
  262. exits(s);
  263. }
  264. char*
  265. getline(void)
  266. {
  267. int c;
  268. char *linep;
  269. char *endlinep;
  270. endlinep= line + mlen;
  271. linep = line;
  272. /* Throw away leading white space */
  273. while(isspace(c = getc(inptr)))
  274. ;
  275. if(c==EOF)
  276. return(0);
  277. ungetc(c,inptr);
  278. while((c = getc(inptr)) != EOF)
  279. switch (c) {
  280. case '\t':
  281. if(linep<endlinep)
  282. *linep++ = ' ';
  283. break;
  284. case '\n':
  285. while(isspace(*--linep))
  286. ;
  287. *++linep = '\n';
  288. return(linep);
  289. default:
  290. if(linep < endlinep)
  291. *linep++ = c;
  292. break;
  293. }
  294. return(0);
  295. }
  296. void
  297. cmpline(char *pend)
  298. {
  299. char *pstrt, *pchar, *cp;
  300. char **hp;
  301. int flag;
  302. pchar = line;
  303. if(rflag)
  304. while(pchar < pend && !isspace(*pchar))
  305. pchar++;
  306. while(pchar < pend){
  307. /* eliminate white space */
  308. if(isabreak(*pchar++))
  309. continue;
  310. pstrt = --pchar;
  311. flag = 1;
  312. while(flag){
  313. if(isabreak(*pchar)) {
  314. hp = &hasht[hash(pstrt,pchar)];
  315. pchar--;
  316. while(cp = *hp++){
  317. if(hp == &hasht[MAXT])
  318. hp = hasht;
  319. /* possible match */
  320. if(cmpword(pstrt,pchar,cp)){
  321. /* exact match */
  322. if(!ignore && only)
  323. putline(pstrt,pend);
  324. flag = 0;
  325. break;
  326. }
  327. }
  328. /* no match */
  329. if(flag){
  330. if(ignore || !only)
  331. putline(pstrt,pend);
  332. flag = 0;
  333. }
  334. }
  335. pchar++;
  336. }
  337. }
  338. }
  339. int
  340. cmpword(char *cpp, char *pend, char *hpp)
  341. {
  342. char c;
  343. while(*hpp != '\0'){
  344. c = *cpp++;
  345. if((isupper(c)?tolower(c):c) != *hpp++)
  346. return(0);
  347. }
  348. if(--cpp == pend)
  349. return(1);
  350. return(0);
  351. }
  352. void
  353. putline(char *strt, char *end)
  354. {
  355. char *cp;
  356. for(cp=strt; cp<end; cp++)
  357. putc(*cp, sortptr);
  358. /* Add extra blank before TILDE to sort correctly with -fd option */
  359. putc(' ',sortptr);
  360. putc(TILDE,sortptr);
  361. for (cp=line; cp<strt; cp++)
  362. putc(*cp,sortptr);
  363. putc('\n',sortptr);
  364. }
  365. void
  366. makek(void)
  367. {
  368. int i, c;
  369. int nr = 0;
  370. if((sortptr = fopen(sortfile,"r")) == NULL)
  371. diag("Cannot open sorted data:",sortfile);
  372. if((kptr = fopen(kfile,"w")) == NULL)
  373. diag("Cannot create mark file:",kfile);
  374. if(cmds)
  375. fprintf(kptr,"%s\n",cmds);
  376. fprintf(kptr,
  377. ".nf\n"
  378. ".pl 1\n"
  379. ".tr %c\\&\n", TILDE);
  380. setlen();
  381. while((c = getc(sortptr)) != EOF) {
  382. if(nr == 0) {
  383. fprintf(kptr,".di xx\n");
  384. nr++;
  385. }
  386. if(c == '\n') {
  387. fprintf(kptr,"\n.di\n");
  388. for(i=1; i<nr; i++)
  389. fprintf(kptr,"\\n(%.2d ",i);
  390. fprintf(kptr,"\n");
  391. nr = 0;
  392. continue;
  393. }
  394. if(isspace(c))
  395. fprintf(kptr,"\\k(%.2d",nr++);
  396. putc(c,kptr);
  397. }
  398. fclose(sortptr);
  399. fclose(kptr);
  400. }
  401. void
  402. getsort(void)
  403. {
  404. char *tilde, *linep, *markp;
  405. int i0, i1, i2, i3, i4, i5, i6, i7, w0, w6;
  406. if((sortptr = fopen(sortfile, "r")) == NULL)
  407. diag("Cannot open sorted data:", sortfile);
  408. if((wptr = fopen(wfile, "r")) == NULL)
  409. diag("Cannot open width file:", wfile);
  410. getlen();
  411. halflen = (llen-gutter)/2;
  412. while(fgets(line, sizeof(line), sortptr) != NULL) {
  413. if(fgets(mark, sizeof(mark), wptr) == NULL)
  414. diag("Phase error 1: premature EOF on width file",
  415. wfile);
  416. linep = line;
  417. markp = mark;
  418. i3 = i7 = 0;
  419. word[i7].p = linep;
  420. word[i7].w = 0;
  421. for(linep=line; *linep; linep++) {
  422. if(*linep == TILDE)
  423. i3 = i7;
  424. else if(*linep == '\n')
  425. break;
  426. else if(isspace(*linep)) {
  427. i7++;
  428. word[i7].p = linep;
  429. if(!markp)
  430. diag("Phase error 2: no widths for summary",
  431. line);
  432. word[i7].w = atoi(markp);
  433. markp = strchr(markp+1, ' ');
  434. }
  435. }
  436. i0 = 0;
  437. for(i1=i0; i1<i3; i1++)
  438. if(word[i1+1].w - word[i0].w >= halflen - spacesl)
  439. break;
  440. w0 = word[i1].w - word[i0].w;
  441. i4 = i3 + rflag;
  442. for(i6 = i7; i6>i4; i6--)
  443. if(word[i7].w - word[i6-1].w >= halflen)
  444. break;
  445. w6 = word[i7].w - word[i6].w - spacesl;
  446. for(i2=i1 ; i2<i3; i2++)
  447. if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
  448. break;
  449. for(i5=i6; i5>i4; i5--)
  450. if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
  451. break;
  452. printf(".xx \"");
  453. putout(word[i1].p+1,word[i2].p);
  454. if(i1<i2 && i2<i3) putchar('/');
  455. printf("\" \"");
  456. if(i5>i4 && i6==i5) putchar('/');
  457. putout(word[i6].p+1+(i6==i3),word[i7].p);
  458. printf("\" \"");
  459. putout(word[i0].p,word[i1].p);
  460. if(i2<i3 && i1==i2) putchar('/');
  461. printf("\" \"");
  462. if(i5>i4 && i6>i5) putchar('/');
  463. putout(word[i5].p+1+(i5==i3),word[i6].p);
  464. if(rflag) {
  465. printf("\" \"");
  466. putout(word[i3].p+2,word[i4].p);
  467. }
  468. printf("\"\n");
  469. }
  470. }
  471. void
  472. putout(char *strt, char *end)
  473. {
  474. char *cp;
  475. for(cp=strt; cp<end; )
  476. putc(*cp++,outptr);
  477. }
  478. void
  479. setlen(void)
  480. {
  481. fprintf(kptr,
  482. "\\w'\\h'%s''\n"
  483. "\\w' /'\n"
  484. "\\w'\\h'%s''\n"
  485. "\\w'\\h'%s''\n",lenarg,gutarg,holarg);
  486. }
  487. void
  488. getlen(void)
  489. {
  490. char s[128];
  491. s[0] = '\0';
  492. fgets(s,sizeof(s),kptr);
  493. llen = atoi(s);
  494. fgets(s,sizeof(s),kptr);
  495. spacesl = atoi(s);
  496. fgets(s,sizeof(s),kptr);
  497. gutter = atoi(s);
  498. fgets(s,sizeof(s),kptr);
  499. hole = atoi(s);
  500. if(hole < 2*spacesl)
  501. hole = 2*spacesl;
  502. }
  503. int
  504. hash(char *strtp, char *endp)
  505. {
  506. char *cp, c;
  507. int i, j, k;
  508. /* Return zero hash number for single letter words */
  509. if((endp - strtp) == 1)
  510. return(0);
  511. cp = strtp;
  512. c = *cp++;
  513. i = (isupper(c)?tolower(c):c);
  514. c = *cp;
  515. j = (isupper(c)?tolower(c):c);
  516. i = i*j;
  517. cp = --endp;
  518. c = *cp--;
  519. k = (isupper(c)?tolower(c):c);
  520. c = *cp;
  521. j = (isupper(c)?tolower(c):c);
  522. j = k*j;
  523. return (i ^ (j>>2)) & MASK;
  524. }
  525. int
  526. storeh(int num, char *strtp)
  527. {
  528. int i;
  529. for(i=num; i<MAXT; i++)
  530. if(hasht[i] == 0) {
  531. hasht[i] = strtp;
  532. return(0);
  533. }
  534. for(i=0; i<num; i++)
  535. if(hasht[i] == 0) {
  536. hasht[i] = strtp;
  537. return(0);
  538. }
  539. return(1);
  540. }