ptx1.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. /*
  2. permuted title index
  3. ptx [-t] [-i ignore] [-o only] [-w num] [-r]
  4. [-c commands] [-g gap] [-f] [input]
  5. Ptx reads the input file and permutes on words in it.
  6. It excludes all words in the ignore file.
  7. Alternately it includes words in the only file.
  8. if neither is given it excludes the words in
  9. /sys/lib/man/permind/ignore.
  10. The width of the output line (except for -r field)
  11. can be changed to num,
  12. which is a troff width measure, ens by default.
  13. with no -w, num is 72n, or 100n under -t.
  14. the -f flag tells the program to fold the output
  15. the -t flag says the output is for troff
  16. font specifier -F implies -t.
  17. -g sets the gutter
  18. -h sets the hole between wrapped segments
  19. -r takes the first word on each line and makes it
  20. into a fifth field.
  21. -c inserts troff commands for font-setting etc at beginning
  22. */
  23. #include <u.h>
  24. #include <libc.h>
  25. #include <stdio.h>
  26. #include <ctype.h>
  27. #define DEFLTX "/sys/lib/man/permind/ignore"
  28. #define TILDE 0177 /* actually RUBOUT, not ~ */
  29. #define N 30
  30. #define MAX N*BUFSIZ
  31. #define LMAX 2048
  32. #define MAXT 2048
  33. #define MASK 03777
  34. #define ON 1
  35. #define isabreak(c) (btable[c])
  36. char *getline(void);
  37. void msg(char *, char *);
  38. void extra(int);
  39. void diag(char *, char *);
  40. void cmpline(char *);
  41. int cmpword(char *, char *, char *);
  42. void putline(char *, char *);
  43. void makek(void);
  44. void getsort(void);
  45. char *rtrim(char *, char *, int);
  46. char *ltrim(char *, char *, int);
  47. void putout(char *, char *);
  48. void setlen(void);
  49. void getlen(void);
  50. int hash(char *, char *);
  51. int storeh(int, char *);
  52. int status;
  53. char *hasht[MAXT];
  54. char line[LMAX];
  55. char mark[LMAX];
  56. struct word {
  57. char *p;
  58. int w;
  59. } word[LMAX/2];
  60. char btable[256];
  61. int ignore;
  62. int only;
  63. char *lenarg;
  64. char *gutarg;
  65. char *holarg;
  66. int llen;
  67. int spacesl;
  68. int gutter;
  69. int hole;
  70. int mlen = LMAX;
  71. int halflen;
  72. int rflag;
  73. char *strtbufp, *endbufp;
  74. char *empty = "";
  75. char *font = "R";
  76. char *roff = "/bin/nroff";
  77. char *troff = "/bin/troff";
  78. char *infile = "/fd/0";
  79. FILE *inptr;
  80. FILE *outptr = stdout;
  81. char *sortfile = "ptxsort"; /* output of sort program */
  82. char nofold[] = {'-', 'd', 't', TILDE, 0};
  83. char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
  84. char *sortopt = nofold;
  85. FILE *sortptr;
  86. char *kfile = "ptxmark"; /* ptxsort + troff goo for widths */
  87. FILE *kptr;
  88. char *wfile = "ptxwidth"; /* widths of words in ptxsort */
  89. FILE *wptr;
  90. char *bfile; /*contains user supplied break chars */
  91. FILE *bptr;
  92. char *cmds;
  93. main(int argc, char **argv)
  94. {
  95. int c;
  96. char *bufp;
  97. char *pend;
  98. char *xfile;
  99. FILE *xptr;
  100. Waitmsg *w;
  101. /* argument decoding */
  102. xfile = DEFLTX;
  103. ARGBEGIN {
  104. case 'r':
  105. rflag = 1;
  106. break;
  107. case 'f':
  108. sortopt = fold;
  109. break;
  110. case 'w':
  111. if(lenarg)
  112. extra(ARGC());
  113. lenarg = ARGF();
  114. break;
  115. case 'c':
  116. if(cmds)
  117. extra(ARGC());
  118. cmds = ARGF();
  119. case 't':
  120. roff = troff;
  121. break;
  122. case 'g':
  123. if(gutarg)
  124. extra(ARGC());
  125. gutarg = ARGF();
  126. break;
  127. case 'h':
  128. if(holarg)
  129. extra(ARGC());
  130. holarg = ARGF();
  131. break;
  132. case 'i':
  133. if(only|ignore)
  134. extra(ARGC());
  135. ignore++;
  136. xfile = ARGF();
  137. break;
  138. case 'o':
  139. if(only|ignore)
  140. extra(ARGC());
  141. only++;
  142. xfile = ARGF();
  143. break;
  144. case 'b':
  145. if(bfile)
  146. extra(ARGC());
  147. bfile = ARGF();
  148. break;
  149. default:
  150. diag("Illegal argument:",*argv);
  151. } ARGEND
  152. if(lenarg == 0)
  153. lenarg = troff? "100n": "72n";
  154. if(gutarg == 0)
  155. gutarg = "3n";
  156. if(holarg == 0)
  157. holarg = gutarg;
  158. if(argc > 1)
  159. diag("Too many filenames",empty);
  160. if(argc == 1)
  161. infile = *argv;
  162. /* Default breaks of blank, tab and newline */
  163. btable[' '] = ON;
  164. btable['\t'] = ON;
  165. btable['\n'] = ON;
  166. if(bfile) {
  167. if((bptr = fopen(bfile,"r")) == NULL)
  168. diag("Cannot open break char file",bfile);
  169. while((c = getc(bptr)) != EOF)
  170. btable[c] = ON;
  171. }
  172. /*
  173. Allocate space for a buffer. If only or ignore file present
  174. read it into buffer. Else read in default ignore file
  175. and put resulting words in buffer.
  176. */
  177. if((strtbufp = calloc(N,BUFSIZ)) == NULL)
  178. diag("Out of memory space",empty);
  179. bufp = strtbufp;
  180. endbufp = strtbufp+MAX;
  181. if((xptr = fopen(xfile,"r")) == NULL)
  182. diag("Cannot open file",xfile);
  183. while(bufp < endbufp && (c = getc(xptr)) != EOF)
  184. if(isabreak(c)) {
  185. if(storeh(hash(strtbufp,bufp),strtbufp))
  186. diag("Too many words",xfile);
  187. *bufp++ = '\0';
  188. strtbufp = bufp;
  189. } else
  190. *bufp++ = (isupper(c)?tolower(c):c);
  191. if (bufp >= endbufp)
  192. diag("Too many words in file",xfile);
  193. endbufp = --bufp;
  194. /* open output file for sorting */
  195. if((sortptr = fopen(sortfile, "w")) == NULL)
  196. diag("Cannot open output for sorting:",sortfile);
  197. /*
  198. get a line of data and compare each word for
  199. inclusion or exclusion in the sort phase
  200. */
  201. if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
  202. diag("Cannot open data: ",infile);
  203. while((pend = getline()) != NULL)
  204. cmpline(pend);
  205. fclose(sortptr);
  206. if(fork()==0){
  207. execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
  208. sortfile, "-o", sortfile, 0);
  209. diag("Sort exec failed","");
  210. }
  211. if((w = wait()) == NULL || w->msg[0] != '\0')
  212. diag("Sort failed","");
  213. free(w);
  214. makek();
  215. if(fork()==0){
  216. if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
  217. diag("Cannot create width file:",wfile);
  218. execl(roff, roff, "-a", kfile, 0);
  219. diag("Sort exec failed","");
  220. }
  221. if((w = wait()) == NULL || w->msg[0] != '\0')
  222. diag("Sort failed","");
  223. free(w);
  224. getsort();
  225. /*
  226. remove(sortfile);
  227. remove(kfile);
  228. */
  229. fflush(0);
  230. _exits(0);
  231. /* I don't know what's wrong with the atexit func... */
  232. /* exits(0); */
  233. }
  234. void
  235. msg(char *s, char *arg)
  236. {
  237. fprintf(stderr,"ptx: %s %s\n",s,arg);
  238. }
  239. void
  240. extra(int c)
  241. {
  242. char s[] = "-x.";
  243. s[1] = c;
  244. diag("Extra option", s);
  245. }
  246. void
  247. diag(char *s, char *arg)
  248. {
  249. msg(s,arg);
  250. /*
  251. remove(sortfile);
  252. remove(kfile);
  253. */
  254. exits(s);
  255. }
  256. char*
  257. getline(void)
  258. {
  259. int c;
  260. char *linep;
  261. char *endlinep;
  262. endlinep= line + mlen;
  263. linep = line;
  264. /* Throw away leading white space */
  265. while(isspace(c = getc(inptr)))
  266. ;
  267. if(c==EOF)
  268. return(0);
  269. ungetc(c,inptr);
  270. while((c = getc(inptr)) != EOF)
  271. switch (c) {
  272. case '\t':
  273. if(linep<endlinep)
  274. *linep++ = ' ';
  275. break;
  276. case '\n':
  277. while(isspace(*--linep))
  278. ;
  279. *++linep = '\n';
  280. return(linep);
  281. default:
  282. if(linep < endlinep)
  283. *linep++ = c;
  284. break;
  285. }
  286. return(0);
  287. }
  288. void
  289. cmpline(char *pend)
  290. {
  291. char *pstrt, *pchar, *cp;
  292. char **hp;
  293. int flag;
  294. pchar = line;
  295. if(rflag)
  296. while(pchar < pend && !isspace(*pchar))
  297. pchar++;
  298. while(pchar < pend){
  299. /* eliminate white space */
  300. if(isabreak(*pchar++))
  301. continue;
  302. pstrt = --pchar;
  303. flag = 1;
  304. while(flag){
  305. if(isabreak(*pchar)) {
  306. hp = &hasht[hash(pstrt,pchar)];
  307. pchar--;
  308. while(cp = *hp++){
  309. if(hp == &hasht[MAXT])
  310. hp = hasht;
  311. /* possible match */
  312. if(cmpword(pstrt,pchar,cp)){
  313. /* exact match */
  314. if(!ignore && only)
  315. putline(pstrt,pend);
  316. flag = 0;
  317. break;
  318. }
  319. }
  320. /* no match */
  321. if(flag){
  322. if(ignore || !only)
  323. putline(pstrt,pend);
  324. flag = 0;
  325. }
  326. }
  327. pchar++;
  328. }
  329. }
  330. }
  331. int
  332. cmpword(char *cpp, char *pend, char *hpp)
  333. {
  334. char c;
  335. while(*hpp != '\0'){
  336. c = *cpp++;
  337. if((isupper(c)?tolower(c):c) != *hpp++)
  338. return(0);
  339. }
  340. if(--cpp == pend)
  341. return(1);
  342. return(0);
  343. }
  344. void
  345. putline(char *strt, char *end)
  346. {
  347. char *cp;
  348. for(cp=strt; cp<end; cp++)
  349. putc(*cp, sortptr);
  350. /* Add extra blank before TILDE to sort correctly with -fd option */
  351. putc(' ',sortptr);
  352. putc(TILDE,sortptr);
  353. for (cp=line; cp<strt; cp++)
  354. putc(*cp,sortptr);
  355. putc('\n',sortptr);
  356. }
  357. void
  358. makek(void)
  359. {
  360. int i, c;
  361. int nr = 0;
  362. if((sortptr = fopen(sortfile,"r")) == NULL)
  363. diag("Cannot open sorted data:",sortfile);
  364. if((kptr = fopen(kfile,"w")) == NULL)
  365. diag("Cannot create mark file:",kfile);
  366. if(cmds)
  367. fprintf(kptr,"%s\n",cmds);
  368. fprintf(kptr,
  369. ".nf\n"
  370. ".pl 1\n"
  371. ".tr %c\\&\n", TILDE);
  372. setlen();
  373. while((c = getc(sortptr)) != EOF) {
  374. if(nr == 0) {
  375. fprintf(kptr,".di xx\n");
  376. nr++;
  377. }
  378. if(c == '\n') {
  379. fprintf(kptr,"\n.di\n");
  380. for(i=1; i<nr; i++)
  381. fprintf(kptr,"\\n(%.2d ",i);
  382. fprintf(kptr,"\n");
  383. nr = 0;
  384. continue;
  385. }
  386. if(isspace(c))
  387. fprintf(kptr,"\\k(%.2d",nr++);
  388. putc(c,kptr);
  389. }
  390. fclose(sortptr);
  391. fclose(kptr);
  392. }
  393. void
  394. getsort(void)
  395. {
  396. char *tilde, *linep, *markp;
  397. int i0, i1, i2, i3, i4, i5, i6, i7, w0, w6;
  398. if((sortptr = fopen(sortfile, "r")) == NULL)
  399. diag("Cannot open sorted data:", sortfile);
  400. if((wptr = fopen(wfile, "r")) == NULL)
  401. diag("Cannot open width file:", wfile);
  402. getlen();
  403. halflen = (llen-gutter)/2;
  404. while(fgets(line, sizeof(line), sortptr) != NULL) {
  405. if(fgets(mark, sizeof(mark), wptr) == NULL)
  406. diag("Phase error 1: premature EOF on width file",
  407. wfile);
  408. linep = line;
  409. markp = mark;
  410. i3 = i7 = 0;
  411. word[i7].p = linep;
  412. word[i7].w = 0;
  413. for(linep=line; *linep; linep++) {
  414. if(*linep == TILDE)
  415. i3 = i7;
  416. else if(*linep == '\n')
  417. break;
  418. else if(isspace(*linep)) {
  419. i7++;
  420. word[i7].p = linep;
  421. if(!markp)
  422. diag("Phase error 2: no widths for summary",
  423. line);
  424. word[i7].w = atoi(markp);
  425. markp = strchr(markp+1, ' ');
  426. }
  427. }
  428. i0 = 0;
  429. for(i1=i0; i1<i3; i1++)
  430. if(word[i1+1].w - word[i0].w >= halflen - spacesl)
  431. break;
  432. w0 = word[i1].w - word[i0].w;
  433. i4 = i3 + rflag;
  434. for(i6 = i7; i6>i4; i6--)
  435. if(word[i7].w - word[i6-1].w >= halflen)
  436. break;
  437. w6 = word[i7].w - word[i6].w - spacesl;
  438. for(i2=i1 ; i2<i3; i2++)
  439. if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
  440. break;
  441. for(i5=i6; i5>i4; i5--)
  442. if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
  443. break;
  444. printf(".xx \"");
  445. putout(word[i1].p+1,word[i2].p);
  446. if(i1<i2 && i2<i3) putchar('/');
  447. printf("\" \"");
  448. if(i5>i4 && i6==i5) putchar('/');
  449. putout(word[i6].p+1+(i6==i3),word[i7].p);
  450. printf("\" \"");
  451. putout(word[i0].p,word[i1].p);
  452. if(i2<i3 && i1==i2) putchar('/');
  453. printf("\" \"");
  454. if(i5>i4 && i6>i5) putchar('/');
  455. putout(word[i5].p+1+(i5==i3),word[i6].p);
  456. if(rflag) {
  457. printf("\" \"");
  458. putout(word[i3].p+2,word[i4].p);
  459. }
  460. printf("\"\n");
  461. }
  462. }
  463. void
  464. putout(char *strt, char *end)
  465. {
  466. char *cp;
  467. for(cp=strt; cp<end; )
  468. putc(*cp++,outptr);
  469. }
  470. void
  471. setlen(void)
  472. {
  473. fprintf(kptr,
  474. "\\w'\\h'%s''\n"
  475. "\\w' /'\n"
  476. "\\w'\\h'%s''\n"
  477. "\\w'\\h'%s''\n",lenarg,gutarg,holarg);
  478. }
  479. void
  480. getlen(void)
  481. {
  482. char s[128];
  483. s[0] = '\0';
  484. fgets(s,sizeof(s),kptr);
  485. llen = atoi(s);
  486. fgets(s,sizeof(s),kptr);
  487. spacesl = atoi(s);
  488. fgets(s,sizeof(s),kptr);
  489. gutter = atoi(s);
  490. fgets(s,sizeof(s),kptr);
  491. hole = atoi(s);
  492. if(hole < 2*spacesl)
  493. hole = 2*spacesl;
  494. }
  495. int
  496. hash(char *strtp, char *endp)
  497. {
  498. char *cp, c;
  499. int i, j, k;
  500. /* Return zero hash number for single letter words */
  501. if((endp - strtp) == 1)
  502. return(0);
  503. cp = strtp;
  504. c = *cp++;
  505. i = (isupper(c)?tolower(c):c);
  506. c = *cp;
  507. j = (isupper(c)?tolower(c):c);
  508. i = i*j;
  509. cp = --endp;
  510. c = *cp--;
  511. k = (isupper(c)?tolower(c):c);
  512. c = *cp;
  513. j = (isupper(c)?tolower(c):c);
  514. j = k*j;
  515. return (i ^ (j>>2)) & MASK;
  516. }
  517. int
  518. storeh(int num, char *strtp)
  519. {
  520. int i;
  521. for(i=num; i<MAXT; i++)
  522. if(hasht[i] == 0) {
  523. hasht[i] = strtp;
  524. return(0);
  525. }
  526. for(i=0; i<num; i++)
  527. if(hasht[i] == 0) {
  528. hasht[i] = strtp;
  529. return(0);
  530. }
  531. return(1);
  532. }