ptx1.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. /* permuted title index
  2. ptx [-t] [-i ignore] [-o only] [-w num] [-r]
  3. [-c commands] [-g gap] [-f] [input]
  4. Ptx reads the input file and permutes on words in it.
  5. It excludes all words in the ignore file.
  6. Alternately it includes words in the only file.
  7. if neither is given it excludes the words in
  8. /sys/man/man0/permind/ignore.
  9. The width of the output line (except for -r field)
  10. can be changed to num,
  11. which is a troff width measure, ens by default.
  12. with no -w, num is 72n, or 100n under -t.
  13. the -f flag tells the program to fold the output
  14. the -t flag says the output is for troff
  15. font specifier -F implies -t.
  16. -g sets the gutter
  17. -h sets the hole between wrapped segments
  18. -r takes the first word on each line and makes it
  19. into a fifth field.
  20. -c inserts troff commands for font-setting etc at beginning
  21. */
  22. #include <u.h>
  23. #include <libc.h>
  24. #include <stdio.h>
  25. #include <ctype.h>
  26. #define DEFLTX "/sys/lib/man/permind/ignore"
  27. #define TILDE 0177
  28. #define N 30
  29. #define MAX N*BUFSIZ
  30. #define LMAX 2048
  31. #define MAXT 2048
  32. #define MASK 03777
  33. #define ON 1
  34. #define isabreak(c) (btable[c])
  35. char *getline(void);
  36. void msg(char *, char *);
  37. void extra(int);
  38. void diag(char *, char *);
  39. void cmpline(char *);
  40. int cmpword(char *, char *, char *);
  41. void putline(char *, char *);
  42. void makek(void);
  43. void getsort(void);
  44. char *rtrim(char *, char *, int);
  45. char *ltrim(char *, char *, int);
  46. void putout(char *, char *);
  47. void setlen(void);
  48. void getlen(void);
  49. int hash(char *, char *);
  50. int storeh(int, char *);
  51. int status;
  52. char *hasht[MAXT];
  53. char line[LMAX];
  54. char mark[LMAX];
  55. struct word {
  56. char *p;
  57. int w;
  58. } word[LMAX/2];
  59. char btable[256];
  60. int ignore;
  61. int only;
  62. char *lenarg;
  63. char *gutarg;
  64. char *holarg;
  65. int llen;
  66. int spacesl;
  67. int gutter;
  68. int hole;
  69. int mlen = LMAX;
  70. int halflen;
  71. int rflag;
  72. char *strtbufp, *endbufp;
  73. char *empty = "";
  74. char *font = "R";
  75. char *roff = "/bin/nroff";
  76. char *troff = "/bin/troff";
  77. char *infile = "/fd/0";
  78. FILE *inptr;
  79. FILE *outptr = stdout;
  80. char *sortfile = "ptxsort"; /* output of sort program */
  81. char nofold[] = {'-', 'd', 't', TILDE, 0};
  82. char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
  83. char *sortopt = nofold;
  84. FILE *sortptr;
  85. char *kfile = "ptxmark"; /* ptxsort + troff goo for widths */
  86. FILE *kptr;
  87. char *wfile = "ptxwidth"; /* widths of words in ptxsort */
  88. FILE *wptr;
  89. char *bfile; /*contains user supplied break chars */
  90. FILE *bptr;
  91. char *cmds;
  92. main(int argc, char **argv)
  93. {
  94. int c;
  95. char *bufp;
  96. char *pend;
  97. char *xfile;
  98. FILE *xptr;
  99. Waitmsg *w;
  100. /* argument decoding */
  101. xfile = DEFLTX;
  102. ARGBEGIN {
  103. case 'r':
  104. rflag = 1;
  105. break;
  106. case 'f':
  107. sortopt = fold;
  108. break;
  109. case 'w':
  110. if(lenarg)
  111. extra(ARGC());
  112. lenarg = ARGF();
  113. break;
  114. case 'c':
  115. if(cmds)
  116. extra(ARGC());
  117. cmds = ARGF();
  118. case 't':
  119. roff = troff;
  120. break;
  121. case 'g':
  122. if(gutarg)
  123. extra(ARGC());
  124. gutarg = ARGF();
  125. break;
  126. case 'h':
  127. if(holarg)
  128. extra(ARGC());
  129. holarg = ARGF();
  130. break;
  131. case 'i':
  132. if(only|ignore)
  133. extra(ARGC());
  134. ignore++;
  135. xfile = ARGF();
  136. break;
  137. case 'o':
  138. if(only|ignore)
  139. extra(ARGC());
  140. only++;
  141. xfile = ARGF();
  142. break;
  143. case 'b':
  144. if(bfile)
  145. extra(ARGC());
  146. bfile = ARGF();
  147. break;
  148. default:
  149. diag("Illegal argument:",*argv);
  150. } ARGEND
  151. if(lenarg == 0)
  152. lenarg = troff? "100n": "72n";
  153. if(gutarg == 0)
  154. gutarg = "3n";
  155. if(holarg == 0)
  156. holarg = gutarg;
  157. if(argc > 1)
  158. diag("Too many filenames",empty);
  159. if(argc == 1)
  160. infile = *argv;
  161. /* Default breaks of blank, tab and newline */
  162. btable[' '] = ON;
  163. btable['\t'] = ON;
  164. btable['\n'] = ON;
  165. if(bfile) {
  166. if((bptr = fopen(bfile,"r")) == NULL)
  167. diag("Cannot open break char file",bfile);
  168. while((c = getc(bptr)) != EOF)
  169. btable[c] = ON;
  170. }
  171. /* Allocate space for a buffer. If only or ignore file present
  172. read it into buffer. Else read in default ignore file
  173. and put resulting words in buffer.
  174. */
  175. if((strtbufp = calloc(N,BUFSIZ)) == NULL)
  176. diag("Out of memory space",empty);
  177. bufp = strtbufp;
  178. endbufp = strtbufp+MAX;
  179. if((xptr = fopen(xfile,"r")) == NULL)
  180. diag("Cannot open file",xfile);
  181. while(bufp < endbufp && (c = getc(xptr)) != EOF) {
  182. if(isabreak(c)) {
  183. if(storeh(hash(strtbufp,bufp),strtbufp))
  184. diag("Too many words",xfile);
  185. *bufp++ = '\0';
  186. strtbufp = bufp;
  187. }
  188. else {
  189. *bufp++ = (isupper(c)?tolower(c):c);
  190. }
  191. }
  192. if (bufp >= endbufp)
  193. diag("Too many words in file",xfile);
  194. endbufp = --bufp;
  195. /* open output file for sorting */
  196. if((sortptr = fopen(sortfile, "w")) == NULL)
  197. diag("Cannot open output for sorting:",sortfile);
  198. /* get a line of data and compare each word for
  199. inclusion or exclusion in the sort phase
  200. */
  201. if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
  202. diag("Cannot open data: ",infile);
  203. while(pend=getline())
  204. cmpline(pend);
  205. fclose(sortptr);
  206. if(fork()==0){
  207. execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
  208. sortfile, "-o", sortfile, 0);
  209. diag("Sort exec failed","");
  210. }
  211. if((w=wait())==nil || w->msg[0]!=0)
  212. diag("Sort failed","");
  213. free(w);
  214. makek();
  215. if(fork()==0){
  216. if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
  217. diag("Cannot create width file:",wfile);
  218. execl(roff, roff, "-a", kfile, 0);
  219. diag("Sort exec failed","");
  220. }
  221. if((w=wait())==nil || w->msg[0]!=0)
  222. diag("Sort failed","");
  223. free(w);
  224. getsort();
  225. /* remove(sortfile);
  226. remove(kfile);*/
  227. fflush(0);
  228. _exits(0);
  229. /* I don't know what's wrong with the atexit func... */
  230. /* exits(0); */
  231. }
  232. void
  233. msg(char *s, char *arg)
  234. {
  235. fprintf(stderr,"ptx: %s %s\n",s,arg);
  236. return;
  237. }
  238. void
  239. extra(int c)
  240. {
  241. char s[] = "-x.";
  242. s[1] = c;
  243. diag("Extra option", s);
  244. }
  245. void
  246. diag(char *s, char *arg)
  247. {
  248. msg(s,arg);
  249. /*
  250. remove(sortfile);
  251. remove(kfile);
  252. */
  253. exits(s);
  254. }
  255. char*
  256. getline(void)
  257. {
  258. int c;
  259. char *linep;
  260. char *endlinep;
  261. endlinep= line + mlen;
  262. linep = line;
  263. /* Throw away leading white space */
  264. while(isspace(c=getc(inptr)))
  265. ;
  266. if(c==EOF)
  267. return(0);
  268. ungetc(c,inptr);
  269. while(( c=getc(inptr)) != EOF) {
  270. switch (c) {
  271. case '\t':
  272. if(linep<endlinep)
  273. *linep++ = ' ';
  274. break;
  275. case '\n':
  276. while(isspace(*--linep));
  277. *++linep = '\n';
  278. return(linep);
  279. default:
  280. if(linep < endlinep)
  281. *linep++ = c;
  282. }
  283. }
  284. return(0);
  285. }
  286. void
  287. cmpline(char *pend)
  288. {
  289. char *pstrt, *pchar, *cp;
  290. char **hp;
  291. int flag;
  292. pchar = line;
  293. if(rflag)
  294. while(pchar<pend&&!isspace(*pchar))
  295. pchar++;
  296. while(pchar<pend){
  297. /* eliminate white space */
  298. if(isabreak(*pchar++))
  299. continue;
  300. pstrt = --pchar;
  301. flag = 1;
  302. while(flag){
  303. if(isabreak(*pchar)) {
  304. hp = &hasht[hash(pstrt,pchar)];
  305. pchar--;
  306. while(cp = *hp++){
  307. if(hp == &hasht[MAXT])
  308. hp = hasht;
  309. /* possible match */
  310. if(cmpword(pstrt,pchar,cp)){
  311. /* exact match */
  312. if(!ignore && only)
  313. putline(pstrt,pend);
  314. flag = 0;
  315. break;
  316. }
  317. }
  318. /* no match */
  319. if(flag){
  320. if(ignore || !only)
  321. putline(pstrt,pend);
  322. flag = 0;
  323. }
  324. }
  325. pchar++;
  326. }
  327. }
  328. }
  329. int
  330. cmpword(char *cpp, char *pend, char *hpp)
  331. {
  332. char c;
  333. while(*hpp != '\0'){
  334. c = *cpp++;
  335. if((isupper(c)?tolower(c):c) != *hpp++)
  336. return(0);
  337. }
  338. if(--cpp == pend) return(1);
  339. return(0);
  340. }
  341. void
  342. putline(char *strt, char *end)
  343. {
  344. char *cp;
  345. for(cp=strt; cp<end; cp++)
  346. putc(*cp, sortptr);
  347. /* Add extra blank before TILDE to sort correctly
  348. with -fd option */
  349. putc(' ',sortptr);
  350. putc(TILDE,sortptr);
  351. for (cp=line; cp<strt; cp++)
  352. putc(*cp,sortptr);
  353. putc('\n',sortptr);
  354. }
  355. void
  356. makek(void)
  357. {
  358. int i, c;
  359. int nr = 0;
  360. if((sortptr = fopen(sortfile,"r")) == NULL)
  361. diag("Cannot open sorted data:",sortfile);
  362. if((kptr = fopen(kfile,"w")) == NULL)
  363. diag("Cannot create mark file:",kfile);
  364. if(cmds)
  365. fprintf(kptr,"%s\n",cmds);
  366. fprintf(kptr,
  367. ".nf\n"
  368. ".pl 1\n"
  369. ".tr %c\\&\n", TILDE);
  370. setlen();
  371. while((c = getc(sortptr)) != EOF) {
  372. if(nr == 0) {
  373. fprintf(kptr,".di xx\n");
  374. nr++;
  375. }
  376. if(c == '\n') {
  377. fprintf(kptr,"\n.di\n");
  378. for(i=1; i<nr; i++)
  379. fprintf(kptr,"\\n(%.2d ",i);
  380. fprintf(kptr,"\n");
  381. nr = 0;
  382. continue;
  383. }
  384. if(isspace(c))
  385. fprintf(kptr,"\\k(%.2d",nr++);
  386. putc(c,kptr);
  387. }
  388. fclose(sortptr);
  389. fclose(kptr);
  390. }
  391. void
  392. getsort(void)
  393. {
  394. char *tilde, *linep, *markp;
  395. int i0, i1, i2, i3, i4, i5, i6, i7;
  396. int w0, w6;
  397. if((sortptr = fopen(sortfile,"r")) == NULL)
  398. diag("Cannot open sorted data:",sortfile);
  399. if((wptr = fopen(wfile,"r")) == NULL)
  400. diag("Cannot open width file:",wfile);
  401. getlen();
  402. halflen = (llen-gutter)/2;
  403. while(fgets(line,sizeof(line),sortptr) != 0) {
  404. if(fgets(mark,sizeof(mark),wptr) == 0)
  405. diag("Phase error 1","");
  406. linep = line;
  407. markp = mark;
  408. i3 = i7 = 0;
  409. word[i7].p = linep;
  410. word[i7].w = 0;
  411. for(linep=line; *linep; linep++) {
  412. if(*linep == TILDE)
  413. i3 = i7;
  414. else if(*linep == '\n')
  415. break;
  416. else if(isspace(*linep)) {
  417. i7++;
  418. word[i7].p = linep;
  419. if(!markp) {
  420. diag("Phase error 2","");
  421. }
  422. word[i7].w = atoi(markp);
  423. markp = strchr(markp+1,' ');
  424. }
  425. }
  426. i0 = 0;
  427. for(i1=i0; i1<i3; i1++)
  428. if(word[i1+1].w - word[i0].w >= halflen - spacesl)
  429. break;
  430. w0 = word[i1].w - word[i0].w;
  431. i4 = i3 + rflag;
  432. for(i6 = i7; i6>i4; i6--)
  433. if(word[i7].w - word[i6-1].w >= halflen)
  434. break;
  435. w6 = word[i7].w - word[i6].w - spacesl;
  436. for(i2=i1 ; i2<i3; i2++)
  437. if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
  438. break;
  439. for(i5=i6; i5>i4; i5--)
  440. if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
  441. break;
  442. printf(".xx \"");
  443. putout(word[i1].p+1,word[i2].p);
  444. if(i1<i2 && i2<i3) putchar('/');
  445. printf("\" \"");
  446. if(i5>i4 && i6==i5) putchar('/');
  447. putout(word[i6].p+1+(i6==i3),word[i7].p);
  448. printf("\" \"");
  449. putout(word[i0].p,word[i1].p);
  450. if(i2<i3 && i1==i2) putchar('/');
  451. printf("\" \"");
  452. if(i5>i4 && i6>i5) putchar('/');
  453. putout(word[i5].p+1+(i5==i3),word[i6].p);
  454. if(rflag) {
  455. printf("\" \"");
  456. putout(word[i3].p+2,word[i4].p);
  457. }
  458. printf("\"\n");
  459. }
  460. }
  461. void
  462. putout(char *strt, char *end)
  463. {
  464. char *cp;
  465. for(cp=strt; cp<end; )
  466. putc(*cp++,outptr);
  467. }
  468. void
  469. setlen(void)
  470. {
  471. fprintf(kptr,
  472. "\\w'\\h'%s''\n"
  473. "\\w' /'\n"
  474. "\\w'\\h'%s''\n"
  475. "\\w'\\h'%s''\n",lenarg,gutarg,holarg);
  476. }
  477. void
  478. getlen(void)
  479. {
  480. char s[20];
  481. fgets(s,sizeof(s),kptr);
  482. llen = atoi(s);
  483. fgets(s,sizeof(s),kptr);
  484. spacesl = atoi(s);
  485. fgets(s,sizeof(s),kptr);
  486. gutter = atoi(s);
  487. fgets(s,sizeof(s),kptr);
  488. hole = atoi(s);
  489. if(hole < 2*spacesl)
  490. hole = 2*spacesl;
  491. }
  492. int
  493. hash(char *strtp, char *endp)
  494. {
  495. char *cp, c;
  496. int i, j, k;
  497. /* Return zero hash number for single letter words */
  498. if((endp - strtp) == 1)
  499. return(0);
  500. cp = strtp;
  501. c = *cp++;
  502. i = (isupper(c)?tolower(c):c);
  503. c = *cp;
  504. j = (isupper(c)?tolower(c):c);
  505. i = i*j;
  506. cp = --endp;
  507. c = *cp--;
  508. k = (isupper(c)?tolower(c):c);
  509. c = *cp;
  510. j = (isupper(c)?tolower(c):c);
  511. j = k*j;
  512. k = (i ^ (j>>2)) & MASK;
  513. return(k);
  514. }
  515. int
  516. storeh(int num, char *strtp)
  517. {
  518. int i;
  519. for(i=num; i<MAXT; i++) {
  520. if(hasht[i] == 0) {
  521. hasht[i] = strtp;
  522. return(0);
  523. }
  524. }
  525. for(i=0; i<num; i++) {
  526. if(hasht[i] == 0) {
  527. hasht[i] = strtp;
  528. return(0);
  529. }
  530. }
  531. return(1);
  532. }