123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 |
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- /* automatically generated; do not edit. */
- typedef struct Fibhdr Fibhdr;
- struct Fibhdr {
- ushort wIdent;
- ushort nFib;
- ushort nProduct;
- ushort lid;
- short pnNext;
- uchar fDot;
- uchar fGlsy;
- uchar fComplex;
- uchar fHasPic;
- uchar cQuickSaves;
- uchar fEncrypted;
- uchar fWhichTblStm;
- uchar fReadOnlyRecommended;
- uchar fWriteReservation;
- uchar fExtChar;
- uchar fLoadOverride;
- uchar fFarEast;
- uchar fCrypto;
- ushort nFibBack;
- ulong lKey;
- uchar envr;
- uchar fMac;
- uchar fEmptySpecial;
- uchar fLoadOverridePage;
- uchar fFutureSavedUndo;
- uchar fWord97Saved;
- ushort chs;
- ushort chsTables;
- long fcMin;
- long fcMac;
- ushort csw;
- };
- enum { bcFibhdr = 0x22 };
- /* automatically generated; do not edit. */
- void
- readFibhdr(Fibhdr *s, uchar *v, int nv)
- {
- if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
- s->wIdent = v[0x0] | (v[0x0+1] << 8);
- s->nFib = v[0x2] | (v[0x2+1] << 8);
- s->nProduct = v[0x4] | (v[0x4+1] << 8);
- s->lid = v[0x6] | (v[0x6+1] << 8);
- s->pnNext = v[0x8] | (v[0x8+1] << 8);
- s->fDot = ((v[0xA]) & 0x1) >> 0;
- s->fGlsy = ((v[0xA]) & 0x2) >> 1;
- s->fComplex = ((v[0xA]) & 0x4) >> 2;
- s->fHasPic = ((v[0xA]) & 0x8) >> 3;
- s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
- s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
- s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
- s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
- s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
- s->fExtChar = ((v[0xB]) & 0x16) >> 4;
- s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
- s->fFarEast = ((v[0xB]) & 0x64) >> 6;
- s->fCrypto = ((v[0xB]) & 0x128) >> 7;
- s->nFibBack = v[0xC] | (v[0xC+1] << 8);
- s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24);
- s->envr = v[0x12];
- s->fMac = ((v[0x13]) & 0x1) >> 0;
- s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
- s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
- s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
- s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
- s->chs = v[0x14] | (v[0x14+1] << 8);
- s->chsTables = v[0x16] | (v[0x16+1] << 8);
- s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
- s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
- s->csw = v[0x20] | (v[0x20+1] << 8);
- }
- void
- usage(void)
- {
- fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
- exits("usage");
- }
- void
- main(int argc, char **argv)
- {
- Biobuf *b;
- Biobuf bout;
- uchar buf[512];
- Fibhdr f;
- int i, c, n;
- ARGBEGIN{
- default:
- usage();
- }ARGEND
- if(argc != 1)
- usage();
- Binit(&bout, 1, OWRITE);
- b = Bopen(argv[0], OREAD);
- if(b == nil) {
- fprint(2, "couldn't open file: %r\n");
- exits("word");
- }
- n = Bread(b, buf, sizeof buf);
- if(n < sizeof buf) {
- fprint(2, "short read: %r\n");
- exits("read");
- }
- readFibhdr(&f, buf, sizeof buf);
- // printFibhdr(&f);
- Bseek(b, f.fcMin, 0);
- n = f.fcMac - f.fcMin;
- for(i=0; i<n; i++) {
- c = Bgetc(b);
- if(c < 0)
- break;
- switch(c) {
- default:
- Bputc(&bout, c);
- break;
- case '\\': Bprint(&bout, "\\"); break; /* field escape */
- case 7: Bprint(&bout, "\n"); break; /* cell, row mark */
- case 9: Bprint(&bout, "\t"); break; /* tab */
- case 11: Bprint(&bout, "\n"); break; /* hard line break */
- case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */
- case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */
- case 14: break; /* column break */
- case 19: Bprint(&bout, "<"); break; /* field begin */
- case 20: Bprint(&bout, ":"); break; /* field sep */
- case 21: Bprint(&bout, ">"); break; /* field end */
- case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */
- case 31: break; /* non-required hyphen */
- /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */
- case 160: Bprint(&bout, " "); break; /* non-breaking space */
- /*
- * these are only supposed to get used when special is set, but we
- * never see these ascii values otherwise anyway.
- */
- /*
- * Empirically, some documents have sections of text where
- * every character is followed by a zero byte. Some have sections
- * of text where there are no zero bytes. Still others have both
- * types and alternate between them. Until we parse which
- * characters are ``special'', page numbers lose out.
- */
- case 0: /* Bprint(&bout, "<pageno>"); */ break;
- case 1: Bprint(&bout, "<picture>"); break;
- case 2: Bprint(&bout, "<footnote>"); break;
- case 3: Bprint(&bout, "<footnote sep>"); break;
- case 4: Bprint(&bout, "<footnote cont>"); break;
- case 5: Bprint(&bout, "<animation>"); break;
- case 6: Bprint(&bout, "<lineno>"); break;
- /* case 7: Bprint(&bout, "<hand picture>"); break; */
- case 8: Bprint(&bout, "<drawn object>"); break;
- case 10: Bprint(&bout, "<abbrev date>"); break;
- /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */
- /* case 12: Bprint(&bout, "<section no>"); break; */
- /* case 14: Bprint(&bout, "<Thu>"); break; */
- case 15: Bprint(&bout, "<Thursday>"); break;
- case 16: Bprint(&bout, "<day of month>"); break;
- case 22: Bprint(&bout, "<hour>"); break;
- case 23: Bprint(&bout, "<hour hh>"); break;
- case 24: Bprint(&bout, "<minute>"); break;
- case 25: Bprint(&bout, "<minute mm>"); break;
- case 26: Bprint(&bout, "<seconds>"); break;
- case 27: Bprint(&bout, "<AM/PM>"); break;
- case 28: Bprint(&bout, "<hh:mm:ss>"); break;
- case 29: Bprint(&bout, "<date>"); break;
- /* printable ascii begins hereish */
- /*
- case 30: Bprint(&bout, "<mm/dd/yy>"); break;
- case 33: Bprint(&bout, "<mm>"); break;
- case 34: Bprint(&bout, "<yyyy>"); break;
- case 35: Bprint(&bout, "<yy>"); break;
- case 36: Bprint(&bout, "<Feb>"); break;
- case 37: Bprint(&bout, "<February>"); break;
- case 38: Bprint(&bout, "<hh:mm>"); break;
- case 39: Bprint(&bout, "<long date>"); break;
- case 41: break; */
- }
- }
- Bprint(&bout, "\n");
- }
|