mswordstrings.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. /* automatically generated; do not edit. */
  5. typedef struct Fibhdr Fibhdr;
  6. struct Fibhdr {
  7. ushort wIdent;
  8. ushort nFib;
  9. ushort nProduct;
  10. ushort lid;
  11. short pnNext;
  12. uchar fDot;
  13. uchar fGlsy;
  14. uchar fComplex;
  15. uchar fHasPic;
  16. uchar cQuickSaves;
  17. uchar fEncrypted;
  18. uchar fWhichTblStm;
  19. uchar fReadOnlyRecommended;
  20. uchar fWriteReservation;
  21. uchar fExtChar;
  22. uchar fLoadOverride;
  23. uchar fFarEast;
  24. uchar fCrypto;
  25. ushort nFibBack;
  26. ulong lKey;
  27. uchar envr;
  28. uchar fMac;
  29. uchar fEmptySpecial;
  30. uchar fLoadOverridePage;
  31. uchar fFutureSavedUndo;
  32. uchar fWord97Saved;
  33. ushort chs;
  34. ushort chsTables;
  35. long fcMin;
  36. long fcMac;
  37. ushort csw;
  38. };
  39. enum { bcFibhdr = 0x22 };
  40. /* automatically generated; do not edit. */
  41. void
  42. readFibhdr(Fibhdr *s, uchar *v, int nv)
  43. {
  44. if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
  45. s->wIdent = v[0x0] | (v[0x0+1] << 8);
  46. s->nFib = v[0x2] | (v[0x2+1] << 8);
  47. s->nProduct = v[0x4] | (v[0x4+1] << 8);
  48. s->lid = v[0x6] | (v[0x6+1] << 8);
  49. s->pnNext = v[0x8] | (v[0x8+1] << 8);
  50. s->fDot = ((v[0xA]) & 0x1) >> 0;
  51. s->fGlsy = ((v[0xA]) & 0x2) >> 1;
  52. s->fComplex = ((v[0xA]) & 0x4) >> 2;
  53. s->fHasPic = ((v[0xA]) & 0x8) >> 3;
  54. s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
  55. s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
  56. s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
  57. s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
  58. s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
  59. s->fExtChar = ((v[0xB]) & 0x16) >> 4;
  60. s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
  61. s->fFarEast = ((v[0xB]) & 0x64) >> 6;
  62. s->fCrypto = ((v[0xB]) & 0x128) >> 7;
  63. s->nFibBack = v[0xC] | (v[0xC+1] << 8);
  64. s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24);
  65. s->envr = v[0x12];
  66. s->fMac = ((v[0x13]) & 0x1) >> 0;
  67. s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
  68. s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
  69. s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
  70. s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
  71. s->chs = v[0x14] | (v[0x14+1] << 8);
  72. s->chsTables = v[0x16] | (v[0x16+1] << 8);
  73. s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
  74. s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
  75. s->csw = v[0x20] | (v[0x20+1] << 8);
  76. }
  77. void
  78. usage(void)
  79. {
  80. fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
  81. exits("usage");
  82. }
  83. void
  84. main(int argc, char **argv)
  85. {
  86. Biobuf *b;
  87. Biobuf bout;
  88. uchar buf[512];
  89. Fibhdr f;
  90. int i, c, n;
  91. ARGBEGIN{
  92. default:
  93. usage();
  94. }ARGEND
  95. if(argc != 1)
  96. usage();
  97. Binit(&bout, 1, OWRITE);
  98. b = Bopen(argv[0], OREAD);
  99. if(b == nil) {
  100. fprint(2, "couldn't open file: %r\n");
  101. exits("word");
  102. }
  103. n = Bread(b, buf, sizeof buf);
  104. if(n < sizeof buf) {
  105. fprint(2, "short read: %r\n");
  106. exits("read");
  107. }
  108. readFibhdr(&f, buf, sizeof buf);
  109. // printFibhdr(&f);
  110. Bseek(b, f.fcMin, 0);
  111. n = f.fcMac - f.fcMin;
  112. for(i=0; i<n; i++) {
  113. c = Bgetc(b);
  114. if(c < 0)
  115. break;
  116. switch(c) {
  117. default:
  118. Bputc(&bout, c);
  119. break;
  120. case '\\': Bprint(&bout, "\\"); break; /* field escape */
  121. case 7: Bprint(&bout, "\n"); break; /* cell, row mark */
  122. case 9: Bprint(&bout, "\t"); break; /* tab */
  123. case 11: Bprint(&bout, "\n"); break; /* hard line break */
  124. case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */
  125. case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */
  126. case 14: break; /* column break */
  127. case 19: Bprint(&bout, "<"); break; /* field begin */
  128. case 20: Bprint(&bout, ":"); break; /* field sep */
  129. case 21: Bprint(&bout, ">"); break; /* field end */
  130. case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */
  131. case 31: break; /* non-required hyphen */
  132. /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */
  133. case 160: Bprint(&bout, " "); break; /* non-breaking space */
  134. /*
  135. * these are only supposed to get used when special is set, but we
  136. * never see these ascii values otherwise anyway.
  137. */
  138. /*
  139. * Empirically, some documents have sections of text where
  140. * every character is followed by a zero byte. Some have sections
  141. * of text where there are no zero bytes. Still others have both
  142. * types and alternate between them. Until we parse which
  143. * characters are ``special'', page numbers lose out.
  144. */
  145. case 0: /* Bprint(&bout, "<pageno>"); */ break;
  146. case 1: Bprint(&bout, "<picture>"); break;
  147. case 2: Bprint(&bout, "<footnote>"); break;
  148. case 3: Bprint(&bout, "<footnote sep>"); break;
  149. case 4: Bprint(&bout, "<footnote cont>"); break;
  150. case 5: Bprint(&bout, "<animation>"); break;
  151. case 6: Bprint(&bout, "<lineno>"); break;
  152. /* case 7: Bprint(&bout, "<hand picture>"); break; */
  153. case 8: Bprint(&bout, "<drawn object>"); break;
  154. case 10: Bprint(&bout, "<abbrev date>"); break;
  155. /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */
  156. /* case 12: Bprint(&bout, "<section no>"); break; */
  157. /* case 14: Bprint(&bout, "<Thu>"); break; */
  158. case 15: Bprint(&bout, "<Thursday>"); break;
  159. case 16: Bprint(&bout, "<day of month>"); break;
  160. case 22: Bprint(&bout, "<hour>"); break;
  161. case 23: Bprint(&bout, "<hour hh>"); break;
  162. case 24: Bprint(&bout, "<minute>"); break;
  163. case 25: Bprint(&bout, "<minute mm>"); break;
  164. case 26: Bprint(&bout, "<seconds>"); break;
  165. case 27: Bprint(&bout, "<AM/PM>"); break;
  166. case 28: Bprint(&bout, "<hh:mm:ss>"); break;
  167. case 29: Bprint(&bout, "<date>"); break;
  168. /* printable ascii begins hereish */
  169. /*
  170. case 30: Bprint(&bout, "<mm/dd/yy>"); break;
  171. case 33: Bprint(&bout, "<mm>"); break;
  172. case 34: Bprint(&bout, "<yyyy>"); break;
  173. case 35: Bprint(&bout, "<yy>"); break;
  174. case 36: Bprint(&bout, "<Feb>"); break;
  175. case 37: Bprint(&bout, "<February>"); break;
  176. case 38: Bprint(&bout, "<hh:mm>"); break;
  177. case 39: Bprint(&bout, "<long date>"); break;
  178. case 41: break; */
  179. }
  180. }
  181. Bprint(&bout, "\n");
  182. }