mswordstrings.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. /* automatically generated; do not edit. */
  13. typedef struct Fibhdr Fibhdr;
  14. struct Fibhdr {
  15. uint16_t wIdent;
  16. uint16_t nFib;
  17. uint16_t nProduct;
  18. uint16_t lid;
  19. int16_t pnNext;
  20. uint8_t fDot;
  21. uint8_t fGlsy;
  22. uint8_t fComplex;
  23. uint8_t fHasPic;
  24. uint8_t cQuickSaves;
  25. uint8_t fEncrypted;
  26. uint8_t fWhichTblStm;
  27. uint8_t fReadOnlyRecommended;
  28. uint8_t fWriteReservation;
  29. uint8_t fExtChar;
  30. uint8_t fLoadOverride;
  31. uint8_t fFarEast;
  32. uint8_t fCrypto;
  33. uint16_t nFibBack;
  34. uint32_t lKey;
  35. uint8_t envr;
  36. uint8_t fMac;
  37. uint8_t fEmptySpecial;
  38. uint8_t fLoadOverridePage;
  39. uint8_t fFutureSavedUndo;
  40. uint8_t fWord97Saved;
  41. uint16_t chs;
  42. uint16_t chsTables;
  43. int32_t fcMin;
  44. int32_t fcMac;
  45. uint16_t csw;
  46. };
  47. enum { bcFibhdr = 0x22 };
  48. /* automatically generated; do not edit. */
  49. void
  50. readFibhdr(Fibhdr *s, uint8_t *v, int nv)
  51. {
  52. if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
  53. s->wIdent = v[0x0] | (v[0x0+1] << 8);
  54. s->nFib = v[0x2] | (v[0x2+1] << 8);
  55. s->nProduct = v[0x4] | (v[0x4+1] << 8);
  56. s->lid = v[0x6] | (v[0x6+1] << 8);
  57. s->pnNext = v[0x8] | (v[0x8+1] << 8);
  58. s->fDot = ((v[0xA]) & 0x1) >> 0;
  59. s->fGlsy = ((v[0xA]) & 0x2) >> 1;
  60. s->fComplex = ((v[0xA]) & 0x4) >> 2;
  61. s->fHasPic = ((v[0xA]) & 0x8) >> 3;
  62. s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
  63. s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
  64. s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
  65. s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
  66. s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
  67. s->fExtChar = ((v[0xB]) & 0x16) >> 4;
  68. s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
  69. s->fFarEast = ((v[0xB]) & 0x64) >> 6;
  70. s->fCrypto = ((v[0xB]) & 0x128) >> 7;
  71. s->nFibBack = v[0xC] | (v[0xC+1] << 8);
  72. s->lKey = v[0xE] | (v[0xE + 1] << 8)| (v[0xE + 2] << 16) | (v[0xE + 3] << 24);
  73. s->envr = v[0x12];
  74. s->fMac = ((v[0x13]) & 0x1) >> 0;
  75. s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
  76. s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
  77. s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
  78. s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
  79. s->chs = v[0x14] | (v[0x14+1] << 8);
  80. s->chsTables = v[0x16] | (v[0x16+1] << 8);
  81. s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
  82. s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
  83. s->csw = v[0x20] | (v[0x20+1] << 8);
  84. }
  85. void
  86. usage(void)
  87. {
  88. fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
  89. exits("usage");
  90. }
  91. void
  92. main(int argc, char **argv)
  93. {
  94. Biobuf *b;
  95. Biobuf bout;
  96. unsigned char buf[512];
  97. Fibhdr f;
  98. int i, c, n;
  99. ARGBEGIN{
  100. default:
  101. usage();
  102. }ARGEND
  103. if(argc != 1)
  104. usage();
  105. Binit(&bout, 1, OWRITE);
  106. b = Bopen(argv[0], OREAD);
  107. if(b == nil) {
  108. fprint(2, "couldn't open file: %r\n");
  109. exits("word");
  110. }
  111. n = Bread(b, buf, sizeof buf);
  112. if(n < sizeof buf) {
  113. fprint(2, "short read: %r\n");
  114. exits("read");
  115. }
  116. readFibhdr(&f, buf, sizeof buf);
  117. // printFibhdr(&f);
  118. Bseek(b, f.fcMin, 0);
  119. n = f.fcMac - f.fcMin;
  120. for(i=0; i<n; i++) {
  121. c = Bgetc(b);
  122. if(c < 0)
  123. break;
  124. switch(c) {
  125. default:
  126. Bputc(&bout, c);
  127. break;
  128. case '\\': Bprint(&bout, "\\"); break; /* field escape */
  129. case 7: Bprint(&bout, "\n"); break; /* cell, row mark */
  130. case 9: Bprint(&bout, "\t"); break; /* tab */
  131. case 11: Bprint(&bout, "\n"); break; /* hard line break */
  132. case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */
  133. case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */
  134. case 14: break; /* column break */
  135. case 19: Bprint(&bout, "<"); break; /* field begin */
  136. case 20: Bprint(&bout, ":"); break; /* field sep */
  137. case 21: Bprint(&bout, ">"); break; /* field end */
  138. case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */
  139. case 31: break; /* non-required hyphen */
  140. /* case 45: Bprint(&bout, "-"); break; *//* breaking hyphen */
  141. case 160: Bprint(&bout, " "); break; /* non-breaking space */
  142. /*
  143. * these are only supposed to get used when special is set, but we
  144. * never see these ascii values otherwise anyway.
  145. */
  146. /*
  147. * Empirically, some documents have sections of text where
  148. * every character is followed by a zero byte. Some have sections
  149. * of text where there are no zero bytes. Still others have both
  150. * types and alternate between them. Until we parse which
  151. * characters are ``special'', page numbers lose out.
  152. */
  153. case 0: /* Bprint(&bout, "<pageno>"); */ break;
  154. case 1: Bprint(&bout, "<picture>"); break;
  155. case 2: Bprint(&bout, "<footnote>"); break;
  156. case 3: Bprint(&bout, "<footnote sep>"); break;
  157. case 4: Bprint(&bout, "<footnote cont>"); break;
  158. case 5: Bprint(&bout, "<animation>"); break;
  159. case 6: Bprint(&bout, "<lineno>"); break;
  160. /* case 7: Bprint(&bout, "<hand picture>"); break; */
  161. case 8: Bprint(&bout, "<drawn object>"); break;
  162. case 10: Bprint(&bout, "<abbrev date>"); break;
  163. /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */
  164. /* case 12: Bprint(&bout, "<section no>"); break; */
  165. /* case 14: Bprint(&bout, "<Thu>"); break; */
  166. case 15: Bprint(&bout, "<Thursday>"); break;
  167. case 16: Bprint(&bout, "<day of month>"); break;
  168. case 22: Bprint(&bout, "<hour>"); break;
  169. case 23: Bprint(&bout, "<hour hh>"); break;
  170. case 24: Bprint(&bout, "<minute>"); break;
  171. case 25: Bprint(&bout, "<minute mm>"); break;
  172. case 26: Bprint(&bout, "<seconds>"); break;
  173. case 27: Bprint(&bout, "<AM/PM>"); break;
  174. case 28: Bprint(&bout, "<hh:mm:ss>"); break;
  175. case 29: Bprint(&bout, "<date>"); break;
  176. /* printable ascii begins hereish */
  177. /*
  178. case 30: Bprint(&bout, "<mm/dd/yy>"); break;
  179. case 33: Bprint(&bout, "<mm>"); break;
  180. case 34: Bprint(&bout, "<yyyy>"); break;
  181. case 35: Bprint(&bout, "<yy>"); break;
  182. case 36: Bprint(&bout, "<Feb>"); break;
  183. case 37: Bprint(&bout, "<February>"); break;
  184. case 38: Bprint(&bout, "<hh:mm>"); break;
  185. case 39: Bprint(&bout, "<long date>"); break;
  186. case 41: break; */
  187. }
  188. }
  189. Bprint(&bout, "\n");
  190. }