worddos.c 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. /*
  2. * worddos.c
  3. * Copyright (C) 2002-2005 A.J. van Os; Released under GNU GPL
  4. *
  5. * Description:
  6. * Deal with the DOS internals of a MS Word file
  7. */
  8. #include "antiword.h"
  9. /*
  10. * bGetDocumentText - make a list of the text blocks of a Word document
  11. *
  12. * Return TRUE when succesful, otherwise FALSE
  13. */
  14. static BOOL
  15. bGetDocumentText(FILE *pFile, long lFilesize, const UCHAR *aucHeader)
  16. {
  17. text_block_type tTextBlock;
  18. ULONG ulTextLen;
  19. BOOL bFastSaved;
  20. UCHAR ucDocStatus, ucVersion;
  21. fail(pFile == NULL);
  22. fail(lFilesize < 128);
  23. fail(aucHeader == NULL);
  24. /* Get the status flags from the header */
  25. ucDocStatus = ucGetByte(0x75, aucHeader);
  26. DBG_HEX(ucDocStatus);
  27. bFastSaved = (ucDocStatus & BIT(1)) != 0;
  28. DBG_MSG_C(bFastSaved, "This document is Fast Saved");
  29. ucVersion = ucGetByte(0x74, aucHeader);
  30. DBG_DEC(ucVersion);
  31. DBG_MSG_C(ucVersion == 0, "Written by Word 4.0 or earlier");
  32. DBG_MSG_C(ucVersion == 3, "Word 5.0 format, but not written by Word");
  33. DBG_MSG_C(ucVersion == 4, "Written by Word 5.x");
  34. if (bFastSaved) {
  35. werr(0, "Word for DOS: autosave documents are not supported");
  36. return FALSE;
  37. }
  38. /* Get length information */
  39. ulTextLen = ulGetLong(0x0e, aucHeader);
  40. DBG_HEX(ulTextLen);
  41. ulTextLen -= 128;
  42. DBG_DEC(ulTextLen);
  43. tTextBlock.ulFileOffset = 128;
  44. tTextBlock.ulCharPos = 128;
  45. tTextBlock.ulLength = ulTextLen;
  46. tTextBlock.bUsesUnicode = FALSE;
  47. tTextBlock.usPropMod = IGNORE_PROPMOD;
  48. if (!bAdd2TextBlockList(&tTextBlock)) {
  49. DBG_HEX(tTextBlock.ulFileOffset);
  50. DBG_HEX(tTextBlock.ulCharPos);
  51. DBG_DEC(tTextBlock.ulLength);
  52. DBG_DEC(tTextBlock.bUsesUnicode);
  53. DBG_DEC(tTextBlock.usPropMod);
  54. return FALSE;
  55. }
  56. return TRUE;
  57. } /* end of bGetDocumentText */
  58. /*
  59. * iInitDocumentDOS - initialize an DOS document
  60. *
  61. * Returns the version of Word that made the document or -1
  62. */
  63. int
  64. iInitDocumentDOS(FILE *pFile, long lFilesize)
  65. {
  66. int iWordVersion;
  67. BOOL bSuccess;
  68. USHORT usIdent;
  69. UCHAR aucHeader[128];
  70. fail(pFile == NULL);
  71. if (lFilesize < 128) {
  72. return -1;
  73. }
  74. /* Read the headerblock */
  75. if (!bReadBytes(aucHeader, 128, 0x00, pFile)) {
  76. return -1;
  77. }
  78. /* Get the "magic number" from the header */
  79. usIdent = usGetWord(0x00, aucHeader);
  80. DBG_HEX(usIdent);
  81. fail(usIdent != 0xbe31); /* Word for DOS */
  82. iWordVersion = iGetVersionNumber(aucHeader);
  83. if (iWordVersion != 0) {
  84. werr(0, "This file is not from 'Word for DOS'.");
  85. return -1;
  86. }
  87. bSuccess = bGetDocumentText(pFile, lFilesize, aucHeader);
  88. if (bSuccess) {
  89. vGetPropertyInfo(pFile, NULL,
  90. NULL, 0, NULL, 0,
  91. aucHeader, iWordVersion);
  92. vSetDefaultTabWidth(pFile, NULL,
  93. NULL, 0, NULL, 0,
  94. aucHeader, iWordVersion);
  95. vGetNotesInfo(pFile, NULL,
  96. NULL, 0, NULL, 0,
  97. aucHeader, iWordVersion);
  98. }
  99. return bSuccess ? iWordVersion : -1;
  100. } /* end of iInitDocumentDOS */