wordole.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804
  1. /*
  2. * wordole.c
  3. * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
  4. *
  5. * Description:
  6. * Deal with the OLE internals of a MS Word file
  7. */
  8. #include <string.h>
  9. #include "antiword.h"
  10. /* Private type for Property Set Storage entries */
  11. typedef struct pps_entry_tag {
  12. ULONG ulNext;
  13. ULONG ulPrevious;
  14. ULONG ulDir;
  15. ULONG ulSB;
  16. ULONG ulSize;
  17. int iLevel;
  18. char szName[32];
  19. UCHAR ucType;
  20. } pps_entry_type;
  21. /* Show that a PPS number or index should not be used */
  22. #define PPS_NUMBER_INVALID 0xffffffffUL
  23. /* Macro to make sure all such statements will be identical */
  24. #define FREE_ALL() \
  25. do {\
  26. vDestroySmallBlockList();\
  27. aulRootList = xfree(aulRootList);\
  28. aulSbdList = xfree(aulSbdList);\
  29. aulBbdList = xfree(aulBbdList);\
  30. aulSBD = xfree(aulSBD);\
  31. aulBBD = xfree(aulBBD);\
  32. } while(0)
  33. /*
  34. * ulReadLong - read four bytes from the given file and offset
  35. */
  36. static ULONG
  37. ulReadLong(FILE *pFile, ULONG ulOffset)
  38. {
  39. UCHAR aucBytes[4];
  40. fail(pFile == NULL);
  41. if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
  42. werr(1, "Read long 0x%lx not possible", ulOffset);
  43. }
  44. return ulGetLong(0, aucBytes);
  45. } /* end of ulReadLong */
  46. /*
  47. * vName2String - turn the name into a proper string.
  48. */
  49. static void
  50. vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
  51. {
  52. char *pcChar;
  53. size_t tIndex;
  54. fail(aucBytes == NULL || szName == NULL);
  55. if (tNameSize < 2) {
  56. szName[0] = '\0';
  57. return;
  58. }
  59. for (tIndex = 0, pcChar = szName;
  60. tIndex < 2 * tNameSize;
  61. tIndex += 2, pcChar++) {
  62. *pcChar = (char)aucBytes[tIndex];
  63. }
  64. szName[tNameSize - 1] = '\0';
  65. } /* end of vName2String */
  66. /*
  67. * tReadBlockIndices - read the Big/Small Block Depot indices
  68. *
  69. * Returns the number of indices read
  70. */
  71. static size_t
  72. tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
  73. size_t tMaxRec, ULONG ulOffset)
  74. {
  75. size_t tDone;
  76. int iIndex;
  77. UCHAR aucBytes[BIG_BLOCK_SIZE];
  78. fail(pFile == NULL || aulBlockDepot == NULL);
  79. fail(tMaxRec == 0);
  80. /* Read a big block with BBD or SBD indices */
  81. if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
  82. werr(0, "Reading big block from 0x%lx is not possible",
  83. ulOffset);
  84. return 0;
  85. }
  86. /* Split the big block into indices, an index is four bytes */
  87. tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
  88. for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
  89. aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
  90. NO_DBG_DEC(aulBlockDepot[iIndex]);
  91. }
  92. return tDone;
  93. } /* end of tReadBlockIndices */
  94. /*
  95. * bGetBBD - get the Big Block Depot indices from the index-blocks
  96. */
  97. static BOOL
  98. bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
  99. ULONG *aulBBD, size_t tBBDLen)
  100. {
  101. ULONG ulBegin;
  102. size_t tToGo, tDone;
  103. int iIndex;
  104. fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
  105. DBG_MSG("bGetBBD");
  106. tToGo = tBBDLen;
  107. for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
  108. ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
  109. NO_DBG_HEX(ulBegin);
  110. tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
  111. fail(tDone > tToGo);
  112. if (tDone == 0) {
  113. return FALSE;
  114. }
  115. aulBBD += tDone;
  116. tToGo -= tDone;
  117. }
  118. return tToGo == 0;
  119. } /* end of bGetBBD */
  120. /*
  121. * bGetSBD - get the Small Block Depot indices from the index-blocks
  122. */
  123. static BOOL
  124. bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
  125. ULONG *aulSBD, size_t tSBDLen)
  126. {
  127. ULONG ulBegin;
  128. size_t tToGo, tDone;
  129. int iIndex;
  130. fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
  131. DBG_MSG("bGetSBD");
  132. tToGo = tSBDLen;
  133. for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
  134. fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
  135. ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
  136. NO_DBG_HEX(ulBegin);
  137. tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
  138. fail(tDone > tToGo);
  139. if (tDone == 0) {
  140. return FALSE;
  141. }
  142. aulSBD += tDone;
  143. tToGo -= tDone;
  144. }
  145. return tToGo == 0;
  146. } /* end of bGetSBD */
  147. /*
  148. * vComputePPSlevels - compute the levels of the Property Set Storage entries
  149. */
  150. static void
  151. vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
  152. int iLevel, int iRecursionLevel)
  153. {
  154. fail(atPPSlist == NULL || pNode == NULL);
  155. fail(iLevel < 0 || iRecursionLevel < 0);
  156. if (iRecursionLevel > 25) {
  157. /* This removes the possibility of an infinite recursion */
  158. DBG_DEC(iRecursionLevel);
  159. return;
  160. }
  161. if (pNode->iLevel <= iLevel) {
  162. /* Avoid entering a loop */
  163. DBG_DEC(iLevel);
  164. DBG_DEC(pNode->iLevel);
  165. return;
  166. }
  167. pNode->iLevel = iLevel;
  168. if (pNode->ulDir != PPS_NUMBER_INVALID) {
  169. vComputePPSlevels(atPPSlist,
  170. &atPPSlist[pNode->ulDir],
  171. iLevel + 1,
  172. iRecursionLevel + 1);
  173. }
  174. if (pNode->ulNext != PPS_NUMBER_INVALID) {
  175. vComputePPSlevels(atPPSlist,
  176. &atPPSlist[pNode->ulNext],
  177. iLevel,
  178. iRecursionLevel + 1);
  179. }
  180. if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
  181. vComputePPSlevels(atPPSlist,
  182. &atPPSlist[pNode->ulPrevious],
  183. iLevel,
  184. iRecursionLevel + 1);
  185. }
  186. } /* end of vComputePPSlevels */
  187. /*
  188. * bGetPPS - search the Property Set Storage for three sets
  189. *
  190. * Return TRUE if the WordDocument PPS is found
  191. */
  192. static BOOL
  193. bGetPPS(FILE *pFile,
  194. const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
  195. {
  196. pps_entry_type *atPPSlist;
  197. ULONG ulBegin, ulOffset, ulTmp;
  198. size_t tNbrOfPPS, tNameSize;
  199. int iIndex, iStartBlock, iRootIndex;
  200. BOOL bWord, bExcel;
  201. UCHAR aucBytes[PROPERTY_SET_STORAGE_SIZE];
  202. fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
  203. DBG_MSG("bGetPPS");
  204. NO_DBG_DEC(tRootListLen);
  205. bWord = FALSE;
  206. bExcel = FALSE;
  207. (void)memset(pPPS, 0, sizeof(*pPPS));
  208. /* Read and store all the Property Set Storage entries */
  209. tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
  210. atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
  211. iRootIndex = 0;
  212. for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
  213. ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
  214. iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
  215. ulOffset = ulTmp % BIG_BLOCK_SIZE;
  216. ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
  217. ulOffset;
  218. NO_DBG_HEX(ulBegin);
  219. if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
  220. ulBegin, pFile)) {
  221. werr(0, "Reading PPS %d is not possible", iIndex);
  222. atPPSlist = xfree(atPPSlist);
  223. return FALSE;
  224. }
  225. tNameSize = (size_t)usGetWord(0x40, aucBytes);
  226. tNameSize = (tNameSize + 1) / 2;
  227. vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
  228. atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
  229. if (atPPSlist[iIndex].ucType == 5) {
  230. iRootIndex = iIndex;
  231. }
  232. atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
  233. atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
  234. atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
  235. atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
  236. atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
  237. atPPSlist[iIndex].iLevel = INT_MAX;
  238. if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
  239. atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
  240. (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
  241. atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
  242. (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
  243. atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
  244. DBG_DEC(iIndex);
  245. DBG_DEC(atPPSlist[iIndex].ulPrevious);
  246. DBG_DEC(atPPSlist[iIndex].ulNext);
  247. DBG_DEC(atPPSlist[iIndex].ulDir);
  248. DBG_DEC(tNbrOfPPS);
  249. werr(0, "The Property Set Storage is damaged");
  250. atPPSlist = xfree(atPPSlist);
  251. return FALSE;
  252. }
  253. }
  254. #if 0 /* defined(DEBUG) */
  255. DBG_MSG("Before");
  256. for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
  257. DBG_MSG(atPPSlist[iIndex].szName);
  258. DBG_HEX(atPPSlist[iIndex].ulDir);
  259. DBG_HEX(atPPSlist[iIndex].ulPrevious);
  260. DBG_HEX(atPPSlist[iIndex].ulNext);
  261. DBG_DEC(atPPSlist[iIndex].ulSB);
  262. DBG_HEX(atPPSlist[iIndex].ulSize);
  263. DBG_DEC(atPPSlist[iIndex].iLevel);
  264. }
  265. #endif /* DEBUG */
  266. /* Add level information to each entry */
  267. vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
  268. /* Check the entries on level 1 for the required information */
  269. NO_DBG_MSG("After");
  270. for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
  271. #if 0 /* defined(DEBUG) */
  272. DBG_MSG(atPPSlist[iIndex].szName);
  273. DBG_HEX(atPPSlist[iIndex].ulDir);
  274. DBG_HEX(atPPSlist[iIndex].ulPrevious);
  275. DBG_HEX(atPPSlist[iIndex].ulNext);
  276. DBG_DEC(atPPSlist[iIndex].ulSB);
  277. DBG_HEX(atPPSlist[iIndex].ulSize);
  278. DBG_DEC(atPPSlist[iIndex].iLevel);
  279. #endif /* DEBUG */
  280. if (atPPSlist[iIndex].iLevel != 1 ||
  281. atPPSlist[iIndex].ucType != 2 ||
  282. atPPSlist[iIndex].szName[0] == '\0' ||
  283. atPPSlist[iIndex].ulSize == 0) {
  284. /* This entry can be ignored */
  285. continue;
  286. }
  287. if (pPPS->tWordDocument.ulSize == 0 &&
  288. STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
  289. pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
  290. pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
  291. bWord = TRUE;
  292. } else if (pPPS->tData.ulSize == 0 &&
  293. STREQ(atPPSlist[iIndex].szName, "Data")) {
  294. pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
  295. pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
  296. } else if (pPPS->t0Table.ulSize == 0 &&
  297. STREQ(atPPSlist[iIndex].szName, "0Table")) {
  298. pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
  299. pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
  300. } else if (pPPS->t1Table.ulSize == 0 &&
  301. STREQ(atPPSlist[iIndex].szName, "1Table")) {
  302. pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
  303. pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
  304. } else if (pPPS->tSummaryInfo.ulSize == 0 &&
  305. STREQ(atPPSlist[iIndex].szName,
  306. "\005SummaryInformation")) {
  307. pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
  308. pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
  309. } else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
  310. STREQ(atPPSlist[iIndex].szName,
  311. "\005DocumentSummaryInformation")) {
  312. pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
  313. pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
  314. } else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
  315. STREQ(atPPSlist[iIndex].szName, "Workbook")) {
  316. bExcel = TRUE;
  317. }
  318. }
  319. /* Free the space for the Property Set Storage entries */
  320. atPPSlist = xfree(atPPSlist);
  321. /* Draw your conclusions */
  322. if (bWord) {
  323. return TRUE;
  324. }
  325. if (bExcel) {
  326. werr(0, "Sorry, but this is an Excel spreadsheet");
  327. } else {
  328. werr(0, "This OLE file does not contain a Word document");
  329. }
  330. return FALSE;
  331. } /* end of bGetPPS */
  332. /*
  333. * vGetBbdList - make a list of the places to find big blocks
  334. */
  335. static void
  336. vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
  337. {
  338. int iIndex;
  339. fail(pFile == NULL);
  340. fail(iNbr > 127);
  341. fail(aulBbdList == NULL);
  342. NO_DBG_DEC(iNbr);
  343. for (iIndex = 0; iIndex < iNbr; iIndex++) {
  344. aulBbdList[iIndex] =
  345. ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
  346. NO_DBG_DEC(iIndex);
  347. NO_DBG_HEX(aulBbdList[iIndex]);
  348. }
  349. } /* end of vGetBbdList */
  350. /*
  351. * bGetDocumentText - make a list of the text blocks of a Word document
  352. *
  353. * Return TRUE when succesful, otherwise FALSE
  354. */
  355. static BOOL
  356. bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
  357. const ULONG *aulBBD, size_t tBBDLen,
  358. const ULONG *aulSBD, size_t tSBDLen,
  359. const UCHAR *aucHeader, int iWordVersion)
  360. {
  361. ULONG ulBeginOfText;
  362. ULONG ulTextLen, ulFootnoteLen, ulEndnoteLen;
  363. ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
  364. ULONG ulTextBoxLen, ulHdrTextBoxLen;
  365. UINT uiQuickSaves;
  366. BOOL bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
  367. USHORT usIdent, usDocStatus;
  368. fail(pFile == NULL || pPPS == NULL);
  369. fail(aulBBD == NULL);
  370. fail(aulSBD == NULL);
  371. DBG_MSG("bGetDocumentText");
  372. /* Get the "magic number" from the header */
  373. usIdent = usGetWord(0x00, aucHeader);
  374. DBG_HEX(usIdent);
  375. bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
  376. usIdent == 0xa697 || usIdent == 0xa699;
  377. /* Get the status flags from the header */
  378. usDocStatus = usGetWord(0x0a, aucHeader);
  379. DBG_HEX(usDocStatus);
  380. bTemplate = (usDocStatus & BIT(0)) != 0;
  381. DBG_MSG_C(bTemplate, "This document is a Template");
  382. bFastSaved = (usDocStatus & BIT(2)) != 0;
  383. uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
  384. DBG_MSG_C(bFastSaved, "This document is Fast Saved");
  385. DBG_DEC_C(bFastSaved, uiQuickSaves);
  386. bEncrypted = (usDocStatus & BIT(8)) != 0;
  387. if (bEncrypted) {
  388. werr(0, "Encrypted documents are not supported");
  389. return FALSE;
  390. }
  391. /* Get length information */
  392. ulBeginOfText = ulGetLong(0x18, aucHeader);
  393. DBG_HEX(ulBeginOfText);
  394. switch (iWordVersion) {
  395. case 6:
  396. case 7:
  397. ulTextLen = ulGetLong(0x34, aucHeader);
  398. ulFootnoteLen = ulGetLong(0x38, aucHeader);
  399. ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
  400. ulMacroLen = ulGetLong(0x40, aucHeader);
  401. ulAnnotationLen = ulGetLong(0x44, aucHeader);
  402. ulEndnoteLen = ulGetLong(0x48, aucHeader);
  403. ulTextBoxLen = ulGetLong(0x4c, aucHeader);
  404. ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
  405. break;
  406. case 8:
  407. ulTextLen = ulGetLong(0x4c, aucHeader);
  408. ulFootnoteLen = ulGetLong(0x50, aucHeader);
  409. ulHdrFtrLen = ulGetLong(0x54, aucHeader);
  410. ulMacroLen = ulGetLong(0x58, aucHeader);
  411. ulAnnotationLen = ulGetLong(0x5c, aucHeader);
  412. ulEndnoteLen = ulGetLong(0x60, aucHeader);
  413. ulTextBoxLen = ulGetLong(0x64, aucHeader);
  414. ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
  415. break;
  416. default:
  417. werr(0, "This version of Word is not supported");
  418. return FALSE;
  419. }
  420. DBG_DEC(ulTextLen);
  421. DBG_DEC(ulFootnoteLen);
  422. DBG_DEC(ulHdrFtrLen);
  423. DBG_DEC(ulMacroLen);
  424. DBG_DEC(ulAnnotationLen);
  425. DBG_DEC(ulEndnoteLen);
  426. DBG_DEC(ulTextBoxLen);
  427. DBG_DEC(ulHdrTextBoxLen);
  428. /* Make a list of the text blocks */
  429. switch (iWordVersion) {
  430. case 6:
  431. case 7:
  432. if (bFastSaved) {
  433. bSuccess = bGet6DocumentText(pFile,
  434. bFarEastWord,
  435. pPPS->tWordDocument.ulSB,
  436. aulBBD, tBBDLen,
  437. aucHeader);
  438. } else {
  439. bSuccess = bAddTextBlocks(ulBeginOfText,
  440. ulTextLen +
  441. ulFootnoteLen +
  442. ulHdrFtrLen +
  443. ulMacroLen + ulAnnotationLen +
  444. ulEndnoteLen +
  445. ulTextBoxLen + ulHdrTextBoxLen,
  446. bFarEastWord,
  447. IGNORE_PROPMOD,
  448. pPPS->tWordDocument.ulSB,
  449. aulBBD, tBBDLen);
  450. }
  451. break;
  452. case 8:
  453. bSuccess = bGet8DocumentText(pFile,
  454. pPPS,
  455. aulBBD, tBBDLen, aulSBD, tSBDLen,
  456. aucHeader);
  457. break;
  458. default:
  459. werr(0, "This version of Word is not supported");
  460. bSuccess = FALSE;
  461. break;
  462. }
  463. if (bSuccess) {
  464. vSplitBlockList(pFile,
  465. ulTextLen,
  466. ulFootnoteLen,
  467. ulHdrFtrLen,
  468. ulMacroLen,
  469. ulAnnotationLen,
  470. ulEndnoteLen,
  471. ulTextBoxLen,
  472. ulHdrTextBoxLen,
  473. !bFastSaved && iWordVersion == 8);
  474. } else {
  475. vDestroyTextBlockList();
  476. werr(0, "I can't find the text of this document");
  477. }
  478. return bSuccess;
  479. } /* end of bGetDocumentText */
  480. /*
  481. * vGetDocumentData - make a list of the data blocks of a Word document
  482. */
  483. static void
  484. vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
  485. const ULONG *aulBBD, size_t tBBDLen,
  486. const UCHAR *aucHeader, int iWordVersion)
  487. {
  488. options_type tOptions;
  489. ULONG ulBeginOfText;
  490. BOOL bFastSaved, bHasImages, bSuccess;
  491. USHORT usDocStatus;
  492. fail(pFile == NULL);
  493. fail(pPPS == NULL);
  494. fail(aulBBD == NULL);
  495. /* Get the options */
  496. vGetOptions(&tOptions);
  497. /* Get the status flags from the header */
  498. usDocStatus = usGetWord(0x0a, aucHeader);
  499. DBG_HEX(usDocStatus);
  500. bFastSaved = (usDocStatus & BIT(2)) != 0;
  501. bHasImages = (usDocStatus & BIT(3)) != 0;
  502. if (!bHasImages ||
  503. tOptions.eConversionType == conversion_text ||
  504. tOptions.eConversionType == conversion_fmt_text ||
  505. tOptions.eConversionType == conversion_xml ||
  506. tOptions.eImageLevel == level_no_images) {
  507. /*
  508. * No images in the document or text-only output or
  509. * no images wanted, so no data blocks will be needed
  510. */
  511. vDestroyDataBlockList();
  512. return;
  513. }
  514. /* Get length information */
  515. ulBeginOfText = ulGetLong(0x18, aucHeader);
  516. DBG_HEX(ulBeginOfText);
  517. /* Make a list of the data blocks */
  518. switch (iWordVersion) {
  519. case 6:
  520. case 7:
  521. /*
  522. * The data blocks are in the text stream. The text stream
  523. * is in "fast saved" format or "normal saved" format
  524. */
  525. if (bFastSaved) {
  526. bSuccess = bGet6DocumentData(pFile,
  527. pPPS->tWordDocument.ulSB,
  528. aulBBD, tBBDLen,
  529. aucHeader);
  530. } else {
  531. bSuccess = bAddDataBlocks(ulBeginOfText,
  532. (ULONG)LONG_MAX,
  533. pPPS->tWordDocument.ulSB,
  534. aulBBD, tBBDLen);
  535. }
  536. break;
  537. case 8:
  538. /*
  539. * The data blocks are in the data stream. The data stream
  540. * is always in "normal saved" format
  541. */
  542. bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
  543. pPPS->tData.ulSB, aulBBD, tBBDLen);
  544. break;
  545. default:
  546. werr(0, "This version of Word is not supported");
  547. bSuccess = FALSE;
  548. break;
  549. }
  550. if (!bSuccess) {
  551. vDestroyDataBlockList();
  552. werr(0, "I can't find the data of this document");
  553. }
  554. } /* end of vGetDocumentData */
  555. /*
  556. * iInitDocumentOLE - initialize an OLE document
  557. *
  558. * Returns the version of Word that made the document or -1
  559. */
  560. int
  561. iInitDocumentOLE(FILE *pFile, long lFilesize)
  562. {
  563. pps_info_type PPS_info;
  564. ULONG *aulBBD, *aulSBD;
  565. ULONG *aulRootList, *aulBbdList, *aulSbdList;
  566. ULONG ulBdbListStart, ulAdditionalBBDlist;
  567. ULONG ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
  568. ULONG ulStart, ulTmp;
  569. long lMaxBlock;
  570. size_t tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
  571. int iWordVersion, iIndex, iToGo;
  572. BOOL bSuccess;
  573. USHORT usIdent, usDocStatus;
  574. UCHAR aucHeader[HEADER_SIZE];
  575. fail(pFile == NULL);
  576. lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
  577. DBG_DEC(lMaxBlock);
  578. if (lMaxBlock < 1) {
  579. return -1;
  580. }
  581. tBBDLen = (size_t)(lMaxBlock + 1);
  582. tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
  583. DBG_DEC(tNumBbdBlocks);
  584. ulRootStartblock = ulReadLong(pFile, 0x30);
  585. DBG_DEC(ulRootStartblock);
  586. ulSbdStartblock = ulReadLong(pFile, 0x3c);
  587. DBG_DEC(ulSbdStartblock);
  588. ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
  589. DBG_HEX(ulAdditionalBBDlist);
  590. ulSBLstartblock = ulReadLong(pFile,
  591. (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
  592. DBG_DEC(ulSBLstartblock);
  593. tSBDLen = (size_t)(ulReadLong(pFile,
  594. (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
  595. SMALL_BLOCK_SIZE);
  596. /* All to be xcalloc-ed pointers to NULL */
  597. aulRootList = NULL;
  598. aulSbdList = NULL;
  599. aulBbdList = NULL;
  600. aulSBD = NULL;
  601. aulBBD = NULL;
  602. /* Big Block Depot */
  603. aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
  604. aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
  605. iToGo = (int)tNumBbdBlocks;
  606. vGetBbdList(pFile, min(iToGo, 109), aulBbdList, 0x4c);
  607. ulStart = 109;
  608. iToGo -= 109;
  609. while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
  610. ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
  611. vGetBbdList(pFile, min(iToGo, 127),
  612. aulBbdList + ulStart, ulBdbListStart);
  613. ulAdditionalBBDlist = ulReadLong(pFile,
  614. ulBdbListStart + 4 * 127);
  615. DBG_DEC(ulAdditionalBBDlist);
  616. DBG_HEX(ulAdditionalBBDlist);
  617. ulStart += 127;
  618. iToGo -= 127;
  619. }
  620. if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
  621. FREE_ALL();
  622. return -1;
  623. }
  624. aulBbdList = xfree(aulBbdList);
  625. /* Small Block Depot */
  626. aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
  627. aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
  628. for (iIndex = 0, ulTmp = ulSbdStartblock;
  629. iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
  630. iIndex++, ulTmp = aulBBD[ulTmp]) {
  631. if (ulTmp >= (ULONG)tBBDLen) {
  632. DBG_DEC(ulTmp);
  633. DBG_DEC(tBBDLen);
  634. werr(1, "The Big Block Depot is damaged");
  635. }
  636. aulSbdList[iIndex] = ulTmp;
  637. NO_DBG_HEX(aulSbdList[iIndex]);
  638. }
  639. if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
  640. FREE_ALL();
  641. return -1;
  642. }
  643. aulSbdList = xfree(aulSbdList);
  644. /* Root list */
  645. for (tRootListLen = 0, ulTmp = ulRootStartblock;
  646. tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
  647. tRootListLen++, ulTmp = aulBBD[ulTmp]) {
  648. if (ulTmp >= (ULONG)tBBDLen) {
  649. DBG_DEC(ulTmp);
  650. DBG_DEC(tBBDLen);
  651. werr(1, "The Big Block Depot is damaged");
  652. }
  653. }
  654. if (tRootListLen == 0) {
  655. werr(0, "No Rootlist found");
  656. FREE_ALL();
  657. return -1;
  658. }
  659. aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
  660. for (iIndex = 0, ulTmp = ulRootStartblock;
  661. iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
  662. iIndex++, ulTmp = aulBBD[ulTmp]) {
  663. if (ulTmp >= (ULONG)tBBDLen) {
  664. DBG_DEC(ulTmp);
  665. DBG_DEC(tBBDLen);
  666. werr(1, "The Big Block Depot is damaged");
  667. }
  668. aulRootList[iIndex] = ulTmp;
  669. NO_DBG_DEC(aulRootList[iIndex]);
  670. }
  671. fail(tRootListLen != (size_t)iIndex);
  672. bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
  673. aulRootList = xfree(aulRootList);
  674. if (!bSuccess) {
  675. FREE_ALL();
  676. return -1;
  677. }
  678. /* Small block list */
  679. if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
  680. FREE_ALL();
  681. return -1;
  682. }
  683. if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
  684. DBG_DEC(PPS_info.tWordDocument.ulSize);
  685. FREE_ALL();
  686. werr(0, "I'm afraid the text stream of this file "
  687. "is too small to handle.");
  688. return -1;
  689. }
  690. /* Read the headerblock */
  691. if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
  692. aulBBD, tBBDLen, BIG_BLOCK_SIZE,
  693. aucHeader, 0, HEADER_SIZE)) {
  694. FREE_ALL();
  695. return -1;
  696. }
  697. usIdent = usGetWord(0x00, aucHeader);
  698. DBG_HEX(usIdent);
  699. fail(usIdent != 0x8098 && /* Word 7 for oriental languages */
  700. usIdent != 0x8099 && /* Word 7 for oriental languages */
  701. usIdent != 0xa5dc && /* Word 6 & 7 */
  702. usIdent != 0xa5ec && /* Word 7 & 97 & 98 */
  703. usIdent != 0xa697 && /* Word 7 for oriental languages */
  704. usIdent != 0xa699); /* Word 7 for oriental languages */
  705. iWordVersion = iGetVersionNumber(aucHeader);
  706. if (iWordVersion < 6) {
  707. FREE_ALL();
  708. werr(0, "This file is from a version of Word before Word 6.");
  709. return -1;
  710. }
  711. /* Get the status flags from the header */
  712. usDocStatus = usGetWord(0x0a, aucHeader);
  713. if (usDocStatus & BIT(9)) {
  714. PPS_info.tTable = PPS_info.t1Table;
  715. } else {
  716. PPS_info.tTable = PPS_info.t0Table;
  717. }
  718. /* Clean the entries that should not be used */
  719. memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
  720. memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
  721. bSuccess = bGetDocumentText(pFile, &PPS_info,
  722. aulBBD, tBBDLen, aulSBD, tSBDLen,
  723. aucHeader, iWordVersion);
  724. if (bSuccess) {
  725. vGetDocumentData(pFile, &PPS_info,
  726. aulBBD, tBBDLen, aucHeader, iWordVersion);
  727. vGetPropertyInfo(pFile, &PPS_info,
  728. aulBBD, tBBDLen, aulSBD, tSBDLen,
  729. aucHeader, iWordVersion);
  730. vSetDefaultTabWidth(pFile, &PPS_info,
  731. aulBBD, tBBDLen, aulSBD, tSBDLen,
  732. aucHeader, iWordVersion);
  733. vGetNotesInfo(pFile, &PPS_info,
  734. aulBBD, tBBDLen, aulSBD, tSBDLen,
  735. aucHeader, iWordVersion);
  736. }
  737. FREE_ALL();
  738. return bSuccess ? iWordVersion : -1;
  739. } /* end of iInitDocumentOLE */