prop0.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. /*
  2. * prop0.c
  3. * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
  4. *
  5. * Description:
  6. * Read the property information from a Word for DOS file
  7. */
  8. #include <string.h>
  9. #include <time.h>
  10. #include "antiword.h"
  11. /*
  12. * tConvertDosDate - convert DOS date format
  13. *
  14. * returns Unix time_t or -1
  15. */
  16. static time_t
  17. tConvertDosDate(const char *szDosDate)
  18. {
  19. struct tm tTime;
  20. const char *pcTmp;
  21. time_t tResult;
  22. memset(&tTime, 0, sizeof(tTime));
  23. pcTmp = szDosDate;
  24. /* Get the month */
  25. if (!isdigit(*pcTmp)) {
  26. return (time_t)-1;
  27. }
  28. tTime.tm_mon = (int)(*pcTmp - '0');
  29. pcTmp++;
  30. if (isdigit(*pcTmp)) {
  31. tTime.tm_mon *= 10;
  32. tTime.tm_mon += (int)(*pcTmp - '0');
  33. pcTmp++;
  34. }
  35. /* Get the first separater */
  36. if (isalnum(*pcTmp)) {
  37. return (time_t)-1;
  38. }
  39. pcTmp++;
  40. /* Get the day */
  41. if (!isdigit(*pcTmp)) {
  42. return (time_t)-1;
  43. }
  44. tTime.tm_mday = (int)(*pcTmp - '0');
  45. pcTmp++;
  46. if (isdigit(*pcTmp)) {
  47. tTime.tm_mday *= 10;
  48. tTime.tm_mday += (int)(*pcTmp - '0');
  49. pcTmp++;
  50. }
  51. /* Get the second separater */
  52. if (isalnum(*pcTmp)) {
  53. return (time_t)-1;
  54. }
  55. pcTmp++;
  56. /* Get the year */
  57. if (!isdigit(*pcTmp)) {
  58. return (time_t)-1;
  59. }
  60. tTime.tm_year = (int)(*pcTmp - '0');
  61. pcTmp++;
  62. if (isdigit(*pcTmp)) {
  63. tTime.tm_year *= 10;
  64. tTime.tm_year += (int)(*pcTmp - '0');
  65. pcTmp++;
  66. }
  67. /* Check the values */
  68. if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
  69. return (time_t)-1;
  70. }
  71. /* Correct the values */
  72. tTime.tm_mon--; /* From 01-12 to 00-11 */
  73. if (tTime.tm_year < 80) {
  74. tTime.tm_year += 100; /* 00 means 2000 is 100 */
  75. }
  76. tTime.tm_isdst = -1;
  77. tResult = mktime(&tTime);
  78. NO_DBG_MSG(ctime(&tResult));
  79. return tResult;
  80. } /* end of tConvertDosDate */
  81. /*
  82. * Build the lists with Document Property Information for Word for DOS files
  83. */
  84. void
  85. vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
  86. {
  87. document_block_type tDocument;
  88. UCHAR *aucBuffer;
  89. ULONG ulBeginSumdInfo, ulBeginNextBlock;
  90. size_t tLen;
  91. USHORT usOffset;
  92. tDocument.ucHdrFtrSpecification = 0;
  93. tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
  94. tDocument.tCreateDate = (time_t)-1;
  95. tDocument.tRevisedDate = (time_t)-1;
  96. ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
  97. DBG_HEX(ulBeginSumdInfo);
  98. ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
  99. DBG_HEX(ulBeginNextBlock);
  100. if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
  101. /* There is a summary information block */
  102. tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
  103. aucBuffer = xmalloc(tLen);
  104. /* Read the summary information block */
  105. if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
  106. usOffset = usGetWord(12, aucBuffer);
  107. if (aucBuffer[usOffset] != 0) {
  108. NO_DBG_STRN(aucBuffer + usOffset, 8);
  109. tDocument.tRevisedDate =
  110. tConvertDosDate((char *)aucBuffer + usOffset);
  111. }
  112. usOffset = usGetWord(14, aucBuffer);
  113. if (aucBuffer[usOffset] != 0) {
  114. NO_DBG_STRN(aucBuffer + usOffset, 8);
  115. tDocument.tCreateDate =
  116. tConvertDosDate((char *)aucBuffer + usOffset);
  117. }
  118. }
  119. aucBuffer = xfree(aucBuffer);
  120. }
  121. vCreateDocumentInfoList(&tDocument);
  122. } /* end of vGet0DopInfo */
  123. /*
  124. * Fill the section information block with information
  125. * from a Word for DOS file.
  126. */
  127. static void
  128. vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
  129. section_block_type *pSection)
  130. {
  131. USHORT usCcol;
  132. UCHAR ucTmp;
  133. fail(aucGrpprl == NULL || pSection == NULL);
  134. if (tBytes < 2) {
  135. return;
  136. }
  137. /* bkc */
  138. ucTmp = ucGetByte(1, aucGrpprl);
  139. DBG_HEX(ucTmp);
  140. ucTmp &= 0x07;
  141. DBG_HEX(ucTmp);
  142. pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
  143. if (tBytes < 18) {
  144. return;
  145. }
  146. /* ccolM1 */
  147. usCcol = (USHORT)ucGetByte(17, aucGrpprl);
  148. DBG_DEC(usCcol);
  149. } /* end of vGet0SectionInfo */
  150. /*
  151. * Build the lists with Section Property Information for Word for DOS files
  152. */
  153. void
  154. vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
  155. {
  156. section_block_type tSection;
  157. UCHAR *aucBuffer;
  158. ULONG ulBeginOfText, ulTextOffset, ulBeginSectInfo;
  159. ULONG ulCharPos, ulSectPage, ulBeginNextBlock;
  160. size_t tSectInfoLen, tIndex, tSections, tBytes;
  161. UCHAR aucTmp[2], aucFpage[35];
  162. fail(pFile == NULL || aucHeader == NULL);
  163. ulBeginOfText = 128;
  164. NO_DBG_HEX(ulBeginOfText);
  165. ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
  166. DBG_HEX(ulBeginSectInfo);
  167. ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
  168. DBG_HEX(ulBeginNextBlock);
  169. if (ulBeginSectInfo == ulBeginNextBlock) {
  170. /* There is no section information block */
  171. return;
  172. }
  173. /* Get the the number of sections */
  174. if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
  175. return;
  176. }
  177. tSections = (size_t)usGetWord(0, aucTmp);
  178. NO_DBG_DEC(tSections);
  179. /* Read the Section Descriptors */
  180. tSectInfoLen = 10 * tSections;
  181. NO_DBG_DEC(tSectInfoLen);
  182. aucBuffer = xmalloc(tSectInfoLen);
  183. if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
  184. aucBuffer = xfree(aucBuffer);
  185. return;
  186. }
  187. NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
  188. /* Read the Section Properties */
  189. for (tIndex = 0; tIndex < tSections; tIndex++) {
  190. ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
  191. NO_DBG_HEX(ulTextOffset);
  192. ulCharPos = ulBeginOfText + ulTextOffset;
  193. NO_DBG_HEX(ulTextOffset);
  194. ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
  195. NO_DBG_HEX(ulSectPage);
  196. if (ulSectPage == FC_INVALID || /* Must use defaults */
  197. ulSectPage < 128 || /* Should not happen */
  198. ulSectPage >= ulBeginSectInfo) { /* Should not happen */
  199. DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
  200. vDefault2SectionInfoList(ulCharPos);
  201. continue;
  202. }
  203. /* Get the number of bytes to read */
  204. if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
  205. continue;
  206. }
  207. tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
  208. NO_DBG_DEC(tBytes);
  209. if (tBytes > sizeof(aucFpage)) {
  210. DBG_DEC(tBytes);
  211. tBytes = sizeof(aucFpage);
  212. }
  213. /* Read the bytes */
  214. if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
  215. continue;
  216. }
  217. NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
  218. /* Process the bytes */
  219. vGetDefaultSection(&tSection);
  220. vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
  221. vAdd2SectionInfoList(&tSection, ulCharPos);
  222. }
  223. /* Clean up before you leave */
  224. aucBuffer = xfree(aucBuffer);
  225. } /* end of vGet0SepInfo */
  226. /*
  227. * Fill the style information block with information
  228. * from a Word for DOS file.
  229. */
  230. static void
  231. vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
  232. {
  233. int iBytes;
  234. UCHAR ucTmp;
  235. fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
  236. pStyle->usIstdNext = ISTD_NORMAL;
  237. iBytes = (int)ucGetByte(iFodo, aucGrpprl);
  238. if (iBytes < 1) {
  239. return;
  240. }
  241. /* stc if styled */
  242. ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
  243. if ((ucTmp & BIT(0)) != 0) {
  244. ucTmp >>= 1;
  245. if (ucTmp >= 88 && ucTmp <= 94) {
  246. /* Header levels 1 through 7 */
  247. pStyle->usIstd = ucTmp - 87;
  248. pStyle->ucNumLevel = 1;
  249. }
  250. }
  251. if (iBytes < 2) {
  252. return;
  253. }
  254. /* jc */
  255. ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
  256. pStyle->ucAlignment = ucTmp & 0x02;
  257. if (iBytes < 3) {
  258. return;
  259. }
  260. /* stc */
  261. ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
  262. ucTmp &= 0x7f;
  263. if (ucTmp >= 88 && ucTmp <= 94) {
  264. /* Header levels 1 through 7 */
  265. pStyle->usIstd = ucTmp - 87;
  266. pStyle->ucNumLevel = 1;
  267. }
  268. if (iBytes < 6) {
  269. return;
  270. }
  271. /* dxaRight */
  272. pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
  273. NO_DBG_DEC(pStyle->sRightIndent);
  274. if (iBytes < 8) {
  275. return;
  276. }
  277. /* dxaLeft */
  278. pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
  279. NO_DBG_DEC(pStyle->sLeftIndent);
  280. if (iBytes < 10) {
  281. return;
  282. }
  283. /* dxaLeft1 */
  284. pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
  285. NO_DBG_DEC(pStyle->sLeftIndent1);
  286. if (iBytes < 14) {
  287. return;
  288. }
  289. /* dyaBefore */
  290. pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
  291. NO_DBG_DEC(pStyle->usBeforeIndent);
  292. if (iBytes < 16) {
  293. return;
  294. }
  295. /* dyaAfter */
  296. pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
  297. NO_DBG_DEC(pStyle->usAfterIndent);
  298. } /* end of vGet0StyleInfo */
  299. /*
  300. * Build the lists with Paragraph Information for Word for DOS files
  301. */
  302. void
  303. vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
  304. {
  305. style_block_type tStyle;
  306. ULONG ulBeginParfInfo, ulCharPos, ulCharPosNext;
  307. int iIndex, iRun, iFodo;
  308. UCHAR aucFpage[128];
  309. fail(pFile == NULL || aucHeader == NULL);
  310. ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
  311. NO_DBG_HEX(ulBeginParfInfo);
  312. do {
  313. if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
  314. return;
  315. }
  316. NO_DBG_PRINT_BLOCK(aucFpage, 128);
  317. ulCharPosNext = ulGetLong(0, aucFpage);
  318. iRun = (int)ucGetByte(0x7f, aucFpage);
  319. NO_DBG_DEC(iRun);
  320. for (iIndex = 0; iIndex < iRun; iIndex++) {
  321. iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
  322. if (iFodo <= 0 || iFodo > 0x79) {
  323. DBG_DEC_C(iFodo != (int)0xffff, iFodo);
  324. continue;
  325. }
  326. vFillStyleFromStylesheet(0, &tStyle);
  327. vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
  328. ulCharPos = ulCharPosNext;
  329. ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
  330. tStyle.ulFileOffset = ulCharPos;
  331. vAdd2StyleInfoList(&tStyle);
  332. }
  333. ulBeginParfInfo += 128;
  334. } while (ulCharPosNext == ulBeginParfInfo);
  335. } /* end of vGet0PapInfo */
  336. /*
  337. * Fill the font information block with information
  338. * from a Word for DOS file.
  339. */
  340. static void
  341. vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
  342. {
  343. int iBytes;
  344. UCHAR ucTmp;
  345. fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
  346. iBytes = (int)ucGetByte(iFodo, aucGrpprl);
  347. if (iBytes < 2) {
  348. return;
  349. }
  350. /* fBold, fItalic, cFtc */
  351. ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
  352. if ((ucTmp & BIT(0)) != 0) {
  353. pFont->usFontStyle |= FONT_BOLD;
  354. }
  355. if ((ucTmp & BIT(1)) != 0) {
  356. pFont->usFontStyle |= FONT_ITALIC;
  357. }
  358. pFont->ucFontNumber = ucTmp >> 2;
  359. NO_DBG_DEC(pFont->ucFontNumber);
  360. if (iBytes < 3) {
  361. return;
  362. }
  363. /* cHps */
  364. pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
  365. NO_DBG_DEC(pFont->usFontSize);
  366. if (iBytes < 4) {
  367. return;
  368. }
  369. /* cKul, fStrike, fCaps, fSmallCaps, fVanish */
  370. ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
  371. if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
  372. pFont->usFontStyle |= FONT_UNDERLINE;
  373. }
  374. if ((ucTmp & BIT(1)) != 0) {
  375. pFont->usFontStyle |= FONT_STRIKE;
  376. }
  377. if ((ucTmp & BIT(4)) != 0) {
  378. pFont->usFontStyle |= FONT_CAPITALS;
  379. }
  380. if ((ucTmp & BIT(5)) != 0) {
  381. pFont->usFontStyle |= FONT_SMALL_CAPITALS;
  382. }
  383. if ((ucTmp & BIT(7)) != 0) {
  384. pFont->usFontStyle |= FONT_HIDDEN;
  385. }
  386. DBG_HEX(pFont->usFontStyle);
  387. if (iBytes < 6) {
  388. return;
  389. }
  390. /* cIss */
  391. ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
  392. if (ucTmp != 0) {
  393. if (ucTmp < 128) {
  394. pFont->usFontStyle |= FONT_SUPERSCRIPT;
  395. DBG_MSG("Superscript");
  396. } else {
  397. pFont->usFontStyle |= FONT_SUBSCRIPT;
  398. DBG_MSG("Subscript");
  399. }
  400. }
  401. if (iBytes < 7) {
  402. return;
  403. }
  404. /* cIco */
  405. ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
  406. switch (ucTmp & 0x07) {
  407. case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
  408. case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
  409. case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
  410. case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
  411. case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
  412. case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
  413. case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
  414. case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
  415. default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
  416. }
  417. NO_DBG_DEC(pFont->ucFontColor);
  418. } /* end of vGet0FontInfo */
  419. /*
  420. * Build the lists with Character Information for Word for DOS files
  421. */
  422. void
  423. vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
  424. {
  425. font_block_type tFont;
  426. ULONG ulBeginCharInfo, ulCharPos, ulCharPosNext;
  427. int iIndex, iRun, iFodo;
  428. UCHAR aucFpage[128];
  429. fail(pFile == NULL || aucHeader == NULL);
  430. ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
  431. NO_DBG_HEX(ulBeginCharInfo);
  432. ulBeginCharInfo = ROUND128(ulBeginCharInfo);
  433. NO_DBG_HEX(ulBeginCharInfo);
  434. do {
  435. if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
  436. return;
  437. }
  438. NO_DBG_PRINT_BLOCK(aucFpage, 128);
  439. ulCharPosNext = ulGetLong(0, aucFpage);
  440. iRun = (int)ucGetByte(0x7f, aucFpage);
  441. NO_DBG_DEC(iRun);
  442. for (iIndex = 0; iIndex < iRun; iIndex++) {
  443. iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
  444. if (iFodo <= 0 || iFodo > 0x79) {
  445. DBG_DEC_C(iFodo != (int)0xffff, iFodo);
  446. continue;
  447. }
  448. vFillFontFromStylesheet(0, &tFont);
  449. vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
  450. ulCharPos = ulCharPosNext;
  451. ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
  452. tFont.ulFileOffset = ulCharPos;
  453. vAdd2FontInfoList(&tFont);
  454. }
  455. ulBeginCharInfo += 128;
  456. } while (ulCharPosNext == ulBeginCharInfo);
  457. } /* end of vGet0ChrInfo */