stylelist.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /*
  2. * stylelist.c
  3. * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
  4. *
  5. * Description:
  6. * Build, read and destroy a list of Word style information
  7. */
  8. #include <stdlib.h>
  9. #include <stddef.h>
  10. #include <ctype.h>
  11. #include "antiword.h"
  12. /*
  13. * Private structure to hide the way the information
  14. * is stored from the rest of the program
  15. */
  16. typedef struct style_mem_tag {
  17. style_block_type tInfo;
  18. ULONG ulSequenceNumber;
  19. struct style_mem_tag *pNext;
  20. } style_mem_type;
  21. /* Variables needed to write the Style Information List */
  22. static style_mem_type *pAnchor = NULL;
  23. static style_mem_type *pStyleLast = NULL;
  24. /* The type of conversion */
  25. static conversion_type eConversionType = conversion_unknown;
  26. /* The character set encoding */
  27. static encoding_type eEncoding = encoding_neutral;
  28. /* Values for efficiency reasons */
  29. static const style_mem_type *pMidPtr = NULL;
  30. static BOOL bMoveMidPtr = FALSE;
  31. static BOOL bInSequence = TRUE;
  32. /*
  33. * vDestroyStyleInfoList - destroy the Style Information List
  34. */
  35. void
  36. vDestroyStyleInfoList(void)
  37. {
  38. style_mem_type *pCurr, *pNext;
  39. DBG_MSG("vDestroyStyleInfoList");
  40. /* Free the Style Information List */
  41. pCurr = pAnchor;
  42. while (pCurr != NULL) {
  43. pNext = pCurr->pNext;
  44. pCurr = xfree(pCurr);
  45. pCurr = pNext;
  46. }
  47. pAnchor = NULL;
  48. /* Reset all control variables */
  49. pStyleLast = NULL;
  50. pMidPtr = NULL;
  51. bMoveMidPtr = FALSE;
  52. bInSequence = TRUE;
  53. } /* end of vDestroyStyleInfoList */
  54. /*
  55. * vConvertListCharacter - convert the list character
  56. */
  57. static void
  58. vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
  59. {
  60. options_type tOptions;
  61. size_t tLen;
  62. fail(szListChar == NULL);
  63. fail(szListChar[0] != '\0');
  64. if (usListChar < 0x80 && isprint((int)usListChar)) {
  65. DBG_CHR_C(isalnum((int)usListChar), usListChar);
  66. szListChar[0] = (char)usListChar;
  67. szListChar[1] = '\0';
  68. return;
  69. }
  70. if (ucNFC != LIST_SPECIAL &&
  71. ucNFC != LIST_SPECIAL2 &&
  72. ucNFC != LIST_BULLETS) {
  73. szListChar[0] = '.';
  74. szListChar[1] = '\0';
  75. return;
  76. }
  77. if (eConversionType == conversion_unknown ||
  78. eEncoding == encoding_neutral) {
  79. vGetOptions(&tOptions);
  80. eConversionType = tOptions.eConversionType;
  81. eEncoding = tOptions.eEncoding;
  82. }
  83. switch (usListChar) {
  84. case 0x0000: case 0x00b7: case 0x00fe: case 0xf021: case 0xf043:
  85. case 0xf06c: case 0xf093: case 0xf0b7:
  86. usListChar = 0x2022; /* BULLET */
  87. break;
  88. case 0x0096: case 0xf02d:
  89. usListChar = 0x2013; /* EN DASH */
  90. break;
  91. case 0x00a8:
  92. usListChar = 0x2666; /* BLACK DIAMOND SUIT */
  93. break;
  94. case 0x00de:
  95. usListChar = 0x21d2; /* RIGHTWARDS DOUBLE ARROW */
  96. break;
  97. case 0x00e0: case 0xf074:
  98. usListChar = 0x25ca; /* LOZENGE */
  99. break;
  100. case 0x00e1:
  101. usListChar = 0x2329; /* LEFT ANGLE BRACKET */
  102. break;
  103. case 0xf020:
  104. usListChar = 0x0020; /* SPACE */
  105. break;
  106. case 0xf041:
  107. usListChar = 0x270c; /* VICTORY HAND */
  108. break;
  109. case 0xf066:
  110. usListChar = 0x03d5; /* GREEK PHI SYMBOL */
  111. break;
  112. case 0xf06e:
  113. usListChar = 0x25a0; /* BLACK SQUARE */
  114. break;
  115. case 0xf06f: case 0xf070: case 0xf0a8:
  116. usListChar = 0x25a1; /* WHITE SQUARE */
  117. break;
  118. case 0xf071:
  119. usListChar = 0x2751; /* LOWER RIGHT SHADOWED WHITE SQUARE */
  120. break;
  121. case 0xf075: case 0xf077:
  122. usListChar = 0x25c6; /* BLACK DIAMOND */
  123. break;
  124. case 0xf076:
  125. usListChar = 0x2756; /* BLACK DIAMOND MINUS WHITE X */
  126. break;
  127. case 0xf0a7:
  128. usListChar = 0x25aa; /* BLACK SMALL SQUARE */
  129. break;
  130. case 0xf0d8:
  131. usListChar = 0x27a2; /* RIGHTWARDS ARROWHEAD */
  132. break;
  133. case 0xf0e5:
  134. usListChar = 0x2199; /* SOUTH WEST ARROW */
  135. break;
  136. case 0xf0f0:
  137. usListChar = 0x21e8; /* RIGHTWARDS WHITE ARROW */
  138. break;
  139. case 0xf0fc:
  140. usListChar = 0x2713; /* CHECK MARK */
  141. break;
  142. default:
  143. if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
  144. (usListChar < 0x80 && !isprint((int)usListChar))) {
  145. /*
  146. * All remaining private area characters and all
  147. * remaining non-printable ASCII characters to their
  148. * default bullet character
  149. */
  150. DBG_HEX(usListChar);
  151. DBG_FIXME();
  152. if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
  153. usListChar = 0x2190; /* LEFTWARDS ARROW */
  154. } else {
  155. usListChar = 0x2022; /* BULLET */
  156. }
  157. }
  158. break;
  159. }
  160. if (eEncoding == encoding_utf_8) {
  161. tLen = tUcs2Utf8(usListChar, szListChar, 4);
  162. szListChar[tLen] = '\0';
  163. } else {
  164. switch (usListChar) {
  165. case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
  166. case 0x2751:
  167. szListChar[0] = 'o';
  168. break;
  169. case 0x2013: case 0x2500:
  170. szListChar[0] = '-';
  171. break;
  172. case 0x2190: case 0x2199: case 0x2329:
  173. szListChar[0] = '<';
  174. break;
  175. case 0x21d2:
  176. szListChar[0] = '=';
  177. break;
  178. case 0x21e8: case 0x27a2:
  179. szListChar[0] = '>';
  180. break;
  181. case 0x25a0: case 0x25aa:
  182. szListChar[0] = '.';
  183. break;
  184. case 0x2666:
  185. szListChar[0] = OUR_DIAMOND;
  186. break;
  187. case 0x270c:
  188. szListChar[0] = 'x';
  189. break;
  190. case 0x2713:
  191. szListChar[0] = 'V';
  192. break;
  193. case 0x2756:
  194. szListChar[0] = '*';
  195. break;
  196. case 0x2022:
  197. default:
  198. vGetBulletValue(eConversionType, eEncoding,
  199. szListChar, 2);
  200. break;
  201. }
  202. tLen = 1;
  203. }
  204. szListChar[tLen] = '\0';
  205. } /* end of vConvertListCharacter */
  206. /*
  207. * eGetNumType - get the level type from the given level number
  208. *
  209. * Returns the level type
  210. */
  211. level_type_enum
  212. eGetNumType(UCHAR ucNumLevel)
  213. {
  214. switch (ucNumLevel) {
  215. case 1: case 2: case 3: case 4: case 5:
  216. case 6: case 7: case 8: case 9:
  217. return level_type_outline;
  218. case 10:
  219. return level_type_numbering;
  220. case 11:
  221. return level_type_sequence;
  222. case 12:
  223. return level_type_pause;
  224. default:
  225. return level_type_none;
  226. }
  227. } /* end of eGetNumType */
  228. /*
  229. * vCorrectStyleValues - correct style values that Antiword can't use
  230. */
  231. void
  232. vCorrectStyleValues(style_block_type *pStyleBlock)
  233. {
  234. if (pStyleBlock->usBeforeIndent > 0x7fff) {
  235. pStyleBlock->usBeforeIndent = 0;
  236. } else if (pStyleBlock->usBeforeIndent > 2160) {
  237. /* 2160 twips = 1.5 inches or 38.1 mm */
  238. DBG_DEC(pStyleBlock->usBeforeIndent);
  239. pStyleBlock->usBeforeIndent = 2160;
  240. }
  241. if (pStyleBlock->usIstd >= 1 &&
  242. pStyleBlock->usIstd <= 9 &&
  243. pStyleBlock->usBeforeIndent < HEADING_GAP) {
  244. NO_DBG_DEC(pStyleBlock->usBeforeIndent);
  245. pStyleBlock->usBeforeIndent = HEADING_GAP;
  246. }
  247. if (pStyleBlock->usAfterIndent > 0x7fff) {
  248. pStyleBlock->usAfterIndent = 0;
  249. } else if (pStyleBlock->usAfterIndent > 2160) {
  250. /* 2160 twips = 1.5 inches or 38.1 mm */
  251. DBG_DEC(pStyleBlock->usAfterIndent);
  252. pStyleBlock->usAfterIndent = 2160;
  253. }
  254. if (pStyleBlock->usIstd >= 1 &&
  255. pStyleBlock->usIstd <= 9 &&
  256. pStyleBlock->usAfterIndent < HEADING_GAP) {
  257. NO_DBG_DEC(pStyleBlock->usAfterIndent);
  258. pStyleBlock->usAfterIndent = HEADING_GAP;
  259. }
  260. if (pStyleBlock->sLeftIndent < 0) {
  261. pStyleBlock->sLeftIndent = 0;
  262. }
  263. if (pStyleBlock->sRightIndent > 0) {
  264. pStyleBlock->sRightIndent = 0;
  265. }
  266. vConvertListCharacter(pStyleBlock->ucNFC,
  267. pStyleBlock->usListChar,
  268. pStyleBlock->szListChar);
  269. } /* end of vCorrectStyleValues */
  270. /*
  271. * vAdd2StyleInfoList - Add an element to the Style Information List
  272. */
  273. void
  274. vAdd2StyleInfoList(const style_block_type *pStyleBlock)
  275. {
  276. style_mem_type *pListMember;
  277. fail(pStyleBlock == NULL);
  278. NO_DBG_MSG("bAdd2StyleInfoList");
  279. if (pStyleBlock->ulFileOffset == FC_INVALID) {
  280. NO_DBG_DEC(pStyleBlock->usIstd);
  281. return;
  282. }
  283. NO_DBG_HEX(pStyleBlock->ulFileOffset);
  284. NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
  285. pStyleBlock->sLeftIndent);
  286. NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
  287. pStyleBlock->sRightIndent);
  288. NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
  289. NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
  290. NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
  291. NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
  292. pStyleBlock->usAfterIndent);
  293. NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
  294. NO_DBG_DEC(pStyleBlock->ucNFC);
  295. NO_DBG_HEX(pStyleBlock->usListChar);
  296. if (pStyleLast != NULL &&
  297. pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
  298. /*
  299. * If two consecutive styles share the same
  300. * offset, remember only the last style
  301. */
  302. fail(pStyleLast->pNext != NULL);
  303. pStyleLast->tInfo = *pStyleBlock;
  304. /* Correct the values where needed */
  305. vCorrectStyleValues(&pStyleLast->tInfo);
  306. return;
  307. }
  308. /* Create list member */
  309. pListMember = xmalloc(sizeof(style_mem_type));
  310. /* Fill the list member */
  311. pListMember->tInfo = *pStyleBlock;
  312. pListMember->pNext = NULL;
  313. /* Add the sequence number */
  314. pListMember->ulSequenceNumber =
  315. ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
  316. /* Correct the values where needed */
  317. vCorrectStyleValues(&pListMember->tInfo);
  318. /* Add the new member to the list */
  319. if (pAnchor == NULL) {
  320. pAnchor = pListMember;
  321. /* For efficiency */
  322. pMidPtr = pAnchor;
  323. bMoveMidPtr = FALSE;
  324. bInSequence = TRUE;
  325. } else {
  326. fail(pStyleLast == NULL);
  327. pStyleLast->pNext = pListMember;
  328. /* For efficiency */
  329. if (bMoveMidPtr) {
  330. pMidPtr = pMidPtr->pNext;
  331. bMoveMidPtr = FALSE;
  332. } else {
  333. bMoveMidPtr = TRUE;
  334. }
  335. if (bInSequence) {
  336. bInSequence = pListMember->ulSequenceNumber >
  337. pStyleLast->ulSequenceNumber;
  338. }
  339. }
  340. pStyleLast = pListMember;
  341. } /* end of vAdd2StyleInfoList */
  342. /*
  343. * Get the record that follows the given recored in the Style Information List
  344. */
  345. const style_block_type *
  346. pGetNextStyleInfoListItem(const style_block_type *pCurr)
  347. {
  348. const style_mem_type *pRecord;
  349. size_t tOffset;
  350. if (pCurr == NULL) {
  351. if (pAnchor == NULL) {
  352. /* There are no records */
  353. return NULL;
  354. }
  355. /* The first record is the only one without a predecessor */
  356. return &pAnchor->tInfo;
  357. }
  358. tOffset = offsetof(style_mem_type, tInfo);
  359. /* Many casts to prevent alignment warnings */
  360. pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
  361. fail(pCurr != &pRecord->tInfo);
  362. if (pRecord->pNext == NULL) {
  363. /* The last record has no successor */
  364. return NULL;
  365. }
  366. return &pRecord->pNext->tInfo;
  367. } /* end of pGetNextStyleInfoListItem */
  368. /*
  369. * Get the next text style
  370. */
  371. const style_block_type *
  372. pGetNextTextStyle(const style_block_type *pCurr)
  373. {
  374. const style_block_type *pRecord;
  375. pRecord = pCurr;
  376. do {
  377. pRecord = pGetNextStyleInfoListItem(pRecord);
  378. } while (pRecord != NULL &&
  379. (pRecord->eListID == hdrftr_list ||
  380. pRecord->eListID == macro_list ||
  381. pRecord->eListID == annotation_list));
  382. return pRecord;
  383. } /* end of pGetNextTextStyle */
  384. /*
  385. * usGetIstd - get the istd that belongs to the given file offset
  386. */
  387. USHORT
  388. usGetIstd(ULONG ulFileOffset)
  389. {
  390. const style_mem_type *pCurr, *pBest, *pStart;
  391. ULONG ulSeq, ulBest;
  392. ulSeq = ulGetSeqNumber(ulFileOffset);
  393. if (ulSeq == FC_INVALID) {
  394. return ISTD_NORMAL;
  395. }
  396. NO_DBG_HEX(ulFileOffset);
  397. NO_DBG_DEC(ulSeq);
  398. if (bInSequence &&
  399. pMidPtr != NULL &&
  400. ulSeq > pMidPtr->ulSequenceNumber) {
  401. /* The istd is in the second half of the chained list */
  402. pStart = pMidPtr;
  403. } else {
  404. pStart = pAnchor;
  405. }
  406. pBest = NULL;
  407. ulBest = 0;
  408. for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
  409. if (pCurr->ulSequenceNumber != FC_INVALID &&
  410. (pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
  411. pCurr->ulSequenceNumber <= ulSeq) {
  412. pBest = pCurr;
  413. ulBest = pCurr->ulSequenceNumber;
  414. }
  415. if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
  416. break;
  417. }
  418. }
  419. NO_DBG_DEC(ulBest);
  420. if (pBest == NULL) {
  421. return ISTD_NORMAL;
  422. }
  423. NO_DBG_DEC(pBest->tInfo.usIstd);
  424. return pBest->tInfo.usIstd;
  425. } /* end of usGetIstd */
  426. /*
  427. * bStyleImpliesList - does style info implies being part of a list
  428. *
  429. * Decide whether the style information implies that the given paragraph is
  430. * part of a list
  431. *
  432. * Returns TRUE when the paragraph is part of a list, otherwise FALSE
  433. */
  434. BOOL
  435. bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
  436. {
  437. fail(pStyle == NULL);
  438. fail(iWordVersion < 0);
  439. if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
  440. /* These are heading levels */
  441. return FALSE;
  442. }
  443. if (iWordVersion < 8) {
  444. /* Check for old style lists */
  445. return pStyle->ucNumLevel != 0;
  446. }
  447. /* Check for new style lists */
  448. return pStyle->usListIndex != 0;
  449. } /* end of bStyleImpliesList */