123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487 |
- /*
- * stylelist.c
- * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
- *
- * Description:
- * Build, read and destroy a list of Word style information
- */
- #include <stdlib.h>
- #include <stddef.h>
- #include <ctype.h>
- #include "antiword.h"
- /*
- * Private structure to hide the way the information
- * is stored from the rest of the program
- */
- typedef struct style_mem_tag {
- style_block_type tInfo;
- ULONG ulSequenceNumber;
- struct style_mem_tag *pNext;
- } style_mem_type;
- /* Variables needed to write the Style Information List */
- static style_mem_type *pAnchor = NULL;
- static style_mem_type *pStyleLast = NULL;
- /* The type of conversion */
- static conversion_type eConversionType = conversion_unknown;
- /* The character set encoding */
- static encoding_type eEncoding = encoding_neutral;
- /* Values for efficiency reasons */
- static const style_mem_type *pMidPtr = NULL;
- static BOOL bMoveMidPtr = FALSE;
- static BOOL bInSequence = TRUE;
- /*
- * vDestroyStyleInfoList - destroy the Style Information List
- */
- void
- vDestroyStyleInfoList(void)
- {
- style_mem_type *pCurr, *pNext;
- DBG_MSG("vDestroyStyleInfoList");
- /* Free the Style Information List */
- pCurr = pAnchor;
- while (pCurr != NULL) {
- pNext = pCurr->pNext;
- pCurr = xfree(pCurr);
- pCurr = pNext;
- }
- pAnchor = NULL;
- /* Reset all control variables */
- pStyleLast = NULL;
- pMidPtr = NULL;
- bMoveMidPtr = FALSE;
- bInSequence = TRUE;
- } /* end of vDestroyStyleInfoList */
- /*
- * vConvertListCharacter - convert the list character
- */
- static void
- vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
- {
- options_type tOptions;
- size_t tLen;
- fail(szListChar == NULL);
- fail(szListChar[0] != '\0');
- if (usListChar < 0x80 && isprint((int)usListChar)) {
- DBG_CHR_C(isalnum((int)usListChar), usListChar);
- szListChar[0] = (char)usListChar;
- szListChar[1] = '\0';
- return;
- }
- if (ucNFC != LIST_SPECIAL &&
- ucNFC != LIST_SPECIAL2 &&
- ucNFC != LIST_BULLETS) {
- szListChar[0] = '.';
- szListChar[1] = '\0';
- return;
- }
- if (eConversionType == conversion_unknown ||
- eEncoding == encoding_neutral) {
- vGetOptions(&tOptions);
- eConversionType = tOptions.eConversionType;
- eEncoding = tOptions.eEncoding;
- }
- switch (usListChar) {
- case 0x0000: case 0x00b7: case 0x00fe: case 0xf021: case 0xf043:
- case 0xf06c: case 0xf093: case 0xf0b7:
- usListChar = 0x2022; /* BULLET */
- break;
- case 0x0096: case 0xf02d:
- usListChar = 0x2013; /* EN DASH */
- break;
- case 0x00a8:
- usListChar = 0x2666; /* BLACK DIAMOND SUIT */
- break;
- case 0x00de:
- usListChar = 0x21d2; /* RIGHTWARDS DOUBLE ARROW */
- break;
- case 0x00e0: case 0xf074:
- usListChar = 0x25ca; /* LOZENGE */
- break;
- case 0x00e1:
- usListChar = 0x2329; /* LEFT ANGLE BRACKET */
- break;
- case 0xf020:
- usListChar = 0x0020; /* SPACE */
- break;
- case 0xf041:
- usListChar = 0x270c; /* VICTORY HAND */
- break;
- case 0xf066:
- usListChar = 0x03d5; /* GREEK PHI SYMBOL */
- break;
- case 0xf06e:
- usListChar = 0x25a0; /* BLACK SQUARE */
- break;
- case 0xf06f: case 0xf070: case 0xf0a8:
- usListChar = 0x25a1; /* WHITE SQUARE */
- break;
- case 0xf071:
- usListChar = 0x2751; /* LOWER RIGHT SHADOWED WHITE SQUARE */
- break;
- case 0xf075: case 0xf077:
- usListChar = 0x25c6; /* BLACK DIAMOND */
- break;
- case 0xf076:
- usListChar = 0x2756; /* BLACK DIAMOND MINUS WHITE X */
- break;
- case 0xf0a7:
- usListChar = 0x25aa; /* BLACK SMALL SQUARE */
- break;
- case 0xf0d8:
- usListChar = 0x27a2; /* RIGHTWARDS ARROWHEAD */
- break;
- case 0xf0e5:
- usListChar = 0x2199; /* SOUTH WEST ARROW */
- break;
- case 0xf0f0:
- usListChar = 0x21e8; /* RIGHTWARDS WHITE ARROW */
- break;
- case 0xf0fc:
- usListChar = 0x2713; /* CHECK MARK */
- break;
- default:
- if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
- (usListChar < 0x80 && !isprint((int)usListChar))) {
- /*
- * All remaining private area characters and all
- * remaining non-printable ASCII characters to their
- * default bullet character
- */
- DBG_HEX(usListChar);
- DBG_FIXME();
- if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
- usListChar = 0x2190; /* LEFTWARDS ARROW */
- } else {
- usListChar = 0x2022; /* BULLET */
- }
- }
- break;
- }
- if (eEncoding == encoding_utf_8) {
- tLen = tUcs2Utf8(usListChar, szListChar, 4);
- szListChar[tLen] = '\0';
- } else {
- switch (usListChar) {
- case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
- case 0x2751:
- szListChar[0] = 'o';
- break;
- case 0x2013: case 0x2500:
- szListChar[0] = '-';
- break;
- case 0x2190: case 0x2199: case 0x2329:
- szListChar[0] = '<';
- break;
- case 0x21d2:
- szListChar[0] = '=';
- break;
- case 0x21e8: case 0x27a2:
- szListChar[0] = '>';
- break;
- case 0x25a0: case 0x25aa:
- szListChar[0] = '.';
- break;
- case 0x2666:
- szListChar[0] = OUR_DIAMOND;
- break;
- case 0x270c:
- szListChar[0] = 'x';
- break;
- case 0x2713:
- szListChar[0] = 'V';
- break;
- case 0x2756:
- szListChar[0] = '*';
- break;
- case 0x2022:
- default:
- vGetBulletValue(eConversionType, eEncoding,
- szListChar, 2);
- break;
- }
- tLen = 1;
- }
- szListChar[tLen] = '\0';
- } /* end of vConvertListCharacter */
- /*
- * eGetNumType - get the level type from the given level number
- *
- * Returns the level type
- */
- level_type_enum
- eGetNumType(UCHAR ucNumLevel)
- {
- switch (ucNumLevel) {
- case 1: case 2: case 3: case 4: case 5:
- case 6: case 7: case 8: case 9:
- return level_type_outline;
- case 10:
- return level_type_numbering;
- case 11:
- return level_type_sequence;
- case 12:
- return level_type_pause;
- default:
- return level_type_none;
- }
- } /* end of eGetNumType */
- /*
- * vCorrectStyleValues - correct style values that Antiword can't use
- */
- void
- vCorrectStyleValues(style_block_type *pStyleBlock)
- {
- if (pStyleBlock->usBeforeIndent > 0x7fff) {
- pStyleBlock->usBeforeIndent = 0;
- } else if (pStyleBlock->usBeforeIndent > 2160) {
- /* 2160 twips = 1.5 inches or 38.1 mm */
- DBG_DEC(pStyleBlock->usBeforeIndent);
- pStyleBlock->usBeforeIndent = 2160;
- }
- if (pStyleBlock->usIstd >= 1 &&
- pStyleBlock->usIstd <= 9 &&
- pStyleBlock->usBeforeIndent < HEADING_GAP) {
- NO_DBG_DEC(pStyleBlock->usBeforeIndent);
- pStyleBlock->usBeforeIndent = HEADING_GAP;
- }
- if (pStyleBlock->usAfterIndent > 0x7fff) {
- pStyleBlock->usAfterIndent = 0;
- } else if (pStyleBlock->usAfterIndent > 2160) {
- /* 2160 twips = 1.5 inches or 38.1 mm */
- DBG_DEC(pStyleBlock->usAfterIndent);
- pStyleBlock->usAfterIndent = 2160;
- }
- if (pStyleBlock->usIstd >= 1 &&
- pStyleBlock->usIstd <= 9 &&
- pStyleBlock->usAfterIndent < HEADING_GAP) {
- NO_DBG_DEC(pStyleBlock->usAfterIndent);
- pStyleBlock->usAfterIndent = HEADING_GAP;
- }
- if (pStyleBlock->sLeftIndent < 0) {
- pStyleBlock->sLeftIndent = 0;
- }
- if (pStyleBlock->sRightIndent > 0) {
- pStyleBlock->sRightIndent = 0;
- }
- vConvertListCharacter(pStyleBlock->ucNFC,
- pStyleBlock->usListChar,
- pStyleBlock->szListChar);
- } /* end of vCorrectStyleValues */
- /*
- * vAdd2StyleInfoList - Add an element to the Style Information List
- */
- void
- vAdd2StyleInfoList(const style_block_type *pStyleBlock)
- {
- style_mem_type *pListMember;
- fail(pStyleBlock == NULL);
- NO_DBG_MSG("bAdd2StyleInfoList");
- if (pStyleBlock->ulFileOffset == FC_INVALID) {
- NO_DBG_DEC(pStyleBlock->usIstd);
- return;
- }
- NO_DBG_HEX(pStyleBlock->ulFileOffset);
- NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
- pStyleBlock->sLeftIndent);
- NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
- pStyleBlock->sRightIndent);
- NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
- NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
- NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
- NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
- pStyleBlock->usAfterIndent);
- NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
- NO_DBG_DEC(pStyleBlock->ucNFC);
- NO_DBG_HEX(pStyleBlock->usListChar);
- if (pStyleLast != NULL &&
- pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
- /*
- * If two consecutive styles share the same
- * offset, remember only the last style
- */
- fail(pStyleLast->pNext != NULL);
- pStyleLast->tInfo = *pStyleBlock;
- /* Correct the values where needed */
- vCorrectStyleValues(&pStyleLast->tInfo);
- return;
- }
- /* Create list member */
- pListMember = xmalloc(sizeof(style_mem_type));
- /* Fill the list member */
- pListMember->tInfo = *pStyleBlock;
- pListMember->pNext = NULL;
- /* Add the sequence number */
- pListMember->ulSequenceNumber =
- ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
- /* Correct the values where needed */
- vCorrectStyleValues(&pListMember->tInfo);
- /* Add the new member to the list */
- if (pAnchor == NULL) {
- pAnchor = pListMember;
- /* For efficiency */
- pMidPtr = pAnchor;
- bMoveMidPtr = FALSE;
- bInSequence = TRUE;
- } else {
- fail(pStyleLast == NULL);
- pStyleLast->pNext = pListMember;
- /* For efficiency */
- if (bMoveMidPtr) {
- pMidPtr = pMidPtr->pNext;
- bMoveMidPtr = FALSE;
- } else {
- bMoveMidPtr = TRUE;
- }
- if (bInSequence) {
- bInSequence = pListMember->ulSequenceNumber >
- pStyleLast->ulSequenceNumber;
- }
- }
- pStyleLast = pListMember;
- } /* end of vAdd2StyleInfoList */
- /*
- * Get the record that follows the given recored in the Style Information List
- */
- const style_block_type *
- pGetNextStyleInfoListItem(const style_block_type *pCurr)
- {
- const style_mem_type *pRecord;
- size_t tOffset;
- if (pCurr == NULL) {
- if (pAnchor == NULL) {
- /* There are no records */
- return NULL;
- }
- /* The first record is the only one without a predecessor */
- return &pAnchor->tInfo;
- }
- tOffset = offsetof(style_mem_type, tInfo);
- /* Many casts to prevent alignment warnings */
- pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
- fail(pCurr != &pRecord->tInfo);
- if (pRecord->pNext == NULL) {
- /* The last record has no successor */
- return NULL;
- }
- return &pRecord->pNext->tInfo;
- } /* end of pGetNextStyleInfoListItem */
- /*
- * Get the next text style
- */
- const style_block_type *
- pGetNextTextStyle(const style_block_type *pCurr)
- {
- const style_block_type *pRecord;
- pRecord = pCurr;
- do {
- pRecord = pGetNextStyleInfoListItem(pRecord);
- } while (pRecord != NULL &&
- (pRecord->eListID == hdrftr_list ||
- pRecord->eListID == macro_list ||
- pRecord->eListID == annotation_list));
- return pRecord;
- } /* end of pGetNextTextStyle */
- /*
- * usGetIstd - get the istd that belongs to the given file offset
- */
- USHORT
- usGetIstd(ULONG ulFileOffset)
- {
- const style_mem_type *pCurr, *pBest, *pStart;
- ULONG ulSeq, ulBest;
- ulSeq = ulGetSeqNumber(ulFileOffset);
- if (ulSeq == FC_INVALID) {
- return ISTD_NORMAL;
- }
- NO_DBG_HEX(ulFileOffset);
- NO_DBG_DEC(ulSeq);
- if (bInSequence &&
- pMidPtr != NULL &&
- ulSeq > pMidPtr->ulSequenceNumber) {
- /* The istd is in the second half of the chained list */
- pStart = pMidPtr;
- } else {
- pStart = pAnchor;
- }
- pBest = NULL;
- ulBest = 0;
- for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
- if (pCurr->ulSequenceNumber != FC_INVALID &&
- (pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
- pCurr->ulSequenceNumber <= ulSeq) {
- pBest = pCurr;
- ulBest = pCurr->ulSequenceNumber;
- }
- if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
- break;
- }
- }
- NO_DBG_DEC(ulBest);
- if (pBest == NULL) {
- return ISTD_NORMAL;
- }
- NO_DBG_DEC(pBest->tInfo.usIstd);
- return pBest->tInfo.usIstd;
- } /* end of usGetIstd */
- /*
- * bStyleImpliesList - does style info implies being part of a list
- *
- * Decide whether the style information implies that the given paragraph is
- * part of a list
- *
- * Returns TRUE when the paragraph is part of a list, otherwise FALSE
- */
- BOOL
- bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
- {
- fail(pStyle == NULL);
- fail(iWordVersion < 0);
- if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
- /* These are heading levels */
- return FALSE;
- }
- if (iWordVersion < 8) {
- /* Check for old style lists */
- return pStyle->ucNumLevel != 0;
- }
- /* Check for new style lists */
- return pStyle->usListIndex != 0;
- } /* end of bStyleImpliesList */
|