parseInstance.C 36 KB


  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /* $XConsortium: parseInstance.C /main/2 1996/08/12 14:05:40 mgreess $ */
  24. // Copyright (c) 1994 James Clark
  25. // See the file COPYING for copying permission.
  26. #include "splib.h"
  27. #include "Parser.h"
  28. #include "ParserMessages.C"
  29. #include "MessageArg.h"
  30. #include "TokenMessageArg.h"
  31. #include "StringVectorMessageArg.h"
  32. #include "token.h"
  33. #include "macros.h"
  34. #ifdef SP_NAMESPACE
  35. namespace SP_NAMESPACE {
  36. #endif
  37. void Parser::doInstanceStart()
  38. {
  39. if (cancelled()) {
  40. allDone();
  41. return;
  42. }
  43. // FIXME check here that we have a valid dtd
  44. compileInstanceModes();
  45. setPhase(contentPhase);
  46. Token token = getToken(currentMode());
  47. switch (token) {
  48. case tokenEe:
  49. case tokenStagoNameStart:
  50. case tokenStagoTagc:
  51. case tokenStagoGrpo:
  52. case tokenEtagoNameStart:
  53. case tokenEtagoTagc:
  54. case tokenEtagoGrpo:
  55. break;
  56. default:
  57. if (sd().omittag()) {
  58. unsigned startImpliedCount = 0;
  59. unsigned attributeListIndex = 0;
  60. IList<Undo> undoList;
  61. IList<Event> eventList;
  62. if (!tryImplyTag(currentLocation(),
  63. startImpliedCount,
  64. attributeListIndex,
  65. undoList,
  66. eventList))
  67. CANNOT_HAPPEN();
  68. queueElementEvents(eventList);
  69. }
  70. else
  71. message(ParserMessages::instanceStartOmittag);
  72. }
  73. currentInput()->ungetToken();
  74. }
  75. void Parser::endInstance()
  76. {
  77. // Do checking before popping entity stack so that there's a
  78. // current location for error messages.
  79. endAllElements();
  80. while (markedSectionLevel() > 0) {
  81. message(ParserMessages::unclosedMarkedSection,
  82. currentMarkedSectionStartLocation());
  83. endMarkedSection();
  84. }
  85. checkIdrefs();
  86. popInputStack();
  87. allDone();
  88. }
  89. void Parser::checkIdrefs()
  90. {
  91. IdTableIter iter(idTableIter());
  92. Id *id;
  93. while ((id = iter.next()) != 0) {
  94. for (size_t i = 0; i < id->pendingRefs().size(); i++) {
  95. Messenger::setNextLocation(id->pendingRefs()[i]);
  96. message(ParserMessages::missingId, StringMessageArg(id->name()));
  97. }
  98. }
  99. }
  100. void Parser::doContent()
  101. {
  102. do {
  103. if (cancelled()) {
  104. allDone();
  105. return;
  106. }
  107. Token token = getToken(currentMode());
  108. switch (token) {
  109. case tokenEe:
  110. if (inputLevel() == 1) {
  111. endInstance();
  112. return;
  113. }
  114. if (inputLevel() == specialParseInputLevel()) {
  115. // FIXME have separate messages for each type of special parse
  116. // perhaps force end of marked section or element
  117. message(ParserMessages::specialParseEntityEnd);
  118. }
  119. if (eventsWanted().wantInstanceMarkup())
  120. eventHandler().entityEnd(new (eventAllocator())
  121. EntityEndEvent(currentLocation()));
  122. if (afterDocumentElement())
  123. message(ParserMessages::afterDocumentElementEntityEnd);
  124. popInputStack();
  125. break;
  126. case tokenCroDigit:
  127. {
  128. if (afterDocumentElement())
  129. message(ParserMessages::characterReferenceAfterDocumentElement);
  130. Char ch;
  131. Location loc;
  132. if (parseNumericCharRef(ch, loc)) {
  133. acceptPcdata(loc);
  134. noteData();
  135. eventHandler().data(new (eventAllocator())
  136. ImmediateDataEvent(Event::characterData,
  137. &ch, 1, loc, 1));
  138. break;
  139. }
  140. }
  141. break;
  142. case tokenCroNameStart:
  143. if (afterDocumentElement())
  144. message(ParserMessages::characterReferenceAfterDocumentElement);
  145. parseNamedCharRef();
  146. break;
  147. case tokenEroGrpo:
  148. case tokenEroNameStart:
  149. {
  150. if (afterDocumentElement())
  151. message(ParserMessages::entityReferenceAfterDocumentElement);
  152. ConstPtr<Entity> entity;
  153. Ptr<EntityOrigin> origin;
  154. if (parseEntityReference(0, token == tokenEroGrpo, entity, origin)) {
  155. if (!entity.isNull()) {
  156. if (entity->isCharacterData())
  157. acceptPcdata(Location(origin.pointer(), 0));
  158. if (inputLevel() == specialParseInputLevel())
  159. entity->rcdataReference(*this, origin);
  160. else
  161. entity->contentReference(*this, origin);
  162. }
  163. }
  164. noteMarkup();
  165. }
  166. break;
  167. case tokenEtagoNameStart:
  168. parseEndTag();
  169. break;
  170. case tokenEtagoTagc:
  171. parseEmptyEndTag();
  172. break;
  173. case tokenEtagoGrpo:
  174. parseGroupEndTag();
  175. break;
  176. case tokenMdoNameStart:
  177. if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()))
  178. currentMarkup()->addDelim(Syntax::dMDO);
  179. Syntax::ReservedName name;
  180. Boolean result;
  181. unsigned startLevel;
  182. startLevel = inputLevel();
  183. if (parseDeclarationName(&name)) {
  184. switch (name) {
  185. case Syntax::rUSEMAP:
  186. if (afterDocumentElement())
  187. message(ParserMessages::declarationAfterDocumentElement,
  188. StringMessageArg(syntax().reservedName(name)));
  189. result = parseUsemapDecl();
  190. break;
  191. case Syntax::rUSELINK:
  192. if (afterDocumentElement())
  193. message(ParserMessages::declarationAfterDocumentElement,
  194. StringMessageArg(syntax().reservedName(name)));
  195. result = parseUselinkDecl();
  196. break;
  197. case Syntax::rDOCTYPE:
  198. case Syntax::rLINKTYPE:
  199. case Syntax::rELEMENT:
  200. case Syntax::rATTLIST:
  201. case Syntax::rENTITY:
  202. case Syntax::rNOTATION:
  203. case Syntax::rSHORTREF:
  204. case Syntax::rLINK:
  205. case Syntax::rIDLINK:
  206. message(ParserMessages::instanceDeclaration,
  207. StringMessageArg(syntax().reservedName(name)));
  208. result = 0;
  209. break;
  210. default:
  211. message(ParserMessages::noSuchDeclarationType,
  212. StringMessageArg(syntax().reservedName(name)));
  213. result = 0;
  214. break;
  215. }
  216. }
  217. else
  218. result = 0;
  219. if (!result)
  220. skipDeclaration(startLevel);
  221. noteMarkup();
  222. break;
  223. case tokenMdoMdc:
  224. // empty comment
  225. emptyCommentDecl();
  226. noteMarkup();
  227. break;
  228. case tokenMdoCom:
  229. parseCommentDecl();
  230. noteMarkup();
  231. break;
  232. case tokenMdoDso:
  233. if (afterDocumentElement())
  234. message(ParserMessages::markedSectionAfterDocumentElement);
  235. parseMarkedSectionDeclStart();
  236. noteMarkup();
  237. break;
  238. case tokenMscMdc:
  239. handleMarkedSectionEnd();
  240. noteMarkup();
  241. break;
  242. case tokenNet:
  243. parseNullEndTag();
  244. break;
  245. case tokenPio:
  246. parseProcessingInstruction();
  247. break;
  248. case tokenStagoNameStart:
  249. parseStartTag();
  250. break;
  251. case tokenStagoTagc:
  252. parseEmptyStartTag();
  253. break;
  254. case tokenStagoGrpo:
  255. parseGroupStartTag();
  256. break;
  257. case tokenRe:
  258. acceptPcdata(currentLocation());
  259. queueRe(currentLocation());
  260. break;
  261. case tokenRs:
  262. acceptPcdata(currentLocation());
  263. noteRs();
  264. if (eventsWanted().wantInstanceMarkup())
  265. eventHandler().ignoredRs(new (eventAllocator())
  266. IgnoredRsEvent(currentChar(),
  267. currentLocation()));
  268. break;
  269. case tokenS:
  270. extendContentS();
  271. if (eventsWanted().wantInstanceMarkup())
  272. eventHandler().sSep(new (eventAllocator())
  273. SSepEvent(currentInput()->currentTokenStart(),
  274. currentInput()->currentTokenLength(),
  275. currentLocation(),
  276. 0));
  277. break;
  278. case tokenIgnoredChar:
  279. extendData();
  280. if (eventsWanted().wantMarkedSections())
  281. eventHandler().ignoredChars(new (eventAllocator())
  282. IgnoredCharsEvent(currentInput()->currentTokenStart(),
  283. currentInput()->currentTokenLength(),
  284. currentLocation(),
  285. 0));
  286. break;
  287. case tokenUnrecognized:
  288. reportNonSgmlCharacter();
  289. // fall through
  290. case tokenChar:
  291. parsePcdata();
  292. break;
  293. default:
  294. ASSERT(token >= tokenFirstShortref);
  295. handleShortref(token - tokenFirstShortref);
  296. break;
  297. }
  298. } while (eventQueueEmpty());
  299. }
  300. void Parser::skipDeclaration(unsigned startLevel)
  301. {
  302. const unsigned skipMax = 250;
  303. unsigned skipCount = 0;
  304. for (;;) {
  305. Token token = getToken(mdMode);
  306. if (inputLevel() == startLevel)
  307. skipCount++;
  308. switch (token) {
  309. case tokenUnrecognized:
  310. (void)getChar();
  311. break;
  312. case tokenEe:
  313. if (inputLevel() <= startLevel)
  314. return;
  315. popInputStack();
  316. return;
  317. case tokenMdc:
  318. if (inputLevel() == startLevel)
  319. return;
  320. break;
  321. case tokenS:
  322. if (inputLevel() == startLevel && skipCount >= skipMax
  323. && currentChar() == syntax().standardFunction(Syntax::fRE))
  324. return;
  325. break;
  326. default:
  327. break;
  328. }
  329. }
  330. }
  331. void Parser::handleShortref(int index)
  332. {
  333. const ConstPtr<Entity> &entity
  334. = currentElement().map()->entity(index);
  335. if (!entity.isNull()) {
  336. Owner<Markup> markupPtr;
  337. if (eventsWanted().wantInstanceMarkup()) {
  338. markupPtr = new Markup;
  339. markupPtr->addShortref(currentInput());
  340. }
  341. Ptr<EntityOrigin> origin
  342. = new (internalAllocator())
  343. EntityOrigin(entity,
  344. currentLocation(),
  345. currentInput()->currentTokenLength(),
  346. markupPtr);
  347. entity->contentReference(*this, origin);
  348. return;
  349. }
  350. InputSource *in = currentInput();
  351. size_t length = in->currentTokenLength();
  352. const Char *s = in->currentTokenStart();
  353. size_t i = 0;
  354. if (currentMode() == econMode || currentMode() == econnetMode) {
  355. // FIXME do this in advance (what about B sequence?)
  356. for (i = 0; i < length && syntax().isS(s[i]); i++)
  357. ;
  358. if (i > 0 && eventsWanted().wantInstanceMarkup())
  359. eventHandler().sSep(new (eventAllocator())
  360. SSepEvent(s, i, currentLocation(), 0));
  361. }
  362. if (i < length) {
  363. Location location(currentLocation());
  364. location += i;
  365. s += i;
  366. length -= i;
  367. acceptPcdata(location);
  368. // FIXME speed this up
  369. for (; length > 0; location += 1, length--, s++) {
  370. if (*s == syntax().standardFunction(Syntax::fRS)) {
  371. noteRs();
  372. if (eventsWanted().wantInstanceMarkup())
  373. eventHandler().ignoredRs(new (eventAllocator())
  374. IgnoredRsEvent(*s, location));
  375. }
  376. else if (*s == syntax().standardFunction(Syntax::fRE))
  377. queueRe(location);
  378. else {
  379. noteData();
  380. eventHandler().data(new (eventAllocator())
  381. ImmediateDataEvent(Event::characterData, s, 1,
  382. location, 0));
  383. }
  384. }
  385. }
  386. }
  387. void Parser::parsePcdata()
  388. {
  389. extendData();
  390. acceptPcdata(currentLocation());
  391. noteData();
  392. eventHandler().data(new (eventAllocator())
  393. ImmediateDataEvent(Event::characterData,
  394. currentInput()->currentTokenStart(),
  395. currentInput()->currentTokenLength(),
  396. currentLocation(),
  397. 0));
  398. }
  399. void Parser::parseStartTag()
  400. {
  401. InputSource *in = currentInput();
  402. Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
  403. in->currentLocation());
  404. in->discardInitial();
  405. extendNameToken(syntax().namelen(), ParserMessages::nameLength);
  406. if (markup) {
  407. markup->addDelim(Syntax::dSTAGO);
  408. markup->addName(in);
  409. }
  410. StringC &name = nameBuffer();
  411. getCurrentToken(syntax().generalSubstTable(), name);
  412. const ElementType *e = currentDtd().lookupElementType(name);
  413. if (sd().rank()) {
  414. if (!e)
  415. e = completeRankStem(name);
  416. else if (e->isRankedElement())
  417. handleRankedElement(e);
  418. }
  419. if (!e)
  420. e = lookupCreateUndefinedElement(name, currentLocation());
  421. Boolean netEnabling;
  422. AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
  423. Token closeToken = getToken(tagMode);
  424. if (closeToken == tokenTagc) {
  425. if (name.size() > syntax().taglen())
  426. checkTaglen(markupLocation().index());
  427. attributes->finish(*this);
  428. netEnabling = 0;
  429. if (markup)
  430. markup->addDelim(Syntax::dTAGC);
  431. }
  432. else {
  433. in->ungetToken();
  434. if (parseAttributeSpec(0, *attributes, netEnabling)) {
  435. // The difference between the indices will be the difference
  436. // in offsets plus 1 for each named character reference.
  437. if (in->currentLocation().index() - markupLocation().index()
  438. > syntax().taglen())
  439. checkTaglen(markupLocation().index());
  440. }
  441. else
  442. netEnabling = 0;
  443. }
  444. acceptStartTag(e,
  445. new (eventAllocator())
  446. StartElementEvent(e,
  447. currentDtdPointer(),
  448. attributes,
  449. markupLocation(),
  450. markup),
  451. netEnabling);
  452. }
  453. const ElementType *Parser::completeRankStem(const StringC &name)
  454. {
  455. const RankStem *rankStem = currentDtd().lookupRankStem(name);
  456. if (rankStem) {
  457. StringC name(rankStem->name());
  458. if (!appendCurrentRank(name, rankStem))
  459. message(ParserMessages::noCurrentRank, StringMessageArg(name));
  460. else
  461. return currentDtd().lookupElementType(name);
  462. }
  463. return 0;
  464. }
  465. void Parser::handleRankedElement(const ElementType *e)
  466. {
  467. StringC rankSuffix(e->definition()->rankSuffix());
  468. const RankStem *rankStem = e->rankedElementRankStem();
  469. for (size_t i = 0; i < rankStem->nDefinitions(); i++) {
  470. const ElementDefinition *def = rankStem->definition(i);
  471. for (size_t j = 0; j < def->nRankStems(); j++)
  472. setCurrentRank(def->rankStem(j), rankSuffix);
  473. }
  474. }
  475. void Parser::checkTaglen(Index tagStartIndex)
  476. {
  477. const InputSourceOrigin *origin
  478. = currentLocation().origin()->asInputSourceOrigin();
  479. ASSERT(origin != 0);
  480. if (origin->startOffset(currentLocation().index())
  481. - origin->startOffset(tagStartIndex
  482. + syntax().delimGeneral(Syntax::dSTAGO).size())
  483. > syntax().taglen())
  484. message(ParserMessages::taglen, NumberMessageArg(syntax().taglen()));
  485. }
  486. void Parser::parseEmptyStartTag()
  487. {
  488. if (options().warnEmptyTag)
  489. message(ParserMessages::emptyStartTag);
  490. // FIXME error if not in base.
  491. const ElementType *e = 0;
  492. if (!sd().omittag())
  493. e = lastEndedElementType();
  494. else if (tagLevel() > 0)
  495. e = currentElement().type();
  496. if (!e)
  497. e = currentDtd().documentElementType();
  498. AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
  499. attributes->finish(*this);
  500. Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
  501. currentLocation());
  502. if (markup) {
  503. markup->addDelim(Syntax::dSTAGO);
  504. markup->addDelim(Syntax::dTAGC);
  505. }
  506. acceptStartTag(e,
  507. new (eventAllocator())
  508. StartElementEvent(e,
  509. currentDtdPointer(),
  510. attributes,
  511. markupLocation(),
  512. markup),
  513. 0);
  514. }
  515. void Parser::parseGroupStartTag()
  516. {
  517. if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
  518. currentMarkup()->addDelim(Syntax::dSTAGO);
  519. currentMarkup()->addDelim(Syntax::dGRPO);
  520. }
  521. Boolean active;
  522. if (!parseTagNameGroup(active))
  523. return;
  524. InputSource *in = currentInput();
  525. // Location startLocation = in->currentLocation();
  526. in->startToken();
  527. Xchar c = in->tokenChar(messenger());
  528. if (!syntax().isNameStartCharacter(c)) {
  529. message(ParserMessages::startTagMissingName);
  530. return;
  531. }
  532. in->discardInitial();
  533. extendNameToken(syntax().namelen(), ParserMessages::nameLength);
  534. if (currentMarkup())
  535. currentMarkup()->addName(currentInput());
  536. skipAttributeSpec();
  537. if (currentMarkup())
  538. eventHandler().ignoredMarkup(new (eventAllocator())
  539. IgnoredMarkupEvent(markupLocation(),
  540. currentMarkup()));
  541. noteMarkup();
  542. }
  543. void Parser::parseGroupEndTag()
  544. {
  545. if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
  546. currentMarkup()->addDelim(Syntax::dSTAGO);
  547. currentMarkup()->addDelim(Syntax::dGRPO);
  548. }
  549. Boolean active;
  550. if (!parseTagNameGroup(active))
  551. return;
  552. InputSource *in = currentInput();
  553. // Location startLocation = in->currentLocation();
  554. in->startToken();
  555. Xchar c = in->tokenChar(messenger());
  556. if (!syntax().isNameStartCharacter(c)) {
  557. message(ParserMessages::endTagMissingName);
  558. return;
  559. }
  560. in->discardInitial();
  561. extendNameToken(syntax().namelen(), ParserMessages::nameLength);
  562. if (currentMarkup())
  563. currentMarkup()->addName(currentInput());
  564. parseEndTagClose();
  565. if (currentMarkup())
  566. eventHandler().ignoredMarkup(new (eventAllocator())
  567. IgnoredMarkupEvent(markupLocation(),
  568. currentMarkup()));
  569. noteMarkup();
  570. }
  571. void Parser::acceptPcdata(const Location &startLocation)
  572. {
  573. if (currentElement().tryTransitionPcdata())
  574. return;
  575. // Need to test here since implying tags may turn off pcdataRecovering.
  576. if (pcdataRecovering())
  577. return;
  578. IList<Undo> undoList;
  579. IList<Event> eventList;
  580. unsigned startImpliedCount = 0;
  581. unsigned attributeListIndex = 0;
  582. keepMessages();
  583. while (tryImplyTag(startLocation, startImpliedCount, attributeListIndex,
  584. undoList, eventList))
  585. if (currentElement().tryTransitionPcdata()) {
  586. queueElementEvents(eventList);
  587. return;
  588. }
  589. discardKeptMessages();
  590. undo(undoList);
  591. message(ParserMessages::pcdataNotAllowed);
  592. pcdataRecover();
  593. }
  594. void Parser::acceptStartTag(const ElementType *e,
  595. StartElementEvent *event,
  596. Boolean netEnabling)
  597. {
  598. if (e->definition()->undefined()) {
  599. message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
  600. pushElementCheck(e, event, netEnabling);
  601. return;
  602. }
  603. if (elementIsExcluded(e)) {
  604. keepMessages();
  605. checkExclusion(e);
  606. }
  607. else {
  608. if (currentElement().tryTransition(e)) {
  609. pushElementCheck(e, event, netEnabling);
  610. return;
  611. }
  612. if (elementIsIncluded(e)) {
  613. event->setIncluded();
  614. pushElementCheck(e, event, netEnabling);
  615. return;
  616. }
  617. keepMessages();
  618. }
  619. IList<Undo> undoList;
  620. IList<Event> eventList;
  621. unsigned startImpliedCount = 0;
  622. unsigned attributeListIndex = 1;
  623. while (tryImplyTag(event->location(), startImpliedCount,
  624. attributeListIndex, undoList, eventList))
  625. if (tryStartTag(e, event, netEnabling, eventList))
  626. return;
  627. discardKeptMessages();
  628. undo(undoList);
  629. handleBadStartTag(e, event, netEnabling);
  630. }
  631. void Parser::undo(IList<Undo> &undoList)
  632. {
  633. while (!undoList.empty()) {
  634. Undo *p = undoList.get();
  635. p->undo(this);
  636. delete p;
  637. }
  638. }
  639. void Parser::queueElementEvents(IList<Event> &events)
  640. {
  641. releaseKeptMessages();
  642. // FIXME provide IList<T>::reverse function
  643. // reverse it
  644. IList<Event> tem;
  645. while (!events.empty())
  646. tem.insert(events.get());
  647. while (!tem.empty()) {
  648. Event *e = tem.get();
  649. if (e->type() == Event::startElement) {
  650. noteStartElement(((StartElementEvent *)e)->included());
  651. eventHandler().startElement((StartElementEvent *)e);
  652. }
  653. else {
  654. noteEndElement(((EndElementEvent *)e)->included());
  655. eventHandler().endElement((EndElementEvent *)e);
  656. }
  657. }
  658. }
  659. void Parser::checkExclusion(const ElementType *e)
  660. {
  661. const LeafContentToken *token = currentElement().invalidExclusion(e);
  662. if (token)
  663. message(ParserMessages::invalidExclusion,
  664. OrdinalMessageArg(token->typeIndex() + 1),
  665. StringMessageArg(token->elementType()->name()),
  666. StringMessageArg(currentElement().type()->name()));
  667. }
  668. Boolean Parser::tryStartTag(const ElementType *e,
  669. StartElementEvent *event,
  670. Boolean netEnabling,
  671. IList<Event> &impliedEvents)
  672. {
  673. if (elementIsExcluded(e)) {
  674. checkExclusion(e);
  675. return 0;
  676. }
  677. if (currentElement().tryTransition(e)) {
  678. queueElementEvents(impliedEvents);
  679. pushElementCheck(e, event, netEnabling);
  680. return 1;
  681. }
  682. if (elementIsIncluded(e)) {
  683. queueElementEvents(impliedEvents);
  684. event->setIncluded();
  685. pushElementCheck(e, event, netEnabling);
  686. return 1;
  687. }
  688. return 0;
  689. }
  690. Boolean Parser::tryImplyTag(const Location &loc,
  691. unsigned &startImpliedCount,
  692. unsigned &attributeListIndex,
  693. IList<Undo> &undo,
  694. IList<Event> &eventList)
  695. {
  696. if (!sd().omittag())
  697. return 0;
  698. if (currentElement().isFinished()) {
  699. if (tagLevel() == 0)
  700. return 0;
  701. #if 1
  702. const ElementDefinition *def = currentElement().type()->definition();
  703. if (def && !def->canOmitEndTag())
  704. return 0;
  705. #endif
  706. // imply an end tag
  707. if (startImpliedCount > 0) {
  708. message(ParserMessages::startTagEmptyElement,
  709. StringMessageArg(currentElement().type()->name()));
  710. startImpliedCount--;
  711. }
  712. #if 0
  713. const ElementDefinition *def = currentElement().type()->definition();
  714. if (def && !def->canOmitEndTag())
  715. message(ParserMessages::omitEndTagDeclare,
  716. StringMessageArg(currentElement().type()->name()),
  717. currentElement().startLocation());
  718. #endif
  719. EndElementEvent *event
  720. = new (eventAllocator()) EndElementEvent(currentElement().type(),
  721. currentDtdPointer(),
  722. loc,
  723. 0);
  724. eventList.insert(event);
  725. undo.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
  726. return 1;
  727. }
  728. const LeafContentToken *token = currentElement().impliedStartTag();
  729. if (!token)
  730. return 0;
  731. const ElementType *e = token->elementType();
  732. if (elementIsExcluded(e))
  733. message(ParserMessages::requiredElementExcluded,
  734. OrdinalMessageArg(token->typeIndex() + 1),
  735. StringMessageArg(e->name()),
  736. StringMessageArg(currentElement().type()->name()));
  737. if (tagLevel() != 0)
  738. undo.insert(new (internalAllocator())
  739. UndoTransition(currentElement().matchState()));
  740. currentElement().doRequiredTransition();
  741. const ElementDefinition *def = e->definition();
  742. if (def->declaredContent() != ElementDefinition::modelGroup
  743. && def->declaredContent() != ElementDefinition::any)
  744. message(ParserMessages::omitStartTagDeclaredContent,
  745. StringMessageArg(e->name()));
  746. if (def->undefined())
  747. message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
  748. else if (!def->canOmitStartTag())
  749. message(ParserMessages::omitStartTagDeclare, StringMessageArg(e->name()));
  750. AttributeList *attributes
  751. = allocAttributeList(e->attributeDef(),
  752. attributeListIndex++);
  753. // this will give an error if the element has a required attribute
  754. attributes->finish(*this);
  755. startImpliedCount++;
  756. StartElementEvent *event
  757. = new (eventAllocator()) StartElementEvent(e,
  758. currentDtdPointer(),
  759. attributes,
  760. loc,
  761. 0);
  762. pushElementCheck(e, event, undo, eventList);
  763. const int implyCheckLimit = 30; // this is fairly arbitrary
  764. if (startImpliedCount > implyCheckLimit
  765. && !checkImplyLoop(startImpliedCount))
  766. return 0;
  767. return 1;
  768. }
  769. void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
  770. Boolean netEnabling)
  771. {
  772. if (tagLevel() == syntax().taglvl())
  773. message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
  774. noteStartElement(event->included());
  775. if (event->mustOmitEnd()) {
  776. EndElementEvent *end
  777. = new (eventAllocator()) EndElementEvent(e,
  778. currentDtdPointer(),
  779. event->location(),
  780. 0);
  781. if (event->included()) {
  782. end->setIncluded();
  783. noteEndElement(1);
  784. }
  785. else
  786. noteEndElement(0);
  787. eventHandler().startElement(event);
  788. eventHandler().endElement(end);
  789. }
  790. else {
  791. const ShortReferenceMap *map = e->map();
  792. if (!map)
  793. map = currentElement().map();
  794. pushElement(new (internalAllocator()) OpenElement(e,
  795. netEnabling,
  796. event->included(),
  797. map,
  798. event->location()));
  799. // Can't access event after it's passed to the event handler.
  800. eventHandler().startElement(event);
  801. }
  802. }
  803. void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
  804. IList<Undo> &undoList,
  805. IList<Event> &eventList)
  806. {
  807. if (tagLevel() == syntax().taglvl())
  808. message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
  809. eventList.insert(event);
  810. if (event->mustOmitEnd()) {
  811. EndElementEvent *end
  812. = new (eventAllocator()) EndElementEvent(e,
  813. currentDtdPointer(),
  814. event->location(),
  815. 0);
  816. if (event->included())
  817. end->setIncluded();
  818. eventList.insert(end);
  819. }
  820. else {
  821. undoList.insert(new (internalAllocator()) UndoStartTag);
  822. const ShortReferenceMap *map = e->map();
  823. if (!map)
  824. map = currentElement().map();
  825. pushElement(new (internalAllocator()) OpenElement(e,
  826. 0,
  827. event->included(),
  828. map,
  829. event->location()));
  830. }
  831. }
  832. void Parser::parseEndTag()
  833. {
  834. Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
  835. currentLocation());
  836. currentInput()->discardInitial();
  837. extendNameToken(syntax().namelen(), ParserMessages::nameLength);
  838. if (markup) {
  839. markup->addDelim(Syntax::dETAGO);
  840. markup->addName(currentInput());
  841. }
  842. StringC &name = nameBuffer();
  843. getCurrentToken(syntax().generalSubstTable(), name);
  844. const ElementType *e = currentDtd().lookupElementType(name);
  845. if (sd().rank()) {
  846. if (!e)
  847. e = completeRankStem(name);
  848. }
  849. if (!e)
  850. e = lookupCreateUndefinedElement(name, currentLocation());
  851. parseEndTagClose();
  852. acceptEndTag(e,
  853. new (eventAllocator())
  854. EndElementEvent(e,
  855. currentDtdPointer(),
  856. markupLocation(),
  857. markup));
  858. }
  859. void Parser::parseEndTagClose()
  860. {
  861. for (;;) {
  862. Token token = getToken(tagMode);
  863. switch (token) {
  864. case tokenUnrecognized:
  865. if (!reportNonSgmlCharacter())
  866. message(ParserMessages::endTagCharacter, StringMessageArg(currentToken()));
  867. return;
  868. case tokenEe:
  869. message(ParserMessages::endTagEntityEnd);
  870. return;
  871. case tokenEtago:
  872. case tokenStago:
  873. if (!sd().shorttag())
  874. message(ParserMessages::minimizedEndTag);
  875. else if (options().warnUnclosedTag)
  876. message(ParserMessages::unclosedEndTag);
  877. currentInput()->ungetToken();
  878. return;
  879. case tokenTagc:
  880. if (currentMarkup())
  881. currentMarkup()->addDelim(Syntax::dTAGC);
  882. return;
  883. case tokenS:
  884. if (currentMarkup())
  885. currentMarkup()->addS(currentChar());
  886. break;
  887. default:
  888. message(ParserMessages::endTagInvalidToken,
  889. TokenMessageArg(token, tagMode, syntaxPointer(), sdPointer()));
  890. return;
  891. }
  892. }
  893. }
  894. void Parser::parseEmptyEndTag()
  895. {
  896. if (options().warnEmptyTag)
  897. message(ParserMessages::emptyEndTag);
  898. // FIXME what to do if not in base
  899. if (tagLevel() == 0)
  900. message(ParserMessages::emptyEndTagNoOpenElements);
  901. else {
  902. Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
  903. currentLocation());
  904. if (markup) {
  905. markup->addDelim(Syntax::dETAGO);
  906. markup->addDelim(Syntax::dTAGC);
  907. }
  908. acceptEndTag(currentElement().type(),
  909. new (eventAllocator()) EndElementEvent(currentElement().type(),
  910. currentDtdPointer(),
  911. currentLocation(),
  912. markup));
  913. }
  914. }
  915. void Parser::parseNullEndTag()
  916. {
  917. if (options().warnNet)
  918. message(ParserMessages::nullEndTag);
  919. // If a null end tag was recognized, then there must be a net enabling
  920. // element on the stack.
  921. for (;;) {
  922. ASSERT(tagLevel() > 0);
  923. if (currentElement().netEnabling())
  924. break;
  925. if (!currentElement().isFinished())
  926. message(ParserMessages::elementNotFinished,
  927. StringMessageArg(currentElement().type()->name()));
  928. implyCurrentElementEnd(currentLocation());
  929. }
  930. if (!currentElement().isFinished())
  931. message(ParserMessages::elementEndTagNotFinished,
  932. StringMessageArg(currentElement().type()->name()));
  933. Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
  934. currentLocation());
  935. if (markup)
  936. markup->addDelim(Syntax::dNET);
  937. acceptEndTag(currentElement().type(),
  938. new (eventAllocator()) EndElementEvent(currentElement().type(),
  939. currentDtdPointer(),
  940. currentLocation(),
  941. markup));
  942. }
  943. void Parser::endAllElements()
  944. {
  945. while (tagLevel() > 0) {
  946. if (!currentElement().isFinished())
  947. message(ParserMessages::elementNotFinishedDocumentEnd,
  948. StringMessageArg(currentElement().type()->name()));
  949. implyCurrentElementEnd(currentLocation());
  950. }
  951. if (!currentElement().isFinished())
  952. message(ParserMessages::noDocumentElement);
  953. }
  954. void Parser::acceptEndTag(const ElementType *e,
  955. EndElementEvent *event)
  956. {
  957. if (!elementIsOpen(e)) {
  958. message(ParserMessages::elementNotOpen, StringMessageArg(e->name()));
  959. delete event;
  960. return;
  961. }
  962. for (;;){
  963. if (currentElement().type() == e)
  964. break;
  965. if (!currentElement().isFinished())
  966. message(ParserMessages::elementNotFinished,
  967. StringMessageArg(currentElement().type()->name()));
  968. implyCurrentElementEnd(event->location());
  969. }
  970. if (!currentElement().isFinished())
  971. message(ParserMessages::elementEndTagNotFinished,
  972. StringMessageArg(currentElement().type()->name()));
  973. if (currentElement().included())
  974. event->setIncluded();
  975. noteEndElement(event->included());
  976. eventHandler().endElement(event);
  977. popElement();
  978. }
  979. void Parser::implyCurrentElementEnd(const Location &loc)
  980. {
  981. if (!sd().omittag())
  982. message(ParserMessages::omitEndTagOmittag,
  983. StringMessageArg(currentElement().type()->name()),
  984. currentElement().startLocation());
  985. else {
  986. const ElementDefinition *def = currentElement().type()->definition();
  987. if (def && !def->canOmitEndTag())
  988. message(ParserMessages::omitEndTagDeclare,
  989. StringMessageArg(currentElement().type()->name()),
  990. currentElement().startLocation());
  991. }
  992. EndElementEvent *event
  993. = new (eventAllocator()) EndElementEvent(currentElement().type(),
  994. currentDtdPointer(),
  995. loc,
  996. 0);
  997. if (currentElement().included())
  998. event->setIncluded();
  999. noteEndElement(event->included());
  1000. eventHandler().endElement(event);
  1001. popElement();
  1002. }
  1003. void Parser::extendData()
  1004. {
  1005. XcharMap<PackedBoolean> isNormal(normalMap());
  1006. InputSource *in = currentInput();
  1007. size_t length = in->currentTokenLength();
  1008. // This is one of the parser's inner loops, so it needs to be fast.
  1009. while (isNormal[in->tokenChar(messenger())])
  1010. length++;
  1011. in->endToken(length);
  1012. }
  1013. void Parser::extendContentS()
  1014. {
  1015. InputSource *in = currentInput();
  1016. size_t length = in->currentTokenLength();
  1017. XcharMap<PackedBoolean> isNormal(normalMap());
  1018. for (;;) {
  1019. Xchar ch = in->tokenChar(messenger());
  1020. if (!syntax().isS(ch) || !isNormal[ch])
  1021. break;
  1022. length++;
  1023. }
  1024. in->endToken(length);
  1025. }
  1026. void Parser::handleBadStartTag(const ElementType *e,
  1027. StartElementEvent *event,
  1028. Boolean netEnabling)
  1029. {
  1030. IList<Undo> undoList;
  1031. IList<Event> eventList;
  1032. keepMessages();
  1033. for (;;) {
  1034. Vector<const ElementType *> missing;
  1035. findMissingTag(e, missing);
  1036. if (missing.size() == 1) {
  1037. queueElementEvents(eventList);
  1038. const ElementType *m = missing[0];
  1039. message(ParserMessages::missingElementInferred,
  1040. StringMessageArg(e->name()),
  1041. StringMessageArg(m->name()));
  1042. AttributeList *attributes
  1043. = allocAttributeList(m->attributeDef(), 1);
  1044. // this will give an error if the element has a required attribute
  1045. attributes->finish(*this);
  1046. StartElementEvent *inferEvent
  1047. = new (eventAllocator()) StartElementEvent(m,
  1048. currentDtdPointer(),
  1049. attributes,
  1050. event->location(),
  1051. 0);
  1052. if (!currentElement().tryTransition(m))
  1053. inferEvent->setIncluded();
  1054. pushElementCheck(m, inferEvent, 0);
  1055. if (!currentElement().tryTransition(e))
  1056. event->setIncluded();
  1057. pushElementCheck(e, event, netEnabling);
  1058. return;
  1059. }
  1060. if (missing.size() > 0) {
  1061. queueElementEvents(eventList);
  1062. Vector<StringC> missingNames;
  1063. for (size_t i = 0; i < missing.size(); i++)
  1064. missingNames.push_back(missing[i]->name());
  1065. message(ParserMessages::missingElementMultiple,
  1066. StringMessageArg(e->name()),
  1067. StringVectorMessageArg(missingNames));
  1068. pushElementCheck(e, event, netEnabling);
  1069. return;
  1070. }
  1071. if (!sd().omittag()
  1072. || !currentElement().isFinished()
  1073. || tagLevel() == 0
  1074. || !currentElement().type()->definition()->canOmitEndTag())
  1075. break;
  1076. EndElementEvent *endEvent
  1077. = new (eventAllocator()) EndElementEvent(currentElement().type(),
  1078. currentDtdPointer(),
  1079. event->location(),
  1080. 0);
  1081. eventList.insert(endEvent);
  1082. undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
  1083. }
  1084. discardKeptMessages();
  1085. undo(undoList);
  1086. message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
  1087. // If element couldn't occur because it was excluded, then
  1088. // do the transition here.
  1089. (void)currentElement().tryTransition(e);
  1090. pushElementCheck(e, event, netEnabling);
  1091. }
  1092. void Parser::findMissingTag(const ElementType *e,
  1093. Vector<const ElementType *> &v)
  1094. {
  1095. size_t i;
  1096. if (!currentElement().currentPosition()) {
  1097. if (!e)
  1098. v.push_back((const ElementType *)0);
  1099. return;
  1100. }
  1101. if (elementIsExcluded(e))
  1102. return;
  1103. size_t newSize = 0;
  1104. currentElement().matchState().possibleTransitions(v);
  1105. // FIXME also get currentInclusions
  1106. for (i = 0; i < v.size(); i++) {
  1107. if (v[i] && !elementIsExcluded(v[i])) {
  1108. Boolean success = 0;
  1109. switch (v[i]->definition()->declaredContent()) {
  1110. case ElementDefinition::modelGroup:
  1111. {
  1112. const CompiledModelGroup *grp
  1113. = v[i]->definition()->compiledModelGroup();
  1114. MatchState state(grp);
  1115. if (!e) {
  1116. if (state.tryTransitionPcdata())
  1117. success = 1;
  1118. }
  1119. else {
  1120. if (state.tryTransition(e))
  1121. success = 1;
  1122. if (!success) {
  1123. for (size_t j = 0; j < v[i]->definition()->nInclusions(); j++)
  1124. if (v[i]->definition()->inclusion(j) == e) {
  1125. success = 1;
  1126. break;
  1127. }
  1128. }
  1129. if (success) {
  1130. for (size_t j = 0; j < v[i]->definition()->nExclusions(); j++)
  1131. if (v[i]->definition()->exclusion(j) == e) {
  1132. success = 0;
  1133. break;
  1134. }
  1135. }
  1136. }
  1137. }
  1138. break;
  1139. #if 0
  1140. case ElementDefinition::any:
  1141. success = 1;
  1142. break;
  1143. #endif
  1144. case ElementDefinition::cdata:
  1145. case ElementDefinition::rcdata:
  1146. if (e == 0)
  1147. success = 1;
  1148. break;
  1149. default:
  1150. break;
  1151. }
  1152. if (success)
  1153. v[newSize++] = v[i];
  1154. }
  1155. }
  1156. v.resize(newSize);
  1157. // Sort them according to the order of their occurrence in the DTD.
  1158. // Do an insertion sort.
  1159. for (i = 1; i < v.size(); i++) {
  1160. const ElementType *tem = v[i];
  1161. size_t j;
  1162. for (j = i; j > 0 && v[j - 1]->index() > tem->index(); j--)
  1163. v[j] = v[j - 1];
  1164. v[j] = tem;
  1165. }
  1166. }
  1167. #if 0
  1168. // This produces messages that are too verbose
  1169. // This doesn't try to be very efficient.
  1170. // 0 for #pcdata
  1171. void Parser::getAllowedElementTypes(Vector<const ElementType *> &v)
  1172. {
  1173. v.clear();
  1174. // FIXME get a list of all inclusions first
  1175. // getCurrentInclusions(v);
  1176. // x says whether each element of v was excluded
  1177. Vector<PackedBoolean> x(v.size(), 0);
  1178. unsigned startImpliedCount = 0;
  1179. IList<Undo> undoList;
  1180. for (;;) {
  1181. if (currentElement().currentPosition()) {
  1182. // have a model group
  1183. size_t i = v.size();
  1184. currentElement().matchState().possibleTransitions(v);
  1185. x.resize(v.size());
  1186. for (size_t j = i; j < v.size(); j++)
  1187. x[j] = (v[j] && elementIsExcluded(v[j]));
  1188. if (!sd().omittag())
  1189. break;
  1190. // Try to imply a tag
  1191. if (currentElement().isFinished()) {
  1192. if (tagLevel() == 0)
  1193. break;
  1194. if (startImpliedCount)
  1195. break;
  1196. const ElementDefinition *def = currentElement().type()->definition();
  1197. if (def && def->canOmitEndTag())
  1198. undoList.insert(new (internalAllocator())
  1199. UndoEndTag(popSaveElement()));
  1200. else
  1201. break;
  1202. }
  1203. else {
  1204. const LeafContentToken *token = currentElement().impliedStartTag();
  1205. if (!token)
  1206. break;
  1207. const ElementType *e = token->elementType();
  1208. if (elementIsExcluded(e))
  1209. break;
  1210. const ElementDefinition *def = e->definition();
  1211. if (!def
  1212. || def->undefined()
  1213. || (def->declaredContent() != ElementDefinition::modelGroup
  1214. && def->declaredContent() != ElementDefinition::any)
  1215. || !def->canOmitStartTag())
  1216. break;
  1217. undoList.insert(new (internalAllocator()) UndoStartTag);
  1218. startImpliedCount++;
  1219. pushElement(new (internalAllocator()) OpenElement(e,
  1220. 0,
  1221. 0,
  1222. 0,
  1223. Location()));
  1224. if (checkImplyLoop(startImpliedCount))
  1225. break;
  1226. for (size_t i = 0; i < def->nInclusions(); i++)
  1227. if (!elementIsExcluded(def->inclusion(i))) {
  1228. v.push_back(def->inclusion(i));
  1229. x.push_back(0);
  1230. }
  1231. }
  1232. }
  1233. else {
  1234. // must be allowed #pcdata
  1235. v.push_back((const ElementType *)0);
  1236. x.push_back((PackedBoolean)0);
  1237. break;
  1238. }
  1239. }
  1240. undo(undoList);
  1241. // Remove exclusions and duplicates and undefined
  1242. size_t newSize = 0;
  1243. for (size_t i = 0; i < v.size(); i++)
  1244. if (!x[i] && (!v[i] || !v[i]->definition()->undefined())) {
  1245. Boolean dup = 0;
  1246. for (size_t j = 0; j < newSize; j++)
  1247. if (v[i] == v[j]) {
  1248. dup = 1;
  1249. break;
  1250. }
  1251. if (!dup)
  1252. v[newSize++] = v[i];
  1253. }
  1254. v.resize(newSize);
  1255. }
  1256. #endif
  1257. #ifdef SP_NAMESPACE
  1258. }
  1259. #endif