12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */
- // Copyright (c) 1994, 1995 James Clark
- // See the file COPYING for copying permission.
- #include "splib.h"
- #include "Parser.h"
- #include "macros.h"
- #include "SdFormalError.h"
- #include "MessageBuilder.h"
- #include "ParserMessages.h"
- #include "MessageArg.h"
- #include "CharsetRegistry.h"
- #include "ISetIter.h"
- #include "token.h"
- #include "TokenMessageArg.h"
- #include "constant.h"
- #include "SdText.h"
- #include "NumericCharRefOrigin.h"
- #ifdef SP_NAMESPACE
- namespace SP_NAMESPACE {
- #endif
- class CharSwitcher {
- public:
- CharSwitcher();
- void addSwitch(WideChar from, WideChar to);
- SyntaxChar subst(WideChar c);
- size_t nSwitches() const;
- Boolean switchUsed(size_t i) const;
- WideChar switchFrom(size_t i) const;
- WideChar switchTo(size_t i) const;
- private:
- Vector<PackedBoolean> switchUsed_;
- Vector<WideChar> switches_;
- };
- // Information about the SGML declaration being built.
- struct SdBuilder {
- SdBuilder();
- void addFormalError(const Location &, const MessageType1 &, const StringC &);
- Ptr<Sd> sd;
- Ptr<Syntax> syntax;
- CharsetDecl syntaxCharsetDecl;
- CharsetInfo syntaxCharset;
- CharSwitcher switcher;
- Boolean externalSyntax;
- Boolean valid;
- IList<SdFormalError> formalErrorList;
- };
- class CharsetMessageArg : public MessageArg {
- public:
- CharsetMessageArg(const ISet<WideChar> &set);
- MessageArg *copy() const;
- void append(MessageBuilder &) const;
- private:
- ISet<WideChar> set_;
- };
- struct SdParam {
- typedef unsigned char Type;
- enum {
- invalid,
- eE,
- minimumLiteral,
- mdc,
- ellipsis,
- number,
- capacityName,
- name,
- paramLiteral,
- generalDelimiterName,
- referenceReservedName,
- quantityName,
- reservedName // Sd::ReservedName is added to this
- };
- Type type;
- StringC token;
- Text literalText;
- String<SyntaxChar> paramLiteralText;
- union {
- Number n;
- Sd::Capacity capacityIndex;
- Syntax::Quantity quantityIndex;
- Syntax::ReservedName reservedNameIndex;
- Syntax::DelimGeneral delimGeneralIndex;
- };
- };
- class AllowedSdParams {
- public:
- AllowedSdParams(SdParam::Type,
- SdParam::Type = SdParam::invalid,
- SdParam::Type = SdParam::invalid,
- SdParam::Type = SdParam::invalid,
- SdParam::Type = SdParam::invalid,
- SdParam::Type = SdParam::invalid);
- Boolean param(SdParam::Type) const;
- SdParam::Type get(int i) const;
- private:
- enum { maxAllow = 6 };
- SdParam::Type allow_[maxAllow];
- };
- class AllowedSdParamsMessageArg : public MessageArg {
- public:
- AllowedSdParamsMessageArg(const AllowedSdParams &allow,
- const ConstPtr<Sd> &sd);
- MessageArg *copy() const;
- void append(MessageBuilder &) const;
- private:
- AllowedSdParams allow_;
- ConstPtr<Sd> sd_;
- };
- struct StandardSyntaxSpec {
- struct AddedFunction {
- const char *name;
- Syntax::FunctionClass functionClass;
- SyntaxChar syntaxChar;
- };
- const AddedFunction *addedFunction;
- size_t nAddedFunction;
- Boolean shortref;
- };
- static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
- { "TAB", Syntax::cSEPCHAR, 9 },
- };
- static StandardSyntaxSpec coreSyntax = {
- coreFunctions, SIZEOF(coreFunctions), 0
- };
- static StandardSyntaxSpec refSyntax = {
- coreFunctions, SIZEOF(coreFunctions), 1
- };
- void Parser::doInit()
- {
- if (cancelled()) {
- allDone();
- return;
- }
- // When document entity doesn't exist, don't give any errors
- // other than the cannot open error.
- if (currentInput()->get(messenger()) == InputSource::eE) {
- if (currentInput()->accessError()) {
- allDone();
- return;
- }
- }
- else
- currentInput()->ungetToken();
- const CharsetInfo &initCharset = sd().docCharset();
- ISet<WideChar> missing;
- findMissingMinimum(initCharset, missing);
- if (!missing.isEmpty()) {
- message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
- giveUp();
- return;
- }
- Boolean found = 0;
- StringC systemId;
- if (scanForSgmlDecl(initCharset))
- found = 1;
- else {
- currentInput()->ungetToken();
- if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
- InputSource *in = entityManager().open(systemId,
- initCharset,
- new InputSourceOrigin,
- 0,
- messenger());
- if (in) {
- pushInput(in);
- if (scanForSgmlDecl(initCharset))
- found = 1;
- else {
- message(ParserMessages::badDefaultSgmlDecl);
- popInputStack();
- }
- }
- }
- }
- if (found) {
- if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
- size_t nS = currentInput()->currentTokenLength() - 6;
- for (size_t i = 0; i < nS; i++)
- currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
- currentMarkup()->addDelim(Syntax::dMDO);
- currentMarkup()->addSdReservedName(Sd::rSGML,
- currentInput()->currentTokenStart()
- + (currentInput()->currentTokenLength() - 4),
- 4);
- }
- Syntax *syntaxp = new Syntax(sd());
- CharSwitcher switcher;
- if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
- switcher)) {
- giveUp();
- delete syntaxp;
- return;
- }
- syntaxp->implySgmlChar(sd().docCharset());
- setSyntax(syntaxp);
- compileSdModes();
- ConstPtr<Sd> refSd(sdPointer());
- ConstPtr<Syntax> refSyntax(syntaxPointer());
- if (!parseSgmlDecl()) {
- giveUp();
- return;
- }
- // queue an SGML declaration event
- eventHandler().sgmlDecl(new (eventAllocator())
- SgmlDeclEvent(sdPointer(),
- syntaxPointer(),
- instanceSyntaxPointer(),
- refSd,
- refSyntax,
- currentInput()->nextIndex(),
- systemId,
- markupLocation(),
- currentMarkup()));
- if (inputLevel() == 2) {
- // FIXME perhaps check for junk after SGML declaration
- popInputStack();
- }
- }
- else {
- if (!implySgmlDecl()) {
- giveUp();
- return;
- }
- // queue an SGML declaration event
- eventHandler().sgmlDecl(new (eventAllocator())
- SgmlDeclEvent(sdPointer(),
- syntaxPointer()));
- }
-
- // Now we have sd and syntax set up, prepare to parse the prolog.
- compilePrologModes();
- setPhase(prologPhase);
- }
- Boolean Parser::implySgmlDecl()
- {
- Syntax *syntaxp = new Syntax(sd());
- const StandardSyntaxSpec *spec;
- if (options().shortref)
- spec = &refSyntax;
- else
- spec = &coreSyntax;
- CharSwitcher switcher;
- if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher)) {
- delete syntaxp;
- return 0;
- }
- syntaxp->implySgmlChar(sd().docCharset());
- for (int i = 0; i < Syntax::nQuantity; i++)
- syntaxp->setQuantity(i, options().quantity[i]);
- setSyntax(syntaxp);
- return 1;
- }
- Boolean Parser::setStandardSyntax(Syntax &syn,
- const StandardSyntaxSpec &spec,
- const CharsetInfo &docCharset,
- CharSwitcher &switcher)
- {
- static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
- { 0, 128, 0 },
- };
- static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
- SIZEOF(syntaxCharsetRanges));
- static CharsetInfo syntaxCharset(syntaxCharsetDesc);
- Boolean valid = 1;
- if (!checkSwitches(switcher, syntaxCharset))
- valid = 0;
- size_t i;
- for (i = 0; i < switcher.nSwitches(); i++)
- if (switcher.switchTo(i) >= 128)
- message(ParserMessages::switchNotInCharset,
- NumberMessageArg(switcher.switchTo(i)));
- static const Char shunchar[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 127, 255
- };
-
- for (i = 0; i < SIZEOF(shunchar); i++)
- syn.addShunchar(shunchar[i]);
- syn.setShuncharControls();
- static Syntax::StandardFunction standardFunctions[3] = {
- Syntax::fRE, Syntax::fRS, Syntax::fSPACE
- };
- static SyntaxChar functionChars[3] = { 13, 10, 32 };
- for (i = 0; i < 3; i++) {
- Char docChar;
- if (translateSyntax(switcher,
- syntaxCharset,
- docCharset,
- functionChars[i],
- docChar)
- && checkNotFunction(syn, docChar))
- syn.setStandardFunction(standardFunctions[i], docChar);
- else
- valid = 0;
- }
- for (i = 0; i < spec.nAddedFunction; i++) {
- Char docChar;
- if (translateSyntax(switcher,
- syntaxCharset,
- docCharset,
- spec.addedFunction[i].syntaxChar,
- docChar)
- && checkNotFunction(syn, docChar))
- syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
- spec.addedFunction[i].functionClass,
- docChar);
- else
- valid = 0;
- }
-
- static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
- ISet<Char> nameCharSet;
- for (i = 0; i < 2; i++) {
- Char docChar;
- if (translateSyntax(switcher,
- syntaxCharset,
- docCharset,
- nameChars[i],
- docChar))
- nameCharSet.add(docChar);
- else
- valid = 0;
- }
- if (!checkNmchars(nameCharSet, syn))
- valid = 0;
- else
- syn.addNameCharacters(nameCharSet);
- syn.setNamecaseGeneral(1);
- syn.setNamecaseEntity(0);
- if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher))
- valid = 0;
- setRefNames(syn, docCharset);
- syn.enterStandardFunctionNames();
- if (spec.shortref
- && !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher))
- valid = 0;
- return valid;
- }
- Boolean Parser::setRefDelimGeneral(Syntax &syntax,
- const CharsetInfo &syntaxCharset,
- const CharsetInfo &docCharset,
- CharSwitcher &switcher)
- {
- // Column 3 from Figure 3
- static const char delims[][2] = {
- { 38 },
- { 45, 45 },
- { 38, 35 },
- { 93 },
- { 91 },
- { 93 },
- { 91 },
- { 38 },
- { 60, 47 },
- { 41 },
- { 40 },
- { 34 },
- { 39 },
- { 62 },
- { 60, 33 },
- { 45 },
- { 93, 93 },
- { 47 },
- { 63 },
- { 124 },
- { 37 },
- { 62 },
- { 60, 63 },
- { 43 },
- { 59 },
- { 42 },
- { 35 },
- { 44 },
- { 60 },
- { 62 },
- { 61 },
- };
- Boolean valid = 1;
- ISet<WideChar> missing;
- for (int i = 0; i < Syntax::nDelimGeneral; i++)
- if (syntax.delimGeneral(i).size() == 0) {
- StringC delim;
- size_t j;
- for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
- UnivChar univChar = translateUniv(delims[i][j], switcher,
- syntaxCharset);
- Char c;
- if (univToDescCheck(docCharset, univChar, c))
- delim += c;
- else {
- missing += univChar;
- valid = 0;
- }
- }
- if (delim.size() == j) {
- if (checkGeneralDelim(syntax, delim))
- syntax.setDelimGeneral(i, delim);
- else
- valid = 0;
- }
- }
- if (!missing.isEmpty())
- message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
- return valid;
- }
- void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
- {
- static const char *const referenceNames[] = {
- "ANY",
- "ATTLIST",
- "CDATA",
- "CONREF",
- "CURRENT",
- "DEFAULT",
- "DOCTYPE",
- "ELEMENT",
- "EMPTY",
- "ENDTAG",
- "ENTITIES",
- "ENTITY",
- "FIXED",
- "ID",
- "IDLINK",
- "IDREF",
- "IDREFS",
- "IGNORE",
- "IMPLIED",
- "INCLUDE",
- "INITIAL",
- "LINK",
- "LINKTYPE",
- "MD",
- "MS",
- "NAME",
- "NAMES",
- "NDATA",
- "NMTOKEN",
- "NMTOKENS",
- "NOTATION",
- "NUMBER",
- "NUMBERS",
- "NUTOKEN",
- "NUTOKENS",
- "O",
- "PCDATA",
- "PI",
- "POSTLINK",
- "PUBLIC",
- "RCDATA",
- "RE",
- "REQUIRED",
- "RESTORE",
- "RS",
- "SDATA",
- "SHORTREF",
- "SIMPLE",
- "SPACE",
- "STARTTAG",
- "SUBDOC",
- "SYSTEM",
- "TEMP",
- "USELINK",
- "USEMAP"
- };
- int i;
- for (i = 0; i < Syntax::nNames; i++) {
- StringC docName(docCharset.execToDesc(referenceNames[i]));
- Syntax::ReservedName tem;
- if (syntax.lookupReservedName(docName, &tem))
- message(ParserMessages::nameReferenceReservedName,
- StringMessageArg(docName));
- if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
- syntax.setName(i, docName);
- }
- }
- Boolean Parser::addRefDelimShortref(Syntax &syntax,
- const CharsetInfo &syntaxCharset,
- const CharsetInfo &docCharset,
- CharSwitcher &switcher)
- {
- // Column 2 from Figure 4
- static const char delimShortref[][3] = {
- { 9 },
- { 13 },
- { 10 },
- { 10, 66 },
- { 10, 13 },
- { 10, 66, 13 },
- { 66, 13 },
- { 32 },
- { 66, 66 },
- { 34 },
- { 35 },
- { 37 },
- { 39 },
- { 40 },
- { 41 },
- { 42 },
- { 43 },
- { 44 },
- { 45 },
- { 45, 45 },
- { 58 },
- { 59 },
- { 61 },
- { 64 },
- { 91 },
- { 93 },
- { 94 },
- { 95 },
- { 123 },
- { 124 },
- { 125 },
- { 126 },
- };
- ISet<WideChar> missing;
- for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
- StringC delim;
-
- size_t j;
- for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
- Char c;
- UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
- syntaxCharset);
- if (univToDescCheck(docCharset, univChar, c))
- delim += c;
- else
- missing += univChar;
- }
- if (delim.size() == j) {
- if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
- message(ParserMessages::duplicateDelimShortref,
- StringMessageArg(delim));
- else
- syntax.addDelimShortref(delim, docCharset);
- }
- }
- if (!missing.isEmpty())
- message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
- return 1;
- }
- // Determine whether the document starts with an SGML declaration.
- // There is no current syntax at this point.
- Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
- {
- Char rs;
- if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
- return 0;
- Char re;
- if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
- return 0;
- Char space;
- if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
- return 0;
- Char tab;
- if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
- return 0;
- InputSource *in = currentInput();
- Xchar c = in->get(messenger());
- while (c == rs || c == space || c == re || c == tab)
- c = in->tokenChar(messenger());
- if (c != initCharset.execToDesc('<'))
- return 0;
- if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
- return 0;
- c = in->tokenChar(messenger());
- if (c != initCharset.execToDesc('S')
- && c != initCharset.execToDesc('s'))
- return 0;
- c = in->tokenChar(messenger());
- if (c != initCharset.execToDesc('G')
- && c != initCharset.execToDesc('g'))
- return 0;
- c = in->tokenChar(messenger());
- if (c != initCharset.execToDesc('M')
- && c != initCharset.execToDesc('m'))
- return 0;
- c = in->tokenChar(messenger());
- if (c != initCharset.execToDesc('L')
- && c != initCharset.execToDesc('l'))
- return 0;
- c = in->tokenChar(messenger());
- // Don't recognize this if SGML is followed by a name character.
- if (c == InputSource::eE)
- return 1;
- in->endToken(in->currentTokenLength() - 1);
- if (c == initCharset.execToDesc('-'))
- return 0;
- if (c == initCharset.execToDesc('.'))
- return 0;
- UnivChar univ;
- if (!initCharset.descToUniv(c, univ))
- return 1;
- if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
- return 0;
- if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
- return 0;
- if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
- return 0;
- return 1;
- }
-
- void Parser::findMissingMinimum(const CharsetInfo &charset,
- ISet<WideChar> &missing)
- {
- Char to;
- size_t i;
- for (i = 0; i < 26; i++) {
- if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
- missing += UnivCharsetDesc::A + i;
- if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
- missing += UnivCharsetDesc::a + i;
- }
- for (i = 0; i < 10; i++) {
- Char to;
- if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
- missing += UnivCharsetDesc::zero + i;
- }
- static const UnivChar special[] = {
- 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
- };
- for (i = 0; i < SIZEOF(special); i++)
- if (!univToDescCheck(charset, special[i], to))
- missing += special[i];
- }
- Boolean Parser::parseSgmlDecl()
- {
- SdParam parm;
- SdBuilder sdBuilder;
- if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
- return 0;
- StringC version(sd().execToDoc("ISO 8879:1986"));
- if (parm.literalText.string() != version)
- message(ParserMessages::standardVersion,
- StringMessageArg(parm.literalText.string()));
- sdBuilder.sd = new Sd;
- typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
- static SdParser parsers[] = {
- &Parser::sdParseDocumentCharset,
- &Parser::sdParseCapacity,
- &Parser::sdParseScope,
- &Parser::sdParseSyntax,
- &Parser::sdParseFeatures,
- &Parser::sdParseAppinfo,
- };
- for (size_t i = 0; i < SIZEOF(parsers); i++) {
- if (!(this->*(parsers[i]))(sdBuilder, parm))
- return 0;
- if (!sdBuilder.valid)
- return 0;
- }
- if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
- return 0;
- if (sdBuilder.sd->formal()) {
- while (!sdBuilder.formalErrorList.empty()) {
- SdFormalError *p = sdBuilder.formalErrorList.get();
- ParserState *state = this; // work around lcc 3.0 bug
- p->send(*state);
- delete p;
- }
- }
- setSd(sdBuilder.sd.pointer());
- if (sdBuilder.sd->scopeInstance()) {
- Syntax *proSyntax = new Syntax(sd());
- CharSwitcher switcher;
- setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
- proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
- ISet<WideChar> invalidSgmlChar;
- proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
- sdBuilder.syntax.pointer(),
- invalidSgmlChar);
- sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
- proSyntax,
- invalidSgmlChar);
- if (!invalidSgmlChar.isEmpty())
- message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
- setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
- }
- else
- setSyntax(sdBuilder.syntax.pointer());
- if (syntax().multicode())
- currentInput()->setMarkupScanTable(syntax().markupScanTable());
- return 1;
- }
- Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
- parm))
- return 0;
- CharsetDecl decl;
- UnivCharsetDesc desc;
- if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
- return 0;
- ISet<WideChar> missing;
- findMissingMinimum(desc, missing);
- if (!missing.isEmpty()) {
- message(ParserMessages::missingMinimumChars,
- CharsetMessageArg(missing));
- return 0;
- }
- ISet<Char> sgmlChar;
- decl.usedSet(sgmlChar);
- sdBuilder.sd->setDocCharsetDesc(desc);
- sdBuilder.sd->setDocCharsetDecl(decl);
- sdBuilder.syntax = new Syntax(*sdBuilder.sd);
- sdBuilder.syntax->setSgmlChar(sgmlChar);
- return 1;
- }
- Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
- SdParam &parm,
- Boolean isDocument,
- CharsetDecl &decl,
- UnivCharsetDesc &desc)
- {
- decl.clear();
- ISet<WideChar> multiplyDeclared;
- // This is for checking whether the syntax reference character set
- // is ISO 646 when SCOPE is INSTANCE.
- Boolean maybeISO646 = 1;
- do {
- if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
- return 0;
- UnivCharsetDesc baseDesc;
- PublicId id;
- Boolean found;
- PublicId::TextClass textClass;
- const MessageType1 *err;
- if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
- sdBuilder.addFormalError(currentLocation(),
- *err,
- id.string());
- else if (id.getTextClass(textClass)
- && textClass != PublicId::CHARSET)
- sdBuilder.addFormalError(currentLocation(),
- ParserMessages::basesetTextClass,
- id.string());
- Boolean givenError;
- if (referencePublic(id, PublicId::CHARSET, givenError))
- found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
- else if (!givenError) {
- found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
- if (!found && options().warnSgmlDecl)
- message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
- }
- else
- found = 0;
- if (!found)
- maybeISO646 = 0;
- decl.addSection(id);
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- do {
- WideChar min = parm.n;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- Number count = parm.n;
- Number adjCount;
- if (options().warnSgmlDecl && count == 0)
- message(ParserMessages::zeroNumberOfCharacters);
- decl.rangeDeclared(min, count, multiplyDeclared);
- if (isDocument
- && count > 0
- && (min > charMax || count - 1 > charMax - min)) {
- message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
- adjCount = min > charMax ? 0 : 1 + (charMax - min);
- sdBuilder.valid = 0;
- maybeISO646 = 0;
- }
- else
- adjCount = count;
- if (!parseSdParam(AllowedSdParams(SdParam::number,
- SdParam::minimumLiteral,
- SdParam::reservedName + Sd::rUNUSED),
- parm))
- return 0;
- switch (parm.type) {
- case SdParam::number:
- decl.addRange(min, count, parm.n);
- if (found && adjCount > 0) {
- ISet<WideChar> baseMissing;
- desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
- baseMissing);
- if (!baseMissing.isEmpty() && options().warnSgmlDecl)
- message(ParserMessages::basesetCharsMissing,
- CharsetMessageArg(baseMissing));
- }
- break;
- case SdParam::reservedName + Sd::rUNUSED:
- decl.addRange(min, count);
- break;
- case SdParam::minimumLiteral:
- {
- UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string());
- if (adjCount > 256) {
- message(ParserMessages::tooManyCharsMinimumLiteral);
- adjCount = 256;
- }
- for (Number i = 0; i < adjCount; i++)
- desc.addRange(min + i, min + i, c);
- }
- maybeISO646 = 0;
- decl.addRange(min, count, parm.literalText.string());
- break;
- default:
- CANNOT_HAPPEN();
- }
- SdParam::Type follow = (isDocument
- ? SdParam::reservedName + Sd::rCAPACITY
- : SdParam::reservedName + Sd::rFUNCTION);
- if (!parseSdParam(AllowedSdParams(SdParam::number,
- SdParam::reservedName + Sd::rBASESET,
- follow),
- parm))
- return 0;
-
- } while (parm.type == SdParam::number);
- } while (parm.type == SdParam::reservedName + Sd::rBASESET);
- if (!multiplyDeclared.isEmpty())
- message(ParserMessages::duplicateCharNumbers,
- CharsetMessageArg(multiplyDeclared));
- ISet<WideChar> declaredSet;
- decl.declaredSet(declaredSet);
- ISetIter<WideChar> iter(declaredSet);
- WideChar min, max, lastMax;
- if (iter.next(min, max)) {
- ISet<WideChar> holes;
- lastMax = max;
- while (iter.next(min, max)) {
- if (min - lastMax > 1)
- holes.addRange(lastMax + 1, min - 1);
- lastMax = max;
- }
- if (!holes.isEmpty())
- message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
- }
- if (!isDocument && sdBuilder.sd->scopeInstance()) {
- // If scope is INSTANCE, syntax reference character set
- // must be same as reference.
- UnivCharsetDescIter iter(desc);
- WideChar descMin, descMax;
- UnivChar univMin;
- if (!iter.next(descMin, descMax, univMin)
- || descMin != 0
- || descMax != 127
- || univMin != 0
- || !maybeISO646)
- message(ParserMessages::scopeInstanceSyntaxCharset);
- }
- return 1;
- }
- Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
- {
- SdParam parm;
- for (;;) {
- if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
- parm))
- break;
- if (parm.type == SdParam::eE)
- return 1;
- WideChar min = parm.n;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- break;
- Number count = parm.n;
- if (!parseSdParam(AllowedSdParams(SdParam::number,
- SdParam::minimumLiteral,
- SdParam::reservedName + Sd::rUNUSED),
- parm))
- break;
- if (parm.type == SdParam::number) {
- if (count > 0)
- desc.addRange(min, min + (count - 1), parm.n);
- }
- else if (parm.type == SdParam::minimumLiteral) {
- UnivChar c = sd.nameToUniv(parm.literalText.string());
- if (count > 256) {
- message(ParserMessages::tooManyCharsMinimumLiteral);
- count = 256;
- }
- for (Number i = 0; i < count; i++)
- desc.addRange(min + i, min + i, c);
- }
- }
- popInputStack();
- return 0;
- }
- Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
- SdParam::reservedName + Sd::rSGMLREF),
- parm))
- return 0;
- Boolean pushed = 0;
- if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
- if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
- return 0;
- PublicId id;
- PublicId::TextClass textClass;
- const MessageType1 *err;
- if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
- sdBuilder.addFormalError(currentLocation(),
- *err,
- id.string());
- else if (id.getTextClass(textClass)
- && textClass != PublicId::CAPACITY)
- sdBuilder.addFormalError(currentLocation(),
- ParserMessages::capacityTextClass,
- id.string());
- const StringC &str = id.string();
- if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
- && str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) {
- Boolean givenError;
- if (referencePublic(id, PublicId::CAPACITY, givenError))
- pushed = 1;
- else if (!givenError)
- message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
- }
- if (!pushed)
- return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
- parm);
- }
- PackedBoolean capacitySpecified[Sd::nCapacity];
- int i;
- for (i = 0; i < Sd::nCapacity; i++)
- capacitySpecified[i] = 0;
- if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
- return 0;
- do {
- Sd::Capacity capacityIndex = parm.capacityIndex;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
-
- if (!capacitySpecified[capacityIndex]) {
- sdBuilder.sd->setCapacity(capacityIndex, parm.n);
- capacitySpecified[capacityIndex] = 1;
- }
- else if (options().warnSgmlDecl)
- message(ParserMessages::duplicateCapacity,
- StringMessageArg(sd().capacityName(i)));
- int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
- if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
- parm))
- return 0;
- } while (parm.type == SdParam::capacityName);
- Number totalcap = sdBuilder.sd->capacity(0);
- for (i = 1; i < Sd::nCapacity; i++)
- if (sdBuilder.sd->capacity(i) > totalcap)
- message(ParserMessages::capacityExceedsTotalcap,
- StringMessageArg(sd().capacityName(i)));
- if (pushed)
- return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
- parm);
- return 1;
- }
- Boolean Parser::referencePublic(const PublicId &id,
- PublicId::TextClass entityType,
- Boolean &givenError)
- {
- givenError = 0;
- StringC sysid;
- if (entityCatalog().lookupPublic(id.string(),
- sd().docCharset(),
- messenger(),
- sysid)) {
- Location loc = currentLocation();
- eventHandler().sgmlDeclEntity(new (eventAllocator())
- SgmlDeclEntityEvent(id,
- entityType,
- sysid,
- loc));
- Ptr<EntityOrigin> origin(new EntityOrigin(loc));
- if (currentMarkup())
- currentMarkup()->addEntityStart(origin);
- InputSource *in = entityManager().open(sysid,
- sd().docCharset(),
- origin.pointer(),
- 0,
- messenger());
- if (!in) {
- givenError = 1;
- return 0;
- }
- pushInput(in);
- return 1;
- }
- return 0;
- }
- Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
- SdParam::reservedName + Sd::rDOCUMENT),
- parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
- sdBuilder.sd->setScopeInstance();
- return 1;
- }
- Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
- SdParam::reservedName + Sd::rPUBLIC),
- parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
- if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
- return 0;
- PublicId id;
- const MessageType1 *err;
- PublicId::TextClass textClass;
- if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
- sdBuilder.addFormalError(currentLocation(),
- *err,
- id.string());
- else if (id.getTextClass(textClass)
- && textClass != PublicId::SYNTAX)
- sdBuilder.addFormalError(currentLocation(),
- ParserMessages::syntaxTextClass,
- id.string());
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
- SdParam::reservedName + Sd::rSWITCHES),
- parm))
- return 0;
- Vector<UnivChar> charSwitches;
- if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- for (;;) {
- SyntaxChar c = parm.n;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- sdBuilder.switcher.addSwitch(c, parm.n);
- if (!parseSdParam(AllowedSdParams(SdParam::number,
- SdParam::reservedName
- + Sd::rFEATURES),
- parm))
- return 0;
- if (parm.type != SdParam::number)
- break;
- }
- }
- const StandardSyntaxSpec *spec = lookupSyntax(id);
- if (spec) {
- if (!setStandardSyntax(*sdBuilder.syntax,
- *spec,
- sdBuilder.sd->docCharset(),
- sdBuilder.switcher))
- sdBuilder.valid = 0;
- }
- else {
- Boolean givenError;
- if (referencePublic(id, PublicId::SYNTAX, givenError)) {
- sdBuilder.externalSyntax = 1;
- SdParam parm2;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName
- + Sd::rSHUNCHAR),
- parm2))
- return 0;
- if (!sdParseExplicitSyntax(sdBuilder, parm2))
- return 0;
- }
- else {
- if (!givenError)
- message(ParserMessages::unknownPublicSyntax,
- StringMessageArg(id.string()));
- sdBuilder.valid = 0;
- }
- }
- }
- else {
- if (!sdParseExplicitSyntax(sdBuilder, parm))
- return 0;
- }
- if (!sdBuilder.sd->scopeInstance()) {
- // we know the significant chars now
- ISet<WideChar> invalidSgmlChar;
- sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
- 0,
- invalidSgmlChar);
- if (!invalidSgmlChar.isEmpty())
- message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
- }
- checkSyntaxNamelen(*sdBuilder.syntax);
- checkSwitchesMarkup(sdBuilder.switcher);
- return 1;
- }
- Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
- SdParam &parm)
- {
- typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
- static SdParser parsers[] = {
- &Parser::sdParseShunchar,
- &Parser::sdParseSyntaxCharset,
- &Parser::sdParseFunction,
- &Parser::sdParseNaming,
- &Parser::sdParseDelim,
- &Parser::sdParseNames,
- &Parser::sdParseQuantity
- };
- for (size_t i = 0; i < SIZEOF(parsers); i++)
- if (!(this->*(parsers[i]))(sdBuilder, parm))
- return 0;
- return 1;
- }
- const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
- {
- PublicId::OwnerType ownerType;
- if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
- return 0;
- StringC str;
- if (!id.getOwner(str))
- return 0;
- if (str != sd().execToDoc("ISO 8879:1986")
- && str != sd().execToDoc("ISO 8879-1986"))
- return 0;
- PublicId::TextClass textClass;
- if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
- return 0;
- if (!id.getDescription(str))
- return 0;
- if (str == sd().execToDoc("Reference"))
- return &refSyntax;
- if (str == sd().execToDoc("Core"))
- return &coreSyntax;
- return 0;
- }
- Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
- {
- UnivCharsetDesc desc;
- if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
- return 0;
- sdBuilder.syntaxCharset.set(desc);
- checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
- for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
- if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
- message(ParserMessages::switchNotInCharset,
- NumberMessageArg(sdBuilder.switcher.switchTo(i)));
- ISet<WideChar> missing;
- findMissingMinimum(sdBuilder.syntaxCharset, missing);
- if (!missing.isEmpty())
- message(ParserMessages::missingMinimumChars,
- CharsetMessageArg(missing));
- return 1;
- }
- Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
- SdParam::reservedName + Sd::rCONTROLS,
- SdParam::number), parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rNONE) {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
- parm))
- return 0;
- return 1;
- }
- if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
- sdBuilder.syntax->setShuncharControls();
- else {
- if (parm.n <= charMax)
- sdBuilder.syntax->addShunchar(Char(parm.n));
- }
- for (;;) {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
- SdParam::number), parm))
- return 0;
- if (parm.type != SdParam::number)
- break;
- if (parm.n <= charMax)
- sdBuilder.syntax->addShunchar(Char(parm.n));
- }
- return 1;
- }
- Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
- {
- static Sd::ReservedName standardNames[3] = {
- Sd::rRE, Sd::rRS, Sd::rSPACE
- };
- for (int i = 0; i < 3; i++) {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName
- + standardNames[i]),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- Char c;
- if (translateSyntax(sdBuilder, parm.n, c)) {
- if (checkNotFunction(*sdBuilder.syntax, c))
- sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
- else
- sdBuilder.valid = 0;
- }
- }
- Boolean haveMsichar = 0;
- Boolean haveMsochar = 0;
- for (;;) {
- if (!parseSdParam(sdBuilder.externalSyntax
- ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
- : AllowedSdParams(SdParam::name),
- parm))
- return 0;
- Boolean nameWasLiteral;
- Boolean invalidName = 0;
- StringC name;
- if (parm.type == SdParam::paramLiteral) {
- nameWasLiteral = 1;
- if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
- invalidName = 1;
- }
- else {
- parm.token.swap(name);
- nameWasLiteral = 0;
- }
- if (!parseSdParam(nameWasLiteral
- ? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
- SdParam::reservedName + Sd::rMSICHAR,
- SdParam::reservedName + Sd::rMSOCHAR,
- SdParam::reservedName + Sd::rMSSCHAR,
- SdParam::reservedName + Sd::rSEPCHAR)
- : AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
- SdParam::reservedName + Sd::rMSICHAR,
- SdParam::reservedName + Sd::rMSOCHAR,
- SdParam::reservedName + Sd::rMSSCHAR,
- SdParam::reservedName + Sd::rSEPCHAR,
- SdParam::reservedName + Sd::rLCNMSTRT),
- parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
- if (name != sd().reservedName(Sd::rNAMING))
- message(ParserMessages::namingBeforeLcnmstrt,
- StringMessageArg(name));
- break;
- }
- if (!nameWasLiteral) {
- StringC tem;
- name.swap(tem);
- if (!translateName(sdBuilder, tem, name))
- invalidName = 1;
- }
- Syntax::FunctionClass functionClass;
- switch (parm.type) {
- case SdParam::reservedName + Sd::rFUNCHAR:
- functionClass = Syntax::cFUNCHAR;
- break;
- case SdParam::reservedName + Sd::rMSICHAR:
- haveMsichar = 1;
- functionClass = Syntax::cMSICHAR;
- break;
- case SdParam::reservedName + Sd::rMSOCHAR:
- haveMsochar = 1;
- functionClass = Syntax::cMSOCHAR;
- break;
- case SdParam::reservedName + Sd::rMSSCHAR:
- functionClass = Syntax::cMSSCHAR;
- break;
- case SdParam::reservedName + Sd::rSEPCHAR:
- functionClass = Syntax::cSEPCHAR;
- break;
- default:
- CANNOT_HAPPEN();
- }
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- Char c;
- if (translateSyntax(sdBuilder, parm.n, c)
- && checkNotFunction(*sdBuilder.syntax, c)
- && !invalidName) {
- Char tem;
- if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
- message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
- else
- sdBuilder.syntax->addFunctionChar(name, functionClass, c);
- }
- }
- if (haveMsochar && !haveMsichar)
- message(ParserMessages::msocharRequiresMsichar);
- return 1;
- }
- Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
- {
- static Sd::ReservedName keys[4] = {
- Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE
- };
- int isNamechar = 0;
- ISet<Char> nameStartChar;
- ISet<Char> nameChar;
- do {
- String<SyntaxChar> lc;
- Vector<size_t> rangeIndex;
- Boolean first = 1;
- Boolean allowThrough = 0;
- for (;;) {
- if (!parseSdParam(sdBuilder.externalSyntax
- ? AllowedSdParams(SdParam::reservedName
- + keys[isNamechar * 2],
- SdParam::paramLiteral,
- SdParam::number,
- SdParam::ellipsis)
- : (first
- ? AllowedSdParams(SdParam::paramLiteral)
- : AllowedSdParams(SdParam::reservedName
- + keys[isNamechar * 2])),
- parm))
- return 0;
- first = 0;
- Boolean wasRange = 0;
- sdParamConvertToLiteral(parm);
- if (parm.type == SdParam::ellipsis) {
- if (!allowThrough)
- message(ParserMessages::sdInvalidEllipsis);
- if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
- SdParam::number),
- parm))
- return 0;
- sdParamConvertToLiteral(parm);
- if (parm.paramLiteralText.size() == 0)
- message(ParserMessages::sdInvalidEllipsis);
- else if (allowThrough) {
- SyntaxChar n = parm.paramLiteralText[0];
- if (n < lc[lc.size() - 1])
- message(ParserMessages::sdInvalidRange);
- else if (n > lc[lc.size() - 1] + 1)
- rangeIndex.push_back(lc.size() - 1);
- }
- wasRange = 1;
- }
- if (parm.type != SdParam::paramLiteral)
- break;
- lc += parm.paramLiteralText;
- allowThrough = (parm.paramLiteralText.size() - wasRange) > 0;
- }
- size_t lcPos = 0;
- size_t rangeIndexPos = 0;
- unsigned long rangeLeft = 0;
- SyntaxChar nextRangeChar;
- ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
- String<SyntaxChar> chars;
- Boolean runOut = 0;
- first = 1;
- for (;;) {
- if (!parseSdParam(sdBuilder.externalSyntax
- ? AllowedSdParams(SdParam::reservedName
- + keys[isNamechar * 2 + 1],
- SdParam::paramLiteral,
- SdParam::number,
- SdParam::ellipsis)
- : (first
- ? AllowedSdParams(SdParam::paramLiteral)
- : AllowedSdParams(SdParam::reservedName
- + keys[isNamechar * 2 + 1])),
- parm))
- return 0;
- sdParamConvertToLiteral(parm);
- first = 0;
- Boolean isRange = parm.type == SdParam::ellipsis;
- size_t nChars = chars.size();
- if (nChars)
- nChars -= isRange;
- for (size_t i = 0; i < nChars; i++) {
- if (rangeLeft == 0
- && rangeIndexPos < rangeIndex.size()
- && rangeIndex[rangeIndexPos] == lcPos) {
- rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
- nextRangeChar = lc[lcPos];
- lcPos += 2;
- rangeIndexPos += 1;
- }
- Char c;
- if (rangeLeft > 0) {
- rangeLeft--;
- c = nextRangeChar++;
- }
- else if (lcPos < lc.size())
- c = lc[lcPos++];
- else {
- runOut = 1;
- c = chars[i];
- }
- // map from c to chars[i]
- Char transLc, transUc;
- if (translateSyntax(sdBuilder, c, transLc)
- && translateSyntax(sdBuilder, chars[i], transUc)) {
- set.add(transLc);
- if (transLc != transUc) {
- set.add(transUc);
- sdBuilder.syntax->addSubst(transLc, transUc);
- }
- }
- }
- if (isRange) {
- if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
- SdParam::number),
- parm))
- return 0;
- sdParamConvertToLiteral(parm);
- if (chars.size() == 0 || parm.paramLiteralText.size() == 0)
- message(ParserMessages::sdInvalidEllipsis);
- else {
- SyntaxChar start = chars[chars.size() - 1];
- SyntaxChar end = parm.paramLiteralText[0];
- if (start > end)
- message(ParserMessages::sdInvalidRange);
- else {
- size_t count = end + 1 - start;
- while (count > 0) {
- if (rangeLeft == 0
- && rangeIndexPos < rangeIndex.size()
- && rangeIndex[rangeIndexPos] == lcPos) {
- rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
- nextRangeChar = lc[lcPos];
- lcPos += 2;
- rangeIndexPos += 1;
- }
- Char c;
- if (rangeLeft > 0) {
- rangeLeft--;
- c = nextRangeChar++;
- }
- else if (lcPos < lc.size())
- c = lc[lcPos++];
- else {
- c = start;
- runOut = 1;
- }
- if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
- size_t n;
- if (runOut)
- n = count;
- else if (rangeLeft < count)
- n = rangeLeft + 1;
- else
- n = count;
- translateRange(sdBuilder, start, start + (count - 1), set);
- count -= n;
- start += n;
- }
- else {
- Char transLc, transUc;
- if (translateSyntax(sdBuilder, c, transLc)
- && translateSyntax(sdBuilder, start, transUc)) {
- set.add(transLc);
- if (transLc != transUc) {
- set.add(transUc);
- sdBuilder.syntax->addSubst(transLc, transUc);
- }
- }
- count--;
- start++;
- }
- }
- }
- }
- chars.resize(0);
- if (parm.type != SdParam::paramLiteral)
- break;
- chars.append(parm.paramLiteralText.data() + 1,
- parm.paramLiteralText.size() - 1);
- }
- else if (parm.type == SdParam::paramLiteral)
- parm.paramLiteralText.swap(chars);
- else
- break;
- }
- if ((runOut && !sdBuilder.externalSyntax)
- || rangeLeft > 0 || lcPos < lc.size())
- message(isNamechar
- ? ParserMessages::nmcharLength
- : ParserMessages::nmstrtLength);
- if (!checkNmchars(set, *sdBuilder.syntax))
- sdBuilder.valid = 0;
- } while (!isNamechar++);
- ISet<WideChar> bad;
- intersectCharSets(nameStartChar, nameChar, bad);
- if (!bad.isEmpty()) {
- sdBuilder.valid = 0;
- message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
- }
- sdBuilder.syntax->addNameStartCharacters(nameStartChar);
- sdBuilder.syntax->addNameCharacters(nameChar);
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
- SdParam::reservedName + Sd::rYES),
- parm))
- return 0;
- sdBuilder.syntax->setNamecaseGeneral(parm.type
- == SdParam::reservedName + Sd::rYES);
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
- SdParam::reservedName + Sd::rYES),
- parm))
- return 0;
- sdBuilder.syntax->setNamecaseEntity(parm.type
- == SdParam::reservedName + Sd::rYES);
- return 1;
- }
- Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
- {
- Boolean valid = 1;
- ISet<WideChar> bad;
- intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
- if (!bad.isEmpty()) {
- message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
- valid = 0;
- bad.clear();
- }
- intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
- if (!bad.isEmpty()) {
- message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
- valid = 0;
- bad.clear();
- }
- Char funChar;
- if (syntax.getStandardFunction(Syntax::fRE, funChar)
- && set.contains(funChar)) {
- message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
- valid = 0;
- }
- if (syntax.getStandardFunction(Syntax::fRS, funChar)
- && set.contains(funChar)) {
- message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
- valid = 0;
- }
- if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
- && set.contains(funChar)) {
- message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
- valid = 0;
- }
- intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
- if (!bad.isEmpty()) {
- message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
- valid = 0;
- }
- return valid;
- }
- // Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
- void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
- ISet<WideChar> &inter)
- {
- ISetIter<Char> i1(s1);
- ISetIter<Char> i2(s2);
- Char min1, max1, min2, max2;
- if (!i1.next(min1, max1))
- return;
- if (!i2.next(min2, max2))
- return;
- for (;;) {
- if (max1 < min2) {
- if (!i1.next(min1, max1))
- break;
- }
- else if (max2 < min1) {
- if (!i2.next(min2, max2))
- break;
- }
- else {
- // min2 <= max1
- // min1 <= max2
- Char min = min1 > min2 ? min1 : min2;
- Char max = max1 < max2 ? max1 : max2;
- inter.addRange(min, max);
- if (!i1.next(min1, max1))
- break;
- if (!i2.next(min2, max2))
- break;
- }
- }
- }
- Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
- parm))
- return 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
- parm))
- return 0;
- PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
- for (int i = 0; i < Syntax::nDelimGeneral; i++)
- delimGeneralSpecified[i] = 0;
- for (;;) {
- if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
- SdParam::reservedName + Sd::rSHORTREF),
- parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
- break;
- Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
- if (delimGeneralSpecified[delimGeneral])
- message(ParserMessages::duplicateDelimGeneral,
- StringMessageArg(sd().generalDelimiterName(delimGeneral)));
- if (!parseSdParam(sdBuilder.externalSyntax
- ? AllowedSdParams(SdParam::paramLiteral,
- SdParam::number)
- : AllowedSdParams(SdParam::paramLiteral),
- parm))
- return 0;
- sdParamConvertToLiteral(parm);
- StringC str;
- if (parm.paramLiteralText.size() == 0)
- message(ParserMessages::sdEmptyDelimiter);
- else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
- const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
- for (size_t i = 0; i < str.size(); i++)
- table->subst(str[i]);
- if (checkGeneralDelim(*sdBuilder.syntax, str)
- && !delimGeneralSpecified[delimGeneral])
- sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
- else
- sdBuilder.valid = 0;
- }
- delimGeneralSpecified[delimGeneral] = 1;
- }
- if (!setRefDelimGeneral(*sdBuilder.syntax,
- sdBuilder.syntaxCharset,
- sdBuilder.sd->docCharset(),
- sdBuilder.switcher))
- sdBuilder.valid = 0;
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
- SdParam::reservedName + Sd::rNONE),
- parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
- if (!addRefDelimShortref(*sdBuilder.syntax,
- sdBuilder.syntaxCharset,
- sdBuilder.sd->docCharset(),
- sdBuilder.switcher))
- sdBuilder.valid = 0;
- }
- String<SyntaxChar> lastLiteral;
- for (;;) {
- if (!parseSdParam(sdBuilder.externalSyntax
- ? AllowedSdParams(SdParam::paramLiteral,
- SdParam::number,
- SdParam::ellipsis,
- SdParam::reservedName + Sd::rNAMES)
- : AllowedSdParams(SdParam::paramLiteral,
- SdParam::reservedName + Sd::rNAMES),
- parm))
- return 0;
- sdParamConvertToLiteral(parm);
- if (parm.type == SdParam::ellipsis) {
- if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
- SdParam::number),
- parm))
- return 0;
- sdParamConvertToLiteral(parm);
- if (parm.paramLiteralText.size() == 0)
- message(ParserMessages::sdEmptyDelimiter);
- else if (lastLiteral.size() != 1
- || parm.paramLiteralText.size() != 1)
- message(ParserMessages::sdInvalidEllipsis);
- else if (parm.paramLiteralText[0] < lastLiteral[0])
- message(ParserMessages::sdInvalidRange);
- else if (parm.paramLiteralText[0] != lastLiteral[0]) {
- ISet<Char> shortrefChars;
- translateRange(sdBuilder,
- lastLiteral[0] + 1,
- parm.paramLiteralText[0],
- shortrefChars);
- ISet<WideChar> duplicates;
- intersectCharSets(shortrefChars,
- sdBuilder.syntax->delimShortrefSimple(),
- duplicates);
- int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
- for (int i = 0; i < nComplexShortrefs; i++) {
- const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
- if (delim.size() == 1 && shortrefChars.contains(delim[0]))
- duplicates.add(delim[0]);
- }
- if (!duplicates.isEmpty())
- message(ParserMessages::duplicateDelimShortrefSet,
- CharsetMessageArg(duplicates));
- sdBuilder.syntax->addDelimShortrefs(shortrefChars,
- sdBuilder.sd->docCharset());
- }
- lastLiteral.resize(0);
- }
- else if (parm.type == SdParam::paramLiteral) {
- parm.paramLiteralText.swap(lastLiteral);
- StringC str;
- if (lastLiteral.size() == 0)
- message(ParserMessages::sdEmptyDelimiter);
- else if (translateSyntax(sdBuilder, lastLiteral, str)) {
- const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
- for (size_t i = 0; i < str.size(); i++)
- table->subst(str[i]);
- if (str.size() == 1
- || checkShortrefDelim(*sdBuilder.syntax,
- sdBuilder.sd->docCharset(),
- str)) {
- if (sdBuilder.syntax->isValidShortref(str))
- message(ParserMessages::duplicateDelimShortref,
- StringMessageArg(str));
- else
- sdBuilder.syntax->addDelimShortref(str,
- sdBuilder.sd->docCharset());
- }
- }
- }
- else
- break;
- }
- return 1;
- }
- Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
- parm))
- return 0;
- for (;;) {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
- SdParam::referenceReservedName),
- parm))
- return 0;
- if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
- break;
- Syntax::ReservedName reservedName = parm.reservedNameIndex;
- if (!parseSdParam(sdBuilder.externalSyntax
- ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
- : AllowedSdParams(SdParam::name),
- parm))
- return 0;
- StringC transName;
- if (parm.type == SdParam::name
- ? translateName(sdBuilder, parm.token, transName)
- : translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
- Syntax::ReservedName tem;
- if (sdBuilder.syntax->lookupReservedName(transName, &tem))
- message(ParserMessages::ambiguousReservedName,
- StringMessageArg(transName));
- else {
- if (transName.size() == 0
- || !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
- message(ParserMessages::reservedNameSyntax,
- StringMessageArg(transName));
- transName.resize(0);
- }
- size_t i;
- // Check that its a valid name in the declared syntax
- // (- and . might not be name characters).
- for (i = 1; i < transName.size(); i++)
- if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
- message(ParserMessages::reservedNameSyntax,
- StringMessageArg(transName));
- transName.resize(0);
- break;
- }
- for (i = 0; i < transName.size(); i++)
- sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
- if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
- message(ParserMessages::duplicateReservedName,
- StringMessageArg(syntax().reservedName(reservedName)));
- else if (transName.size() > 0)
- sdBuilder.syntax->setName(reservedName, transName);
- else
- sdBuilder.valid = 0;
- }
- }
- }
- setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
- static Syntax::ReservedName functionNameIndex[3] = {
- Syntax::rRE, Syntax::rRS, Syntax::rSPACE
- };
- for (int i = 0; i < 3; i++) {
- const StringC &functionName
- = sdBuilder.syntax->reservedName(functionNameIndex[i]);
- Char tem;
- if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
- message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
- }
- sdBuilder.syntax->enterStandardFunctionNames();
- return 1;
- }
- Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
- parm))
- return 0;
- for (;;) {
- int final = (sdBuilder.externalSyntax
- ? int(SdParam::eE)
- : SdParam::reservedName + Sd::rFEATURES);
- if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm))
- return 0;
- if (parm.type != SdParam::quantityName)
- break;
- Syntax::Quantity quantity = parm.quantityIndex;
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- sdBuilder.syntax->setQuantity(quantity, parm.n);
- }
- if (sdBuilder.sd->scopeInstance()) {
- for (int i = 0; i < Syntax::nQuantity; i++)
- if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
- < syntax().quantity(Syntax::Quantity(i)))
- message(ParserMessages::scopeInstanceQuantity,
- StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
- }
- return 1;
- }
- Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
- {
- struct FeatureInfo {
- Sd::ReservedName name;
- enum {
- __none,
- __boolean,
- __number
- } arg;
- };
- static FeatureInfo features[] = {
- { Sd::rMINIMIZE, FeatureInfo::__none },
- { Sd::rDATATAG, FeatureInfo::__boolean },
- { Sd::rOMITTAG, FeatureInfo::__boolean },
- { Sd::rRANK, FeatureInfo::__boolean },
- { Sd::rSHORTTAG, FeatureInfo::__boolean },
- { Sd::rLINK, FeatureInfo::__none },
- { Sd::rSIMPLE, FeatureInfo::__number },
- { Sd::rIMPLICIT, FeatureInfo::__boolean },
- { Sd::rEXPLICIT, FeatureInfo::__number },
- { Sd::rOTHER, FeatureInfo::__none },
- { Sd::rCONCUR, FeatureInfo::__number },
- { Sd::rSUBDOC, FeatureInfo::__number },
- { Sd::rFORMAL, FeatureInfo::__boolean }
- };
- int booleanFeature = 0;
- int numberFeature = 0;
- for (size_t i = 0; i < SIZEOF(features); i++) {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName
- + features[i].name), parm))
- return 0;
- if (features[i].arg != FeatureInfo::__none) {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
- SdParam::reservedName + Sd::rYES),
- parm))
- return 0;
- #if 0
- if (features[i].name == Sd::rDATATAG
- && parm.type == (SdParam::reservedName + Sd::rYES))
- message(ParserMessages::datatagNotImplemented);
- #endif
- if (features[i].arg == FeatureInfo::__number) {
- if (parm.type == SdParam::reservedName + Sd::rYES) {
- if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
- return 0;
- sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
- parm.n);
- }
- else
- sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
- 0);
- }
- else
- sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
- parm.type == (SdParam::reservedName
- + Sd::rYES));
- }
- }
- return 1;
- }
- Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
- {
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
- parm))
- return 0;
- Location location(currentLocation());
- if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
- SdParam::minimumLiteral),
- parm))
- return 0;
- AppinfoEvent *event;
- if (parm.type == SdParam::minimumLiteral)
- event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
- else
- event = new (eventAllocator()) AppinfoEvent(location);
- eventHandler().appinfo(event);
- return 1;
- }
- Boolean Parser::translateSyntax(CharSwitcher &switcher,
- const CharsetInfo &syntaxCharset,
- const CharsetInfo &docCharset,
- WideChar syntaxChar,
- Char &docChar)
- {
- syntaxChar = switcher.subst(syntaxChar);
- UnivChar univChar;
- if (syntaxCharset.descToUniv(syntaxChar, univChar)
- && univToDescCheck(docCharset, univChar, docChar))
- return 1;
- message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
- return 0;
- }
- void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
- SyntaxChar end, ISet<Char> &chars)
- {
- #if 0
- do {
- Char docChar;
- if (!translateSyntax(sdBuilder, start, docChar))
- break;
- chars.add(docChar);
- } while (start++ != end);
- #endif
- for (;;) {
- SyntaxChar doneUpTo = end;
- Boolean gotSwitch = 0;
- WideChar firstSwitch;
- for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
- WideChar c = sdBuilder.switcher.switchFrom(i);
- if (start <= c && c <= end) {
- if (!gotSwitch) {
- gotSwitch = 1;
- firstSwitch = c;
- }
- else if (c < firstSwitch)
- firstSwitch = c;
- }
- }
- if (gotSwitch && firstSwitch == start) {
- doneUpTo = start;
- Char docChar;
- if (translateSyntax(sdBuilder, start, docChar))
- chars.add(docChar);
- }
- else {
- if (gotSwitch)
- doneUpTo = firstSwitch - 1;
- Char docChar;
- Number count;
- if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
- if (count - 1 < doneUpTo - start)
- doneUpTo = start + (count - 1);
- chars.addRange(docChar, docChar + (doneUpTo - start));
- }
- }
- if (doneUpTo == end)
- break;
- start = doneUpTo + 1;
- }
- }
- Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
- WideChar syntaxChar, Char &docChar)
- {
- Number count;
- return translateSyntaxNoSwitch(sdBuilder,
- sdBuilder.switcher.subst(syntaxChar),
- docChar,
- count);
- }
- Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
- WideChar syntaxChar, Char &docChar,
- Number &count)
- {
- Number n;
- StringC str;
- CharsetDeclRange::Type type;
- const PublicId *id;
- if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
- id,
- type,
- n,
- str,
- count)) {
- ISet<WideChar> docChars;
- switch (type) {
- case CharsetDeclRange::unused:
- break;
- case CharsetDeclRange::string:
- sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
- break;
- case CharsetDeclRange::number:
- {
- Number count2;
- sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
- if (!docChars.isEmpty() && count2 < count)
- count = count2;
- }
- break;
- default:
- CANNOT_HAPPEN();
- }
- if (!docChars.isEmpty()) {
- if (!docChars.isSingleton() && options().warnSgmlDecl)
- message(ParserMessages::ambiguousDocCharacter,
- CharsetMessageArg(docChars));
- ISetIter<WideChar> iter(docChars);
- WideChar min, max;
- if (iter.next(min, max) && min <= charMax) {
- docChar = Char(min);
- return 1;
- }
- }
- }
- UnivChar univChar;
- WideChar alsoMax, count2;
- if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
- && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar,
- count2)) {
- count = (alsoMax - syntaxChar) + 1;
- if (count2 < count)
- count = count2;
- return 1;
- }
- sdBuilder.valid = 0;
- message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
- return 0;
- }
- Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
- const String<SyntaxChar> &syntaxString,
- StringC &docString)
- {
- docString.resize(0);
- int ret = 1;
- for (size_t i = 0; i < syntaxString.size(); i++) {
- Char c;
- if (translateSyntax(sdBuilder, syntaxString[i], c))
- docString += c;
- else
- ret = 0;
- }
- return ret;
- }
- Boolean Parser::translateName(SdBuilder &sdBuilder,
- const StringC &name,
- StringC &str)
- {
- str.resize(name.size());
- for (size_t i = 0; i < name.size(); i++) {
- UnivChar univChar;
- Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
- // Might switch hyphen or period.
- univChar = translateUniv(univChar, sdBuilder.switcher,
- sdBuilder.syntaxCharset);
- ASSERT(ret != 0);
- if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
- message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
- sdBuilder.valid = 0;
- return 0;
- }
- }
- return 1;
- }
- UnivChar Parser::translateUniv(UnivChar univChar,
- CharSwitcher &switcher,
- const CharsetInfo &syntaxCharset)
- {
- WideChar syntaxChar;
- ISet<WideChar> syntaxChars;
- if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
- message(ParserMessages::missingSyntaxChar,
- NumberMessageArg(univChar));
- return univChar;
- }
- SyntaxChar tem = switcher.subst(syntaxChar);
- if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
- message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem));
- return univChar;
- }
- Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
- {
- if (syn.charSet(Syntax::functionChar)->contains(c)) {
- message(ParserMessages::oneFunction, NumberMessageArg(c));
- return 0;
- }
- else
- return 1;
- }
- // Check that it has at most one B sequence and that it
- // is not adjacent to a blank sequence.
- Boolean Parser::checkShortrefDelim(const Syntax &syn,
- const CharsetInfo &charset,
- const StringC &delim)
- {
- Boolean hadB = 0;
- Char letterB = charset.execToDesc('B');
- const ISet<Char> *bSet = syn.charSet(Syntax::blank);
- for (size_t i = 0; i < delim.size(); i++)
- if (delim[i] == letterB) {
- if (hadB) {
- message(ParserMessages::multipleBSequence, StringMessageArg(delim));
- return 0;
- }
- hadB = 1;
- if (i > 0 && bSet->contains(delim[i - 1])) {
- message(ParserMessages::blankAdjacentBSequence,
- StringMessageArg(delim));
- return 0;
- }
- while (i + 1 < delim.size() && delim[i + 1] == letterB)
- i++;
- if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
- message(ParserMessages::blankAdjacentBSequence,
- StringMessageArg(delim));
- return 0;
- }
- }
- return 1;
- }
- Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
- {
- const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
- if (delim.size() > 0) {
- Boolean allFunction = 1;
- for (size_t i = 0; i < delim.size(); i++)
- if (!functionSet->contains(delim[i]))
- allFunction = 0;
- if (allFunction) {
- message(ParserMessages::generalDelimAllFunction,
- StringMessageArg(delim));
- return 0;
- }
- }
- return 1;
- }
- Boolean Parser::checkSwitches(CharSwitcher &switcher,
- const CharsetInfo &syntaxCharset)
- {
- Boolean valid = 1;
- for (size_t i = 0; i < switcher.nSwitches(); i++) {
- WideChar c[2];
- c[0] = switcher.switchFrom(i);
- c[1] = switcher.switchTo(i);
- for (int j = 0; j < 2; j++) {
- UnivChar univChar;
- if (syntaxCharset.descToUniv(c[j], univChar)) {
- // Check that it is not Digit Lcletter or Ucletter
- if ((UnivCharsetDesc::a <= univChar
- && univChar < UnivCharsetDesc::a + 26)
- || (UnivCharsetDesc::A <= univChar
- && univChar < UnivCharsetDesc::A + 26)
- || (UnivCharsetDesc::zero <= univChar
- && univChar < UnivCharsetDesc::zero + 10)) {
- message(ParserMessages::switchLetterDigit,
- NumberMessageArg(univChar));
- valid = 0;
- }
- }
- }
- }
- return valid;
- }
- Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
- {
- Boolean valid = 1;
- size_t nSwitches = switcher.nSwitches();
- for (size_t i = 0; i < nSwitches; i++)
- if (!switcher.switchUsed(i)) {
- // If the switch wasn't used,
- // then the character wasn't a markup character.
- message(ParserMessages::switchNotMarkup,
- NumberMessageArg(switcher.switchFrom(i)));
- valid = 0;
- }
- return valid;
- }
- void Parser::checkSyntaxNamelen(const Syntax &syn)
- {
- size_t namelen = syn.namelen();
- int i;
- for (i = 0; i < Syntax::nDelimGeneral; i++)
- if (syn.delimGeneral(i).size() > namelen)
- message(ParserMessages::delimiterLength,
- StringMessageArg(syn.delimGeneral(i)),
- NumberMessageArg(namelen));
- for (i = 0; i < syn.nDelimShortrefComplex(); i++)
- if (syn.delimShortrefComplex(i).size() > namelen)
- message(ParserMessages::delimiterLength,
- StringMessageArg(syn.delimShortrefComplex(i)),
- NumberMessageArg(namelen));
- for (i = 0; i < Syntax::nNames; i++)
- if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
- && options().warnSgmlDecl)
- message(ParserMessages::reservedNameLength,
- StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
- NumberMessageArg(namelen));
- }
- Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
- Char &to)
- {
- WideChar count;
- return univToDescCheck(charset, from, to, count);
- }
- Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
- Char &to, WideChar &count)
- {
- WideChar c;
- ISet<WideChar> descSet;
- unsigned ret = charset.univToDesc(from, c, descSet, count);
- if (ret > 1) {
- if (options().warnSgmlDecl)
- message(ParserMessages::ambiguousDocCharacter,
- CharsetMessageArg(descSet));
- ret = 1;
- }
- if (ret && c <= charMax) {
- to = Char(c);
- return 1;
- }
- return 0;
- }
- Boolean Parser::parseSdParam(const AllowedSdParams &allow,
- SdParam &parm)
- {
- for (;;) {
- Token token = getToken(mdMode);
- switch (token) {
- case tokenUnrecognized:
- if (reportNonSgmlCharacter())
- break;
- {
- message(ParserMessages::markupDeclarationCharacter,
- StringMessageArg(currentToken()),
- AllowedSdParamsMessageArg(allow, sdPointer()));
- }
- return 0;
- case tokenEe:
- if (allow.param(SdParam::eE)) {
- parm.type = SdParam::eE;
- if (currentMarkup())
- currentMarkup()->addEntityEnd();
- popInputStack();
- return 1;
- }
- message(ParserMessages::sdEntityEnd,
- AllowedSdParamsMessageArg(allow, sdPointer()));
- return 0;
- case tokenS:
- if (currentMarkup())
- currentMarkup()->addS(currentChar());
- break;
- case tokenCom:
- if (!parseComment(sdcomMode))
- return 0;
- break;
- case tokenDso:
- case tokenGrpo:
- case tokenMinusGrpo:
- case tokenPlusGrpo:
- case tokenRni:
- case tokenPeroNameStart:
- case tokenPeroGrpo:
- sdParamInvalidToken(token, allow);
- return 0;
- case tokenLcUcNmchar:
- if (allow.param(SdParam::ellipsis)) {
- extendNameToken(syntax().namelen(), ParserMessages::nameLength);
- getCurrentToken(syntax().generalSubstTable(), parm.token);
- if (parm.token == sd().execToDoc("...")) {
- parm.type = SdParam::ellipsis;
- return 1;
- }
- message(ParserMessages::sdInvalidNameToken,
- StringMessageArg(parm.token),
- AllowedSdParamsMessageArg(allow, sdPointer()));
- }
- else {
- sdParamInvalidToken(token, allow);
- return 0;
- }
- case tokenLita:
- case tokenLit:
- {
- Boolean lita = (token == tokenLita);
- if (allow.param(SdParam::minimumLiteral)) {
- if (!parseMinimumLiteral(lita, parm.literalText))
- return 0;
- parm.type = SdParam::minimumLiteral;
- if (currentMarkup())
- currentMarkup()->addLiteral(parm.literalText);
- }
- else if (allow.param(SdParam::paramLiteral)) {
- if (!parseSdParamLiteral(lita, parm.paramLiteralText))
- return 0;
- parm.type = SdParam::paramLiteral;
- }
- else {
- sdParamInvalidToken(token, allow);
- return 0;
- }
- return 1;
- }
- case tokenMdc:
- if (allow.param(SdParam::mdc)) {
- parm.type = SdParam::mdc;
- if (currentMarkup())
- currentMarkup()->addDelim(Syntax::dMDC);
- return 1;
- }
- sdParamInvalidToken(tokenMdc, allow);
- return 0;
- case tokenNameStart:
- {
- extendNameToken(syntax().namelen(), ParserMessages::nameLength);
- getCurrentToken(syntax().generalSubstTable(), parm.token);
- if (allow.param(SdParam::capacityName)) {
- if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
- parm.type = SdParam::capacityName;
- if (currentMarkup())
- currentMarkup()->addName(currentInput());
- return 1;
- }
- }
- if (allow.param(SdParam::referenceReservedName)) {
- if (syntax().lookupReservedName(parm.token,
- &parm.reservedNameIndex)) {
- parm.type = SdParam::referenceReservedName;
- if (currentMarkup())
- currentMarkup()->addName(currentInput());
- return 1;
- }
- }
- if (allow.param(SdParam::generalDelimiterName)) {
- if (sd().lookupGeneralDelimiterName(parm.token,
- parm.delimGeneralIndex)) {
- parm.type = SdParam::generalDelimiterName;
- if (currentMarkup())
- currentMarkup()->addName(currentInput());
- return 1;
- }
- }
- if (allow.param(SdParam::quantityName)) {
- if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
- parm.type = SdParam::quantityName;
- if (currentMarkup())
- currentMarkup()->addName(currentInput());
- return 1;
- }
- }
- for (int i = 0;; i++) {
- SdParam::Type t = allow.get(i);
- if (t == SdParam::invalid)
- break;
- if (t >= SdParam::reservedName) {
- Sd::ReservedName sdReservedName
- = Sd::ReservedName(t - SdParam::reservedName);
- if (parm.token == sd().reservedName(sdReservedName)) {
- parm.type = t;
- if (currentMarkup())
- currentMarkup()->addSdReservedName(sdReservedName,
- currentInput());
- return 1;
- }
- }
- }
- if (allow.param(SdParam::name)) {
- parm.type = SdParam::name;
- if (currentMarkup())
- currentMarkup()->addName(currentInput());
- return 1;
- }
- {
- message(ParserMessages::sdInvalidNameToken,
- StringMessageArg(parm.token),
- AllowedSdParamsMessageArg(allow, sdPointer()));
- }
- return 0;
- }
- case tokenDigit:
- if (allow.param(SdParam::number)) {
- extendNumber(syntax().namelen(), ParserMessages::numberLength);
- parm.type = SdParam::number;
- unsigned long n;
- if (!stringToNumber(currentInput()->currentTokenStart(),
- currentInput()->currentTokenLength(),
- n)
- || n > Number(-1)) {
- message(ParserMessages::numberTooBig,
- StringMessageArg(currentToken()));
- parm.n = Number(-1);
- }
- else {
- if (currentMarkup())
- currentMarkup()->addNumber(currentInput());
- parm.n = Number(n);
- }
- Token token = getToken(mdMode);
- if (token == tokenNameStart)
- message(ParserMessages::psRequired);
- currentInput()->ungetToken();
- return 1;
- }
- sdParamInvalidToken(tokenDigit, allow);
- return 0;
- default:
- CANNOT_HAPPEN();
- }
- }
- }
- // This is a separate function, because we might want SyntaxChar
- // to be bigger than Char.
- Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
- {
- Location loc(currentLocation());
- loc += 1;
- SdText text(loc, lita); // first character of content
- str.resize(0);
- const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
- Mode mode = lita ? sdplitaMode : sdplitMode;
- int done = 0;
- for (;;) {
- Token token = getToken(mode);
- switch (token) {
- case tokenEe:
- message(ParserMessages::literalLevel);
- return 0;
- case tokenUnrecognized:
- if (reportNonSgmlCharacter())
- break;
- if (options().errorSignificant)
- message(ParserMessages::sdLiteralSignificant,
- StringMessageArg(currentToken()));
- text.addChar(currentChar(), currentLocation());
- break;
- case tokenCroDigit:
- {
- InputSource *in = currentInput();
- Location startLocation = currentLocation();
- in->discardInitial();
- extendNumber(syntax().namelen(), ParserMessages::numberLength);
- unsigned long n;
- Boolean valid;
- if (!stringToNumber(in->currentTokenStart(),
- in->currentTokenLength(),
- n)
- || n > syntaxCharMax) {
- message(ParserMessages::syntaxCharacterNumber,
- StringMessageArg(currentToken()));
- valid = 0;
- }
- else
- valid = 1;
- Owner<Markup> markupPtr;
- if (eventsWanted().wantPrologMarkup()) {
- markupPtr = new Markup;
- markupPtr->addDelim(Syntax::dCRO);
- markupPtr->addNumber(in);
- switch (getToken(refMode)) {
- case tokenRefc:
- markupPtr->addDelim(Syntax::dREFC);
- break;
- case tokenRe:
- markupPtr->addRefEndRe();
- break;
- default:
- break;
- }
- }
- else
- (void)getToken(refMode);
- if (valid)
- text.addChar(SyntaxChar(n),
- Location(new NumericCharRefOrigin(startLocation,
- currentLocation().index()
- + currentInput()->currentTokenLength()
- - startLocation.index(),
- markupPtr),
- 0));
- }
- break;
- case tokenCroNameStart:
- if (!parseNamedCharRef())
- return 0;
- break;
- case tokenLit:
- case tokenLita:
- done = 1;
- break;
- case tokenPeroNameStart:
- case tokenPeroGrpo:
- message(ParserMessages::sdParameterEntity);
- {
- Location loc(currentLocation());
- const Char *p = currentInput()->currentTokenStart();
- for (size_t count = currentInput()->currentTokenLength();
- count > 0;
- count--) {
- text.addChar(*p++, loc);
- loc += 1;
- }
- }
- break;
- case tokenChar:
- if (text.string().size() > refLitlen
- && currentChar() == syntax().standardFunction(Syntax::fRE)) {
- message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
- // guess that the closing delimiter has been omitted
- message(ParserMessages::literalClosingDelimiter);
- return 0;
- }
- text.addChar(currentChar(), currentLocation());
- break;
- }
- if (done) break;
- }
- if (text.string().size() > refLitlen)
- message(ParserMessages::parameterLiteralLength,
- NumberMessageArg(refLitlen));
-
- str = text.string();
- if (currentMarkup())
- currentMarkup()->addSdLiteral(text);
- return 1;
- }
- Boolean Parser::stringToNumber(const Char *s, size_t length,
- unsigned long &result)
- {
- unsigned long n = 0;
- for (; length > 0; length--, s++) {
- int val = sd().digitWeight(*s);
- if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
- n += val;
- else
- return 0;
- }
- result = n;
- return 1;
- }
- void Parser::sdParamInvalidToken(Token token,
- const AllowedSdParams &allow)
- {
- message(ParserMessages::sdParamInvalidToken,
- TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
- AllowedSdParamsMessageArg(allow, sdPointer()));
- }
- void Parser::sdParamConvertToLiteral(SdParam &parm)
- {
- if (parm.type == SdParam::number) {
- parm.type = SdParam::paramLiteral;
- parm.paramLiteralText.resize(1);
- parm.paramLiteralText[0] = parm.n;
- }
- }
- AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
- SdParam::Type arg3, SdParam::Type arg4,
- SdParam::Type arg5, SdParam::Type arg6)
- {
- allow_[0] = arg1;
- allow_[1] = arg2;
- allow_[2] = arg3;
- allow_[3] = arg4;
- allow_[4] = arg5;
- allow_[5] = arg6;
- }
- Boolean AllowedSdParams::param(SdParam::Type t) const
- {
- for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
- if (t == allow_[i])
- return 1;
- return 0;
- }
- SdParam::Type AllowedSdParams::get(int i) const
- {
- return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
- }
- AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
- const AllowedSdParams &allow,
- const ConstPtr<Sd> &sd)
- : allow_(allow), sd_(sd)
- {
- }
- MessageArg *AllowedSdParamsMessageArg::copy() const
- {
- return new AllowedSdParamsMessageArg(*this);
- }
- void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
- {
- for (int i = 0;; i++) {
- SdParam::Type type = allow_.get(i);
- if (type == SdParam::invalid)
- break;
- if (i != 0)
- builder.appendFragment(ParserMessages::listSep);
- switch (type) {
- case SdParam::eE:
- builder.appendFragment(ParserMessages::entityEnd);
- break;
- case SdParam::minimumLiteral:
- builder.appendFragment(ParserMessages::minimumLiteral);
- break;
- case SdParam::mdc:
- {
- builder.appendFragment(ParserMessages::delimStart);
- Char c = sd_->execToDoc('>');
- builder.appendChars(&c, 1);
- builder.appendFragment(ParserMessages::delimEnd);
- }
- break;
- case SdParam::number:
- builder.appendFragment(ParserMessages::number);
- break;
- case SdParam::name:
- builder.appendFragment(ParserMessages::name);
- break;
- case SdParam::paramLiteral:
- builder.appendFragment(ParserMessages::parameterLiteral);
- break;
- case SdParam::capacityName:
- builder.appendFragment(ParserMessages::capacityName);
- break;
- case SdParam::generalDelimiterName:
- builder.appendFragment(ParserMessages::generalDelimiteRoleName);
- break;
- case SdParam::referenceReservedName:
- builder.appendFragment(ParserMessages::referenceReservedName);
- break;
- case SdParam::quantityName:
- builder.appendFragment(ParserMessages::quantityName);
- break;
- case SdParam::ellipsis:
- {
- StringC str(sd_->execToDoc("..."));
- builder.appendChars(str.data(), str.size());
- break;
- }
- default:
- {
- StringC str(sd_->reservedName(type - SdParam::reservedName));
- builder.appendChars(str.data(), str.size());
- break;
- }
- }
- }
- }
- SdBuilder::SdBuilder()
- : valid(1), externalSyntax(0)
- {
- }
- void SdBuilder::addFormalError(const Location &location,
- const MessageType1 &message,
- const StringC &id)
- {
- formalErrorList.insert(new SdFormalError(location, message, id));
- }
- SdFormalError::SdFormalError(const Location &location,
- const MessageType1 &message,
- const StringC &id)
- : location_(location),
- message_(&message),
- id_(id)
- {
- }
- void SdFormalError::send(ParserState &parser)
- {
- parser.Messenger::setNextLocation(location_);
- parser.message(*message_, StringMessageArg(id_));
- }
- CharSwitcher::CharSwitcher()
- {
- }
- void CharSwitcher::addSwitch(WideChar from, WideChar to)
- {
- switches_.push_back(from);
- switches_.push_back(to);
- switchUsed_.push_back(0);
- }
- SyntaxChar CharSwitcher::subst(WideChar c)
- {
- for (size_t i = 0; i < switches_.size(); i += 2)
- if (switches_[i] == c) {
- switchUsed_[i/2] = 1;
- return switches_[i + 1];
- }
- return c;
- }
- size_t CharSwitcher::nSwitches() const
- {
- return switchUsed_.size();
- }
- Boolean CharSwitcher::switchUsed(size_t i) const
- {
- return switchUsed_[i];
- }
- WideChar CharSwitcher::switchFrom(size_t i) const
- {
- return switches_[i*2];
- }
- WideChar CharSwitcher::switchTo(size_t i) const
- {
- return switches_[i*2 + 1];
- }
- CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
- : set_(set)
- {
- }
- MessageArg *CharsetMessageArg::copy() const
- {
- return new CharsetMessageArg(*this);
- }
- void CharsetMessageArg::append(MessageBuilder &builder) const
- {
- ISetIter<WideChar> iter(set_);
- WideChar min, max;
- Boolean first = 1;
- while (iter.next(min, max)) {
- if (first)
- first = 0;
- else
- builder.appendFragment(ParserMessages::listSep);
- builder.appendNumber(min);
- if (max != min) {
- builder.appendFragment(max == min + 1
- ? ParserMessages::listSep
- : ParserMessages::rangeSep);
- builder.appendNumber(max);
- }
- }
- }
- #ifdef SP_NAMESPACE
- }
- #endif
|