parseSd.C 78 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */
  24. // Copyright (c) 1994, 1995 James Clark
  25. // See the file COPYING for copying permission.
  26. #include "splib.h"
  27. #include "Parser.h"
  28. #include "macros.h"
  29. #include "SdFormalError.h"
  30. #include "MessageBuilder.h"
  31. #include "ParserMessages.h"
  32. #include "MessageArg.h"
  33. #include "CharsetRegistry.h"
  34. #include "ISetIter.h"
  35. #include "token.h"
  36. #include "TokenMessageArg.h"
  37. #include "constant.h"
  38. #include "SdText.h"
  39. #include "NumericCharRefOrigin.h"
  40. #ifdef SP_NAMESPACE
  41. namespace SP_NAMESPACE {
  42. #endif
  43. class CharSwitcher {
  44. public:
  45. CharSwitcher();
  46. void addSwitch(WideChar from, WideChar to);
  47. SyntaxChar subst(WideChar c);
  48. size_t nSwitches() const;
  49. Boolean switchUsed(size_t i) const;
  50. WideChar switchFrom(size_t i) const;
  51. WideChar switchTo(size_t i) const;
  52. private:
  53. Vector<PackedBoolean> switchUsed_;
  54. Vector<WideChar> switches_;
  55. };
  56. // Information about the SGML declaration being built.
  57. struct SdBuilder {
  58. SdBuilder();
  59. void addFormalError(const Location &, const MessageType1 &, const StringC &);
  60. Ptr<Sd> sd;
  61. Ptr<Syntax> syntax;
  62. CharsetDecl syntaxCharsetDecl;
  63. CharsetInfo syntaxCharset;
  64. CharSwitcher switcher;
  65. Boolean externalSyntax;
  66. Boolean valid;
  67. IList<SdFormalError> formalErrorList;
  68. };
  69. class CharsetMessageArg : public MessageArg {
  70. public:
  71. CharsetMessageArg(const ISet<WideChar> &set);
  72. MessageArg *copy() const;
  73. void append(MessageBuilder &) const;
  74. private:
  75. ISet<WideChar> set_;
  76. };
  77. struct SdParam {
  78. typedef unsigned char Type;
  79. enum {
  80. invalid,
  81. eE,
  82. minimumLiteral,
  83. mdc,
  84. ellipsis,
  85. number,
  86. capacityName,
  87. name,
  88. paramLiteral,
  89. generalDelimiterName,
  90. referenceReservedName,
  91. quantityName,
  92. reservedName // Sd::ReservedName is added to this
  93. };
  94. Type type;
  95. StringC token;
  96. Text literalText;
  97. String<SyntaxChar> paramLiteralText;
  98. union {
  99. Number n;
  100. Sd::Capacity capacityIndex;
  101. Syntax::Quantity quantityIndex;
  102. Syntax::ReservedName reservedNameIndex;
  103. Syntax::DelimGeneral delimGeneralIndex;
  104. };
  105. };
  106. class AllowedSdParams {
  107. public:
  108. AllowedSdParams(SdParam::Type,
  109. SdParam::Type = SdParam::invalid,
  110. SdParam::Type = SdParam::invalid,
  111. SdParam::Type = SdParam::invalid,
  112. SdParam::Type = SdParam::invalid,
  113. SdParam::Type = SdParam::invalid);
  114. Boolean param(SdParam::Type) const;
  115. SdParam::Type get(int i) const;
  116. private:
  117. enum { maxAllow = 6 };
  118. SdParam::Type allow_[maxAllow];
  119. };
  120. class AllowedSdParamsMessageArg : public MessageArg {
  121. public:
  122. AllowedSdParamsMessageArg(const AllowedSdParams &allow,
  123. const ConstPtr<Sd> &sd);
  124. MessageArg *copy() const;
  125. void append(MessageBuilder &) const;
  126. private:
  127. AllowedSdParams allow_;
  128. ConstPtr<Sd> sd_;
  129. };
  130. struct StandardSyntaxSpec {
  131. struct AddedFunction {
  132. const char *name;
  133. Syntax::FunctionClass functionClass;
  134. SyntaxChar syntaxChar;
  135. };
  136. const AddedFunction *addedFunction;
  137. size_t nAddedFunction;
  138. Boolean shortref;
  139. };
  140. static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
  141. { "TAB", Syntax::cSEPCHAR, 9 },
  142. };
  143. static StandardSyntaxSpec coreSyntax = {
  144. coreFunctions, SIZEOF(coreFunctions), 0
  145. };
  146. static StandardSyntaxSpec refSyntax = {
  147. coreFunctions, SIZEOF(coreFunctions), 1
  148. };
  149. void Parser::doInit()
  150. {
  151. if (cancelled()) {
  152. allDone();
  153. return;
  154. }
  155. // When document entity doesn't exist, don't give any errors
  156. // other than the cannot open error.
  157. if (currentInput()->get(messenger()) == InputSource::eE) {
  158. if (currentInput()->accessError()) {
  159. allDone();
  160. return;
  161. }
  162. }
  163. else
  164. currentInput()->ungetToken();
  165. const CharsetInfo &initCharset = sd().docCharset();
  166. ISet<WideChar> missing;
  167. findMissingMinimum(initCharset, missing);
  168. if (!missing.isEmpty()) {
  169. message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
  170. giveUp();
  171. return;
  172. }
  173. Boolean found = 0;
  174. StringC systemId;
  175. if (scanForSgmlDecl(initCharset))
  176. found = 1;
  177. else {
  178. currentInput()->ungetToken();
  179. if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
  180. InputSource *in = entityManager().open(systemId,
  181. initCharset,
  182. new InputSourceOrigin,
  183. 0,
  184. messenger());
  185. if (in) {
  186. pushInput(in);
  187. if (scanForSgmlDecl(initCharset))
  188. found = 1;
  189. else {
  190. message(ParserMessages::badDefaultSgmlDecl);
  191. popInputStack();
  192. }
  193. }
  194. }
  195. }
  196. if (found) {
  197. if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
  198. size_t nS = currentInput()->currentTokenLength() - 6;
  199. for (size_t i = 0; i < nS; i++)
  200. currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
  201. currentMarkup()->addDelim(Syntax::dMDO);
  202. currentMarkup()->addSdReservedName(Sd::rSGML,
  203. currentInput()->currentTokenStart()
  204. + (currentInput()->currentTokenLength() - 4),
  205. 4);
  206. }
  207. Syntax *syntaxp = new Syntax(sd());
  208. CharSwitcher switcher;
  209. if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
  210. switcher)) {
  211. giveUp();
  212. delete syntaxp;
  213. return;
  214. }
  215. syntaxp->implySgmlChar(sd().docCharset());
  216. setSyntax(syntaxp);
  217. compileSdModes();
  218. ConstPtr<Sd> refSd(sdPointer());
  219. ConstPtr<Syntax> refSyntax(syntaxPointer());
  220. if (!parseSgmlDecl()) {
  221. giveUp();
  222. return;
  223. }
  224. // queue an SGML declaration event
  225. eventHandler().sgmlDecl(new (eventAllocator())
  226. SgmlDeclEvent(sdPointer(),
  227. syntaxPointer(),
  228. instanceSyntaxPointer(),
  229. refSd,
  230. refSyntax,
  231. currentInput()->nextIndex(),
  232. systemId,
  233. markupLocation(),
  234. currentMarkup()));
  235. if (inputLevel() == 2) {
  236. // FIXME perhaps check for junk after SGML declaration
  237. popInputStack();
  238. }
  239. }
  240. else {
  241. if (!implySgmlDecl()) {
  242. giveUp();
  243. return;
  244. }
  245. // queue an SGML declaration event
  246. eventHandler().sgmlDecl(new (eventAllocator())
  247. SgmlDeclEvent(sdPointer(),
  248. syntaxPointer()));
  249. }
  250. // Now we have sd and syntax set up, prepare to parse the prolog.
  251. compilePrologModes();
  252. setPhase(prologPhase);
  253. }
  254. Boolean Parser::implySgmlDecl()
  255. {
  256. Syntax *syntaxp = new Syntax(sd());
  257. const StandardSyntaxSpec *spec;
  258. if (options().shortref)
  259. spec = &refSyntax;
  260. else
  261. spec = &coreSyntax;
  262. CharSwitcher switcher;
  263. if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher)) {
  264. delete syntaxp;
  265. return 0;
  266. }
  267. syntaxp->implySgmlChar(sd().docCharset());
  268. for (int i = 0; i < Syntax::nQuantity; i++)
  269. syntaxp->setQuantity(i, options().quantity[i]);
  270. setSyntax(syntaxp);
  271. return 1;
  272. }
  273. Boolean Parser::setStandardSyntax(Syntax &syn,
  274. const StandardSyntaxSpec &spec,
  275. const CharsetInfo &docCharset,
  276. CharSwitcher &switcher)
  277. {
  278. static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
  279. { 0, 128, 0 },
  280. };
  281. static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
  282. SIZEOF(syntaxCharsetRanges));
  283. static CharsetInfo syntaxCharset(syntaxCharsetDesc);
  284. Boolean valid = 1;
  285. if (!checkSwitches(switcher, syntaxCharset))
  286. valid = 0;
  287. size_t i;
  288. for (i = 0; i < switcher.nSwitches(); i++)
  289. if (switcher.switchTo(i) >= 128)
  290. message(ParserMessages::switchNotInCharset,
  291. NumberMessageArg(switcher.switchTo(i)));
  292. static const Char shunchar[] = {
  293. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  294. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  295. 127, 255
  296. };
  297. for (i = 0; i < SIZEOF(shunchar); i++)
  298. syn.addShunchar(shunchar[i]);
  299. syn.setShuncharControls();
  300. static Syntax::StandardFunction standardFunctions[3] = {
  301. Syntax::fRE, Syntax::fRS, Syntax::fSPACE
  302. };
  303. static SyntaxChar functionChars[3] = { 13, 10, 32 };
  304. for (i = 0; i < 3; i++) {
  305. Char docChar;
  306. if (translateSyntax(switcher,
  307. syntaxCharset,
  308. docCharset,
  309. functionChars[i],
  310. docChar)
  311. && checkNotFunction(syn, docChar))
  312. syn.setStandardFunction(standardFunctions[i], docChar);
  313. else
  314. valid = 0;
  315. }
  316. for (i = 0; i < spec.nAddedFunction; i++) {
  317. Char docChar;
  318. if (translateSyntax(switcher,
  319. syntaxCharset,
  320. docCharset,
  321. spec.addedFunction[i].syntaxChar,
  322. docChar)
  323. && checkNotFunction(syn, docChar))
  324. syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
  325. spec.addedFunction[i].functionClass,
  326. docChar);
  327. else
  328. valid = 0;
  329. }
  330. static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
  331. ISet<Char> nameCharSet;
  332. for (i = 0; i < 2; i++) {
  333. Char docChar;
  334. if (translateSyntax(switcher,
  335. syntaxCharset,
  336. docCharset,
  337. nameChars[i],
  338. docChar))
  339. nameCharSet.add(docChar);
  340. else
  341. valid = 0;
  342. }
  343. if (!checkNmchars(nameCharSet, syn))
  344. valid = 0;
  345. else
  346. syn.addNameCharacters(nameCharSet);
  347. syn.setNamecaseGeneral(1);
  348. syn.setNamecaseEntity(0);
  349. if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher))
  350. valid = 0;
  351. setRefNames(syn, docCharset);
  352. syn.enterStandardFunctionNames();
  353. if (spec.shortref
  354. && !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher))
  355. valid = 0;
  356. return valid;
  357. }
  358. Boolean Parser::setRefDelimGeneral(Syntax &syntax,
  359. const CharsetInfo &syntaxCharset,
  360. const CharsetInfo &docCharset,
  361. CharSwitcher &switcher)
  362. {
  363. // Column 3 from Figure 3
  364. static const char delims[][2] = {
  365. { 38 },
  366. { 45, 45 },
  367. { 38, 35 },
  368. { 93 },
  369. { 91 },
  370. { 93 },
  371. { 91 },
  372. { 38 },
  373. { 60, 47 },
  374. { 41 },
  375. { 40 },
  376. { 34 },
  377. { 39 },
  378. { 62 },
  379. { 60, 33 },
  380. { 45 },
  381. { 93, 93 },
  382. { 47 },
  383. { 63 },
  384. { 124 },
  385. { 37 },
  386. { 62 },
  387. { 60, 63 },
  388. { 43 },
  389. { 59 },
  390. { 42 },
  391. { 35 },
  392. { 44 },
  393. { 60 },
  394. { 62 },
  395. { 61 },
  396. };
  397. Boolean valid = 1;
  398. ISet<WideChar> missing;
  399. for (int i = 0; i < Syntax::nDelimGeneral; i++)
  400. if (syntax.delimGeneral(i).size() == 0) {
  401. StringC delim;
  402. size_t j;
  403. for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
  404. UnivChar univChar = translateUniv(delims[i][j], switcher,
  405. syntaxCharset);
  406. Char c;
  407. if (univToDescCheck(docCharset, univChar, c))
  408. delim += c;
  409. else {
  410. missing += univChar;
  411. valid = 0;
  412. }
  413. }
  414. if (delim.size() == j) {
  415. if (checkGeneralDelim(syntax, delim))
  416. syntax.setDelimGeneral(i, delim);
  417. else
  418. valid = 0;
  419. }
  420. }
  421. if (!missing.isEmpty())
  422. message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
  423. return valid;
  424. }
  425. void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
  426. {
  427. static const char *const referenceNames[] = {
  428. "ANY",
  429. "ATTLIST",
  430. "CDATA",
  431. "CONREF",
  432. "CURRENT",
  433. "DEFAULT",
  434. "DOCTYPE",
  435. "ELEMENT",
  436. "EMPTY",
  437. "ENDTAG",
  438. "ENTITIES",
  439. "ENTITY",
  440. "FIXED",
  441. "ID",
  442. "IDLINK",
  443. "IDREF",
  444. "IDREFS",
  445. "IGNORE",
  446. "IMPLIED",
  447. "INCLUDE",
  448. "INITIAL",
  449. "LINK",
  450. "LINKTYPE",
  451. "MD",
  452. "MS",
  453. "NAME",
  454. "NAMES",
  455. "NDATA",
  456. "NMTOKEN",
  457. "NMTOKENS",
  458. "NOTATION",
  459. "NUMBER",
  460. "NUMBERS",
  461. "NUTOKEN",
  462. "NUTOKENS",
  463. "O",
  464. "PCDATA",
  465. "PI",
  466. "POSTLINK",
  467. "PUBLIC",
  468. "RCDATA",
  469. "RE",
  470. "REQUIRED",
  471. "RESTORE",
  472. "RS",
  473. "SDATA",
  474. "SHORTREF",
  475. "SIMPLE",
  476. "SPACE",
  477. "STARTTAG",
  478. "SUBDOC",
  479. "SYSTEM",
  480. "TEMP",
  481. "USELINK",
  482. "USEMAP"
  483. };
  484. int i;
  485. for (i = 0; i < Syntax::nNames; i++) {
  486. StringC docName(docCharset.execToDesc(referenceNames[i]));
  487. Syntax::ReservedName tem;
  488. if (syntax.lookupReservedName(docName, &tem))
  489. message(ParserMessages::nameReferenceReservedName,
  490. StringMessageArg(docName));
  491. if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
  492. syntax.setName(i, docName);
  493. }
  494. }
  495. Boolean Parser::addRefDelimShortref(Syntax &syntax,
  496. const CharsetInfo &syntaxCharset,
  497. const CharsetInfo &docCharset,
  498. CharSwitcher &switcher)
  499. {
  500. // Column 2 from Figure 4
  501. static const char delimShortref[][3] = {
  502. { 9 },
  503. { 13 },
  504. { 10 },
  505. { 10, 66 },
  506. { 10, 13 },
  507. { 10, 66, 13 },
  508. { 66, 13 },
  509. { 32 },
  510. { 66, 66 },
  511. { 34 },
  512. { 35 },
  513. { 37 },
  514. { 39 },
  515. { 40 },
  516. { 41 },
  517. { 42 },
  518. { 43 },
  519. { 44 },
  520. { 45 },
  521. { 45, 45 },
  522. { 58 },
  523. { 59 },
  524. { 61 },
  525. { 64 },
  526. { 91 },
  527. { 93 },
  528. { 94 },
  529. { 95 },
  530. { 123 },
  531. { 124 },
  532. { 125 },
  533. { 126 },
  534. };
  535. ISet<WideChar> missing;
  536. for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
  537. StringC delim;
  538. size_t j;
  539. for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
  540. Char c;
  541. UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
  542. syntaxCharset);
  543. if (univToDescCheck(docCharset, univChar, c))
  544. delim += c;
  545. else
  546. missing += univChar;
  547. }
  548. if (delim.size() == j) {
  549. if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
  550. message(ParserMessages::duplicateDelimShortref,
  551. StringMessageArg(delim));
  552. else
  553. syntax.addDelimShortref(delim, docCharset);
  554. }
  555. }
  556. if (!missing.isEmpty())
  557. message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
  558. return 1;
  559. }
  560. // Determine whether the document starts with an SGML declaration.
  561. // There is no current syntax at this point.
  562. Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
  563. {
  564. Char rs;
  565. if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
  566. return 0;
  567. Char re;
  568. if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
  569. return 0;
  570. Char space;
  571. if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
  572. return 0;
  573. Char tab;
  574. if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
  575. return 0;
  576. InputSource *in = currentInput();
  577. Xchar c = in->get(messenger());
  578. while (c == rs || c == space || c == re || c == tab)
  579. c = in->tokenChar(messenger());
  580. if (c != initCharset.execToDesc('<'))
  581. return 0;
  582. if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
  583. return 0;
  584. c = in->tokenChar(messenger());
  585. if (c != initCharset.execToDesc('S')
  586. && c != initCharset.execToDesc('s'))
  587. return 0;
  588. c = in->tokenChar(messenger());
  589. if (c != initCharset.execToDesc('G')
  590. && c != initCharset.execToDesc('g'))
  591. return 0;
  592. c = in->tokenChar(messenger());
  593. if (c != initCharset.execToDesc('M')
  594. && c != initCharset.execToDesc('m'))
  595. return 0;
  596. c = in->tokenChar(messenger());
  597. if (c != initCharset.execToDesc('L')
  598. && c != initCharset.execToDesc('l'))
  599. return 0;
  600. c = in->tokenChar(messenger());
  601. // Don't recognize this if SGML is followed by a name character.
  602. if (c == InputSource::eE)
  603. return 1;
  604. in->endToken(in->currentTokenLength() - 1);
  605. if (c == initCharset.execToDesc('-'))
  606. return 0;
  607. if (c == initCharset.execToDesc('.'))
  608. return 0;
  609. UnivChar univ;
  610. if (!initCharset.descToUniv(c, univ))
  611. return 1;
  612. if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
  613. return 0;
  614. if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
  615. return 0;
  616. if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
  617. return 0;
  618. return 1;
  619. }
  620. void Parser::findMissingMinimum(const CharsetInfo &charset,
  621. ISet<WideChar> &missing)
  622. {
  623. Char to;
  624. size_t i;
  625. for (i = 0; i < 26; i++) {
  626. if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
  627. missing += UnivCharsetDesc::A + i;
  628. if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
  629. missing += UnivCharsetDesc::a + i;
  630. }
  631. for (i = 0; i < 10; i++) {
  632. Char to;
  633. if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
  634. missing += UnivCharsetDesc::zero + i;
  635. }
  636. static const UnivChar special[] = {
  637. 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
  638. };
  639. for (i = 0; i < SIZEOF(special); i++)
  640. if (!univToDescCheck(charset, special[i], to))
  641. missing += special[i];
  642. }
  643. Boolean Parser::parseSgmlDecl()
  644. {
  645. SdParam parm;
  646. SdBuilder sdBuilder;
  647. if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
  648. return 0;
  649. StringC version(sd().execToDoc("ISO 8879:1986"));
  650. if (parm.literalText.string() != version)
  651. message(ParserMessages::standardVersion,
  652. StringMessageArg(parm.literalText.string()));
  653. sdBuilder.sd = new Sd;
  654. typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
  655. static SdParser parsers[] = {
  656. &Parser::sdParseDocumentCharset,
  657. &Parser::sdParseCapacity,
  658. &Parser::sdParseScope,
  659. &Parser::sdParseSyntax,
  660. &Parser::sdParseFeatures,
  661. &Parser::sdParseAppinfo,
  662. };
  663. for (size_t i = 0; i < SIZEOF(parsers); i++) {
  664. if (!(this->*(parsers[i]))(sdBuilder, parm))
  665. return 0;
  666. if (!sdBuilder.valid)
  667. return 0;
  668. }
  669. if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
  670. return 0;
  671. if (sdBuilder.sd->formal()) {
  672. while (!sdBuilder.formalErrorList.empty()) {
  673. SdFormalError *p = sdBuilder.formalErrorList.get();
  674. ParserState *state = this; // work around lcc 3.0 bug
  675. p->send(*state);
  676. delete p;
  677. }
  678. }
  679. setSd(sdBuilder.sd.pointer());
  680. if (sdBuilder.sd->scopeInstance()) {
  681. Syntax *proSyntax = new Syntax(sd());
  682. CharSwitcher switcher;
  683. setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
  684. proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
  685. ISet<WideChar> invalidSgmlChar;
  686. proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
  687. sdBuilder.syntax.pointer(),
  688. invalidSgmlChar);
  689. sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
  690. proSyntax,
  691. invalidSgmlChar);
  692. if (!invalidSgmlChar.isEmpty())
  693. message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
  694. setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
  695. }
  696. else
  697. setSyntax(sdBuilder.syntax.pointer());
  698. if (syntax().multicode())
  699. currentInput()->setMarkupScanTable(syntax().markupScanTable());
  700. return 1;
  701. }
  702. Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
  703. {
  704. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
  705. parm))
  706. return 0;
  707. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
  708. parm))
  709. return 0;
  710. CharsetDecl decl;
  711. UnivCharsetDesc desc;
  712. if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
  713. return 0;
  714. ISet<WideChar> missing;
  715. findMissingMinimum(desc, missing);
  716. if (!missing.isEmpty()) {
  717. message(ParserMessages::missingMinimumChars,
  718. CharsetMessageArg(missing));
  719. return 0;
  720. }
  721. ISet<Char> sgmlChar;
  722. decl.usedSet(sgmlChar);
  723. sdBuilder.sd->setDocCharsetDesc(desc);
  724. sdBuilder.sd->setDocCharsetDecl(decl);
  725. sdBuilder.syntax = new Syntax(*sdBuilder.sd);
  726. sdBuilder.syntax->setSgmlChar(sgmlChar);
  727. return 1;
  728. }
  729. Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
  730. SdParam &parm,
  731. Boolean isDocument,
  732. CharsetDecl &decl,
  733. UnivCharsetDesc &desc)
  734. {
  735. decl.clear();
  736. ISet<WideChar> multiplyDeclared;
  737. // This is for checking whether the syntax reference character set
  738. // is ISO 646 when SCOPE is INSTANCE.
  739. Boolean maybeISO646 = 1;
  740. do {
  741. if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
  742. return 0;
  743. UnivCharsetDesc baseDesc;
  744. PublicId id;
  745. Boolean found;
  746. PublicId::TextClass textClass;
  747. const MessageType1 *err;
  748. if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
  749. sdBuilder.addFormalError(currentLocation(),
  750. *err,
  751. id.string());
  752. else if (id.getTextClass(textClass)
  753. && textClass != PublicId::CHARSET)
  754. sdBuilder.addFormalError(currentLocation(),
  755. ParserMessages::basesetTextClass,
  756. id.string());
  757. Boolean givenError;
  758. if (referencePublic(id, PublicId::CHARSET, givenError))
  759. found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
  760. else if (!givenError) {
  761. found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
  762. if (!found && options().warnSgmlDecl)
  763. message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
  764. }
  765. else
  766. found = 0;
  767. if (!found)
  768. maybeISO646 = 0;
  769. decl.addSection(id);
  770. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
  771. parm))
  772. return 0;
  773. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  774. return 0;
  775. do {
  776. WideChar min = parm.n;
  777. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  778. return 0;
  779. Number count = parm.n;
  780. Number adjCount;
  781. if (options().warnSgmlDecl && count == 0)
  782. message(ParserMessages::zeroNumberOfCharacters);
  783. decl.rangeDeclared(min, count, multiplyDeclared);
  784. if (isDocument
  785. && count > 0
  786. && (min > charMax || count - 1 > charMax - min)) {
  787. message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
  788. adjCount = min > charMax ? 0 : 1 + (charMax - min);
  789. sdBuilder.valid = 0;
  790. maybeISO646 = 0;
  791. }
  792. else
  793. adjCount = count;
  794. if (!parseSdParam(AllowedSdParams(SdParam::number,
  795. SdParam::minimumLiteral,
  796. SdParam::reservedName + Sd::rUNUSED),
  797. parm))
  798. return 0;
  799. switch (parm.type) {
  800. case SdParam::number:
  801. decl.addRange(min, count, parm.n);
  802. if (found && adjCount > 0) {
  803. ISet<WideChar> baseMissing;
  804. desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
  805. baseMissing);
  806. if (!baseMissing.isEmpty() && options().warnSgmlDecl)
  807. message(ParserMessages::basesetCharsMissing,
  808. CharsetMessageArg(baseMissing));
  809. }
  810. break;
  811. case SdParam::reservedName + Sd::rUNUSED:
  812. decl.addRange(min, count);
  813. break;
  814. case SdParam::minimumLiteral:
  815. {
  816. UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string());
  817. if (adjCount > 256) {
  818. message(ParserMessages::tooManyCharsMinimumLiteral);
  819. adjCount = 256;
  820. }
  821. for (Number i = 0; i < adjCount; i++)
  822. desc.addRange(min + i, min + i, c);
  823. }
  824. maybeISO646 = 0;
  825. decl.addRange(min, count, parm.literalText.string());
  826. break;
  827. default:
  828. CANNOT_HAPPEN();
  829. }
  830. SdParam::Type follow = (isDocument
  831. ? SdParam::reservedName + Sd::rCAPACITY
  832. : SdParam::reservedName + Sd::rFUNCTION);
  833. if (!parseSdParam(AllowedSdParams(SdParam::number,
  834. SdParam::reservedName + Sd::rBASESET,
  835. follow),
  836. parm))
  837. return 0;
  838. } while (parm.type == SdParam::number);
  839. } while (parm.type == SdParam::reservedName + Sd::rBASESET);
  840. if (!multiplyDeclared.isEmpty())
  841. message(ParserMessages::duplicateCharNumbers,
  842. CharsetMessageArg(multiplyDeclared));
  843. ISet<WideChar> declaredSet;
  844. decl.declaredSet(declaredSet);
  845. ISetIter<WideChar> iter(declaredSet);
  846. WideChar min, max, lastMax;
  847. if (iter.next(min, max)) {
  848. ISet<WideChar> holes;
  849. lastMax = max;
  850. while (iter.next(min, max)) {
  851. if (min - lastMax > 1)
  852. holes.addRange(lastMax + 1, min - 1);
  853. lastMax = max;
  854. }
  855. if (!holes.isEmpty())
  856. message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
  857. }
  858. if (!isDocument && sdBuilder.sd->scopeInstance()) {
  859. // If scope is INSTANCE, syntax reference character set
  860. // must be same as reference.
  861. UnivCharsetDescIter iter(desc);
  862. WideChar descMin, descMax;
  863. UnivChar univMin;
  864. if (!iter.next(descMin, descMax, univMin)
  865. || descMin != 0
  866. || descMax != 127
  867. || univMin != 0
  868. || !maybeISO646)
  869. message(ParserMessages::scopeInstanceSyntaxCharset);
  870. }
  871. return 1;
  872. }
  873. Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
  874. {
  875. SdParam parm;
  876. for (;;) {
  877. if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
  878. parm))
  879. break;
  880. if (parm.type == SdParam::eE)
  881. return 1;
  882. WideChar min = parm.n;
  883. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  884. break;
  885. Number count = parm.n;
  886. if (!parseSdParam(AllowedSdParams(SdParam::number,
  887. SdParam::minimumLiteral,
  888. SdParam::reservedName + Sd::rUNUSED),
  889. parm))
  890. break;
  891. if (parm.type == SdParam::number) {
  892. if (count > 0)
  893. desc.addRange(min, min + (count - 1), parm.n);
  894. }
  895. else if (parm.type == SdParam::minimumLiteral) {
  896. UnivChar c = sd.nameToUniv(parm.literalText.string());
  897. if (count > 256) {
  898. message(ParserMessages::tooManyCharsMinimumLiteral);
  899. count = 256;
  900. }
  901. for (Number i = 0; i < count; i++)
  902. desc.addRange(min + i, min + i, c);
  903. }
  904. }
  905. popInputStack();
  906. return 0;
  907. }
  908. Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
  909. {
  910. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
  911. SdParam::reservedName + Sd::rSGMLREF),
  912. parm))
  913. return 0;
  914. Boolean pushed = 0;
  915. if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
  916. if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
  917. return 0;
  918. PublicId id;
  919. PublicId::TextClass textClass;
  920. const MessageType1 *err;
  921. if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
  922. sdBuilder.addFormalError(currentLocation(),
  923. *err,
  924. id.string());
  925. else if (id.getTextClass(textClass)
  926. && textClass != PublicId::CAPACITY)
  927. sdBuilder.addFormalError(currentLocation(),
  928. ParserMessages::capacityTextClass,
  929. id.string());
  930. const StringC &str = id.string();
  931. if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
  932. && str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) {
  933. Boolean givenError;
  934. if (referencePublic(id, PublicId::CAPACITY, givenError))
  935. pushed = 1;
  936. else if (!givenError)
  937. message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
  938. }
  939. if (!pushed)
  940. return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
  941. parm);
  942. }
  943. PackedBoolean capacitySpecified[Sd::nCapacity];
  944. int i;
  945. for (i = 0; i < Sd::nCapacity; i++)
  946. capacitySpecified[i] = 0;
  947. if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
  948. return 0;
  949. do {
  950. Sd::Capacity capacityIndex = parm.capacityIndex;
  951. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  952. return 0;
  953. if (!capacitySpecified[capacityIndex]) {
  954. sdBuilder.sd->setCapacity(capacityIndex, parm.n);
  955. capacitySpecified[capacityIndex] = 1;
  956. }
  957. else if (options().warnSgmlDecl)
  958. message(ParserMessages::duplicateCapacity,
  959. StringMessageArg(sd().capacityName(i)));
  960. int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
  961. if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
  962. parm))
  963. return 0;
  964. } while (parm.type == SdParam::capacityName);
  965. Number totalcap = sdBuilder.sd->capacity(0);
  966. for (i = 1; i < Sd::nCapacity; i++)
  967. if (sdBuilder.sd->capacity(i) > totalcap)
  968. message(ParserMessages::capacityExceedsTotalcap,
  969. StringMessageArg(sd().capacityName(i)));
  970. if (pushed)
  971. return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
  972. parm);
  973. return 1;
  974. }
  975. Boolean Parser::referencePublic(const PublicId &id,
  976. PublicId::TextClass entityType,
  977. Boolean &givenError)
  978. {
  979. givenError = 0;
  980. StringC sysid;
  981. if (entityCatalog().lookupPublic(id.string(),
  982. sd().docCharset(),
  983. messenger(),
  984. sysid)) {
  985. Location loc = currentLocation();
  986. eventHandler().sgmlDeclEntity(new (eventAllocator())
  987. SgmlDeclEntityEvent(id,
  988. entityType,
  989. sysid,
  990. loc));
  991. Ptr<EntityOrigin> origin(new EntityOrigin(loc));
  992. if (currentMarkup())
  993. currentMarkup()->addEntityStart(origin);
  994. InputSource *in = entityManager().open(sysid,
  995. sd().docCharset(),
  996. origin.pointer(),
  997. 0,
  998. messenger());
  999. if (!in) {
  1000. givenError = 1;
  1001. return 0;
  1002. }
  1003. pushInput(in);
  1004. return 1;
  1005. }
  1006. return 0;
  1007. }
  1008. Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
  1009. {
  1010. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
  1011. SdParam::reservedName + Sd::rDOCUMENT),
  1012. parm))
  1013. return 0;
  1014. if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
  1015. sdBuilder.sd->setScopeInstance();
  1016. return 1;
  1017. }
  1018. Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
  1019. {
  1020. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
  1021. parm))
  1022. return 0;
  1023. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
  1024. SdParam::reservedName + Sd::rPUBLIC),
  1025. parm))
  1026. return 0;
  1027. if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
  1028. if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
  1029. return 0;
  1030. PublicId id;
  1031. const MessageType1 *err;
  1032. PublicId::TextClass textClass;
  1033. if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
  1034. sdBuilder.addFormalError(currentLocation(),
  1035. *err,
  1036. id.string());
  1037. else if (id.getTextClass(textClass)
  1038. && textClass != PublicId::SYNTAX)
  1039. sdBuilder.addFormalError(currentLocation(),
  1040. ParserMessages::syntaxTextClass,
  1041. id.string());
  1042. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
  1043. SdParam::reservedName + Sd::rSWITCHES),
  1044. parm))
  1045. return 0;
  1046. Vector<UnivChar> charSwitches;
  1047. if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
  1048. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  1049. return 0;
  1050. for (;;) {
  1051. SyntaxChar c = parm.n;
  1052. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  1053. return 0;
  1054. sdBuilder.switcher.addSwitch(c, parm.n);
  1055. if (!parseSdParam(AllowedSdParams(SdParam::number,
  1056. SdParam::reservedName
  1057. + Sd::rFEATURES),
  1058. parm))
  1059. return 0;
  1060. if (parm.type != SdParam::number)
  1061. break;
  1062. }
  1063. }
  1064. const StandardSyntaxSpec *spec = lookupSyntax(id);
  1065. if (spec) {
  1066. if (!setStandardSyntax(*sdBuilder.syntax,
  1067. *spec,
  1068. sdBuilder.sd->docCharset(),
  1069. sdBuilder.switcher))
  1070. sdBuilder.valid = 0;
  1071. }
  1072. else {
  1073. Boolean givenError;
  1074. if (referencePublic(id, PublicId::SYNTAX, givenError)) {
  1075. sdBuilder.externalSyntax = 1;
  1076. SdParam parm2;
  1077. if (!parseSdParam(AllowedSdParams(SdParam::reservedName
  1078. + Sd::rSHUNCHAR),
  1079. parm2))
  1080. return 0;
  1081. if (!sdParseExplicitSyntax(sdBuilder, parm2))
  1082. return 0;
  1083. }
  1084. else {
  1085. if (!givenError)
  1086. message(ParserMessages::unknownPublicSyntax,
  1087. StringMessageArg(id.string()));
  1088. sdBuilder.valid = 0;
  1089. }
  1090. }
  1091. }
  1092. else {
  1093. if (!sdParseExplicitSyntax(sdBuilder, parm))
  1094. return 0;
  1095. }
  1096. if (!sdBuilder.sd->scopeInstance()) {
  1097. // we know the significant chars now
  1098. ISet<WideChar> invalidSgmlChar;
  1099. sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
  1100. 0,
  1101. invalidSgmlChar);
  1102. if (!invalidSgmlChar.isEmpty())
  1103. message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
  1104. }
  1105. checkSyntaxNamelen(*sdBuilder.syntax);
  1106. checkSwitchesMarkup(sdBuilder.switcher);
  1107. return 1;
  1108. }
  1109. Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
  1110. SdParam &parm)
  1111. {
  1112. typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
  1113. static SdParser parsers[] = {
  1114. &Parser::sdParseShunchar,
  1115. &Parser::sdParseSyntaxCharset,
  1116. &Parser::sdParseFunction,
  1117. &Parser::sdParseNaming,
  1118. &Parser::sdParseDelim,
  1119. &Parser::sdParseNames,
  1120. &Parser::sdParseQuantity
  1121. };
  1122. for (size_t i = 0; i < SIZEOF(parsers); i++)
  1123. if (!(this->*(parsers[i]))(sdBuilder, parm))
  1124. return 0;
  1125. return 1;
  1126. }
  1127. const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
  1128. {
  1129. PublicId::OwnerType ownerType;
  1130. if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
  1131. return 0;
  1132. StringC str;
  1133. if (!id.getOwner(str))
  1134. return 0;
  1135. if (str != sd().execToDoc("ISO 8879:1986")
  1136. && str != sd().execToDoc("ISO 8879-1986"))
  1137. return 0;
  1138. PublicId::TextClass textClass;
  1139. if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
  1140. return 0;
  1141. if (!id.getDescription(str))
  1142. return 0;
  1143. if (str == sd().execToDoc("Reference"))
  1144. return &refSyntax;
  1145. if (str == sd().execToDoc("Core"))
  1146. return &coreSyntax;
  1147. return 0;
  1148. }
  1149. Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
  1150. {
  1151. UnivCharsetDesc desc;
  1152. if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
  1153. return 0;
  1154. sdBuilder.syntaxCharset.set(desc);
  1155. checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
  1156. for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
  1157. if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
  1158. message(ParserMessages::switchNotInCharset,
  1159. NumberMessageArg(sdBuilder.switcher.switchTo(i)));
  1160. ISet<WideChar> missing;
  1161. findMissingMinimum(sdBuilder.syntaxCharset, missing);
  1162. if (!missing.isEmpty())
  1163. message(ParserMessages::missingMinimumChars,
  1164. CharsetMessageArg(missing));
  1165. return 1;
  1166. }
  1167. Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
  1168. {
  1169. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
  1170. SdParam::reservedName + Sd::rCONTROLS,
  1171. SdParam::number), parm))
  1172. return 0;
  1173. if (parm.type == SdParam::reservedName + Sd::rNONE) {
  1174. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
  1175. parm))
  1176. return 0;
  1177. return 1;
  1178. }
  1179. if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
  1180. sdBuilder.syntax->setShuncharControls();
  1181. else {
  1182. if (parm.n <= charMax)
  1183. sdBuilder.syntax->addShunchar(Char(parm.n));
  1184. }
  1185. for (;;) {
  1186. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
  1187. SdParam::number), parm))
  1188. return 0;
  1189. if (parm.type != SdParam::number)
  1190. break;
  1191. if (parm.n <= charMax)
  1192. sdBuilder.syntax->addShunchar(Char(parm.n));
  1193. }
  1194. return 1;
  1195. }
  1196. Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
  1197. {
  1198. static Sd::ReservedName standardNames[3] = {
  1199. Sd::rRE, Sd::rRS, Sd::rSPACE
  1200. };
  1201. for (int i = 0; i < 3; i++) {
  1202. if (!parseSdParam(AllowedSdParams(SdParam::reservedName
  1203. + standardNames[i]),
  1204. parm))
  1205. return 0;
  1206. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  1207. return 0;
  1208. Char c;
  1209. if (translateSyntax(sdBuilder, parm.n, c)) {
  1210. if (checkNotFunction(*sdBuilder.syntax, c))
  1211. sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
  1212. else
  1213. sdBuilder.valid = 0;
  1214. }
  1215. }
  1216. Boolean haveMsichar = 0;
  1217. Boolean haveMsochar = 0;
  1218. for (;;) {
  1219. if (!parseSdParam(sdBuilder.externalSyntax
  1220. ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
  1221. : AllowedSdParams(SdParam::name),
  1222. parm))
  1223. return 0;
  1224. Boolean nameWasLiteral;
  1225. Boolean invalidName = 0;
  1226. StringC name;
  1227. if (parm.type == SdParam::paramLiteral) {
  1228. nameWasLiteral = 1;
  1229. if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
  1230. invalidName = 1;
  1231. }
  1232. else {
  1233. parm.token.swap(name);
  1234. nameWasLiteral = 0;
  1235. }
  1236. if (!parseSdParam(nameWasLiteral
  1237. ? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
  1238. SdParam::reservedName + Sd::rMSICHAR,
  1239. SdParam::reservedName + Sd::rMSOCHAR,
  1240. SdParam::reservedName + Sd::rMSSCHAR,
  1241. SdParam::reservedName + Sd::rSEPCHAR)
  1242. : AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
  1243. SdParam::reservedName + Sd::rMSICHAR,
  1244. SdParam::reservedName + Sd::rMSOCHAR,
  1245. SdParam::reservedName + Sd::rMSSCHAR,
  1246. SdParam::reservedName + Sd::rSEPCHAR,
  1247. SdParam::reservedName + Sd::rLCNMSTRT),
  1248. parm))
  1249. return 0;
  1250. if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
  1251. if (name != sd().reservedName(Sd::rNAMING))
  1252. message(ParserMessages::namingBeforeLcnmstrt,
  1253. StringMessageArg(name));
  1254. break;
  1255. }
  1256. if (!nameWasLiteral) {
  1257. StringC tem;
  1258. name.swap(tem);
  1259. if (!translateName(sdBuilder, tem, name))
  1260. invalidName = 1;
  1261. }
  1262. Syntax::FunctionClass functionClass;
  1263. switch (parm.type) {
  1264. case SdParam::reservedName + Sd::rFUNCHAR:
  1265. functionClass = Syntax::cFUNCHAR;
  1266. break;
  1267. case SdParam::reservedName + Sd::rMSICHAR:
  1268. haveMsichar = 1;
  1269. functionClass = Syntax::cMSICHAR;
  1270. break;
  1271. case SdParam::reservedName + Sd::rMSOCHAR:
  1272. haveMsochar = 1;
  1273. functionClass = Syntax::cMSOCHAR;
  1274. break;
  1275. case SdParam::reservedName + Sd::rMSSCHAR:
  1276. functionClass = Syntax::cMSSCHAR;
  1277. break;
  1278. case SdParam::reservedName + Sd::rSEPCHAR:
  1279. functionClass = Syntax::cSEPCHAR;
  1280. break;
  1281. default:
  1282. CANNOT_HAPPEN();
  1283. }
  1284. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  1285. return 0;
  1286. Char c;
  1287. if (translateSyntax(sdBuilder, parm.n, c)
  1288. && checkNotFunction(*sdBuilder.syntax, c)
  1289. && !invalidName) {
  1290. Char tem;
  1291. if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
  1292. message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
  1293. else
  1294. sdBuilder.syntax->addFunctionChar(name, functionClass, c);
  1295. }
  1296. }
  1297. if (haveMsochar && !haveMsichar)
  1298. message(ParserMessages::msocharRequiresMsichar);
  1299. return 1;
  1300. }
  1301. Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
  1302. {
  1303. static Sd::ReservedName keys[4] = {
  1304. Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE
  1305. };
  1306. int isNamechar = 0;
  1307. ISet<Char> nameStartChar;
  1308. ISet<Char> nameChar;
  1309. do {
  1310. String<SyntaxChar> lc;
  1311. Vector<size_t> rangeIndex;
  1312. Boolean first = 1;
  1313. Boolean allowThrough = 0;
  1314. for (;;) {
  1315. if (!parseSdParam(sdBuilder.externalSyntax
  1316. ? AllowedSdParams(SdParam::reservedName
  1317. + keys[isNamechar * 2],
  1318. SdParam::paramLiteral,
  1319. SdParam::number,
  1320. SdParam::ellipsis)
  1321. : (first
  1322. ? AllowedSdParams(SdParam::paramLiteral)
  1323. : AllowedSdParams(SdParam::reservedName
  1324. + keys[isNamechar * 2])),
  1325. parm))
  1326. return 0;
  1327. first = 0;
  1328. Boolean wasRange = 0;
  1329. sdParamConvertToLiteral(parm);
  1330. if (parm.type == SdParam::ellipsis) {
  1331. if (!allowThrough)
  1332. message(ParserMessages::sdInvalidEllipsis);
  1333. if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
  1334. SdParam::number),
  1335. parm))
  1336. return 0;
  1337. sdParamConvertToLiteral(parm);
  1338. if (parm.paramLiteralText.size() == 0)
  1339. message(ParserMessages::sdInvalidEllipsis);
  1340. else if (allowThrough) {
  1341. SyntaxChar n = parm.paramLiteralText[0];
  1342. if (n < lc[lc.size() - 1])
  1343. message(ParserMessages::sdInvalidRange);
  1344. else if (n > lc[lc.size() - 1] + 1)
  1345. rangeIndex.push_back(lc.size() - 1);
  1346. }
  1347. wasRange = 1;
  1348. }
  1349. if (parm.type != SdParam::paramLiteral)
  1350. break;
  1351. lc += parm.paramLiteralText;
  1352. allowThrough = (parm.paramLiteralText.size() - wasRange) > 0;
  1353. }
  1354. size_t lcPos = 0;
  1355. size_t rangeIndexPos = 0;
  1356. unsigned long rangeLeft = 0;
  1357. SyntaxChar nextRangeChar;
  1358. ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
  1359. String<SyntaxChar> chars;
  1360. Boolean runOut = 0;
  1361. first = 1;
  1362. for (;;) {
  1363. if (!parseSdParam(sdBuilder.externalSyntax
  1364. ? AllowedSdParams(SdParam::reservedName
  1365. + keys[isNamechar * 2 + 1],
  1366. SdParam::paramLiteral,
  1367. SdParam::number,
  1368. SdParam::ellipsis)
  1369. : (first
  1370. ? AllowedSdParams(SdParam::paramLiteral)
  1371. : AllowedSdParams(SdParam::reservedName
  1372. + keys[isNamechar * 2 + 1])),
  1373. parm))
  1374. return 0;
  1375. sdParamConvertToLiteral(parm);
  1376. first = 0;
  1377. Boolean isRange = parm.type == SdParam::ellipsis;
  1378. size_t nChars = chars.size();
  1379. if (nChars)
  1380. nChars -= isRange;
  1381. for (size_t i = 0; i < nChars; i++) {
  1382. if (rangeLeft == 0
  1383. && rangeIndexPos < rangeIndex.size()
  1384. && rangeIndex[rangeIndexPos] == lcPos) {
  1385. rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
  1386. nextRangeChar = lc[lcPos];
  1387. lcPos += 2;
  1388. rangeIndexPos += 1;
  1389. }
  1390. Char c;
  1391. if (rangeLeft > 0) {
  1392. rangeLeft--;
  1393. c = nextRangeChar++;
  1394. }
  1395. else if (lcPos < lc.size())
  1396. c = lc[lcPos++];
  1397. else {
  1398. runOut = 1;
  1399. c = chars[i];
  1400. }
  1401. // map from c to chars[i]
  1402. Char transLc, transUc;
  1403. if (translateSyntax(sdBuilder, c, transLc)
  1404. && translateSyntax(sdBuilder, chars[i], transUc)) {
  1405. set.add(transLc);
  1406. if (transLc != transUc) {
  1407. set.add(transUc);
  1408. sdBuilder.syntax->addSubst(transLc, transUc);
  1409. }
  1410. }
  1411. }
  1412. if (isRange) {
  1413. if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
  1414. SdParam::number),
  1415. parm))
  1416. return 0;
  1417. sdParamConvertToLiteral(parm);
  1418. if (chars.size() == 0 || parm.paramLiteralText.size() == 0)
  1419. message(ParserMessages::sdInvalidEllipsis);
  1420. else {
  1421. SyntaxChar start = chars[chars.size() - 1];
  1422. SyntaxChar end = parm.paramLiteralText[0];
  1423. if (start > end)
  1424. message(ParserMessages::sdInvalidRange);
  1425. else {
  1426. size_t count = end + 1 - start;
  1427. while (count > 0) {
  1428. if (rangeLeft == 0
  1429. && rangeIndexPos < rangeIndex.size()
  1430. && rangeIndex[rangeIndexPos] == lcPos) {
  1431. rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
  1432. nextRangeChar = lc[lcPos];
  1433. lcPos += 2;
  1434. rangeIndexPos += 1;
  1435. }
  1436. Char c;
  1437. if (rangeLeft > 0) {
  1438. rangeLeft--;
  1439. c = nextRangeChar++;
  1440. }
  1441. else if (lcPos < lc.size())
  1442. c = lc[lcPos++];
  1443. else {
  1444. c = start;
  1445. runOut = 1;
  1446. }
  1447. if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
  1448. size_t n;
  1449. if (runOut)
  1450. n = count;
  1451. else if (rangeLeft < count)
  1452. n = rangeLeft + 1;
  1453. else
  1454. n = count;
  1455. translateRange(sdBuilder, start, start + (count - 1), set);
  1456. count -= n;
  1457. start += n;
  1458. }
  1459. else {
  1460. Char transLc, transUc;
  1461. if (translateSyntax(sdBuilder, c, transLc)
  1462. && translateSyntax(sdBuilder, start, transUc)) {
  1463. set.add(transLc);
  1464. if (transLc != transUc) {
  1465. set.add(transUc);
  1466. sdBuilder.syntax->addSubst(transLc, transUc);
  1467. }
  1468. }
  1469. count--;
  1470. start++;
  1471. }
  1472. }
  1473. }
  1474. }
  1475. chars.resize(0);
  1476. if (parm.type != SdParam::paramLiteral)
  1477. break;
  1478. chars.append(parm.paramLiteralText.data() + 1,
  1479. parm.paramLiteralText.size() - 1);
  1480. }
  1481. else if (parm.type == SdParam::paramLiteral)
  1482. parm.paramLiteralText.swap(chars);
  1483. else
  1484. break;
  1485. }
  1486. if ((runOut && !sdBuilder.externalSyntax)
  1487. || rangeLeft > 0 || lcPos < lc.size())
  1488. message(isNamechar
  1489. ? ParserMessages::nmcharLength
  1490. : ParserMessages::nmstrtLength);
  1491. if (!checkNmchars(set, *sdBuilder.syntax))
  1492. sdBuilder.valid = 0;
  1493. } while (!isNamechar++);
  1494. ISet<WideChar> bad;
  1495. intersectCharSets(nameStartChar, nameChar, bad);
  1496. if (!bad.isEmpty()) {
  1497. sdBuilder.valid = 0;
  1498. message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
  1499. }
  1500. sdBuilder.syntax->addNameStartCharacters(nameStartChar);
  1501. sdBuilder.syntax->addNameCharacters(nameChar);
  1502. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
  1503. parm))
  1504. return 0;
  1505. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
  1506. SdParam::reservedName + Sd::rYES),
  1507. parm))
  1508. return 0;
  1509. sdBuilder.syntax->setNamecaseGeneral(parm.type
  1510. == SdParam::reservedName + Sd::rYES);
  1511. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
  1512. parm))
  1513. return 0;
  1514. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
  1515. SdParam::reservedName + Sd::rYES),
  1516. parm))
  1517. return 0;
  1518. sdBuilder.syntax->setNamecaseEntity(parm.type
  1519. == SdParam::reservedName + Sd::rYES);
  1520. return 1;
  1521. }
  1522. Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
  1523. {
  1524. Boolean valid = 1;
  1525. ISet<WideChar> bad;
  1526. intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
  1527. if (!bad.isEmpty()) {
  1528. message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
  1529. valid = 0;
  1530. bad.clear();
  1531. }
  1532. intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
  1533. if (!bad.isEmpty()) {
  1534. message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
  1535. valid = 0;
  1536. bad.clear();
  1537. }
  1538. Char funChar;
  1539. if (syntax.getStandardFunction(Syntax::fRE, funChar)
  1540. && set.contains(funChar)) {
  1541. message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
  1542. valid = 0;
  1543. }
  1544. if (syntax.getStandardFunction(Syntax::fRS, funChar)
  1545. && set.contains(funChar)) {
  1546. message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
  1547. valid = 0;
  1548. }
  1549. if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
  1550. && set.contains(funChar)) {
  1551. message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
  1552. valid = 0;
  1553. }
  1554. intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
  1555. if (!bad.isEmpty()) {
  1556. message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
  1557. valid = 0;
  1558. }
  1559. return valid;
  1560. }
  1561. // Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
  1562. void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
  1563. ISet<WideChar> &inter)
  1564. {
  1565. ISetIter<Char> i1(s1);
  1566. ISetIter<Char> i2(s2);
  1567. Char min1, max1, min2, max2;
  1568. if (!i1.next(min1, max1))
  1569. return;
  1570. if (!i2.next(min2, max2))
  1571. return;
  1572. for (;;) {
  1573. if (max1 < min2) {
  1574. if (!i1.next(min1, max1))
  1575. break;
  1576. }
  1577. else if (max2 < min1) {
  1578. if (!i2.next(min2, max2))
  1579. break;
  1580. }
  1581. else {
  1582. // min2 <= max1
  1583. // min1 <= max2
  1584. Char min = min1 > min2 ? min1 : min2;
  1585. Char max = max1 < max2 ? max1 : max2;
  1586. inter.addRange(min, max);
  1587. if (!i1.next(min1, max1))
  1588. break;
  1589. if (!i2.next(min2, max2))
  1590. break;
  1591. }
  1592. }
  1593. }
  1594. Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
  1595. {
  1596. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
  1597. parm))
  1598. return 0;
  1599. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
  1600. parm))
  1601. return 0;
  1602. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
  1603. parm))
  1604. return 0;
  1605. PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
  1606. for (int i = 0; i < Syntax::nDelimGeneral; i++)
  1607. delimGeneralSpecified[i] = 0;
  1608. for (;;) {
  1609. if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
  1610. SdParam::reservedName + Sd::rSHORTREF),
  1611. parm))
  1612. return 0;
  1613. if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
  1614. break;
  1615. Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
  1616. if (delimGeneralSpecified[delimGeneral])
  1617. message(ParserMessages::duplicateDelimGeneral,
  1618. StringMessageArg(sd().generalDelimiterName(delimGeneral)));
  1619. if (!parseSdParam(sdBuilder.externalSyntax
  1620. ? AllowedSdParams(SdParam::paramLiteral,
  1621. SdParam::number)
  1622. : AllowedSdParams(SdParam::paramLiteral),
  1623. parm))
  1624. return 0;
  1625. sdParamConvertToLiteral(parm);
  1626. StringC str;
  1627. if (parm.paramLiteralText.size() == 0)
  1628. message(ParserMessages::sdEmptyDelimiter);
  1629. else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
  1630. const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
  1631. for (size_t i = 0; i < str.size(); i++)
  1632. table->subst(str[i]);
  1633. if (checkGeneralDelim(*sdBuilder.syntax, str)
  1634. && !delimGeneralSpecified[delimGeneral])
  1635. sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
  1636. else
  1637. sdBuilder.valid = 0;
  1638. }
  1639. delimGeneralSpecified[delimGeneral] = 1;
  1640. }
  1641. if (!setRefDelimGeneral(*sdBuilder.syntax,
  1642. sdBuilder.syntaxCharset,
  1643. sdBuilder.sd->docCharset(),
  1644. sdBuilder.switcher))
  1645. sdBuilder.valid = 0;
  1646. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
  1647. SdParam::reservedName + Sd::rNONE),
  1648. parm))
  1649. return 0;
  1650. if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
  1651. if (!addRefDelimShortref(*sdBuilder.syntax,
  1652. sdBuilder.syntaxCharset,
  1653. sdBuilder.sd->docCharset(),
  1654. sdBuilder.switcher))
  1655. sdBuilder.valid = 0;
  1656. }
  1657. String<SyntaxChar> lastLiteral;
  1658. for (;;) {
  1659. if (!parseSdParam(sdBuilder.externalSyntax
  1660. ? AllowedSdParams(SdParam::paramLiteral,
  1661. SdParam::number,
  1662. SdParam::ellipsis,
  1663. SdParam::reservedName + Sd::rNAMES)
  1664. : AllowedSdParams(SdParam::paramLiteral,
  1665. SdParam::reservedName + Sd::rNAMES),
  1666. parm))
  1667. return 0;
  1668. sdParamConvertToLiteral(parm);
  1669. if (parm.type == SdParam::ellipsis) {
  1670. if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
  1671. SdParam::number),
  1672. parm))
  1673. return 0;
  1674. sdParamConvertToLiteral(parm);
  1675. if (parm.paramLiteralText.size() == 0)
  1676. message(ParserMessages::sdEmptyDelimiter);
  1677. else if (lastLiteral.size() != 1
  1678. || parm.paramLiteralText.size() != 1)
  1679. message(ParserMessages::sdInvalidEllipsis);
  1680. else if (parm.paramLiteralText[0] < lastLiteral[0])
  1681. message(ParserMessages::sdInvalidRange);
  1682. else if (parm.paramLiteralText[0] != lastLiteral[0]) {
  1683. ISet<Char> shortrefChars;
  1684. translateRange(sdBuilder,
  1685. lastLiteral[0] + 1,
  1686. parm.paramLiteralText[0],
  1687. shortrefChars);
  1688. ISet<WideChar> duplicates;
  1689. intersectCharSets(shortrefChars,
  1690. sdBuilder.syntax->delimShortrefSimple(),
  1691. duplicates);
  1692. int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
  1693. for (int i = 0; i < nComplexShortrefs; i++) {
  1694. const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
  1695. if (delim.size() == 1 && shortrefChars.contains(delim[0]))
  1696. duplicates.add(delim[0]);
  1697. }
  1698. if (!duplicates.isEmpty())
  1699. message(ParserMessages::duplicateDelimShortrefSet,
  1700. CharsetMessageArg(duplicates));
  1701. sdBuilder.syntax->addDelimShortrefs(shortrefChars,
  1702. sdBuilder.sd->docCharset());
  1703. }
  1704. lastLiteral.resize(0);
  1705. }
  1706. else if (parm.type == SdParam::paramLiteral) {
  1707. parm.paramLiteralText.swap(lastLiteral);
  1708. StringC str;
  1709. if (lastLiteral.size() == 0)
  1710. message(ParserMessages::sdEmptyDelimiter);
  1711. else if (translateSyntax(sdBuilder, lastLiteral, str)) {
  1712. const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
  1713. for (size_t i = 0; i < str.size(); i++)
  1714. table->subst(str[i]);
  1715. if (str.size() == 1
  1716. || checkShortrefDelim(*sdBuilder.syntax,
  1717. sdBuilder.sd->docCharset(),
  1718. str)) {
  1719. if (sdBuilder.syntax->isValidShortref(str))
  1720. message(ParserMessages::duplicateDelimShortref,
  1721. StringMessageArg(str));
  1722. else
  1723. sdBuilder.syntax->addDelimShortref(str,
  1724. sdBuilder.sd->docCharset());
  1725. }
  1726. }
  1727. }
  1728. else
  1729. break;
  1730. }
  1731. return 1;
  1732. }
  1733. Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
  1734. {
  1735. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
  1736. parm))
  1737. return 0;
  1738. for (;;) {
  1739. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
  1740. SdParam::referenceReservedName),
  1741. parm))
  1742. return 0;
  1743. if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
  1744. break;
  1745. Syntax::ReservedName reservedName = parm.reservedNameIndex;
  1746. if (!parseSdParam(sdBuilder.externalSyntax
  1747. ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
  1748. : AllowedSdParams(SdParam::name),
  1749. parm))
  1750. return 0;
  1751. StringC transName;
  1752. if (parm.type == SdParam::name
  1753. ? translateName(sdBuilder, parm.token, transName)
  1754. : translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
  1755. Syntax::ReservedName tem;
  1756. if (sdBuilder.syntax->lookupReservedName(transName, &tem))
  1757. message(ParserMessages::ambiguousReservedName,
  1758. StringMessageArg(transName));
  1759. else {
  1760. if (transName.size() == 0
  1761. || !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
  1762. message(ParserMessages::reservedNameSyntax,
  1763. StringMessageArg(transName));
  1764. transName.resize(0);
  1765. }
  1766. size_t i;
  1767. // Check that its a valid name in the declared syntax
  1768. // (- and . might not be name characters).
  1769. for (i = 1; i < transName.size(); i++)
  1770. if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
  1771. message(ParserMessages::reservedNameSyntax,
  1772. StringMessageArg(transName));
  1773. transName.resize(0);
  1774. break;
  1775. }
  1776. for (i = 0; i < transName.size(); i++)
  1777. sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
  1778. if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
  1779. message(ParserMessages::duplicateReservedName,
  1780. StringMessageArg(syntax().reservedName(reservedName)));
  1781. else if (transName.size() > 0)
  1782. sdBuilder.syntax->setName(reservedName, transName);
  1783. else
  1784. sdBuilder.valid = 0;
  1785. }
  1786. }
  1787. }
  1788. setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
  1789. static Syntax::ReservedName functionNameIndex[3] = {
  1790. Syntax::rRE, Syntax::rRS, Syntax::rSPACE
  1791. };
  1792. for (int i = 0; i < 3; i++) {
  1793. const StringC &functionName
  1794. = sdBuilder.syntax->reservedName(functionNameIndex[i]);
  1795. Char tem;
  1796. if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
  1797. message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
  1798. }
  1799. sdBuilder.syntax->enterStandardFunctionNames();
  1800. return 1;
  1801. }
  1802. Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
  1803. {
  1804. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
  1805. parm))
  1806. return 0;
  1807. for (;;) {
  1808. int final = (sdBuilder.externalSyntax
  1809. ? int(SdParam::eE)
  1810. : SdParam::reservedName + Sd::rFEATURES);
  1811. if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm))
  1812. return 0;
  1813. if (parm.type != SdParam::quantityName)
  1814. break;
  1815. Syntax::Quantity quantity = parm.quantityIndex;
  1816. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  1817. return 0;
  1818. sdBuilder.syntax->setQuantity(quantity, parm.n);
  1819. }
  1820. if (sdBuilder.sd->scopeInstance()) {
  1821. for (int i = 0; i < Syntax::nQuantity; i++)
  1822. if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
  1823. < syntax().quantity(Syntax::Quantity(i)))
  1824. message(ParserMessages::scopeInstanceQuantity,
  1825. StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
  1826. }
  1827. return 1;
  1828. }
  1829. Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
  1830. {
  1831. struct FeatureInfo {
  1832. Sd::ReservedName name;
  1833. enum {
  1834. __none,
  1835. __boolean,
  1836. __number
  1837. } arg;
  1838. };
  1839. static FeatureInfo features[] = {
  1840. { Sd::rMINIMIZE, FeatureInfo::__none },
  1841. { Sd::rDATATAG, FeatureInfo::__boolean },
  1842. { Sd::rOMITTAG, FeatureInfo::__boolean },
  1843. { Sd::rRANK, FeatureInfo::__boolean },
  1844. { Sd::rSHORTTAG, FeatureInfo::__boolean },
  1845. { Sd::rLINK, FeatureInfo::__none },
  1846. { Sd::rSIMPLE, FeatureInfo::__number },
  1847. { Sd::rIMPLICIT, FeatureInfo::__boolean },
  1848. { Sd::rEXPLICIT, FeatureInfo::__number },
  1849. { Sd::rOTHER, FeatureInfo::__none },
  1850. { Sd::rCONCUR, FeatureInfo::__number },
  1851. { Sd::rSUBDOC, FeatureInfo::__number },
  1852. { Sd::rFORMAL, FeatureInfo::__boolean }
  1853. };
  1854. int booleanFeature = 0;
  1855. int numberFeature = 0;
  1856. for (size_t i = 0; i < SIZEOF(features); i++) {
  1857. if (!parseSdParam(AllowedSdParams(SdParam::reservedName
  1858. + features[i].name), parm))
  1859. return 0;
  1860. if (features[i].arg != FeatureInfo::__none) {
  1861. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
  1862. SdParam::reservedName + Sd::rYES),
  1863. parm))
  1864. return 0;
  1865. #if 0
  1866. if (features[i].name == Sd::rDATATAG
  1867. && parm.type == (SdParam::reservedName + Sd::rYES))
  1868. message(ParserMessages::datatagNotImplemented);
  1869. #endif
  1870. if (features[i].arg == FeatureInfo::__number) {
  1871. if (parm.type == SdParam::reservedName + Sd::rYES) {
  1872. if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
  1873. return 0;
  1874. sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
  1875. parm.n);
  1876. }
  1877. else
  1878. sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
  1879. 0);
  1880. }
  1881. else
  1882. sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
  1883. parm.type == (SdParam::reservedName
  1884. + Sd::rYES));
  1885. }
  1886. }
  1887. return 1;
  1888. }
  1889. Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
  1890. {
  1891. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
  1892. parm))
  1893. return 0;
  1894. Location location(currentLocation());
  1895. if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
  1896. SdParam::minimumLiteral),
  1897. parm))
  1898. return 0;
  1899. AppinfoEvent *event;
  1900. if (parm.type == SdParam::minimumLiteral)
  1901. event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
  1902. else
  1903. event = new (eventAllocator()) AppinfoEvent(location);
  1904. eventHandler().appinfo(event);
  1905. return 1;
  1906. }
  1907. Boolean Parser::translateSyntax(CharSwitcher &switcher,
  1908. const CharsetInfo &syntaxCharset,
  1909. const CharsetInfo &docCharset,
  1910. WideChar syntaxChar,
  1911. Char &docChar)
  1912. {
  1913. syntaxChar = switcher.subst(syntaxChar);
  1914. UnivChar univChar;
  1915. if (syntaxCharset.descToUniv(syntaxChar, univChar)
  1916. && univToDescCheck(docCharset, univChar, docChar))
  1917. return 1;
  1918. message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
  1919. return 0;
  1920. }
  1921. void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
  1922. SyntaxChar end, ISet<Char> &chars)
  1923. {
  1924. #if 0
  1925. do {
  1926. Char docChar;
  1927. if (!translateSyntax(sdBuilder, start, docChar))
  1928. break;
  1929. chars.add(docChar);
  1930. } while (start++ != end);
  1931. #endif
  1932. for (;;) {
  1933. SyntaxChar doneUpTo = end;
  1934. Boolean gotSwitch = 0;
  1935. WideChar firstSwitch;
  1936. for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
  1937. WideChar c = sdBuilder.switcher.switchFrom(i);
  1938. if (start <= c && c <= end) {
  1939. if (!gotSwitch) {
  1940. gotSwitch = 1;
  1941. firstSwitch = c;
  1942. }
  1943. else if (c < firstSwitch)
  1944. firstSwitch = c;
  1945. }
  1946. }
  1947. if (gotSwitch && firstSwitch == start) {
  1948. doneUpTo = start;
  1949. Char docChar;
  1950. if (translateSyntax(sdBuilder, start, docChar))
  1951. chars.add(docChar);
  1952. }
  1953. else {
  1954. if (gotSwitch)
  1955. doneUpTo = firstSwitch - 1;
  1956. Char docChar;
  1957. Number count;
  1958. if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
  1959. if (count - 1 < doneUpTo - start)
  1960. doneUpTo = start + (count - 1);
  1961. chars.addRange(docChar, docChar + (doneUpTo - start));
  1962. }
  1963. }
  1964. if (doneUpTo == end)
  1965. break;
  1966. start = doneUpTo + 1;
  1967. }
  1968. }
  1969. Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
  1970. WideChar syntaxChar, Char &docChar)
  1971. {
  1972. Number count;
  1973. return translateSyntaxNoSwitch(sdBuilder,
  1974. sdBuilder.switcher.subst(syntaxChar),
  1975. docChar,
  1976. count);
  1977. }
  1978. Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
  1979. WideChar syntaxChar, Char &docChar,
  1980. Number &count)
  1981. {
  1982. Number n;
  1983. StringC str;
  1984. CharsetDeclRange::Type type;
  1985. const PublicId *id;
  1986. if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
  1987. id,
  1988. type,
  1989. n,
  1990. str,
  1991. count)) {
  1992. ISet<WideChar> docChars;
  1993. switch (type) {
  1994. case CharsetDeclRange::unused:
  1995. break;
  1996. case CharsetDeclRange::string:
  1997. sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
  1998. break;
  1999. case CharsetDeclRange::number:
  2000. {
  2001. Number count2;
  2002. sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
  2003. if (!docChars.isEmpty() && count2 < count)
  2004. count = count2;
  2005. }
  2006. break;
  2007. default:
  2008. CANNOT_HAPPEN();
  2009. }
  2010. if (!docChars.isEmpty()) {
  2011. if (!docChars.isSingleton() && options().warnSgmlDecl)
  2012. message(ParserMessages::ambiguousDocCharacter,
  2013. CharsetMessageArg(docChars));
  2014. ISetIter<WideChar> iter(docChars);
  2015. WideChar min, max;
  2016. if (iter.next(min, max) && min <= charMax) {
  2017. docChar = Char(min);
  2018. return 1;
  2019. }
  2020. }
  2021. }
  2022. UnivChar univChar;
  2023. WideChar alsoMax, count2;
  2024. if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
  2025. && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar,
  2026. count2)) {
  2027. count = (alsoMax - syntaxChar) + 1;
  2028. if (count2 < count)
  2029. count = count2;
  2030. return 1;
  2031. }
  2032. sdBuilder.valid = 0;
  2033. message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
  2034. return 0;
  2035. }
  2036. Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
  2037. const String<SyntaxChar> &syntaxString,
  2038. StringC &docString)
  2039. {
  2040. docString.resize(0);
  2041. int ret = 1;
  2042. for (size_t i = 0; i < syntaxString.size(); i++) {
  2043. Char c;
  2044. if (translateSyntax(sdBuilder, syntaxString[i], c))
  2045. docString += c;
  2046. else
  2047. ret = 0;
  2048. }
  2049. return ret;
  2050. }
  2051. Boolean Parser::translateName(SdBuilder &sdBuilder,
  2052. const StringC &name,
  2053. StringC &str)
  2054. {
  2055. str.resize(name.size());
  2056. for (size_t i = 0; i < name.size(); i++) {
  2057. UnivChar univChar;
  2058. Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
  2059. // Might switch hyphen or period.
  2060. univChar = translateUniv(univChar, sdBuilder.switcher,
  2061. sdBuilder.syntaxCharset);
  2062. ASSERT(ret != 0);
  2063. if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
  2064. message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
  2065. sdBuilder.valid = 0;
  2066. return 0;
  2067. }
  2068. }
  2069. return 1;
  2070. }
  2071. UnivChar Parser::translateUniv(UnivChar univChar,
  2072. CharSwitcher &switcher,
  2073. const CharsetInfo &syntaxCharset)
  2074. {
  2075. WideChar syntaxChar;
  2076. ISet<WideChar> syntaxChars;
  2077. if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
  2078. message(ParserMessages::missingSyntaxChar,
  2079. NumberMessageArg(univChar));
  2080. return univChar;
  2081. }
  2082. SyntaxChar tem = switcher.subst(syntaxChar);
  2083. if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
  2084. message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem));
  2085. return univChar;
  2086. }
  2087. Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
  2088. {
  2089. if (syn.charSet(Syntax::functionChar)->contains(c)) {
  2090. message(ParserMessages::oneFunction, NumberMessageArg(c));
  2091. return 0;
  2092. }
  2093. else
  2094. return 1;
  2095. }
  2096. // Check that it has at most one B sequence and that it
  2097. // is not adjacent to a blank sequence.
  2098. Boolean Parser::checkShortrefDelim(const Syntax &syn,
  2099. const CharsetInfo &charset,
  2100. const StringC &delim)
  2101. {
  2102. Boolean hadB = 0;
  2103. Char letterB = charset.execToDesc('B');
  2104. const ISet<Char> *bSet = syn.charSet(Syntax::blank);
  2105. for (size_t i = 0; i < delim.size(); i++)
  2106. if (delim[i] == letterB) {
  2107. if (hadB) {
  2108. message(ParserMessages::multipleBSequence, StringMessageArg(delim));
  2109. return 0;
  2110. }
  2111. hadB = 1;
  2112. if (i > 0 && bSet->contains(delim[i - 1])) {
  2113. message(ParserMessages::blankAdjacentBSequence,
  2114. StringMessageArg(delim));
  2115. return 0;
  2116. }
  2117. while (i + 1 < delim.size() && delim[i + 1] == letterB)
  2118. i++;
  2119. if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
  2120. message(ParserMessages::blankAdjacentBSequence,
  2121. StringMessageArg(delim));
  2122. return 0;
  2123. }
  2124. }
  2125. return 1;
  2126. }
  2127. Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
  2128. {
  2129. const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
  2130. if (delim.size() > 0) {
  2131. Boolean allFunction = 1;
  2132. for (size_t i = 0; i < delim.size(); i++)
  2133. if (!functionSet->contains(delim[i]))
  2134. allFunction = 0;
  2135. if (allFunction) {
  2136. message(ParserMessages::generalDelimAllFunction,
  2137. StringMessageArg(delim));
  2138. return 0;
  2139. }
  2140. }
  2141. return 1;
  2142. }
  2143. Boolean Parser::checkSwitches(CharSwitcher &switcher,
  2144. const CharsetInfo &syntaxCharset)
  2145. {
  2146. Boolean valid = 1;
  2147. for (size_t i = 0; i < switcher.nSwitches(); i++) {
  2148. WideChar c[2];
  2149. c[0] = switcher.switchFrom(i);
  2150. c[1] = switcher.switchTo(i);
  2151. for (int j = 0; j < 2; j++) {
  2152. UnivChar univChar;
  2153. if (syntaxCharset.descToUniv(c[j], univChar)) {
  2154. // Check that it is not Digit Lcletter or Ucletter
  2155. if ((UnivCharsetDesc::a <= univChar
  2156. && univChar < UnivCharsetDesc::a + 26)
  2157. || (UnivCharsetDesc::A <= univChar
  2158. && univChar < UnivCharsetDesc::A + 26)
  2159. || (UnivCharsetDesc::zero <= univChar
  2160. && univChar < UnivCharsetDesc::zero + 10)) {
  2161. message(ParserMessages::switchLetterDigit,
  2162. NumberMessageArg(univChar));
  2163. valid = 0;
  2164. }
  2165. }
  2166. }
  2167. }
  2168. return valid;
  2169. }
  2170. Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
  2171. {
  2172. Boolean valid = 1;
  2173. size_t nSwitches = switcher.nSwitches();
  2174. for (size_t i = 0; i < nSwitches; i++)
  2175. if (!switcher.switchUsed(i)) {
  2176. // If the switch wasn't used,
  2177. // then the character wasn't a markup character.
  2178. message(ParserMessages::switchNotMarkup,
  2179. NumberMessageArg(switcher.switchFrom(i)));
  2180. valid = 0;
  2181. }
  2182. return valid;
  2183. }
  2184. void Parser::checkSyntaxNamelen(const Syntax &syn)
  2185. {
  2186. size_t namelen = syn.namelen();
  2187. int i;
  2188. for (i = 0; i < Syntax::nDelimGeneral; i++)
  2189. if (syn.delimGeneral(i).size() > namelen)
  2190. message(ParserMessages::delimiterLength,
  2191. StringMessageArg(syn.delimGeneral(i)),
  2192. NumberMessageArg(namelen));
  2193. for (i = 0; i < syn.nDelimShortrefComplex(); i++)
  2194. if (syn.delimShortrefComplex(i).size() > namelen)
  2195. message(ParserMessages::delimiterLength,
  2196. StringMessageArg(syn.delimShortrefComplex(i)),
  2197. NumberMessageArg(namelen));
  2198. for (i = 0; i < Syntax::nNames; i++)
  2199. if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
  2200. && options().warnSgmlDecl)
  2201. message(ParserMessages::reservedNameLength,
  2202. StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
  2203. NumberMessageArg(namelen));
  2204. }
  2205. Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
  2206. Char &to)
  2207. {
  2208. WideChar count;
  2209. return univToDescCheck(charset, from, to, count);
  2210. }
  2211. Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
  2212. Char &to, WideChar &count)
  2213. {
  2214. WideChar c;
  2215. ISet<WideChar> descSet;
  2216. unsigned ret = charset.univToDesc(from, c, descSet, count);
  2217. if (ret > 1) {
  2218. if (options().warnSgmlDecl)
  2219. message(ParserMessages::ambiguousDocCharacter,
  2220. CharsetMessageArg(descSet));
  2221. ret = 1;
  2222. }
  2223. if (ret && c <= charMax) {
  2224. to = Char(c);
  2225. return 1;
  2226. }
  2227. return 0;
  2228. }
  2229. Boolean Parser::parseSdParam(const AllowedSdParams &allow,
  2230. SdParam &parm)
  2231. {
  2232. for (;;) {
  2233. Token token = getToken(mdMode);
  2234. switch (token) {
  2235. case tokenUnrecognized:
  2236. if (reportNonSgmlCharacter())
  2237. break;
  2238. {
  2239. message(ParserMessages::markupDeclarationCharacter,
  2240. StringMessageArg(currentToken()),
  2241. AllowedSdParamsMessageArg(allow, sdPointer()));
  2242. }
  2243. return 0;
  2244. case tokenEe:
  2245. if (allow.param(SdParam::eE)) {
  2246. parm.type = SdParam::eE;
  2247. if (currentMarkup())
  2248. currentMarkup()->addEntityEnd();
  2249. popInputStack();
  2250. return 1;
  2251. }
  2252. message(ParserMessages::sdEntityEnd,
  2253. AllowedSdParamsMessageArg(allow, sdPointer()));
  2254. return 0;
  2255. case tokenS:
  2256. if (currentMarkup())
  2257. currentMarkup()->addS(currentChar());
  2258. break;
  2259. case tokenCom:
  2260. if (!parseComment(sdcomMode))
  2261. return 0;
  2262. break;
  2263. case tokenDso:
  2264. case tokenGrpo:
  2265. case tokenMinusGrpo:
  2266. case tokenPlusGrpo:
  2267. case tokenRni:
  2268. case tokenPeroNameStart:
  2269. case tokenPeroGrpo:
  2270. sdParamInvalidToken(token, allow);
  2271. return 0;
  2272. case tokenLcUcNmchar:
  2273. if (allow.param(SdParam::ellipsis)) {
  2274. extendNameToken(syntax().namelen(), ParserMessages::nameLength);
  2275. getCurrentToken(syntax().generalSubstTable(), parm.token);
  2276. if (parm.token == sd().execToDoc("...")) {
  2277. parm.type = SdParam::ellipsis;
  2278. return 1;
  2279. }
  2280. message(ParserMessages::sdInvalidNameToken,
  2281. StringMessageArg(parm.token),
  2282. AllowedSdParamsMessageArg(allow, sdPointer()));
  2283. }
  2284. else {
  2285. sdParamInvalidToken(token, allow);
  2286. return 0;
  2287. }
  2288. case tokenLita:
  2289. case tokenLit:
  2290. {
  2291. Boolean lita = (token == tokenLita);
  2292. if (allow.param(SdParam::minimumLiteral)) {
  2293. if (!parseMinimumLiteral(lita, parm.literalText))
  2294. return 0;
  2295. parm.type = SdParam::minimumLiteral;
  2296. if (currentMarkup())
  2297. currentMarkup()->addLiteral(parm.literalText);
  2298. }
  2299. else if (allow.param(SdParam::paramLiteral)) {
  2300. if (!parseSdParamLiteral(lita, parm.paramLiteralText))
  2301. return 0;
  2302. parm.type = SdParam::paramLiteral;
  2303. }
  2304. else {
  2305. sdParamInvalidToken(token, allow);
  2306. return 0;
  2307. }
  2308. return 1;
  2309. }
  2310. case tokenMdc:
  2311. if (allow.param(SdParam::mdc)) {
  2312. parm.type = SdParam::mdc;
  2313. if (currentMarkup())
  2314. currentMarkup()->addDelim(Syntax::dMDC);
  2315. return 1;
  2316. }
  2317. sdParamInvalidToken(tokenMdc, allow);
  2318. return 0;
  2319. case tokenNameStart:
  2320. {
  2321. extendNameToken(syntax().namelen(), ParserMessages::nameLength);
  2322. getCurrentToken(syntax().generalSubstTable(), parm.token);
  2323. if (allow.param(SdParam::capacityName)) {
  2324. if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
  2325. parm.type = SdParam::capacityName;
  2326. if (currentMarkup())
  2327. currentMarkup()->addName(currentInput());
  2328. return 1;
  2329. }
  2330. }
  2331. if (allow.param(SdParam::referenceReservedName)) {
  2332. if (syntax().lookupReservedName(parm.token,
  2333. &parm.reservedNameIndex)) {
  2334. parm.type = SdParam::referenceReservedName;
  2335. if (currentMarkup())
  2336. currentMarkup()->addName(currentInput());
  2337. return 1;
  2338. }
  2339. }
  2340. if (allow.param(SdParam::generalDelimiterName)) {
  2341. if (sd().lookupGeneralDelimiterName(parm.token,
  2342. parm.delimGeneralIndex)) {
  2343. parm.type = SdParam::generalDelimiterName;
  2344. if (currentMarkup())
  2345. currentMarkup()->addName(currentInput());
  2346. return 1;
  2347. }
  2348. }
  2349. if (allow.param(SdParam::quantityName)) {
  2350. if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
  2351. parm.type = SdParam::quantityName;
  2352. if (currentMarkup())
  2353. currentMarkup()->addName(currentInput());
  2354. return 1;
  2355. }
  2356. }
  2357. for (int i = 0;; i++) {
  2358. SdParam::Type t = allow.get(i);
  2359. if (t == SdParam::invalid)
  2360. break;
  2361. if (t >= SdParam::reservedName) {
  2362. Sd::ReservedName sdReservedName
  2363. = Sd::ReservedName(t - SdParam::reservedName);
  2364. if (parm.token == sd().reservedName(sdReservedName)) {
  2365. parm.type = t;
  2366. if (currentMarkup())
  2367. currentMarkup()->addSdReservedName(sdReservedName,
  2368. currentInput());
  2369. return 1;
  2370. }
  2371. }
  2372. }
  2373. if (allow.param(SdParam::name)) {
  2374. parm.type = SdParam::name;
  2375. if (currentMarkup())
  2376. currentMarkup()->addName(currentInput());
  2377. return 1;
  2378. }
  2379. {
  2380. message(ParserMessages::sdInvalidNameToken,
  2381. StringMessageArg(parm.token),
  2382. AllowedSdParamsMessageArg(allow, sdPointer()));
  2383. }
  2384. return 0;
  2385. }
  2386. case tokenDigit:
  2387. if (allow.param(SdParam::number)) {
  2388. extendNumber(syntax().namelen(), ParserMessages::numberLength);
  2389. parm.type = SdParam::number;
  2390. unsigned long n;
  2391. if (!stringToNumber(currentInput()->currentTokenStart(),
  2392. currentInput()->currentTokenLength(),
  2393. n)
  2394. || n > Number(-1)) {
  2395. message(ParserMessages::numberTooBig,
  2396. StringMessageArg(currentToken()));
  2397. parm.n = Number(-1);
  2398. }
  2399. else {
  2400. if (currentMarkup())
  2401. currentMarkup()->addNumber(currentInput());
  2402. parm.n = Number(n);
  2403. }
  2404. Token token = getToken(mdMode);
  2405. if (token == tokenNameStart)
  2406. message(ParserMessages::psRequired);
  2407. currentInput()->ungetToken();
  2408. return 1;
  2409. }
  2410. sdParamInvalidToken(tokenDigit, allow);
  2411. return 0;
  2412. default:
  2413. CANNOT_HAPPEN();
  2414. }
  2415. }
  2416. }
  2417. // This is a separate function, because we might want SyntaxChar
  2418. // to be bigger than Char.
  2419. Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
  2420. {
  2421. Location loc(currentLocation());
  2422. loc += 1;
  2423. SdText text(loc, lita); // first character of content
  2424. str.resize(0);
  2425. const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
  2426. Mode mode = lita ? sdplitaMode : sdplitMode;
  2427. int done = 0;
  2428. for (;;) {
  2429. Token token = getToken(mode);
  2430. switch (token) {
  2431. case tokenEe:
  2432. message(ParserMessages::literalLevel);
  2433. return 0;
  2434. case tokenUnrecognized:
  2435. if (reportNonSgmlCharacter())
  2436. break;
  2437. if (options().errorSignificant)
  2438. message(ParserMessages::sdLiteralSignificant,
  2439. StringMessageArg(currentToken()));
  2440. text.addChar(currentChar(), currentLocation());
  2441. break;
  2442. case tokenCroDigit:
  2443. {
  2444. InputSource *in = currentInput();
  2445. Location startLocation = currentLocation();
  2446. in->discardInitial();
  2447. extendNumber(syntax().namelen(), ParserMessages::numberLength);
  2448. unsigned long n;
  2449. Boolean valid;
  2450. if (!stringToNumber(in->currentTokenStart(),
  2451. in->currentTokenLength(),
  2452. n)
  2453. || n > syntaxCharMax) {
  2454. message(ParserMessages::syntaxCharacterNumber,
  2455. StringMessageArg(currentToken()));
  2456. valid = 0;
  2457. }
  2458. else
  2459. valid = 1;
  2460. Owner<Markup> markupPtr;
  2461. if (eventsWanted().wantPrologMarkup()) {
  2462. markupPtr = new Markup;
  2463. markupPtr->addDelim(Syntax::dCRO);
  2464. markupPtr->addNumber(in);
  2465. switch (getToken(refMode)) {
  2466. case tokenRefc:
  2467. markupPtr->addDelim(Syntax::dREFC);
  2468. break;
  2469. case tokenRe:
  2470. markupPtr->addRefEndRe();
  2471. break;
  2472. default:
  2473. break;
  2474. }
  2475. }
  2476. else
  2477. (void)getToken(refMode);
  2478. if (valid)
  2479. text.addChar(SyntaxChar(n),
  2480. Location(new NumericCharRefOrigin(startLocation,
  2481. currentLocation().index()
  2482. + currentInput()->currentTokenLength()
  2483. - startLocation.index(),
  2484. markupPtr),
  2485. 0));
  2486. }
  2487. break;
  2488. case tokenCroNameStart:
  2489. if (!parseNamedCharRef())
  2490. return 0;
  2491. break;
  2492. case tokenLit:
  2493. case tokenLita:
  2494. done = 1;
  2495. break;
  2496. case tokenPeroNameStart:
  2497. case tokenPeroGrpo:
  2498. message(ParserMessages::sdParameterEntity);
  2499. {
  2500. Location loc(currentLocation());
  2501. const Char *p = currentInput()->currentTokenStart();
  2502. for (size_t count = currentInput()->currentTokenLength();
  2503. count > 0;
  2504. count--) {
  2505. text.addChar(*p++, loc);
  2506. loc += 1;
  2507. }
  2508. }
  2509. break;
  2510. case tokenChar:
  2511. if (text.string().size() > refLitlen
  2512. && currentChar() == syntax().standardFunction(Syntax::fRE)) {
  2513. message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
  2514. // guess that the closing delimiter has been omitted
  2515. message(ParserMessages::literalClosingDelimiter);
  2516. return 0;
  2517. }
  2518. text.addChar(currentChar(), currentLocation());
  2519. break;
  2520. }
  2521. if (done) break;
  2522. }
  2523. if (text.string().size() > refLitlen)
  2524. message(ParserMessages::parameterLiteralLength,
  2525. NumberMessageArg(refLitlen));
  2526. str = text.string();
  2527. if (currentMarkup())
  2528. currentMarkup()->addSdLiteral(text);
  2529. return 1;
  2530. }
  2531. Boolean Parser::stringToNumber(const Char *s, size_t length,
  2532. unsigned long &result)
  2533. {
  2534. unsigned long n = 0;
  2535. for (; length > 0; length--, s++) {
  2536. int val = sd().digitWeight(*s);
  2537. if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
  2538. n += val;
  2539. else
  2540. return 0;
  2541. }
  2542. result = n;
  2543. return 1;
  2544. }
  2545. void Parser::sdParamInvalidToken(Token token,
  2546. const AllowedSdParams &allow)
  2547. {
  2548. message(ParserMessages::sdParamInvalidToken,
  2549. TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
  2550. AllowedSdParamsMessageArg(allow, sdPointer()));
  2551. }
  2552. void Parser::sdParamConvertToLiteral(SdParam &parm)
  2553. {
  2554. if (parm.type == SdParam::number) {
  2555. parm.type = SdParam::paramLiteral;
  2556. parm.paramLiteralText.resize(1);
  2557. parm.paramLiteralText[0] = parm.n;
  2558. }
  2559. }
  2560. AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
  2561. SdParam::Type arg3, SdParam::Type arg4,
  2562. SdParam::Type arg5, SdParam::Type arg6)
  2563. {
  2564. allow_[0] = arg1;
  2565. allow_[1] = arg2;
  2566. allow_[2] = arg3;
  2567. allow_[3] = arg4;
  2568. allow_[4] = arg5;
  2569. allow_[5] = arg6;
  2570. }
  2571. Boolean AllowedSdParams::param(SdParam::Type t) const
  2572. {
  2573. for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
  2574. if (t == allow_[i])
  2575. return 1;
  2576. return 0;
  2577. }
  2578. SdParam::Type AllowedSdParams::get(int i) const
  2579. {
  2580. return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
  2581. }
  2582. AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
  2583. const AllowedSdParams &allow,
  2584. const ConstPtr<Sd> &sd)
  2585. : allow_(allow), sd_(sd)
  2586. {
  2587. }
  2588. MessageArg *AllowedSdParamsMessageArg::copy() const
  2589. {
  2590. return new AllowedSdParamsMessageArg(*this);
  2591. }
  2592. void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
  2593. {
  2594. for (int i = 0;; i++) {
  2595. SdParam::Type type = allow_.get(i);
  2596. if (type == SdParam::invalid)
  2597. break;
  2598. if (i != 0)
  2599. builder.appendFragment(ParserMessages::listSep);
  2600. switch (type) {
  2601. case SdParam::eE:
  2602. builder.appendFragment(ParserMessages::entityEnd);
  2603. break;
  2604. case SdParam::minimumLiteral:
  2605. builder.appendFragment(ParserMessages::minimumLiteral);
  2606. break;
  2607. case SdParam::mdc:
  2608. {
  2609. builder.appendFragment(ParserMessages::delimStart);
  2610. Char c = sd_->execToDoc('>');
  2611. builder.appendChars(&c, 1);
  2612. builder.appendFragment(ParserMessages::delimEnd);
  2613. }
  2614. break;
  2615. case SdParam::number:
  2616. builder.appendFragment(ParserMessages::number);
  2617. break;
  2618. case SdParam::name:
  2619. builder.appendFragment(ParserMessages::name);
  2620. break;
  2621. case SdParam::paramLiteral:
  2622. builder.appendFragment(ParserMessages::parameterLiteral);
  2623. break;
  2624. case SdParam::capacityName:
  2625. builder.appendFragment(ParserMessages::capacityName);
  2626. break;
  2627. case SdParam::generalDelimiterName:
  2628. builder.appendFragment(ParserMessages::generalDelimiteRoleName);
  2629. break;
  2630. case SdParam::referenceReservedName:
  2631. builder.appendFragment(ParserMessages::referenceReservedName);
  2632. break;
  2633. case SdParam::quantityName:
  2634. builder.appendFragment(ParserMessages::quantityName);
  2635. break;
  2636. case SdParam::ellipsis:
  2637. {
  2638. StringC str(sd_->execToDoc("..."));
  2639. builder.appendChars(str.data(), str.size());
  2640. break;
  2641. }
  2642. default:
  2643. {
  2644. StringC str(sd_->reservedName(type - SdParam::reservedName));
  2645. builder.appendChars(str.data(), str.size());
  2646. break;
  2647. }
  2648. }
  2649. }
  2650. }
  2651. SdBuilder::SdBuilder()
  2652. : valid(1), externalSyntax(0)
  2653. {
  2654. }
  2655. void SdBuilder::addFormalError(const Location &location,
  2656. const MessageType1 &message,
  2657. const StringC &id)
  2658. {
  2659. formalErrorList.insert(new SdFormalError(location, message, id));
  2660. }
  2661. SdFormalError::SdFormalError(const Location &location,
  2662. const MessageType1 &message,
  2663. const StringC &id)
  2664. : location_(location),
  2665. message_(&message),
  2666. id_(id)
  2667. {
  2668. }
  2669. void SdFormalError::send(ParserState &parser)
  2670. {
  2671. parser.Messenger::setNextLocation(location_);
  2672. parser.message(*message_, StringMessageArg(id_));
  2673. }
  2674. CharSwitcher::CharSwitcher()
  2675. {
  2676. }
  2677. void CharSwitcher::addSwitch(WideChar from, WideChar to)
  2678. {
  2679. switches_.push_back(from);
  2680. switches_.push_back(to);
  2681. switchUsed_.push_back(0);
  2682. }
  2683. SyntaxChar CharSwitcher::subst(WideChar c)
  2684. {
  2685. for (size_t i = 0; i < switches_.size(); i += 2)
  2686. if (switches_[i] == c) {
  2687. switchUsed_[i/2] = 1;
  2688. return switches_[i + 1];
  2689. }
  2690. return c;
  2691. }
  2692. size_t CharSwitcher::nSwitches() const
  2693. {
  2694. return switchUsed_.size();
  2695. }
  2696. Boolean CharSwitcher::switchUsed(size_t i) const
  2697. {
  2698. return switchUsed_[i];
  2699. }
  2700. WideChar CharSwitcher::switchFrom(size_t i) const
  2701. {
  2702. return switches_[i*2];
  2703. }
  2704. WideChar CharSwitcher::switchTo(size_t i) const
  2705. {
  2706. return switches_[i*2 + 1];
  2707. }
  2708. CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
  2709. : set_(set)
  2710. {
  2711. }
  2712. MessageArg *CharsetMessageArg::copy() const
  2713. {
  2714. return new CharsetMessageArg(*this);
  2715. }
  2716. void CharsetMessageArg::append(MessageBuilder &builder) const
  2717. {
  2718. ISetIter<WideChar> iter(set_);
  2719. WideChar min, max;
  2720. Boolean first = 1;
  2721. while (iter.next(min, max)) {
  2722. if (first)
  2723. first = 0;
  2724. else
  2725. builder.appendFragment(ParserMessages::listSep);
  2726. builder.appendNumber(min);
  2727. if (max != min) {
  2728. builder.appendFragment(max == min + 1
  2729. ? ParserMessages::listSep
  2730. : ParserMessages::rangeSep);
  2731. builder.appendNumber(max);
  2732. }
  2733. }
  2734. }
  2735. #ifdef SP_NAMESPACE
  2736. }
  2737. #endif