Syntax.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /* $XConsortium: Syntax.h /main/1 1996/07/29 17:06:04 cde-hp $ */
  24. // Copyright (c) 1994 James Clark
  25. // See the file COPYING for copying permission.
  26. #ifndef Syntax_INCLUDED
  27. #define Syntax_INCLUDED 1
  28. #ifdef __GNUG__
  29. #pragma interface
  30. #endif
  31. #include "types.h"
  32. #include "Boolean.h"
  33. #include "ISet.h"
  34. #include "StringC.h"
  35. #include "SubstTable.h"
  36. #include "HashTable.h"
  37. #include "Vector.h"
  38. #include "Resource.h"
  39. #include "XcharMap.h"
  40. #include "EntityCatalog.h"
  41. #ifdef SP_NAMESPACE
  42. namespace SP_NAMESPACE {
  43. #endif
  44. class Sd;
  45. class CharsetInfo;
  46. class SP_API Syntax : public Resource, public EntityCatalog::Syntax {
  47. public:
  48. enum ReservedName {
  49. rANY,
  50. rATTLIST,
  51. rCDATA,
  52. rCONREF,
  53. rCURRENT,
  54. rDEFAULT,
  55. rDOCTYPE,
  56. rELEMENT,
  57. rEMPTY,
  58. rENDTAG,
  59. rENTITIES,
  60. rENTITY,
  61. rFIXED,
  62. rID,
  63. rIDLINK,
  64. rIDREF,
  65. rIDREFS,
  66. rIGNORE,
  67. rIMPLIED,
  68. rINCLUDE,
  69. rINITIAL,
  70. rLINK,
  71. rLINKTYPE,
  72. rMD,
  73. rMS,
  74. rNAME,
  75. rNAMES,
  76. rNDATA,
  77. rNMTOKEN,
  78. rNMTOKENS,
  79. rNOTATION,
  80. rNUMBER,
  81. rNUMBERS,
  82. rNUTOKEN,
  83. rNUTOKENS,
  84. rO,
  85. rPCDATA,
  86. rPI,
  87. rPOSTLINK,
  88. rPUBLIC,
  89. rRCDATA,
  90. rRE,
  91. rREQUIRED,
  92. rRESTORE,
  93. rRS,
  94. rSDATA,
  95. rSHORTREF,
  96. rSIMPLE,
  97. rSPACE,
  98. rSTARTTAG,
  99. rSUBDOC,
  100. rSYSTEM,
  101. rTEMP,
  102. rUSELINK,
  103. rUSEMAP
  104. };
  105. enum { nNames = rUSEMAP + 1 };
  106. enum Quantity {
  107. qATTCNT,
  108. qATTSPLEN,
  109. qBSEQLEN,
  110. qDTAGLEN,
  111. qDTEMPLEN,
  112. qENTLVL,
  113. qGRPCNT,
  114. qGRPGTCNT,
  115. qGRPLVL,
  116. qLITLEN,
  117. qNAMELEN,
  118. qNORMSEP,
  119. qPILEN,
  120. qTAGLEN,
  121. qTAGLVL
  122. };
  123. enum { nQuantity = qTAGLVL + 1 };
  124. enum DelimGeneral {
  125. dAND,
  126. dCOM,
  127. dCRO,
  128. dDSC,
  129. dDSO,
  130. dDTGC,
  131. dDTGO,
  132. dERO,
  133. dETAGO,
  134. dGRPC,
  135. dGRPO,
  136. dLIT,
  137. dLITA,
  138. dMDC,
  139. dMDO,
  140. dMINUS,
  141. dMSC,
  142. dNET,
  143. dOPT,
  144. dOR,
  145. dPERO,
  146. dPIC,
  147. dPIO,
  148. dPLUS,
  149. dREFC,
  150. dREP,
  151. dRNI,
  152. dSEQ,
  153. dSTAGO,
  154. dTAGC,
  155. dVI
  156. };
  157. enum { nDelimGeneral = dVI + 1 };
  158. enum StandardFunction {
  159. fRE,
  160. fRS,
  161. fSPACE
  162. };
  163. enum FunctionClass {
  164. cFUNCHAR,
  165. cSEPCHAR,
  166. cMSOCHAR,
  167. cMSICHAR,
  168. cMSSCHAR
  169. };
  170. enum Set {
  171. nameStart,
  172. digit,
  173. nmchar, // LCNMCHAR or UCNMCHAR
  174. s,
  175. blank,
  176. sepchar,
  177. minimumData,
  178. significant,
  179. functionChar, // function character
  180. sgmlChar
  181. };
  182. enum { nSet = sgmlChar + 1 };
  183. enum Category {
  184. otherCategory = 0,
  185. sCategory = 01,
  186. nameStartCategory = 02,
  187. digitCategory = 04,
  188. otherNameCategory = 010
  189. };
  190. Syntax(const Sd &);
  191. Syntax(const Syntax &);
  192. Boolean lookupFunctionChar(const StringC &, Char *) const;
  193. Boolean charFunctionName(Char c, const StringC *&name) const;
  194. Boolean lookupReservedName(const StringC &, ReservedName *) const;
  195. const StringC &reservedName(ReservedName) const;
  196. StringC rniReservedName(ReservedName) const;
  197. Number quantity(Quantity) const;
  198. Char standardFunction(int) const;
  199. Boolean getStandardFunction(int, Char &) const;
  200. const StringC &delim() const;
  201. const ISet<Char> *charSet(int i) const;
  202. const SubstTable<Char> *generalSubstTable() const;
  203. const SubstTable<Char> *entitySubstTable() const;
  204. const SubstTable<Char> &upperSubstTable() const;
  205. Boolean namecaseGeneral() const;
  206. Boolean namecaseEntity() const;
  207. const StringC &peroDelim() const;
  208. const StringC &delimGeneral(int) const;
  209. const StringC &delimShortrefComplex(size_t) const;
  210. const ISet<Char> &delimShortrefSimple() const;
  211. int nDelimShortrefComplex() const;
  212. Boolean isValidShortref(const StringC &) const;
  213. Boolean hasShortrefs() const;
  214. Boolean isNameCharacter(Xchar) const;
  215. Boolean isNameStartCharacter(Xchar) const;
  216. Boolean isDigit(Xchar) const;
  217. Boolean isS(Xchar) const;
  218. Boolean isB(Xchar c) const;
  219. Category charCategory(Xchar) const;
  220. Boolean isSgmlChar(Xchar) const;
  221. size_t attcnt() const;
  222. size_t attsplen() const;
  223. size_t namelen() const;
  224. size_t penamelen() const;
  225. size_t litlen() const;
  226. size_t normsep() const;
  227. size_t dtemplen() const;
  228. size_t grpcnt() const;
  229. size_t grpgtcnt() const;
  230. size_t grplvl() const;
  231. size_t taglvl() const;
  232. size_t taglen() const;
  233. size_t entlvl() const;
  234. size_t pilen() const;
  235. Char space() const;
  236. void setStandardFunction(StandardFunction, Char);
  237. void enterStandardFunctionNames();
  238. void addFunctionChar(const StringC &, FunctionClass, Char);
  239. void setNamecaseGeneral(Boolean);
  240. void setNamecaseEntity(Boolean);
  241. void setDelimGeneral(int, const StringC &);
  242. void addDelimShortref(const StringC &, const CharsetInfo &);
  243. void addDelimShortrefs(const ISet<Char> &shortrefChars,
  244. const CharsetInfo &charset);
  245. void addNameCharacters(const ISet<Char> &);
  246. void addNameStartCharacters(const ISet<Char> &);
  247. void addSubst(Char lc, Char uc);
  248. void addShunchar(Char);
  249. void setShuncharControls();
  250. void setQuantity(int, Number);
  251. void setName(int, const StringC &);
  252. void setSgmlChar(const ISet<Char> &);
  253. void implySgmlChar(const CharsetInfo &docCharset);
  254. // :: is for Watcom 10.0a
  255. void checkSgmlChar(const CharsetInfo &docCharset,
  256. const /* ::SP_NAMESPACE_SCOPE */ Syntax *otherSyntax,
  257. ISet<WideChar> &invalid)
  258. const;
  259. static int referenceQuantity(Quantity);
  260. const XcharMap<unsigned char> &markupScanTable() const;
  261. Boolean multicode() const;
  262. private:
  263. void subst(Char, Char);
  264. void checkUnivControlChar(UnivChar univChar,
  265. const CharsetInfo &docCharset,
  266. const /* ::SP_NAMESPACE_SCOPE */ Syntax *otherSyntax,
  267. ISet<WideChar> &invalid) const;
  268. ISet<Char> shunchar_;
  269. PackedBoolean shuncharControls_;
  270. ISet<Char> set_[nSet];
  271. Char standardFunction_[3];
  272. PackedBoolean standardFunctionValid_[3];
  273. Boolean namecaseGeneral_;
  274. Boolean namecaseEntity_;
  275. StringC delimGeneral_[nDelimGeneral];
  276. Vector<StringC> delimShortrefComplex_;
  277. ISet<Char> delimShortrefSimple_;
  278. StringC names_[nNames];
  279. Number quantity_[nQuantity];
  280. HashTable<StringC,int> nameTable_;
  281. HashTable<StringC,Char> functionTable_;
  282. SubstTable<Char> upperSubst_;
  283. SubstTable<Char> identitySubst_;
  284. const SubstTable<Char> *generalSubst_;
  285. const SubstTable<Char> *entitySubst_;
  286. XcharMap<unsigned char> categoryTable_;
  287. Boolean multicode_;
  288. XcharMap<unsigned char> markupScanTable_;
  289. static const int referenceQuantity_[];
  290. };
  291. inline Number Syntax::quantity(Quantity q) const
  292. {
  293. return quantity_[q];
  294. }
  295. inline void Syntax::setQuantity(int i, Number n)
  296. {
  297. quantity_[i] = n;
  298. }
  299. inline const SubstTable<Char> *Syntax::generalSubstTable() const
  300. {
  301. return generalSubst_;
  302. }
  303. inline const SubstTable<Char> *Syntax::entitySubstTable() const
  304. {
  305. return entitySubst_;
  306. }
  307. inline int Syntax::nDelimShortrefComplex() const
  308. {
  309. return int(delimShortrefComplex_.size());
  310. }
  311. inline const StringC &Syntax::delimGeneral(int i) const
  312. {
  313. return delimGeneral_[i];
  314. }
  315. inline const StringC &Syntax::delimShortrefComplex(size_t i) const
  316. {
  317. return delimShortrefComplex_[i];
  318. }
  319. inline const ISet<Char> &Syntax::delimShortrefSimple() const
  320. {
  321. return delimShortrefSimple_;
  322. }
  323. inline Boolean Syntax::hasShortrefs() const
  324. {
  325. return delimShortrefComplex_.size() > 0 || !delimShortrefSimple_.isEmpty();
  326. }
  327. inline Char Syntax::standardFunction(int i) const
  328. {
  329. return standardFunction_[i];
  330. }
  331. inline Boolean Syntax::getStandardFunction(int i, Char &result) const
  332. {
  333. if (standardFunctionValid_[i]) {
  334. result = standardFunction_[i];
  335. return 1;
  336. }
  337. else
  338. return 0;
  339. }
  340. inline const ISet<Char> *Syntax::charSet(int i) const
  341. {
  342. return &set_[i];
  343. }
  344. inline Boolean Syntax::isNameCharacter(Xchar c) const
  345. {
  346. return categoryTable_[c] >= nameStartCategory;
  347. }
  348. inline Boolean Syntax::isNameStartCharacter(Xchar c) const
  349. {
  350. return categoryTable_[c] == nameStartCategory;
  351. }
  352. inline Boolean Syntax::isDigit(Xchar c) const
  353. {
  354. return categoryTable_[c] == digitCategory;
  355. }
  356. inline Boolean Syntax::isS(Xchar c) const
  357. {
  358. return categoryTable_[c] == sCategory;
  359. }
  360. inline Boolean Syntax::isB(Xchar c) const
  361. {
  362. return (categoryTable_[c] == sCategory
  363. && !(standardFunctionValid_[fRE] && c == standardFunction_[fRE])
  364. && !(standardFunctionValid_[fRS] && c == standardFunction_[fRS]));
  365. }
  366. inline Syntax::Category Syntax::charCategory(Xchar c) const
  367. {
  368. return Category(categoryTable_[c]);
  369. }
  370. inline Boolean Syntax::isSgmlChar(Xchar c) const
  371. {
  372. return c >= 0 && set_[sgmlChar].contains(Char(c));
  373. }
  374. inline const StringC &Syntax::reservedName(ReservedName i) const
  375. {
  376. return names_[i];
  377. }
  378. inline size_t Syntax::attcnt() const
  379. {
  380. return quantity(qATTCNT);
  381. }
  382. inline size_t Syntax::attsplen() const
  383. {
  384. return quantity(qATTSPLEN);
  385. }
  386. inline size_t Syntax::namelen() const
  387. {
  388. return quantity(qNAMELEN);
  389. }
  390. inline size_t Syntax::penamelen() const
  391. {
  392. return quantity(qNAMELEN) - delimGeneral(dPERO).size();
  393. }
  394. inline size_t Syntax::litlen() const
  395. {
  396. return quantity(qLITLEN);
  397. }
  398. inline size_t Syntax::normsep() const
  399. {
  400. return quantity(qNORMSEP);
  401. }
  402. inline size_t Syntax::dtemplen() const
  403. {
  404. return quantity(qDTEMPLEN);
  405. }
  406. inline size_t Syntax::grpcnt() const
  407. {
  408. return quantity(qGRPCNT);
  409. }
  410. inline size_t Syntax::grpgtcnt() const
  411. {
  412. return quantity(qGRPGTCNT);
  413. }
  414. inline size_t Syntax::grplvl() const
  415. {
  416. return quantity(qGRPLVL);
  417. }
  418. inline size_t Syntax::taglvl() const
  419. {
  420. return quantity(qTAGLVL);
  421. }
  422. inline size_t Syntax::taglen() const
  423. {
  424. return quantity(qTAGLEN);
  425. }
  426. inline size_t Syntax::entlvl() const
  427. {
  428. return quantity(qENTLVL);
  429. }
  430. inline size_t Syntax::pilen() const
  431. {
  432. return quantity(qPILEN);
  433. }
  434. inline Char Syntax::space() const
  435. {
  436. return standardFunction(fSPACE);
  437. }
  438. inline void Syntax::setSgmlChar(const ISet<Char> &set)
  439. {
  440. set_[sgmlChar] = set;
  441. }
  442. inline int Syntax::referenceQuantity(Quantity i)
  443. {
  444. return referenceQuantity_[i];
  445. }
  446. inline void Syntax::setShuncharControls()
  447. {
  448. shuncharControls_ = 1;
  449. }
  450. inline const XcharMap<unsigned char> &Syntax::markupScanTable() const
  451. {
  452. return markupScanTable_;
  453. }
  454. inline Boolean Syntax::multicode() const
  455. {
  456. return multicode_;
  457. }
  458. inline Boolean Syntax::namecaseGeneral() const
  459. {
  460. return namecaseGeneral_;
  461. }
  462. inline Boolean Syntax::namecaseEntity() const
  463. {
  464. return namecaseEntity_;
  465. }
  466. #ifdef SP_NAMESPACE
  467. }
  468. #endif
  469. #endif /* Syntax_INCLUDED */