Text.C 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /* $XConsortium: Text.C /main/1 1996/07/29 17:06:09 cde-hp $ */
  24. // Copyright (c) 1994 James Clark
  25. // See the file COPYING for copying permission.
  26. #ifdef __GNUG__
  27. #pragma implementation
  28. #endif
  29. #include "splib.h"
  30. #include "Text.h"
  31. #include "Entity.h"
  32. // for memcmp()
  33. #include <string.h>
  34. #ifdef SP_NAMESPACE
  35. namespace SP_NAMESPACE {
  36. #endif
  37. Text::Text()
  38. {
  39. }
  40. void Text::addChar(Char c, const Location &loc)
  41. {
  42. if (items_.size() == 0
  43. || items_.back().type != TextItem::data
  44. || loc.origin().pointer() != items_.back().loc.origin().pointer()
  45. || loc.index() != (items_.back().loc.index()
  46. + (chars_.size() - items_.back().index))) {
  47. items_.resize(items_.size() + 1);
  48. items_.back().loc = loc;
  49. items_.back().type = TextItem::data;
  50. items_.back().index = chars_.size();
  51. }
  52. chars_ += c;
  53. }
  54. void Text::addChars(const Char *p, size_t length, const Location &loc)
  55. {
  56. if (items_.size() == 0
  57. || items_.back().type != TextItem::data
  58. || loc.origin().pointer() != items_.back().loc.origin().pointer()
  59. || loc.index() != (items_.back().loc.index()
  60. + (chars_.size() - items_.back().index))) {
  61. items_.resize(items_.size() + 1);
  62. items_.back().loc = loc;
  63. items_.back().type = TextItem::data;
  64. items_.back().index = chars_.size();
  65. }
  66. chars_.append(p, length);
  67. }
  68. void Text::addCdata(const InternalEntity *entity,
  69. const ConstPtr<Origin> &origin)
  70. {
  71. addSimple(TextItem::cdata, Location(origin, 0));
  72. chars_.append(entity->string().data(), entity->string().size());
  73. }
  74. void Text::addSdata(const InternalEntity *entity,
  75. const ConstPtr<Origin> &origin)
  76. {
  77. addSimple(TextItem::sdata, Location(origin, 0));
  78. chars_.append(entity->string().data(), entity->string().size());
  79. }
  80. void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc,
  81. Char space)
  82. {
  83. Location loci(loc);
  84. // FIXME speed this up
  85. for (size_t i = 0; i < n; loci += 1, i++) {
  86. if (str[i] == space && (size() == 0 || lastChar() == space))
  87. ignoreChar(str[i], loci);
  88. else
  89. addChar(str[i], loci);
  90. }
  91. }
  92. void Text::tokenize(Char space, Text &text) const
  93. {
  94. TextIter iter(*this);
  95. TextItem::Type type;
  96. const Char *p;
  97. size_t n;
  98. const Location *loc;
  99. while (iter.next(type, p, n, loc)) {
  100. switch (type) {
  101. case TextItem::data:
  102. text.addCharsTokenize(p, n, *loc, space);
  103. break;
  104. case TextItem::sdata:
  105. case TextItem::cdata:
  106. {
  107. text.addEntityStart(*loc);
  108. text.addCharsTokenize(p, n, *loc, space);
  109. Location tem(*loc);
  110. tem += n;
  111. text.addEntityEnd(tem);
  112. }
  113. break;
  114. case TextItem::ignore:
  115. text.ignoreChar(*p, *loc);
  116. break;
  117. default:
  118. text.addSimple(type, *loc);
  119. break;
  120. }
  121. }
  122. if (text.size() > 0 && text.lastChar() == space)
  123. text.ignoreLastChar();
  124. }
  125. void Text::addSimple(TextItem::Type type, const Location &loc)
  126. {
  127. items_.resize(items_.size() + 1);
  128. items_.back().loc = loc;
  129. items_.back().type = type;
  130. items_.back().index = chars_.size();
  131. }
  132. void Text::ignoreChar(Char c, const Location &loc)
  133. {
  134. items_.resize(items_.size() + 1);
  135. items_.back().loc = loc;
  136. items_.back().type = TextItem::ignore;
  137. items_.back().c = c;
  138. items_.back().index = chars_.size();
  139. }
  140. void Text::ignoreLastChar()
  141. {
  142. size_t lastIndex = chars_.size() - 1;
  143. size_t i;
  144. for (i = items_.size() - 1; items_[i].index > lastIndex; i--)
  145. ;
  146. // lastIndex >= items_[i].index
  147. if (items_[i].index != lastIndex) {
  148. items_.resize(items_.size() + 1);
  149. i++;
  150. for (size_t j = items_.size() - 1; j > i; j--)
  151. items_[j] = items_[j - 1];
  152. items_[i].index = lastIndex;
  153. items_[i].loc = items_[i - 1].loc;
  154. items_[i].loc += lastIndex - items_[i - 1].index;
  155. }
  156. items_[i].c = chars_[chars_.size() - 1];
  157. items_[i].type = TextItem::ignore;
  158. for (size_t j = i + 1; j < items_.size(); j++)
  159. items_[j].index = lastIndex;
  160. chars_.resize(chars_.size() - 1);
  161. }
  162. // All characters other than spaces are substed.
  163. void Text::subst(const SubstTable<Char> &table, Char space)
  164. {
  165. for (size_t i = 0; i < items_.size(); i++)
  166. if (items_[i].type == TextItem::data) {
  167. size_t lim = (i + 1 < items_.size()
  168. ? items_[i + 1].index
  169. : chars_.size());
  170. size_t j;
  171. for (j = items_[i].index; j < lim; j++) {
  172. Char c = chars_[j];
  173. if (c != space && c != table[c])
  174. break;
  175. }
  176. if (j < lim) {
  177. size_t start = items_[i].index;
  178. StringC origChars(chars_.data() + start, lim - start);
  179. for (; j < lim; j++)
  180. if (chars_[j] != space)
  181. table.subst(chars_[j]);
  182. items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc,
  183. origChars),
  184. 0);
  185. }
  186. }
  187. }
  188. void Text::clear()
  189. {
  190. chars_.resize(0);
  191. items_.clear();
  192. }
  193. Boolean Text::startDelimLocation(Location &loc) const
  194. {
  195. if (items_.size() == 0 || items_[0].type != TextItem::startDelim)
  196. return 0;
  197. loc = items_[0].loc;
  198. return 1;
  199. }
  200. Boolean Text::endDelimLocation(Location &loc) const
  201. {
  202. if (items_.size() == 0)
  203. return 0;
  204. switch (items_.back().type) {
  205. case TextItem::endDelim:
  206. case TextItem::endDelimA:
  207. break;
  208. default:
  209. return 0;
  210. }
  211. loc = items_.back().loc;
  212. return 1;
  213. }
  214. Boolean Text::delimType(Boolean &lita) const
  215. {
  216. if (items_.size() == 0)
  217. return 0;
  218. switch (items_.back().type) {
  219. case TextItem::endDelim:
  220. lita = 0;
  221. return 1;
  222. case TextItem::endDelimA:
  223. lita = 1;
  224. return 1;
  225. default:
  226. break;
  227. }
  228. return 0;
  229. }
  230. TextItem::TextItem()
  231. : type(data), c(0), index(0)
  232. {
  233. }
  234. void Text::swap(Text &to)
  235. {
  236. items_.swap(to.items_);
  237. chars_.swap(to.chars_);
  238. }
  239. TextIter::TextIter(const Text &text)
  240. : ptr_(text.items_.begin()), text_(&text)
  241. {
  242. }
  243. const Char *TextIter::chars(size_t &length) const
  244. {
  245. if (ptr_->type == TextItem::ignore) {
  246. length = 1;
  247. return &ptr_->c;
  248. }
  249. else {
  250. const StringC &chars = text_->chars_;
  251. size_t charsIndex = ptr_->index;
  252. if (ptr_ + 1 != text_->items_.begin() + text_->items_.size())
  253. length = ptr_[1].index - charsIndex;
  254. else
  255. length = chars.size() - charsIndex;
  256. return chars.data() + charsIndex;
  257. }
  258. }
  259. Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length,
  260. const Location *&loc)
  261. {
  262. const TextItem *end = text_->items_.begin() + text_->items_.size();
  263. if (ptr_ == end)
  264. return 0;
  265. type = ptr_->type;
  266. loc = &ptr_->loc;
  267. if (type == TextItem::ignore) {
  268. str = &ptr_->c;
  269. length = 1;
  270. }
  271. else {
  272. const StringC &chars = text_->chars_;
  273. size_t charsIndex = ptr_->index;
  274. str = chars.data() + charsIndex;
  275. if (ptr_ + 1 != end)
  276. length = ptr_[1].index - charsIndex;
  277. else
  278. length = chars.size() - charsIndex;
  279. }
  280. ptr_++;
  281. return 1;
  282. }
  283. void Text::insertChars(const StringC &s, const Location &loc)
  284. {
  285. chars_.insert(0, s);
  286. items_.resize(items_.size() + 1);
  287. for (size_t i = items_.size() - 1; i > 0; i--) {
  288. items_[i] = items_[i - 1];
  289. items_[i].index += s.size();
  290. }
  291. items_[0].loc = loc;
  292. items_[0].type = TextItem::data;
  293. items_[0].index = 0;
  294. }
  295. size_t Text::nDataEntities() const
  296. {
  297. size_t n = 0;
  298. for (size_t i = 0; i < items_.size(); i++)
  299. switch (items_[i].type) {
  300. case TextItem::sdata:
  301. case TextItem::cdata:
  302. n++;
  303. break;
  304. default:
  305. break;
  306. }
  307. return n;
  308. }
  309. // This is used to determine for a FIXED CDATA attribute
  310. // whether a specified value if equal to the default value.
  311. Boolean Text::fixedEqual(const Text &text) const
  312. {
  313. if (string() != text.string())
  314. return 0;
  315. size_t j = 0;
  316. for (size_t i = 0; i < items_.size(); i++)
  317. switch (items_[i].type) {
  318. case TextItem::cdata:
  319. case TextItem::sdata:
  320. for (;;) {
  321. if (j >= text.items_.size())
  322. return 0;
  323. if (text.items_[j].type == TextItem::cdata
  324. || text.items_[j].type == TextItem::sdata)
  325. break;
  326. j++;
  327. }
  328. if (text.items_[j].index != items_[i].index
  329. || (text.items_[j].loc.origin()->asEntityOrigin()->entity()
  330. != items_[i].loc.origin()->asEntityOrigin()->entity()))
  331. return 0;
  332. break;
  333. default:
  334. break;
  335. }
  336. for (; j < text.items_.size(); j++)
  337. switch (text.items_[j].type) {
  338. case TextItem::cdata:
  339. case TextItem::sdata:
  340. return 0;
  341. default:
  342. break;
  343. }
  344. return 1;
  345. }
  346. Location Text::charLocation(size_t ind) const
  347. {
  348. // Find the last item whose index <= ind.
  349. // Invariant:
  350. // indexes < i implies index <= ind
  351. // indexes >= lim implies index > ind
  352. // The first item will always have index 0.
  353. size_t i = 1;
  354. size_t lim = items_.size();
  355. while (i < lim) {
  356. size_t mid = i + (lim - i)/2;
  357. if (items_[mid].index > ind)
  358. lim = mid;
  359. else
  360. i = mid + 1;
  361. }
  362. #if 0
  363. for (size_t i = 1; i < items_.size(); i++)
  364. if (items_[i].index > ind)
  365. break;
  366. #endif
  367. i--;
  368. Location loc;
  369. // If items_.size() == 0, then i == lim.
  370. if (i < lim) {
  371. loc = items_[i].loc;
  372. loc += ind - items_[i].index;
  373. }
  374. return loc;
  375. }
  376. #ifdef SP_NAMESPACE
  377. }
  378. #endif