123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /* $XConsortium: Text.C /main/1 1996/07/29 17:06:09 cde-hp $ */
- // Copyright (c) 1994 James Clark
- // See the file COPYING for copying permission.
- #ifdef __GNUG__
- #pragma implementation
- #endif
- #include "splib.h"
- #include "Text.h"
- #include "Entity.h"
- // for memcmp()
- #include <string.h>
- #ifdef SP_NAMESPACE
- namespace SP_NAMESPACE {
- #endif
- Text::Text()
- {
- }
- void Text::addChar(Char c, const Location &loc)
- {
- if (items_.size() == 0
- || items_.back().type != TextItem::data
- || loc.origin().pointer() != items_.back().loc.origin().pointer()
- || loc.index() != (items_.back().loc.index()
- + (chars_.size() - items_.back().index))) {
- items_.resize(items_.size() + 1);
- items_.back().loc = loc;
- items_.back().type = TextItem::data;
- items_.back().index = chars_.size();
- }
- chars_ += c;
- }
- void Text::addChars(const Char *p, size_t length, const Location &loc)
- {
- if (items_.size() == 0
- || items_.back().type != TextItem::data
- || loc.origin().pointer() != items_.back().loc.origin().pointer()
- || loc.index() != (items_.back().loc.index()
- + (chars_.size() - items_.back().index))) {
- items_.resize(items_.size() + 1);
- items_.back().loc = loc;
- items_.back().type = TextItem::data;
- items_.back().index = chars_.size();
- }
- chars_.append(p, length);
- }
- void Text::addCdata(const InternalEntity *entity,
- const ConstPtr<Origin> &origin)
- {
- addSimple(TextItem::cdata, Location(origin, 0));
- chars_.append(entity->string().data(), entity->string().size());
- }
- void Text::addSdata(const InternalEntity *entity,
- const ConstPtr<Origin> &origin)
- {
- addSimple(TextItem::sdata, Location(origin, 0));
- chars_.append(entity->string().data(), entity->string().size());
- }
- void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc,
- Char space)
- {
- Location loci(loc);
- // FIXME speed this up
- for (size_t i = 0; i < n; loci += 1, i++) {
- if (str[i] == space && (size() == 0 || lastChar() == space))
- ignoreChar(str[i], loci);
- else
- addChar(str[i], loci);
- }
- }
- void Text::tokenize(Char space, Text &text) const
- {
- TextIter iter(*this);
- TextItem::Type type;
- const Char *p;
- size_t n;
- const Location *loc;
- while (iter.next(type, p, n, loc)) {
- switch (type) {
- case TextItem::data:
- text.addCharsTokenize(p, n, *loc, space);
- break;
- case TextItem::sdata:
- case TextItem::cdata:
- {
- text.addEntityStart(*loc);
- text.addCharsTokenize(p, n, *loc, space);
- Location tem(*loc);
- tem += n;
- text.addEntityEnd(tem);
- }
- break;
- case TextItem::ignore:
- text.ignoreChar(*p, *loc);
- break;
- default:
- text.addSimple(type, *loc);
- break;
- }
- }
- if (text.size() > 0 && text.lastChar() == space)
- text.ignoreLastChar();
- }
- void Text::addSimple(TextItem::Type type, const Location &loc)
- {
- items_.resize(items_.size() + 1);
- items_.back().loc = loc;
- items_.back().type = type;
- items_.back().index = chars_.size();
- }
- void Text::ignoreChar(Char c, const Location &loc)
- {
- items_.resize(items_.size() + 1);
- items_.back().loc = loc;
- items_.back().type = TextItem::ignore;
- items_.back().c = c;
- items_.back().index = chars_.size();
- }
- void Text::ignoreLastChar()
- {
- size_t lastIndex = chars_.size() - 1;
- size_t i;
- for (i = items_.size() - 1; items_[i].index > lastIndex; i--)
- ;
- // lastIndex >= items_[i].index
- if (items_[i].index != lastIndex) {
- items_.resize(items_.size() + 1);
- i++;
- for (size_t j = items_.size() - 1; j > i; j--)
- items_[j] = items_[j - 1];
- items_[i].index = lastIndex;
- items_[i].loc = items_[i - 1].loc;
- items_[i].loc += lastIndex - items_[i - 1].index;
- }
-
- items_[i].c = chars_[chars_.size() - 1];
- items_[i].type = TextItem::ignore;
- for (size_t j = i + 1; j < items_.size(); j++)
- items_[j].index = lastIndex;
- chars_.resize(chars_.size() - 1);
- }
- // All characters other than spaces are substed.
- void Text::subst(const SubstTable<Char> &table, Char space)
- {
- for (size_t i = 0; i < items_.size(); i++)
- if (items_[i].type == TextItem::data) {
- size_t lim = (i + 1 < items_.size()
- ? items_[i + 1].index
- : chars_.size());
- size_t j;
- for (j = items_[i].index; j < lim; j++) {
- Char c = chars_[j];
- if (c != space && c != table[c])
- break;
- }
- if (j < lim) {
- size_t start = items_[i].index;
- StringC origChars(chars_.data() + start, lim - start);
- for (; j < lim; j++)
- if (chars_[j] != space)
- table.subst(chars_[j]);
- items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc,
- origChars),
- 0);
- }
- }
- }
- void Text::clear()
- {
- chars_.resize(0);
- items_.clear();
- }
- Boolean Text::startDelimLocation(Location &loc) const
- {
- if (items_.size() == 0 || items_[0].type != TextItem::startDelim)
- return 0;
- loc = items_[0].loc;
- return 1;
- }
- Boolean Text::endDelimLocation(Location &loc) const
- {
- if (items_.size() == 0)
- return 0;
- switch (items_.back().type) {
- case TextItem::endDelim:
- case TextItem::endDelimA:
- break;
- default:
- return 0;
- }
- loc = items_.back().loc;
- return 1;
- }
- Boolean Text::delimType(Boolean &lita) const
- {
- if (items_.size() == 0)
- return 0;
- switch (items_.back().type) {
- case TextItem::endDelim:
- lita = 0;
- return 1;
- case TextItem::endDelimA:
- lita = 1;
- return 1;
- default:
- break;
- }
- return 0;
- }
- TextItem::TextItem()
- : type(data), c(0), index(0)
- {
- }
- void Text::swap(Text &to)
- {
- items_.swap(to.items_);
- chars_.swap(to.chars_);
- }
- TextIter::TextIter(const Text &text)
- : ptr_(text.items_.begin()), text_(&text)
- {
- }
- const Char *TextIter::chars(size_t &length) const
- {
- if (ptr_->type == TextItem::ignore) {
- length = 1;
- return &ptr_->c;
- }
- else {
- const StringC &chars = text_->chars_;
- size_t charsIndex = ptr_->index;
- if (ptr_ + 1 != text_->items_.begin() + text_->items_.size())
- length = ptr_[1].index - charsIndex;
- else
- length = chars.size() - charsIndex;
- return chars.data() + charsIndex;
- }
- }
- Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length,
- const Location *&loc)
- {
- const TextItem *end = text_->items_.begin() + text_->items_.size();
- if (ptr_ == end)
- return 0;
- type = ptr_->type;
- loc = &ptr_->loc;
- if (type == TextItem::ignore) {
- str = &ptr_->c;
- length = 1;
- }
- else {
- const StringC &chars = text_->chars_;
- size_t charsIndex = ptr_->index;
- str = chars.data() + charsIndex;
- if (ptr_ + 1 != end)
- length = ptr_[1].index - charsIndex;
- else
- length = chars.size() - charsIndex;
- }
- ptr_++;
- return 1;
- }
- void Text::insertChars(const StringC &s, const Location &loc)
- {
- chars_.insert(0, s);
- items_.resize(items_.size() + 1);
- for (size_t i = items_.size() - 1; i > 0; i--) {
- items_[i] = items_[i - 1];
- items_[i].index += s.size();
- }
- items_[0].loc = loc;
- items_[0].type = TextItem::data;
- items_[0].index = 0;
- }
- size_t Text::nDataEntities() const
- {
- size_t n = 0;
- for (size_t i = 0; i < items_.size(); i++)
- switch (items_[i].type) {
- case TextItem::sdata:
- case TextItem::cdata:
- n++;
- break;
- default:
- break;
- }
- return n;
- }
- // This is used to determine for a FIXED CDATA attribute
- // whether a specified value if equal to the default value.
- Boolean Text::fixedEqual(const Text &text) const
- {
- if (string() != text.string())
- return 0;
- size_t j = 0;
- for (size_t i = 0; i < items_.size(); i++)
- switch (items_[i].type) {
- case TextItem::cdata:
- case TextItem::sdata:
- for (;;) {
- if (j >= text.items_.size())
- return 0;
- if (text.items_[j].type == TextItem::cdata
- || text.items_[j].type == TextItem::sdata)
- break;
- j++;
- }
- if (text.items_[j].index != items_[i].index
- || (text.items_[j].loc.origin()->asEntityOrigin()->entity()
- != items_[i].loc.origin()->asEntityOrigin()->entity()))
- return 0;
- break;
- default:
- break;
- }
- for (; j < text.items_.size(); j++)
- switch (text.items_[j].type) {
- case TextItem::cdata:
- case TextItem::sdata:
- return 0;
- default:
- break;
- }
- return 1;
- }
- Location Text::charLocation(size_t ind) const
- {
- // Find the last item whose index <= ind.
- // Invariant:
- // indexes < i implies index <= ind
- // indexes >= lim implies index > ind
- // The first item will always have index 0.
- size_t i = 1;
- size_t lim = items_.size();
- while (i < lim) {
- size_t mid = i + (lim - i)/2;
- if (items_[mid].index > ind)
- lim = mid;
- else
- i = mid + 1;
- }
- #if 0
- for (size_t i = 1; i < items_.size(); i++)
- if (items_[i].index > ind)
- break;
- #endif
- i--;
- Location loc;
- // If items_.size() == 0, then i == lim.
- if (i < lim) {
- loc = items_[i].loc;
- loc += ind - items_[i].index;
- }
- return loc;
- }
- #ifdef SP_NAMESPACE
- }
- #endif
|