123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /* $XConsortium: TrieBuilder.C /main/1 1996/07/29 17:06:43 cde-hp $ */
- // Copyright (c) 1994 James Clark
- // See the file COPYING for copying permission.
- #ifdef __GNUG__
- #pragma implementation
- #endif
- #include "splib.h"
- #include "types.h"
- #include "macros.h"
- #include "StringOf.h"
- #include "Trie.h"
- #include "TrieBuilder.h"
- #include "Priority.h"
- #include <stdlib.h>
- #ifdef SP_NAMESPACE
- namespace SP_NAMESPACE {
- #endif
- Trie::~Trie()
- {
- if (next_)
- delete [] next_;
- }
- Trie::Trie(const Trie &t)
- : nCodes_(t.nCodes_),
- token_(t.token_),
- tokenLength_(t.tokenLength_),
- priority_(t.priority_),
- blank_(t.blank_)
- {
- if (t.next_) {
- next_ = new Trie[nCodes_];
- for (int i = 0; i < nCodes_; i++)
- next_[i] = t.next_[i];
- }
- else
- next_ = 0;
- }
- Trie &Trie::operator=(const Trie &t)
- {
- if (next_)
- delete [] next_;
- nCodes_ = t.nCodes_;
- token_ = t.token_;
- tokenLength_ = t.tokenLength_;
- priority_ = t.priority_;
- blank_ = t.blank_;
- if (t.next_) {
- next_ = new Trie[nCodes_];
- for (int i = 0; i < nCodes_; i++)
- next_[i] = t.next_[i];
- }
- else
- next_ = 0;
- return *this;
- }
- TrieBuilder::TrieBuilder(int nCodes)
- : nCodes_(nCodes), root_(new Trie)
- {
- root_->token_ = 0;
- root_->tokenLength_ = 0;
- root_->priority_ = Priority::data;
- root_->nCodes_ = nCodes;
- }
- void TrieBuilder::recognize(const String<EquivCode> &chars,
- Token t,
- Priority::Type priority,
- TokenVector &ambiguities)
- {
- setToken(extendTrie(root_.pointer(), chars), chars.size(), t, priority,
- ambiguities);
- }
- void TrieBuilder::recognize(const String<EquivCode> &chars,
- const String<EquivCode> &set,
- Token t,
- Priority::Type priority,
- TokenVector &ambiguities)
- {
- Trie *trie = extendTrie(root_.pointer(), chars);
- for (size_t i = 0; i < set.size(); i++)
- setToken(forceNext(trie, set[i]), chars.size() + 1, t, priority,
- ambiguities);
- }
- void TrieBuilder::recognizeB(const String<EquivCode> &chars,
- int bSequenceLength,
- size_t maxBlankSequence,
- const String<EquivCode> &blankCodes,
- const String<EquivCode> &chars2,
- Token token,
- TokenVector &ambiguities)
- {
- doB(extendTrie(root_.pointer(), chars),
- chars.size(),
- bSequenceLength,
- maxBlankSequence,
- blankCodes,
- chars2,
- token,
- Priority::blank(bSequenceLength),
- ambiguities);
- }
- void TrieBuilder::recognizeEE(EquivCode code, Token t)
- {
- Trie *trie = forceNext(root_.pointer(), code);
- trie->tokenLength_ = 0; // it has length 0 in the buffer
- trie->token_ = t;
- trie->priority_ = Priority::data;
- }
- void TrieBuilder::doB(Trie *trie,
- int tokenLength,
- int minBLength,
- size_t maxLength,
- const String<EquivCode> &blankCodes,
- const String<EquivCode> &chars2,
- Token token,
- Priority::Type pri,
- TokenVector &ambiguities)
- {
- if (minBLength == 0 && trie->next_ == 0) {
- if (!trie->blank_) {
- BlankTrie *b = new BlankTrie;
- trie->blank_ = b;
- b->maxBlanksToScan_ = maxLength;
- b->additionalLength_ = tokenLength;
- b->codeIsBlank_.assign(nCodes_, 0);
- for (size_t i = 0; i < blankCodes.size(); i++)
- b->codeIsBlank_[blankCodes[i]] = 1;
- b->tokenLength_ = 0;
- b->token_ = 0;
- b->priority_ = Priority::data;
- b->nCodes_ = nCodes_;
- }
- else {
- // A B sequence is not allowed to be adjacent to a character
- // that can occur in a blank sequence, so maxLength will be
- // the same at a node, no matter how we got there.
- ASSERT(trie->blank_->maxBlanksToScan_ == maxLength);
- ASSERT(trie->blank_->additionalLength_ == tokenLength);
- }
- if (chars2.size() == 0)
- setToken(trie, tokenLength, token, pri, ambiguities);
- else
- setToken(extendTrie(trie->blank_.pointer(), chars2),
- chars2.size(),
- token,
- pri,
- ambiguities);
- }
- else {
- if (minBLength == 0)
- setToken(extendTrie(trie, chars2), tokenLength + chars2.size(),
- token, pri, ambiguities);
- for (size_t i = 0; i < blankCodes.size(); i++)
- doB(forceNext(trie, blankCodes[i]),
- tokenLength + 1,
- minBLength == 0 ? 0 : minBLength - 1,
- maxLength - 1,
- blankCodes,
- chars2,
- token,
- pri,
- ambiguities);
- }
- }
- Trie *TrieBuilder::extendTrie(Trie *trie, const String<EquivCode> &s)
- {
- for (size_t i = 0; i < s.size(); i++)
- trie = forceNext(trie, s[i]);
- return trie;
- }
- void TrieBuilder::setToken(Trie *trie,
- int tokenLength,
- Token token,
- Priority::Type pri,
- TokenVector &ambiguities)
- {
- if (tokenLength > trie->tokenLength_
- || (tokenLength == trie->tokenLength_
- && pri > trie->priority_)) {
- trie->tokenLength_ = tokenLength;
- trie->token_ = token;
- trie->priority_ = pri;
- }
- else if (trie->tokenLength_ == tokenLength
- && trie->priority_ == pri
- && trie->token_ != token
- && trie->token_ != 0) {
- ambiguities.push_back(Token(trie->token_));
- ambiguities.push_back(token);
- }
- if (trie->hasNext()) {
- for (int i = 0; i < nCodes_; i++)
- setToken(&trie->next_[i], tokenLength, token, pri, ambiguities);
- }
- }
- void TrieBuilder::copyInto(Trie *into, const Trie *from, int additionalLength)
- {
- if (from->token_ != 0) {
- TokenVector ambiguities;
- setToken(into, from->tokenLength_ + additionalLength, from->token_,
- from->priority_, ambiguities);
- ASSERT(ambiguities.size() == 0);
- }
- if (from->hasNext())
- for (int i = 0; i < nCodes_; i++)
- copyInto(forceNext(into, i), &from->next_[i], additionalLength);
- }
- Trie *TrieBuilder::forceNext(Trie *trie, EquivCode c)
- {
- if (!trie->hasNext()) {
- trie->next_ = new Trie[nCodes_];
- if (trie->blank_) {
- trie->blank_->additionalLength_ += 1;
- trie->blank_->maxBlanksToScan_ -= 1;
- }
- Owner<BlankTrie> blankOwner(trie->blank_.extract());
- const BlankTrie *b = blankOwner.pointer();
- for (int i = 0; i < nCodes_; i++) {
- Trie *p = &trie->next_[i];
- if (b && b->codeIsBlank(i))
- trie->next_[i].blank_ = (blankOwner
- ? blankOwner.extract()
- : new BlankTrie(*b));
- p->token_ = trie->token_;
- p->tokenLength_ = trie->tokenLength_;
- p->priority_ = trie->priority_;
- p->nCodes_ = nCodes_;
- }
- if (b)
- // -1 because 1 was added above
- copyInto(trie, b, b->additionalLength_ - 1);
- }
- return &trie->next_[c];
- }
- #ifdef SP_NAMESPACE
- }
- #endif
|