TrieBuilder.C 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /* $XConsortium: TrieBuilder.C /main/1 1996/07/29 17:06:43 cde-hp $ */
  24. // Copyright (c) 1994 James Clark
  25. // See the file COPYING for copying permission.
  26. #ifdef __GNUG__
  27. #pragma implementation
  28. #endif
  29. #include "splib.h"
  30. #include "types.h"
  31. #include "macros.h"
  32. #include "StringOf.h"
  33. #include "Trie.h"
  34. #include "TrieBuilder.h"
  35. #include "Priority.h"
  36. #include <stdlib.h>
  37. #ifdef SP_NAMESPACE
  38. namespace SP_NAMESPACE {
  39. #endif
  40. Trie::~Trie()
  41. {
  42. if (next_)
  43. delete [] next_;
  44. }
  45. Trie::Trie(const Trie &t)
  46. : nCodes_(t.nCodes_),
  47. token_(t.token_),
  48. tokenLength_(t.tokenLength_),
  49. priority_(t.priority_),
  50. blank_(t.blank_)
  51. {
  52. if (t.next_) {
  53. next_ = new Trie[nCodes_];
  54. for (int i = 0; i < nCodes_; i++)
  55. next_[i] = t.next_[i];
  56. }
  57. else
  58. next_ = 0;
  59. }
  60. Trie &Trie::operator=(const Trie &t)
  61. {
  62. if (next_)
  63. delete [] next_;
  64. nCodes_ = t.nCodes_;
  65. token_ = t.token_;
  66. tokenLength_ = t.tokenLength_;
  67. priority_ = t.priority_;
  68. blank_ = t.blank_;
  69. if (t.next_) {
  70. next_ = new Trie[nCodes_];
  71. for (int i = 0; i < nCodes_; i++)
  72. next_[i] = t.next_[i];
  73. }
  74. else
  75. next_ = 0;
  76. return *this;
  77. }
  78. TrieBuilder::TrieBuilder(int nCodes)
  79. : nCodes_(nCodes), root_(new Trie)
  80. {
  81. root_->token_ = 0;
  82. root_->tokenLength_ = 0;
  83. root_->priority_ = Priority::data;
  84. root_->nCodes_ = nCodes;
  85. }
  86. void TrieBuilder::recognize(const String<EquivCode> &chars,
  87. Token t,
  88. Priority::Type priority,
  89. TokenVector &ambiguities)
  90. {
  91. setToken(extendTrie(root_.pointer(), chars), chars.size(), t, priority,
  92. ambiguities);
  93. }
  94. void TrieBuilder::recognize(const String<EquivCode> &chars,
  95. const String<EquivCode> &set,
  96. Token t,
  97. Priority::Type priority,
  98. TokenVector &ambiguities)
  99. {
  100. Trie *trie = extendTrie(root_.pointer(), chars);
  101. for (size_t i = 0; i < set.size(); i++)
  102. setToken(forceNext(trie, set[i]), chars.size() + 1, t, priority,
  103. ambiguities);
  104. }
  105. void TrieBuilder::recognizeB(const String<EquivCode> &chars,
  106. int bSequenceLength,
  107. size_t maxBlankSequence,
  108. const String<EquivCode> &blankCodes,
  109. const String<EquivCode> &chars2,
  110. Token token,
  111. TokenVector &ambiguities)
  112. {
  113. doB(extendTrie(root_.pointer(), chars),
  114. chars.size(),
  115. bSequenceLength,
  116. maxBlankSequence,
  117. blankCodes,
  118. chars2,
  119. token,
  120. Priority::blank(bSequenceLength),
  121. ambiguities);
  122. }
  123. void TrieBuilder::recognizeEE(EquivCode code, Token t)
  124. {
  125. Trie *trie = forceNext(root_.pointer(), code);
  126. trie->tokenLength_ = 0; // it has length 0 in the buffer
  127. trie->token_ = t;
  128. trie->priority_ = Priority::data;
  129. }
  130. void TrieBuilder::doB(Trie *trie,
  131. int tokenLength,
  132. int minBLength,
  133. size_t maxLength,
  134. const String<EquivCode> &blankCodes,
  135. const String<EquivCode> &chars2,
  136. Token token,
  137. Priority::Type pri,
  138. TokenVector &ambiguities)
  139. {
  140. if (minBLength == 0 && trie->next_ == 0) {
  141. if (!trie->blank_) {
  142. BlankTrie *b = new BlankTrie;
  143. trie->blank_ = b;
  144. b->maxBlanksToScan_ = maxLength;
  145. b->additionalLength_ = tokenLength;
  146. b->codeIsBlank_.assign(nCodes_, 0);
  147. for (size_t i = 0; i < blankCodes.size(); i++)
  148. b->codeIsBlank_[blankCodes[i]] = 1;
  149. b->tokenLength_ = 0;
  150. b->token_ = 0;
  151. b->priority_ = Priority::data;
  152. b->nCodes_ = nCodes_;
  153. }
  154. else {
  155. // A B sequence is not allowed to be adjacent to a character
  156. // that can occur in a blank sequence, so maxLength will be
  157. // the same at a node, no matter how we got there.
  158. ASSERT(trie->blank_->maxBlanksToScan_ == maxLength);
  159. ASSERT(trie->blank_->additionalLength_ == tokenLength);
  160. }
  161. if (chars2.size() == 0)
  162. setToken(trie, tokenLength, token, pri, ambiguities);
  163. else
  164. setToken(extendTrie(trie->blank_.pointer(), chars2),
  165. chars2.size(),
  166. token,
  167. pri,
  168. ambiguities);
  169. }
  170. else {
  171. if (minBLength == 0)
  172. setToken(extendTrie(trie, chars2), tokenLength + chars2.size(),
  173. token, pri, ambiguities);
  174. for (size_t i = 0; i < blankCodes.size(); i++)
  175. doB(forceNext(trie, blankCodes[i]),
  176. tokenLength + 1,
  177. minBLength == 0 ? 0 : minBLength - 1,
  178. maxLength - 1,
  179. blankCodes,
  180. chars2,
  181. token,
  182. pri,
  183. ambiguities);
  184. }
  185. }
  186. Trie *TrieBuilder::extendTrie(Trie *trie, const String<EquivCode> &s)
  187. {
  188. for (size_t i = 0; i < s.size(); i++)
  189. trie = forceNext(trie, s[i]);
  190. return trie;
  191. }
  192. void TrieBuilder::setToken(Trie *trie,
  193. int tokenLength,
  194. Token token,
  195. Priority::Type pri,
  196. TokenVector &ambiguities)
  197. {
  198. if (tokenLength > trie->tokenLength_
  199. || (tokenLength == trie->tokenLength_
  200. && pri > trie->priority_)) {
  201. trie->tokenLength_ = tokenLength;
  202. trie->token_ = token;
  203. trie->priority_ = pri;
  204. }
  205. else if (trie->tokenLength_ == tokenLength
  206. && trie->priority_ == pri
  207. && trie->token_ != token
  208. && trie->token_ != 0) {
  209. ambiguities.push_back(Token(trie->token_));
  210. ambiguities.push_back(token);
  211. }
  212. if (trie->hasNext()) {
  213. for (int i = 0; i < nCodes_; i++)
  214. setToken(&trie->next_[i], tokenLength, token, pri, ambiguities);
  215. }
  216. }
  217. void TrieBuilder::copyInto(Trie *into, const Trie *from, int additionalLength)
  218. {
  219. if (from->token_ != 0) {
  220. TokenVector ambiguities;
  221. setToken(into, from->tokenLength_ + additionalLength, from->token_,
  222. from->priority_, ambiguities);
  223. ASSERT(ambiguities.size() == 0);
  224. }
  225. if (from->hasNext())
  226. for (int i = 0; i < nCodes_; i++)
  227. copyInto(forceNext(into, i), &from->next_[i], additionalLength);
  228. }
  229. Trie *TrieBuilder::forceNext(Trie *trie, EquivCode c)
  230. {
  231. if (!trie->hasNext()) {
  232. trie->next_ = new Trie[nCodes_];
  233. if (trie->blank_) {
  234. trie->blank_->additionalLength_ += 1;
  235. trie->blank_->maxBlanksToScan_ -= 1;
  236. }
  237. Owner<BlankTrie> blankOwner(trie->blank_.extract());
  238. const BlankTrie *b = blankOwner.pointer();
  239. for (int i = 0; i < nCodes_; i++) {
  240. Trie *p = &trie->next_[i];
  241. if (b && b->codeIsBlank(i))
  242. trie->next_[i].blank_ = (blankOwner
  243. ? blankOwner.extract()
  244. : new BlankTrie(*b));
  245. p->token_ = trie->token_;
  246. p->tokenLength_ = trie->tokenLength_;
  247. p->priority_ = trie->priority_;
  248. p->nCodes_ = nCodes_;
  249. }
  250. if (b)
  251. // -1 because 1 was added above
  252. copyInto(trie, b, b->additionalLength_ - 1);
  253. }
  254. return &trie->next_[c];
  255. }
  256. #ifdef SP_NAMESPACE
  257. }
  258. #endif