PathTable.C 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. // $XConsortium: PathTable.cc /main/4 1996/06/11 17:07:57 cde-hal $
  24. #include "PathTable.h"
  25. #include "Debug.h"
  26. #include "Feature.h"
  27. #include "utility/debug.h"
  28. extern SymbolTable* gElemSymTab;
  29. unsigned int letterHash(const LetterType& x)
  30. {
  31. return x;
  32. }
  33. EncodedPath::~EncodedPath()
  34. {
  35. delete [] f_array ;
  36. f_SVectors.clearAndDestroy();
  37. delete f_copyOfS;
  38. }
  39. EncodedPath::EncodedPath(SSPath* p, unsigned int asPattern) :
  40. f_size(p -> entries()), f_patternSize(0), f_SVectors(letterHash),
  41. f_wildCard(gElemSymTab -> wildCardId()),
  42. f_unlimitedWildCard(gElemSymTab -> unlimitedWildCardId()),
  43. f_copyOfS(new BitVector(WORD_SIZE, 0))
  44. {
  45. f_array = new LetterType[f_size];
  46. CC_TPtrDlistIterator<PathTerm> l_pathIter(*p);
  47. BitVector *l_bv = 0;
  48. int i = 0;
  49. while (++l_pathIter) {
  50. //
  51. // Count pattern size, excluding "*".
  52. //
  53. f_array[i] = (LetterType)((l_pathIter.key() -> symbol()).id());
  54. if ( f_array[i] != f_unlimitedWildCard )
  55. f_patternSize++;
  56. i++;
  57. }
  58. if ( asPattern == true ) {
  59. //
  60. // Compute the S arrays.
  61. // Examples pat = a b a c
  62. // S(a) = 1 0 1 0
  63. // S(b) = 0 0 0 0
  64. // S(c) = 0 0 0 1
  65. //
  66. // pat = a ? a c
  67. // S(a) = 1 1 1 0
  68. // S(c) = 0 1 0 1
  69. // S(?) = 0 1 0 0
  70. //
  71. // pat = a * a c
  72. // S(a) = 1 1 0
  73. // S(c) = 0 0 1
  74. // S(*) = 1 0 0
  75. //
  76. l_pathIter.reset();
  77. int j = patternLength()-1;
  78. LetterType l_id;
  79. // i records each PathTerm position in the list
  80. int i=0;
  81. while (++l_pathIter) {
  82. l_id = (LetterType)((l_pathIter.key() -> symbol()).id());
  83. l_bv = f_SVectors.findValue(&l_id);
  84. if ( l_bv == 0 ) {
  85. l_bv = new BitVector(patternLength(), 0);
  86. f_SVectors.insertKeyAndValue(new LetterType(l_id), l_bv);
  87. }
  88. ///////////////////////////////////////////////////
  89. // reverse the order in the bit vector:
  90. // MSB position ==> set LSB bit in the bit vector
  91. ///////////////////////////////////////////////////
  92. if ( l_id != f_unlimitedWildCard ) {
  93. l_bv -> setBitTo(j, 1);
  94. //////////////////////////////////////////////////
  95. // only set position when an expr is attatched
  96. // to the term
  97. //////////////////////////////////////////////////
  98. if ( l_pathIter.key() -> pqexpr() && l_id != f_wildCard ) {
  99. /*
  100. MESSAGE(cerr, "==========");
  101. debug(cerr, (void*)l_bv);
  102. debug(cerr, *l_bv);
  103. debug(cerr, i);
  104. debug(cerr, j);
  105. */
  106. l_bv -> recordPositions(i, j);
  107. }
  108. j--;
  109. } else {
  110. ///////////////////////////////////////////////////
  111. // do not set bits for unlimitedWildCards that
  112. // begin or end the pattern
  113. ///////////////////////////////////////////////////
  114. if ( j < patternLength() - 1 )
  115. l_bv -> setBitTo(j+1, 1);
  116. }
  117. i++;
  118. }
  119. ///////////////////////////////////////////////////
  120. // special treatment to the ? symbols:
  121. // OR S(?) back to each S's.
  122. ///////////////////////////////////////////////////
  123. BitVector* l_BitVectorWildCard = f_SVectors.findValue(&f_wildCard);
  124. hashTableIterator<LetterType, BitVector> l_SVIter(f_SVectors);
  125. if ( l_BitVectorWildCard ) {
  126. while (++l_SVIter) {
  127. if ( *l_SVIter.key() != f_wildCard &&
  128. *l_SVIter.key() != f_unlimitedWildCard
  129. )
  130. {
  131. (*l_SVIter.value()) |= (*l_BitVectorWildCard);
  132. }
  133. }
  134. }
  135. /*
  136. #ifdef DEBUG
  137. l_SVIter.reset();
  138. while (++l_SVIter) {
  139. debug(cerr, (*l_SVIter.key()));
  140. debug(cerr, (*l_SVIter.value()));
  141. }
  142. #endif
  143. */
  144. }
  145. }
  146. unsigned int
  147. _match(LetterType* pattern, int n,
  148. LetterType* text, int m,
  149. LetterType wildCard,
  150. LetterType unlimitedWildCard
  151. )
  152. {
  153. // A naive (brute force) string matching algorithm that handles
  154. // wild card (?) and unlimited wild card (*) symbols.
  155. unsigned int findMisMatch = false;
  156. for ( int i=0; i<n; i++ ) { // over the "text" string
  157. for ( int j=0; j<m; j++ ) { // over the pattern
  158. if ( pattern[j] == wildCard ) {
  159. continue;
  160. } else
  161. if ( pattern[j] == unlimitedWildCard ) {
  162. for (;;) {
  163. j++;
  164. if ( j == m )
  165. return true;
  166. if ( pattern[j] != unlimitedWildCard )
  167. break;
  168. }
  169. for ( int x=i+1; x<n; x++ ) {
  170. if ( text[x] == pattern[j] )
  171. if (
  172. _match(
  173. &pattern[j], m-j,
  174. &text[x], n-x,
  175. wildCard, unlimitedWildCard
  176. ) == true
  177. )
  178. return true;
  179. }
  180. } else {
  181. if ( pattern[j] != text[i] ) {
  182. findMisMatch = true;
  183. break;
  184. }
  185. }
  186. }
  187. if ( findMisMatch == false )
  188. return true;
  189. }
  190. return false;
  191. }
  192. unsigned int
  193. EncodedPath::match(EncodedPath& text, SSPath* patternPath, SSPath* elementPath)
  194. {
  195. ////////////////////////////////////////////////
  196. // text: the encoded string for the element path
  197. // elementPath: the element path
  198. //
  199. // this: the encoded string for the pattern string
  200. // patternPath: the pattern path
  201. ////////////////////////////////////////////////
  202. ////////////////////////////////////////////////
  203. // the unlimited wildcard vector
  204. ////////////////////////////////////////////////
  205. BitVector* l_U = f_SVectors.findValue(&f_unlimitedWildCard);
  206. if ( l_U && patternLength() == 0 )
  207. return true;
  208. ////////////////////////////////////////////////
  209. // the wildcard vector
  210. ////////////////////////////////////////////////
  211. BitVector* l_W = f_SVectors.findValue(&f_wildCard);
  212. // the S vector of each Letter, including that for '?'
  213. BitVector* l_S = 0;
  214. // the accumulated result vector
  215. BitVector l_R(patternLength(), 0);
  216. // placeholder for '*''s contribution
  217. BitVector l_I(patternLength(), 0);
  218. // hole position record of each path term in the pattern path
  219. posRecord l_pr;
  220. // expr pointer of each path term in the pattern path
  221. PQExpr* expr = 0;
  222. CC_TPtrDlistIterator<PathTerm>* elementPathNextPtr = 0;
  223. if ( elementPath )
  224. elementPathNextPtr = new CC_TPtrDlistIterator<PathTerm>(*elementPath);
  225. // loop over text string
  226. for ( int i=0; i<text.length(); i++) {
  227. if ( elementPath )
  228. ++(*elementPathNextPtr);
  229. //MESSAGE(cerr, "===================");
  230. //debug(cerr, text.f_array[i]);
  231. ////////////////////////////////////////////////
  232. // get this character's vector, including '?'s
  233. ////////////////////////////////////////////////
  234. l_S = f_SVectors.findValue(&text.f_array[i]);
  235. if ( l_S ) {
  236. //debug(cerr, (void*)l_S);
  237. //debug(cerr, *l_S);
  238. //MESSAGE(cerr, "checking qualifies");
  239. //////////////////////
  240. // check qualifies.
  241. //////////////////////
  242. if ( patternPath && l_S -> positionArray() ) {
  243. ////////////////////
  244. // get a copy of S
  245. ////////////////////
  246. f_copyOfS -> setTo(*l_S);
  247. //debug(cerr, *patternPath);
  248. positionArrayIteratorT l_positionNext(*(l_S -> positionArray()));
  249. while (++ l_positionNext) {
  250. l_pr = l_positionNext.key();
  251. //cerr << " calling patternPath -> fastGetAt(): " << (void*)patternPath << "l_pr.pathTermPos= " << l_pr.pathTermPos << endl;
  252. expr = patternPath -> fastGetAt(l_pr.pathTermPos) -> pqexpr();
  253. /*
  254. debug(cerr, int(expr));
  255. if ( expr ) {
  256. MESSAGE(cerr, "qualify checking is needed.");
  257. debug(cerr, *(elementPathNext -> key()));
  258. }
  259. */
  260. if ( expr &&
  261. expr->evaluate(elementPathNextPtr->key()->element()) == PQFalse )
  262. {
  263. //MESSAGE(cerr, form("set position %d to 0", l_pr.bitPos));
  264. f_copyOfS -> setBitTo(l_pr.bitPos, 0);
  265. }
  266. }
  267. /////////////////////////////////////////
  268. // make l_S point at its modified copy
  269. /////////////////////////////////////////
  270. l_S = f_copyOfS;
  271. }
  272. } else
  273. l_S = l_W;
  274. ////////////////////////////////////////////////
  275. // get unlimited wildcard's vector.
  276. ////////////////////////////////////////////////
  277. if ( l_U ) {
  278. l_I.setTo(l_R);
  279. l_I &= (*l_U);
  280. //MESSAGE(cerr, "l_I");
  281. //debug(cerr, l_I);
  282. }
  283. ////////////////////////////////////////////////
  284. // shift the partial result vector right once
  285. ////////////////////////////////////////////////
  286. l_R.shiftRightOneBit();
  287. //MESSAGE(cerr, "after l_R >> 1");
  288. //debug(cerr, l_R);
  289. ////////////////////////////////////////////////
  290. // AND in this character's vector
  291. ////////////////////////////////////////////////
  292. if ( l_S ) {
  293. l_R &= (*l_S);
  294. //debug(cerr, *l_S);
  295. //MESSAGE(cerr, "after AND with l_S");
  296. //debug(cerr, l_R);
  297. } else {
  298. ///////////////////////////////////////////////
  299. // this branch is impossible to reach.
  300. ///////////////////////////////////////////////
  301. //MESSAGE(cerr, "l_R set all bits to 0");
  302. l_R.setAllBitsTo(0);
  303. }
  304. ///////////////////////////////////////////////
  305. // OR in unlimited wild char's vector.
  306. ///////////////////////////////////////////////
  307. if ( l_U ) {
  308. l_R |= l_I;
  309. //MESSAGE(cerr, "after OR with l_I");
  310. //debug(cerr, l_R);
  311. }
  312. //debug(cerr, l_R);
  313. //MESSAGE(cerr, "===================");
  314. ///////////////////////////////////////////////
  315. // Use this test to get the first matched position
  316. // if ( l_R.getBit(0) == 1 )
  317. // return true;
  318. ///////////////////////////////////////////////
  319. }
  320. delete elementPathNextPtr;
  321. ///////////////////////////////////////////////
  322. //
  323. // the last symbol must match
  324. //
  325. ///////////////////////////////////////////////
  326. if ( l_R.getBit(0) == 1 )
  327. return true;
  328. else
  329. return false;
  330. }
  331. basePathFeature::~basePathFeature()
  332. {
  333. delete f_path;
  334. delete f_featureSet;
  335. }
  336. PathFeature::~PathFeature()
  337. {
  338. delete f_encodedPath ;
  339. }
  340. unsigned int basePathFeature::operator==(const basePathFeature& pf) const
  341. {
  342. cerr << "Warning: basePathFeature::operator==() called\n";
  343. return true;
  344. }
  345. unsigned int PathFeature::operator==(const PathFeature& pf) const
  346. {
  347. cerr << "Warning: PathFeature::operator==() called\n";
  348. return true;
  349. }
  350. void pathSelector(BitVector& bv)
  351. {
  352. }
  353. unsigned int
  354. PathFeature::match(SSPath& p)
  355. {
  356. EncodedPath l_ep(&p);
  357. if ( f_path -> containSelector() == false )
  358. return f_encodedPath -> match(l_ep, 0, 0);
  359. else
  360. return f_encodedPath -> match(l_ep, f_path, &p);
  361. }
  362. // /////////////////////////////////////////////////////////////////////////
  363. //
  364. // class PathTable
  365. //
  366. // /////////////////////////////////////////////////////////////////////////
  367. unsigned symHashFunc(const Symbol& s)
  368. {
  369. return s.hash();
  370. }
  371. PathTable::PathTable()
  372. : f_lastSymIndex(0),
  373. f_lastSymIndexCount(0)
  374. {
  375. }
  376. PathTable::~PathTable()
  377. {
  378. f_pathFeatureList.clearAndDestroy();
  379. for (unsigned int i = 0 ; i < f_lastSymIndexCount; i++) {
  380. //f_lastSymIndex[i].clearAndDestroy();
  381. f_lastSymIndex[i] -> clear();
  382. delete f_lastSymIndex[i];
  383. }
  384. delete f_lastSymIndex;
  385. }
  386. LetterType PathTable::findIndex(SSPath& p)
  387. {
  388. return (LetterType) ((p.last() -> symbol()).id());
  389. }
  390. void PathTable::initLastSymIndex()
  391. {
  392. f_lastSymIndexCount = gElemSymTab -> IdsAssigned()+1;
  393. f_lastSymIndex = new CC_TPtrDlist_PathFeature_Ptr_T[f_lastSymIndexCount];
  394. for (unsigned int i=0; i<f_lastSymIndexCount; i++)
  395. f_lastSymIndex[i] = new CC_TPtrDlist<PathFeature>;
  396. CC_TPtrDlistIterator<PathFeature> l_pfIter(f_pathFeatureList);
  397. PathFeature* l_pathFeature = 0;
  398. LetterType x;
  399. while ( ++l_pfIter ) {
  400. l_pathFeature = l_pfIter.key();
  401. x = findIndex( *(l_pathFeature->path()) );
  402. //f_lastSymIndex[x] -> prepend(l_pathFeature); // backward order
  403. f_lastSymIndex[x] -> append(l_pathFeature); // select the matching rule
  404. // in the same order rules
  405. // appear in the
  406. // stylesheet
  407. }
  408. }
  409. FeatureSet* PathTable::getFeatureSet(SSPath& p)
  410. {
  411. if ( f_lastSymIndex == 0 ) {
  412. initLastSymIndex();
  413. }
  414. int pids[3];
  415. FeatureSet* fs[3];
  416. fs[0] = getFeatureSet(findIndex(p), p, pids[0]);
  417. fs[1] = getFeatureSet(gElemSymTab -> wildCardId(), p, pids[1]);
  418. fs[2] = getFeatureSet(gElemSymTab -> unlimitedWildCardId(), p, pids[2]);
  419. int index = 0;
  420. int x = pids[0];
  421. for ( int i=1; i<3; i++ ) {
  422. if ( pids[i] > x )
  423. index = i;
  424. }
  425. return fs[index];
  426. }
  427. FeatureSet*
  428. PathTable::getFeatureSet(int bucketIndex, SSPath& p, int& pathId)
  429. {
  430. CC_TPtrDlistIterator<PathFeature> l_pathFeatureIter(*f_lastSymIndex[bucketIndex]);
  431. l_pathFeatureIter.reset();
  432. PathFeature* l_pathFeature = 0;
  433. while ( ++l_pathFeatureIter ) {
  434. l_pathFeature = l_pathFeatureIter.key();
  435. //debug(cerr, *(l_pathFeature->path()));
  436. //debug(cerr, *(l_pathFeature->featureSet()));
  437. if ( l_pathFeature -> match(p) ) {
  438. //MESSAGE(cerr, "match");
  439. pathId = l_pathFeature -> id();
  440. return l_pathFeature -> featureSet();
  441. }
  442. }
  443. pathId = 0;
  444. return 0;
  445. }
  446. void PathTable::addPathFeatureSet(PathFeature* x)
  447. {
  448. //MESSAGE(cerr, "addPathFeatureSet():");
  449. //debug(cerr, *(x->path()));
  450. //debug(cerr, *(x->featureSet()));
  451. if ( x -> path() -> containSelector() )
  452. x -> path() -> fastGetIndex();
  453. EncodedPath* l_epath = new EncodedPath(x -> path(), true);
  454. unsigned int id = f_pathFeatureList.entries()+1;
  455. x -> setEncodedPath(l_epath);
  456. x -> setID(id);
  457. f_pathFeatureList.insert(x);
  458. }
  459. ostream& operator<<(ostream& out, PathTable& pt)
  460. {
  461. CC_TPtrDlistIterator<PathFeature> l_pfIter(pt.f_pathFeatureList);
  462. PathFeature* l_pathFeature = 0;
  463. while ( ++l_pfIter ) {
  464. l_pathFeature = l_pfIter.key();
  465. out << *(l_pathFeature)->path() << ' ' << *(l_pathFeature->featureSet())
  466. << endl;
  467. #ifdef DEBUG
  468. // this will cause errors if gTopOfStack is not set properly
  469. FeatureSet *set = l_pathFeature->featureSet()->evaluate();
  470. out << *set << endl;
  471. delete set ;
  472. #endif
  473. }
  474. return out;
  475. }
  476. void PathFeatureList::appendList(PathFeatureList& list)
  477. {
  478. PathFeatureListIterator l_Iter(list);
  479. while ( ++ l_Iter ) {
  480. append(l_Iter.key());
  481. }
  482. }
  483. PathFeatureList::~PathFeatureList()
  484. {
  485. clearAndDestroy();
  486. }