mphf_funcs.C 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * $XConsortium: mphf_funcs.cc /main/4 1996/07/18 14:33:08 drk $
  25. *
  26. * Copyright (c) 1993 HAL Computer Systems International, Ltd.
  27. * All rights reserved. Unpublished -- rights reserved under
  28. * the Copyright Laws of the United States. USE OF A COPYRIGHT
  29. * NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
  30. * OR DISCLOSURE.
  31. *
  32. * THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
  33. * SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
  34. * DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
  35. * PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
  36. * INTERNATIONAL, LTD.
  37. *
  38. * RESTRICTED RIGHTS LEGEND
  39. * Use, duplication, or disclosure by the Government is subject
  40. * to the restrictions as set forth in subparagraph (c)(l)(ii)
  41. * of the Rights in Technical Data and Computer Software clause
  42. * at DFARS 252.227-7013.
  43. *
  44. * HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
  45. * 1315 Dell Avenue
  46. * Campbell, CA 95008
  47. *
  48. */
  49. #include "hmphf/mphf_funcs.h"
  50. #define NO_BACKTRACKS 50
  51. #define NO_MAPPINGS 20
  52. #define NO_SEARCHINGS 10
  53. //compute_a_mphf(char* key_file, params& pms, char* mphf_spec_file)
  54. int compute_a_mphf(char** keys, params& pms, buffer& mphf_buffer)
  55. {
  56. mphf_hash_table ht(pms);
  57. int i, k;
  58. for ( k=0; k<NO_MAPPINGS; k++ ) {
  59. //buckets bs(key_file, pms);
  60. buckets bs(keys, pms);
  61. //MESSAGE(cerr, "buckets built");
  62. /* search for a MPHF */
  63. for ( i = 0; i<NO_SEARCHINGS; i++) {
  64. //MESSAGE(cerr, form("%dth search:", i+1));
  65. bs.set_control_bit(-1);
  66. ht.clear();
  67. if ( search(bs, ht, pms) == 0 ) {
  68. //MESSAGE(cerr, "search done");
  69. /* verify computed MPHF */
  70. if ( verify(bs, ht, pms) == 0 ) {
  71. /* output the computed MPHF */
  72. return write_spec(bs, pms, mphf_buffer);
  73. } else {
  74. return -1;
  75. }
  76. }
  77. }
  78. pms.v_r++;
  79. }
  80. return 1;
  81. }
  82. int search(buckets& bs, mphf_hash_table& ht, params& pms)
  83. {
  84. int i = 0,
  85. fails = 0,
  86. patternFit = 0;
  87. int num_backtracks = 0;
  88. int no_search_fails = bs.no_buckets() / 2;
  89. int_pattern new_pattern(bs.max_bucket_sz());
  90. while ( i < bs.no_buckets() && fails < no_search_fails ) {
  91. if ( bs[i] == 0 || bs[i] -> no_keys() == 0 ) {
  92. i++;
  93. continue;
  94. };
  95. patternFit = -1;
  96. //MESSAGE(cerr, "before fit");
  97. while ( bs.get_pattern(i, new_pattern, pms) == 0 ) {
  98. //debug(cerr, new_pattern);
  99. //cerr << new_pattern.no_elmts();
  100. patternFit = ht.fast_fit(new_pattern);
  101. if ( patternFit >= 0 ) {
  102. //MESSAGE(cerr, " fit");
  103. bs[i] -> set_g_value(patternFit);
  104. break;
  105. } else {
  106. //MESSAGE(cerr, " fail");
  107. fails++;
  108. }
  109. }
  110. if ( patternFit == -1 ) {
  111. //MESSAGE(cerr, "BACKTRACK");
  112. if ( i <= 0 ) break;
  113. i--;
  114. //fails = 0;
  115. if ( num_backtracks > bs.no_buckets() ) {
  116. return -2;
  117. } else
  118. num_backtracks++;
  119. } else {
  120. //MESSAGE(cerr, "increment i");
  121. i++;
  122. }
  123. }
  124. return ( patternFit >= 0 ) ? 0 : -1;
  125. }
  126. int verify(buckets& bs, mphf_hash_table& ht, params& pms)
  127. {
  128. int i;
  129. int_pattern new_pattern(bs.max_bucket_sz());
  130. //debug(cerr, ht.num_filled_slots());
  131. //debug(cerr, ht.no_slots());
  132. //debug(cerr, pms.n);
  133. if ( ht.num_filled_slots() != ht.no_slots() ) {
  134. MESSAGE(cerr, "panic: hash table not full or 'too' full");
  135. MESSAGE(cerr, form("filled_slots = %d\n", ht.num_filled_slots()));
  136. MESSAGE(cerr, form("no_slots = %d\n", ht.no_slots()));
  137. return -1;
  138. }
  139. ht.clear();
  140. for ( i = 0; i < bs.no_buckets(); i++ ) {
  141. if ( bs[i] == 0 || bs[i] -> no_keys() == 0 ) continue;
  142. bs.use_current_params(i, new_pattern, pms, true);
  143. //debug(cerr, bs[i] -> orig_pos());
  144. //debug(cerr, new_pattern);
  145. if ( ht.fit_hash_table(new_pattern) != 0 ) {
  146. MESSAGE(cerr, "panic: collision occurred");
  147. return -1 ;
  148. }
  149. }
  150. //debug(cerr, ht.num_filled_slots());
  151. //debug(cerr, ht.no_slots());
  152. if ( ht.num_filled_slots() != ht.no_slots() ) {
  153. MESSAGE(cerr, "panic: hash table not full after test insertion");
  154. MESSAGE(cerr, form("filled_slots = %d\n", ht.num_filled_slots()));
  155. MESSAGE(cerr, form("no_slots = %d\n", ht.no_slots()));
  156. return -1;
  157. } else {
  158. MESSAGE(cerr, "verifying OK");
  159. return 0;
  160. }
  161. }
  162. int write_spec(buckets& bs, params& pms, buffer& mphf_buffer)
  163. {
  164. unsigned int gv_bits = 0;
  165. if ( pms.v_n > 0 ) {
  166. gv_bits = (int)(flog2(pms.v_n)) + 1; /* bits of each g value.*/
  167. if ( floor(flog2(pms.v_n)) < flog2(pms.v_n) )
  168. gv_bits++;
  169. }
  170. int uints_of_cmpat_gv = gv_bits * pms.v_b;
  171. if ( uints_of_cmpat_gv % BITS_IN(unsigned) > 0 )
  172. uints_of_cmpat_gv += BITS_IN(unsigned);
  173. uints_of_cmpat_gv /= BITS_IN(unsigned);
  174. unsigned int *c_array = new unsigned[uints_of_cmpat_gv];
  175. compact(bs, c_array, gv_bits, mphf_buffer.get_swap_order());
  176. unsigned int g_array_bytes = sizeof(unsigned int) * uints_of_cmpat_gv;
  177. int spec_bytes = 7 * ( sizeof(unsigned int) + 1) + g_array_bytes + 1;
  178. mphf_buffer.expand_chunk(spec_bytes);
  179. ostringstream fout(mphf_buffer.get_base(), ios::out);
  180. fout << pms.v_n << "\n";
  181. fout << pms.v_b << "\n";
  182. fout << pms.v_p1 << "\n";
  183. fout << pms.v_p2 << "\n";
  184. fout << pms.v_r << "\n";
  185. fout << pms.v_seed << "\n";
  186. fout << g_array_bytes << '\t';
  187. fout.write((char*)c_array, g_array_bytes);
  188. fout << '\n';
  189. int fout_len = fout.str().size();
  190. mphf_buffer.set_content_sz(fout_len);
  191. memcpy(mphf_buffer.get_base(), fout.str().c_str(), fout_len);
  192. delete [] c_array;
  193. return 0;
  194. }
  195. int compact(buckets& bs, unsigned s[], int t, Boolean swap)
  196. {
  197. int target, k, i, remaining_bits, branch = 0;
  198. unsigned unsigned_g, high_part_bits;
  199. unsigned lower_part_bits = 0;
  200. remaining_bits = BITS_IN(unsigned);
  201. k = target = 0;
  202. unsigned* y = new unsigned[bs.no_buckets()];
  203. for ( i = 0; i < bs.no_buckets(); y[i++] = 0 );
  204. for ( i = 0; i < bs.no_buckets(); i++ ) {
  205. if ( bs[i] && bs[i] -> no_keys() > 0 ) {
  206. y[bs[i] -> orig_pos()] =
  207. ((bs[i] -> g_value()) << 1) + bs[i] -> control_bit();
  208. /*
  209. cerr << bs[i] -> orig_pos() << " ";
  210. cerr << bs[i] -> g_value() << " ";
  211. cerr << bs[i] -> control_bit() << " ";
  212. cerr << y[bs[i] -> orig_pos()] << "\n";
  213. */
  214. }
  215. }
  216. /*
  217. MESSAGE(cerr, "=======BIT ARRAY:");
  218. debug(cerr, bs.no_buckets());
  219. for ( i = 0; i < bs.no_buckets(); i++ )
  220. cerr << i << " " << y[i] << "\n";
  221. cerr << "=======\n";
  222. */
  223. //MESSAGE(cerr, "=======BIT ARRAY (before swap):");
  224. for ( i = 0; i < bs.no_buckets(); i++ ) {
  225. unsigned_g = y[i];
  226. /*
  227. debug(cerr, i);
  228. debug(cerr, hex(unsigned_g));
  229. */
  230. /*
  231. debug(cerr, form("%x", c_bit));
  232. debug(cerr, "=====");
  233. */
  234. if (remaining_bits >= t ) {
  235. unsigned_g <<= (remaining_bits -t);
  236. target |= unsigned_g;
  237. remaining_bits -= t;
  238. branch = 0;
  239. } else {
  240. high_part_bits = getbits(unsigned_g,t,remaining_bits);
  241. lower_part_bits = unsigned_g & ~(~0 << (t-remaining_bits));
  242. lower_part_bits <<= (BITS_IN(unsigned)- (t-remaining_bits));
  243. s[k++] = target | high_part_bits;
  244. #ifdef PORTABLE_DB
  245. if ( swap == true )
  246. ORDER_SWAP_UINT(s[k-1]);
  247. #endif
  248. target = lower_part_bits;
  249. remaining_bits = BITS_IN(unsigned) - ( t - remaining_bits );
  250. branch =1;
  251. }
  252. }
  253. if ( bs.no_buckets() > 0 ) {
  254. s[k] = ( branch == 0 ) ? target : lower_part_bits;
  255. #ifdef PORTABLE_DB
  256. if ( swap == true )
  257. ORDER_SWAP_UINT(s[k]);
  258. #endif
  259. }
  260. /*
  261. MESSAGE(cerr, "=======BIT ARRAY (after swap):");
  262. debug(cerr, k+1);
  263. for ( i = 0; i <= k; i++ )
  264. cerr << i << " " << s[i] << "\n";
  265. cerr << "=======\n";
  266. */
  267. delete [] y;
  268. return 0;
  269. }
  270. int wc(char* file_name, unsigned int& lines, unsigned int& max_length)
  271. {
  272. char buf[BUFSIZ];
  273. fstream in(file_name, ios::in);
  274. if ( !in ) {
  275. MESSAGE(cerr, "can not open key file");
  276. throw(streamException(in.rdstate()));
  277. }
  278. lines = 0;
  279. max_length = 0;
  280. while ( in.getline(buf, BUFSIZ) ) {
  281. max_length = MAX(strlen(buf)-1, max_length);
  282. lines++;
  283. }
  284. if ( lines == 0 ) {
  285. MESSAGE(cerr, "empty key file");
  286. return -1;
  287. } else
  288. return 0;
  289. }