hash.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include "hash.h"
  5. /***
  6. * String hash tables.
  7. */
  8. Stringtab *tfree;
  9. Stringtab*
  10. taballoc(void)
  11. {
  12. static Stringtab *t;
  13. static uint nt;
  14. if(tfree){
  15. Stringtab *tt = tfree;
  16. tfree = tt->link;
  17. return tt;
  18. }
  19. if(nt == 0){
  20. t = malloc(64000*sizeof(Stringtab));
  21. if(t == 0)
  22. sysfatal("out of memory");
  23. nt = 64000;
  24. }
  25. nt--;
  26. return t++;
  27. }
  28. void
  29. tabfree(Stringtab *tt)
  30. {
  31. tt->link = tfree;
  32. tfree = tt;
  33. }
  34. char*
  35. xstrdup(char *s, int len)
  36. {
  37. char *r;
  38. static char *t;
  39. static int nt;
  40. if(nt < len){
  41. t = malloc(512*1024+len);
  42. if(t == 0)
  43. sysfatal("out of memory");
  44. nt = 512*1024;
  45. }
  46. r = t;
  47. t += len;
  48. nt -= len;
  49. memmove(r, s, len);
  50. return r;
  51. }
  52. static uint
  53. hash(char *s, int n)
  54. {
  55. uint h;
  56. uchar *p, *ep;
  57. h = 0;
  58. for(p=(uchar*)s, ep=p+n; p<ep; p++)
  59. h = h*37 + *p;
  60. return h;
  61. }
  62. static void
  63. rehash(Hash *hh)
  64. {
  65. int h;
  66. Stringtab *s;
  67. if(hh->nstab == 0)
  68. hh->nstab = 1024;
  69. else
  70. hh->nstab = hh->ntab*2;
  71. free(hh->stab);
  72. hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1);
  73. if(hh->stab == nil)
  74. sysfatal("out of memory");
  75. for(s=hh->all; s; s=s->link){
  76. h = hash(s->str, s->n) % hh->nstab;
  77. s->hash = hh->stab[h];
  78. hh->stab[h] = s;
  79. }
  80. }
  81. Stringtab*
  82. findstab(Hash *hh, char *str, int n, int create)
  83. {
  84. uint h;
  85. Stringtab *tab, **l;
  86. if(hh->nstab == 0)
  87. rehash(hh);
  88. h = hash(str, n) % hh->nstab;
  89. for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash)
  90. if(n==tab->n && memcmp(str, tab->str, n) == 0){
  91. *l = tab->hash;
  92. tab->hash = hh->stab[h];
  93. hh->stab[h] = tab;
  94. return tab;
  95. }
  96. if(!create)
  97. return nil;
  98. hh->sorted = 0;
  99. tab = taballoc();
  100. tab->str = xstrdup(str, n);
  101. tab->hash = hh->stab[h];
  102. tab->link = hh->all;
  103. hh->all = tab;
  104. tab->n = n;
  105. tab->count = 0;
  106. hh->stab[h] = tab;
  107. hh->ntab++;
  108. if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1)))
  109. rehash(hh);
  110. return tab;
  111. }
  112. int
  113. scmp(Stringtab *a, Stringtab *b)
  114. {
  115. int n, x;
  116. if(a == 0)
  117. return 1;
  118. if(b == 0)
  119. return -1;
  120. n = a->n;
  121. if(n > b->n)
  122. n = b->n;
  123. x = memcmp(a->str, b->str, n);
  124. if(x != 0)
  125. return x;
  126. if(a->n < b->n)
  127. return -1;
  128. if(a->n > b->n)
  129. return 1;
  130. return 0; /* shouldn't happen */
  131. }
  132. Stringtab*
  133. merge(Stringtab *a, Stringtab *b)
  134. {
  135. Stringtab *s, **l;
  136. l = &s;
  137. while(a || b){
  138. if(scmp(a, b) < 0){
  139. *l = a;
  140. l = &a->link;
  141. a = a->link;
  142. }else{
  143. *l = b;
  144. l = &b->link;
  145. b = b->link;
  146. }
  147. }
  148. *l = 0;
  149. return s;
  150. }
  151. Stringtab*
  152. mergesort(Stringtab *s)
  153. {
  154. Stringtab *a, *b;
  155. int delay;
  156. if(s == nil)
  157. return nil;
  158. if(s->link == nil)
  159. return s;
  160. a = b = s;
  161. delay = 1;
  162. while(a && b){
  163. if(delay) /* easy way to handle 2-element list */
  164. delay = 0;
  165. else
  166. a = a->link;
  167. if(b = b->link)
  168. b = b->link;
  169. }
  170. b = a->link;
  171. a->link = nil;
  172. a = mergesort(s);
  173. b = mergesort(b);
  174. return merge(a, b);
  175. }
  176. Stringtab*
  177. sortstab(Hash *hh)
  178. {
  179. if(!hh->sorted){
  180. hh->all = mergesort(hh->all);
  181. hh->sorted = 1;
  182. }
  183. return hh->all;
  184. }
  185. int
  186. Bwritehash(Biobuf *b, Hash *hh)
  187. {
  188. Stringtab *s;
  189. s = sortstab(hh);
  190. Bprint(b, "# hash table\n");
  191. for(; s; s=s->link){
  192. if(s->count <= 0)
  193. continue;
  194. Bwrite(b, s->str, s->n);
  195. Bprint(b, "\t%d\n", s->count);
  196. }
  197. if(Bflush(b) == Beof)
  198. return -1;
  199. return 0;
  200. }
  201. void
  202. Breadhash(Biobuf *b, Hash *hh, int scale)
  203. {
  204. char *s;
  205. char *t;
  206. int n;
  207. s = Brdstr(b, '\n', 1);
  208. if(s == nil)
  209. return;
  210. if(strcmp(s, "# hash table") != 0)
  211. sysfatal("bad hash table format");
  212. while(s = Brdline(b, '\n')){
  213. s[Blinelen(b)-1] = 0;
  214. t = strrchr(s, '\t');
  215. if(t == nil)
  216. sysfatal("bad hash table format");
  217. *t++ = '\0';
  218. if(*t < '0' || *t > '9')
  219. sysfatal("bad hash table format");
  220. n = strtol(t, &t, 10);
  221. if(*t != 0)
  222. sysfatal("bad hash table format");
  223. findstab(hh, s, strlen(s), 1)->count += n*scale;
  224. }
  225. }
  226. void
  227. freehash(Hash *h)
  228. {
  229. Stringtab *s, *next;
  230. for(s=h->all; s; s=next){
  231. next = s->link;
  232. tabfree(s);
  233. }
  234. free(h->stab);
  235. free(h);
  236. }
  237. Biobuf*
  238. Bopenlock(char *file, int mode)
  239. {
  240. int i;
  241. Biobuf *b;
  242. char err[ERRMAX];
  243. b = nil;
  244. for(i=0; i<120; i++){
  245. if((b = Bopen(file, mode)) != nil)
  246. break;
  247. rerrstr(err, sizeof err);
  248. if(strstr(err, "file is locked") == nil)
  249. break;
  250. sleep(1000);
  251. }
  252. return b;
  253. }