hash.c 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include "hash.h"
  5. /***
  6. * String hash tables.
  7. */
  8. Stringtab *tfree;
  9. Stringtab*
  10. taballoc(void)
  11. {
  12. static Stringtab *t;
  13. static uint nt;
  14. if(tfree){
  15. Stringtab *tt = tfree;
  16. tfree = tt->link;
  17. return tt;
  18. }
  19. if(nt == 0){
  20. t = malloc(64000*sizeof(Stringtab));
  21. if(t == 0)
  22. sysfatal("out of memory");
  23. nt = 64000;
  24. }
  25. nt--;
  26. return t++;
  27. }
  28. void
  29. tabfree(Stringtab *tt)
  30. {
  31. tt->link = tfree;
  32. tfree = tt;
  33. }
  34. char*
  35. xstrdup(char *s, int len)
  36. {
  37. char *r;
  38. static char *t;
  39. static int nt;
  40. if(nt < len){
  41. t = malloc(512*1024+len);
  42. if(t == 0)
  43. sysfatal("out of memory");
  44. nt = 512*1024;
  45. }
  46. r = t;
  47. t += len;
  48. nt -= len;
  49. memmove(r, s, len);
  50. return r;
  51. }
  52. static uint
  53. hash(char *s, int n)
  54. {
  55. uint h;
  56. uchar *p, *ep;
  57. h = 0;
  58. for(p=(uchar*)s, ep=p+n; p<ep; p++)
  59. h = h*37 + *p;
  60. return h;
  61. }
  62. static void
  63. rehash(Hash *hh)
  64. {
  65. int h;
  66. Stringtab *s;
  67. if(hh->nstab == 0)
  68. hh->nstab = 1024;
  69. else
  70. hh->nstab = hh->ntab*2;
  71. free(hh->stab);
  72. hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1);
  73. if(hh->stab == nil)
  74. sysfatal("out of memory");
  75. for(s=hh->all; s; s=s->link){
  76. h = hash(s->str, s->n) % hh->nstab;
  77. s->hash = hh->stab[h];
  78. hh->stab[h] = s;
  79. }
  80. }
  81. Stringtab*
  82. findstab(Hash *hh, char *str, int n, int create)
  83. {
  84. uint h;
  85. Stringtab *tab, **l;
  86. if(hh->nstab == 0)
  87. rehash(hh);
  88. h = hash(str, n) % hh->nstab;
  89. for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash)
  90. if(n==tab->n && memcmp(str, tab->str, n) == 0){
  91. *l = tab->hash;
  92. tab->hash = hh->stab[h];
  93. hh->stab[h] = tab;
  94. return tab;
  95. }
  96. if(!create)
  97. return nil;
  98. hh->sorted = 0;
  99. tab = taballoc();
  100. tab->str = xstrdup(str, n);
  101. tab->hash = hh->stab[h];
  102. tab->link = hh->all;
  103. hh->all = tab;
  104. tab->n = n;
  105. tab->count = 0;
  106. tab->date = 0;
  107. hh->stab[h] = tab;
  108. hh->ntab++;
  109. if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1)))
  110. rehash(hh);
  111. return tab;
  112. }
  113. int
  114. scmp(Stringtab *a, Stringtab *b)
  115. {
  116. int n, x;
  117. if(a == 0)
  118. return 1;
  119. if(b == 0)
  120. return -1;
  121. n = a->n;
  122. if(n > b->n)
  123. n = b->n;
  124. x = memcmp(a->str, b->str, n);
  125. if(x != 0)
  126. return x;
  127. if(a->n < b->n)
  128. return -1;
  129. if(a->n > b->n)
  130. return 1;
  131. return 0; /* shouldn't happen */
  132. }
  133. Stringtab*
  134. merge(Stringtab *a, Stringtab *b)
  135. {
  136. Stringtab *s, **l;
  137. l = &s;
  138. while(a || b){
  139. if(scmp(a, b) < 0){
  140. *l = a;
  141. l = &a->link;
  142. a = a->link;
  143. }else{
  144. *l = b;
  145. l = &b->link;
  146. b = b->link;
  147. }
  148. }
  149. *l = 0;
  150. return s;
  151. }
  152. Stringtab*
  153. mergesort(Stringtab *s)
  154. {
  155. Stringtab *a, *b;
  156. int delay;
  157. if(s == nil)
  158. return nil;
  159. if(s->link == nil)
  160. return s;
  161. a = b = s;
  162. delay = 1;
  163. while(a && b){
  164. if(delay) /* easy way to handle 2-element list */
  165. delay = 0;
  166. else
  167. a = a->link;
  168. if(b = b->link)
  169. b = b->link;
  170. }
  171. b = a->link;
  172. a->link = nil;
  173. a = mergesort(s);
  174. b = mergesort(b);
  175. return merge(a, b);
  176. }
  177. Stringtab*
  178. sortstab(Hash *hh)
  179. {
  180. if(!hh->sorted){
  181. hh->all = mergesort(hh->all);
  182. hh->sorted = 1;
  183. }
  184. return hh->all;
  185. }
  186. int
  187. Bwritehash(Biobuf *b, Hash *hh)
  188. {
  189. Stringtab *s;
  190. int now;
  191. now = time(0);
  192. s = sortstab(hh);
  193. Bprint(b, "# hash table\n");
  194. for(; s; s=s->link){
  195. if(s->count <= 0)
  196. continue;
  197. /*
  198. * Entries that haven't been updated in thirty days get tossed.
  199. */
  200. if(s->date+30*86400 < now)
  201. continue;
  202. Bwrite(b, s->str, s->n);
  203. Bprint(b, "\t%d %d\n", s->count, s->date);
  204. }
  205. if(Bflush(b) == Beof)
  206. return -1;
  207. return 0;
  208. }
  209. void
  210. Breadhash(Biobuf *b, Hash *hh, int scale)
  211. {
  212. char *s;
  213. char *t;
  214. int n;
  215. int date;
  216. Stringtab *st;
  217. s = Brdstr(b, '\n', 1);
  218. if(s == nil)
  219. return;
  220. if(strcmp(s, "# hash table") != 0)
  221. sysfatal("bad hash table format");
  222. while(s = Brdline(b, '\n')){
  223. s[Blinelen(b)-1] = 0;
  224. t = strrchr(s, '\t');
  225. if(t == nil)
  226. sysfatal("bad hash table format");
  227. *t++ = '\0';
  228. if(*t < '0' || *t > '9')
  229. sysfatal("bad hash table format");
  230. n = strtol(t, &t, 10);
  231. date = time(0);
  232. if(*t != 0){
  233. if(*t == ' '){
  234. t++;
  235. date = strtol(t, &t, 10);
  236. }
  237. if(*t != 0)
  238. sysfatal("bad hash table format");
  239. }
  240. st = findstab(hh, s, strlen(s), 1);
  241. if(date > st->date)
  242. st->date = date;
  243. st->count += n*scale;
  244. }
  245. }
  246. void
  247. freehash(Hash *h)
  248. {
  249. Stringtab *s, *next;
  250. for(s=h->all; s; s=next){
  251. next = s->link;
  252. tabfree(s);
  253. }
  254. free(h->stab);
  255. free(h);
  256. }
  257. Biobuf*
  258. Bopenlock(char *file, int mode)
  259. {
  260. int i;
  261. Biobuf *b;
  262. char err[ERRMAX];
  263. b = nil;
  264. for(i=0; i<120; i++){
  265. if((b = Bopen(file, mode)) != nil)
  266. break;
  267. rerrstr(err, sizeof err);
  268. if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil)
  269. break;
  270. sleep(1000);
  271. }
  272. return b;
  273. }