hints.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include "httpd.h"
  5. #include "httpsrv.h"
  6. enum{ URLmax = 65536, HINTmax = 20 };
  7. #define RECIPLOG2 1.44269504089
  8. char **urlname; /* array of url strings 1,...,nurl */
  9. static int nurl;
  10. static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */
  11. static int urlnext[URLmax]; /* index urltab of next url in chain */
  12. static int urlhash[URLmax]; /* initially 0, meaning empty buckets */
  13. typedef struct Hint {
  14. ushort url;
  15. uchar prob;
  16. } Hint;
  17. Hint *hints[URLmax];
  18. uchar nhint[URLmax];
  19. vlong
  20. Bfilelen(void *vb)
  21. {
  22. Biobuf *b;
  23. vlong n;
  24. b = vb;
  25. n = Bseek(b, 0L, 2);
  26. Bseek(b, 0L, 0);
  27. return n;
  28. }
  29. static uint
  30. hashstr(char* key)
  31. {
  32. /* asu works better than pjw for urls */
  33. uchar *k = (unsigned char*)key;
  34. uint h = 0;
  35. while(*k!=0)
  36. h = 65599*h + *k++;
  37. return h;
  38. }
  39. static int
  40. urllookup(uint url)
  41. {
  42. /* returns +index into urltab, else -hash */
  43. int j, hash;
  44. hash = 1 + url%(URLmax-1);
  45. j = urlhash[hash];
  46. for(;;){
  47. if(j==0)
  48. return -hash;
  49. if(url==urltab[j])
  50. return j;
  51. j = urlnext[j];
  52. }
  53. }
  54. int
  55. Bage(Biobuf *b)
  56. {
  57. Dir *dir;
  58. long mtime;
  59. dir = dirfstat(Bfildes(b));
  60. if(dir != nil)
  61. mtime = dir->mtime;
  62. else
  63. mtime = 0;
  64. free(dir);
  65. return time(nil) - mtime;
  66. }
  67. void
  68. urlinit(void)
  69. {
  70. static Biobuf *b = nil;
  71. static vlong filelen = 0;
  72. vlong newlen;
  73. char *s, *arena;
  74. int i, j, n;
  75. uint url;
  76. char *file;
  77. if(filelen < 0)
  78. return;
  79. file = "/sys/log/httpd/url";
  80. if(b == nil){
  81. b = Bopen(file, OREAD); /* first time */
  82. if(b == nil){
  83. syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
  84. filelen = -1;
  85. return;
  86. }
  87. }
  88. newlen = Bfilelen(b); /* side effect: rewinds b */
  89. if(newlen == filelen || Bage(b)<300)
  90. return;
  91. filelen = newlen;
  92. if(filelen < 0)
  93. return;
  94. if(nurl){ /* free existing tables */
  95. free(urlname[0]); /* arena */
  96. memset(urlhash,0,sizeof urlhash);
  97. memset(urlnext,0,sizeof urlnext);
  98. nurl = 0;
  99. }
  100. if(urlname==nil)
  101. urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
  102. arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */
  103. i = 1;
  104. while((s=Brdline(b,'\n'))!=0){
  105. /* read lines of the form: 999 /url/path */
  106. n = Blinelen(b) - 1;
  107. if(n>2 && s[n]=='\n'){
  108. s[n] = '\0';
  109. }else{
  110. sysfatal("missing fields or newline in url-db");
  111. }
  112. j = strtoul(s,&s,10);
  113. while(*s==' ')
  114. s++;
  115. if(i++!=j)
  116. sysfatal("url-db synchronization error");
  117. url = hashstr(s);
  118. j = urllookup(url);
  119. if(j>=0)
  120. sysfatal("duplicate url");
  121. j = -j;
  122. nurl++;
  123. if(nurl>=URLmax){
  124. syslog(0, HTTPLOG, "urlinit overflow at %s",s);
  125. free(urlname[0]); /* arena */
  126. memset(urlhash,0,sizeof urlhash);
  127. memset(urlnext,0,sizeof urlnext);
  128. nurl = 0;
  129. return;
  130. }
  131. urltab[nurl] = url;
  132. urlnext[nurl] = urlhash[j];
  133. urlhash[j] = nurl;
  134. strcpy(arena,s);
  135. urlname[nurl] = arena;
  136. arena += strlen(s)+1;
  137. }
  138. syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
  139. /* b is held open, because namespace will be chopped */
  140. }
  141. void
  142. statsinit(void)
  143. {
  144. static Biobuf *b = nil;
  145. static vlong filelen = 0;
  146. vlong newlen;
  147. int iq, n, i, nstats = 0;
  148. uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */
  149. Hint *arena, *h;
  150. char *file;
  151. static void *oldarena = nil;
  152. file = "/sys/log/httpd/pathstat";
  153. if(b == nil){
  154. if(filelen == -1)
  155. return; /* if failed first time */
  156. b = Bopen(file, OREAD); /* first time */
  157. if(b == nil){
  158. syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
  159. filelen = -1;
  160. return;
  161. }
  162. }
  163. newlen = Bfilelen(b); /* side effect: rewinds b */
  164. if(newlen == filelen || Bage(b)<300)
  165. return;
  166. filelen = newlen;
  167. if(oldarena){
  168. free(oldarena);
  169. memset(nhint,0,sizeof nhint);
  170. }
  171. arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
  172. oldarena = arena;
  173. for(;;){
  174. i = Bread(b,buf,3);
  175. if(i<3)
  176. break;
  177. nstats++;
  178. iq = buf[0];
  179. iq = (iq<<8) | buf[1];
  180. n = buf[2];
  181. h = arena;
  182. arena += n;
  183. hints[iq] = h;
  184. nhint[iq] = n;
  185. if(Bread(b,buf,3*n)!=3*n)
  186. sysfatal("stats read error");
  187. for(i=0; i<n; i++){
  188. s = &buf[3*i];
  189. h[i].url = (s[0]<<8) | s[1];
  190. h[i].prob = s[2];
  191. }
  192. }
  193. syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
  194. }
  195. void
  196. urlcanon(char *url)
  197. {
  198. /* all the changes here can be implemented by rewriting in-place */
  199. char *p, *q;
  200. /* remove extraneous '/' in the middle and at the end */
  201. p = url+1; /* first char needs no change */
  202. q = p;
  203. while(q[0]){
  204. if(q[0]=='/' && q[-1]=='/'){
  205. q++;
  206. continue;
  207. }
  208. *p++ = *q++;
  209. }
  210. if(q[-1]=='/'){ /* trailing '/' */
  211. p[-1] = '\0';
  212. }else{
  213. p[0] = '\0';
  214. }
  215. /* specific to the cm.bell-labs.com web site */
  216. if(strncmp(url,"/cm/",4)==0){
  217. if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
  218. /* strip off /cm/cs */
  219. memmove(url,url+6,strlen(url+6)+1);
  220. else if(strncmp(url+4,"ms/what/wavelet",15)==0)
  221. /* /cm/ms/what */
  222. memmove(url,url+11,strlen(url+11)+1);
  223. }
  224. }
  225. void
  226. hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
  227. {
  228. int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
  229. char *query, *sf, etag[32], *wurl;
  230. Dir *dir;
  231. Hint *h, *haveh;
  232. query = hstrdup(hc, uri);
  233. urlcanon(query);
  234. j = urllookup(hashstr(query));
  235. if(j < 0)
  236. return;
  237. query = strrchr(uri,'/');
  238. if(!query)
  239. return; /* can't happen */
  240. prefix = query-uri+1; /* = strlen(dirname)+1 */
  241. h = hints[j];
  242. for(i=0; i<nhint[j]; i++){
  243. if(havej > 0 && havej < URLmax){ /* exclude hints client has */
  244. haveh = hints[havej];
  245. for(havei=0; havei<nhint[havej]; havei++)
  246. if( haveh[havei].url == h[i].url)
  247. goto continuei;
  248. }
  249. sf = urlname[h[i].url];
  250. pr = h[i].prob;
  251. if(pr<thresh)
  252. break;
  253. n = strlen(webroot) + strlen(sf) + 1;
  254. wurl = halloc(hc, n);
  255. strcpy(wurl, webroot);
  256. strcat(wurl, sf);
  257. fd = open(wurl, OREAD);
  258. if(fd<0)
  259. continue;
  260. dir = dirfstat(fd);
  261. if(dir == nil){
  262. close(fd);
  263. continue;
  264. }
  265. close(fd);
  266. snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
  267. siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
  268. free(dir);
  269. if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
  270. sf = sf+prefix;
  271. hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
  272. newhint++;
  273. continuei: ;
  274. }
  275. if(newhint)
  276. hprint(hout, "Fresh: have/%d\r\n", j);
  277. }