hints.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include "httpd.h"
  5. #include "httpsrv.h"
  6. enum{ URLmax = 65536, HINTmax = 20 };
  7. #define RECIPLOG2 1.44269504089
  8. char **urlname; /* array of url strings 1,...,nurl */
  9. static int nurl;
  10. static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */
  11. static int urlnext[URLmax]; /* index urltab of next url in chain */
  12. static int urlhash[URLmax]; /* initially 0, meaning empty buckets */
  13. typedef struct Hint {
  14. ushort url;
  15. uchar prob;
  16. } Hint;
  17. Hint *hints[URLmax];
  18. uchar nhint[URLmax];
  19. vlong
  20. Bfilelen(void *vb)
  21. {
  22. Biobuf *b;
  23. vlong n;
  24. b = vb;
  25. n = Bseek(b, 0L, 2);
  26. Bseek(b, 0L, 0);
  27. return n;
  28. }
  29. static uint
  30. hashstr(char* key)
  31. {
  32. /* asu works better than pjw for urls */
  33. uchar *k = (unsigned char*)key;
  34. uint h = 0;
  35. while(*k!=0)
  36. h = 65599*h + *k++;
  37. return h;
  38. }
  39. static int
  40. urllookup(uint url)
  41. {
  42. /* returns +index into urltab, else -hash */
  43. int j, hash;
  44. hash = 1 + url%(URLmax-1);
  45. j = urlhash[hash];
  46. for(;;){
  47. if(j==0)
  48. return -hash;
  49. if(url==urltab[j])
  50. return j;
  51. j = urlnext[j];
  52. }
  53. return 0; /* not reached */
  54. }
  55. int
  56. Bage(Biobuf *b)
  57. {
  58. Dir *dir;
  59. long mtime;
  60. dir = dirfstat(Bfildes(b));
  61. if(dir != nil)
  62. mtime = dir->mtime;
  63. else
  64. mtime = 0;
  65. free(dir);
  66. return time(nil) - mtime;
  67. }
  68. void
  69. urlinit(void)
  70. {
  71. static Biobuf *b = nil;
  72. static vlong filelen = 0;
  73. vlong newlen;
  74. char *s, *arena;
  75. int i, j, n;
  76. uint url;
  77. char *file;
  78. if(filelen < 0)
  79. return;
  80. file = "/sys/log/httpd/url";
  81. if(b == nil){
  82. b = Bopen(file, OREAD); /* first time */
  83. if(b == nil){
  84. syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
  85. filelen = -1;
  86. return;
  87. }
  88. }
  89. newlen = Bfilelen(b); /* side effect: rewinds b */
  90. if(newlen == filelen || Bage(b)<300)
  91. return;
  92. filelen = newlen;
  93. if(filelen < 0)
  94. return;
  95. if(nurl){ /* free existing tables */
  96. free(urlname[0]); /* arena */
  97. memset(urlhash,0,sizeof urlhash);
  98. memset(urlnext,0,sizeof urlnext);
  99. nurl = 0;
  100. }
  101. if(urlname==nil)
  102. urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
  103. arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */
  104. i = 1;
  105. while((s=Brdline(b,'\n'))!=0){
  106. /* read lines of the form: 999 /url/path */
  107. n = Blinelen(b) - 1;
  108. if(n>2 && s[n]=='\n'){
  109. s[n] = '\0';
  110. }else{
  111. sysfatal("missing fields or newline in url-db");
  112. }
  113. j = strtoul(s,&s,10);
  114. while(*s==' ')
  115. s++;
  116. if(i++!=j)
  117. sysfatal("url-db synchronization error");
  118. url = hashstr(s);
  119. j = urllookup(url);
  120. if(j>=0)
  121. sysfatal("duplicate url");
  122. j = -j;
  123. nurl++;
  124. if(nurl>=URLmax){
  125. syslog(0, HTTPLOG, "urlinit overflow at %s",s);
  126. free(urlname[0]); /* arena */
  127. memset(urlhash,0,sizeof urlhash);
  128. memset(urlnext,0,sizeof urlnext);
  129. nurl = 0;
  130. return;
  131. }
  132. urltab[nurl] = url;
  133. urlnext[nurl] = urlhash[j];
  134. urlhash[j] = nurl;
  135. strcpy(arena,s);
  136. urlname[nurl] = arena;
  137. arena += strlen(s)+1;
  138. }
  139. syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
  140. /* b is held open, because namespace will be chopped */
  141. }
  142. void
  143. statsinit(void)
  144. {
  145. static Biobuf *b = nil;
  146. static vlong filelen = 0;
  147. vlong newlen;
  148. int iq, n, i, nstats = 0;
  149. uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */
  150. Hint *arena, *h;
  151. char *file;
  152. static void *oldarena = nil;
  153. file = "/sys/log/httpd/pathstat";
  154. if(b == nil){
  155. if(filelen == -1)
  156. return; /* if failed first time */
  157. b = Bopen(file, OREAD); /* first time */
  158. if(b == nil){
  159. syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
  160. filelen = -1;
  161. return;
  162. }
  163. }
  164. newlen = Bfilelen(b); /* side effect: rewinds b */
  165. if(newlen == filelen || Bage(b)<300)
  166. return;
  167. filelen = newlen;
  168. if(oldarena){
  169. free(oldarena);
  170. memset(nhint,0,sizeof nhint);
  171. }
  172. arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
  173. oldarena = arena;
  174. for(;;){
  175. i = Bread(b,buf,3);
  176. if(i<3)
  177. break;
  178. nstats++;
  179. iq = buf[0];
  180. iq = (iq<<8) | buf[1];
  181. n = buf[2];
  182. h = arena;
  183. arena += n;
  184. hints[iq] = h;
  185. nhint[iq] = n;
  186. if(Bread(b,buf,3*n)!=3*n)
  187. sysfatal("stats read error");
  188. for(i=0; i<n; i++){
  189. s = &buf[3*i];
  190. h[i].url = (s[0]<<8) | s[1];
  191. h[i].prob = s[2];
  192. }
  193. }
  194. syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
  195. }
  196. void
  197. urlcanon(char *url)
  198. {
  199. /* all the changes here can be implemented by rewriting in-place */
  200. char *p, *q;
  201. /* remove extraneous '/' in the middle and at the end */
  202. p = url+1; /* first char needs no change */
  203. q = p;
  204. while(q[0]){
  205. if(q[0]=='/' && q[-1]=='/'){
  206. q++;
  207. continue;
  208. }
  209. *p++ = *q++;
  210. }
  211. if(q[-1]=='/'){ /* trailing '/' */
  212. p[-1] = '\0';
  213. }else{
  214. p[0] = '\0';
  215. }
  216. /* specific to the cm.bell-labs.com web site */
  217. if(strncmp(url,"/cm/",4)==0){
  218. if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
  219. /* strip off /cm/cs */
  220. memmove(url,url+6,strlen(url+6)+1);
  221. else if(strncmp(url+4,"ms/what/wavelet",15)==0)
  222. /* /cm/ms/what */
  223. memmove(url,url+11,strlen(url+11)+1);
  224. }
  225. }
  226. void
  227. hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
  228. {
  229. int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
  230. char *query, *sf, etag[32], *wurl;
  231. Dir *dir;
  232. Hint *h, *haveh;
  233. query = hstrdup(hc, uri);
  234. urlcanon(query);
  235. j = urllookup(hashstr(query));
  236. if(j < 0)
  237. return;
  238. query = strrchr(uri,'/');
  239. if(!query)
  240. return; /* can't happen */
  241. prefix = query-uri+1; /* = strlen(dirname)+1 */
  242. h = hints[j];
  243. for(i=0; i<nhint[j]; i++){
  244. if(havej > 0 && havej < URLmax){ /* exclude hints client has */
  245. haveh = hints[havej];
  246. for(havei=0; havei<nhint[havej]; havei++)
  247. if( haveh[havei].url == h[i].url)
  248. goto continuei;
  249. }
  250. sf = urlname[h[i].url];
  251. pr = h[i].prob;
  252. if(pr<thresh)
  253. break;
  254. n = strlen(webroot) + strlen(sf) + 1;
  255. wurl = halloc(hc, n);
  256. strcpy(wurl, webroot);
  257. strcat(wurl, sf);
  258. fd = open(wurl, OREAD);
  259. if(fd<0)
  260. continue;
  261. dir = dirfstat(fd);
  262. if(dir == nil){
  263. close(fd);
  264. continue;
  265. }
  266. close(fd);
  267. snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
  268. siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
  269. free(dir);
  270. if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
  271. sf = sf+prefix;
  272. hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
  273. newhint++;
  274. continuei: ;
  275. }
  276. if(newhint)
  277. hprint(hout, "Fresh: have/%d\r\n", j);
  278. }