dnresolve.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595
  1. /*
  2. * domain name resolvers, see rfcs 1035 and 1123
  3. */
  4. #include <u.h>
  5. #include <libc.h>
  6. #include <ip.h>
  7. #include <bio.h>
  8. #include <ndb.h>
  9. #include "dns.h"
  10. #define NS2MS(ns) ((ns) / 1000000L)
  11. #define S2MS(s) ((s) * 1000)
  12. #define MS2S(ms) ((ms) / 1000)
  13. typedef struct Dest Dest;
  14. typedef struct Ipaddr Ipaddr;
  15. typedef struct Query Query;
  16. enum
  17. {
  18. Udp, Tcp,
  19. Maxdest= 24, /* maximum destinations for a request message */
  20. Maxtrans= 3, /* maximum transmissions to a server */
  21. Destmagic= 0xcafebabe,
  22. Querymagic= 0xdeadbeef,
  23. };
  24. enum { Hurry, Patient, };
  25. enum { Outns, Inns, };
  26. enum { Remntretry = 15, }; /* min. sec.s between remount attempts */
  27. struct Ipaddr {
  28. Ipaddr *next;
  29. uchar ip[IPaddrlen];
  30. };
  31. struct Dest
  32. {
  33. uchar a[IPaddrlen]; /* ip address */
  34. DN *s; /* name server */
  35. int nx; /* number of transmissions */
  36. int code; /* response code; used to clear dp->respcode */
  37. ulong magic;
  38. };
  39. /*
  40. * Query has a QLock in it, thus it can't be an automatic
  41. * variable, since each process would see a separate copy
  42. * of the lock on its stack.
  43. */
  44. struct Query {
  45. DN *dp; /* domain */
  46. ushort type; /* and type to look up */
  47. Request *req;
  48. RR *nsrp; /* name servers to consult */
  49. /* dest must not be on the stack due to forking in slave() */
  50. Dest *dest; /* array of destinations */
  51. Dest *curdest; /* pointer to one of them */
  52. int ndest;
  53. int udpfd;
  54. QLock tcplock; /* only one tcp call at a time per query */
  55. int tcpset;
  56. int tcpfd; /* if Tcp, read replies from here */
  57. int tcpctlfd;
  58. uchar tcpip[IPaddrlen];
  59. ulong magic;
  60. };
  61. /* estimated % probability of such a record existing at all */
  62. int likely[] = {
  63. [Ta] 95,
  64. [Taaaa] 10,
  65. [Tcname] 15,
  66. [Tmx] 60,
  67. [Tns] 90,
  68. [Tnull] 5,
  69. [Tptr] 35,
  70. [Tsoa] 90,
  71. [Tsrv] 60,
  72. [Ttxt] 15,
  73. [Tall] 95,
  74. };
  75. static RR* dnresolve1(char*, int, int, Request*, int, int);
  76. static int netquery(Query *, int);
  77. /*
  78. * reading /proc/pid/args yields either "name args" or "name [display args]",
  79. * so return only display args, if any.
  80. */
  81. static char *
  82. procgetname(void)
  83. {
  84. int fd, n;
  85. char *lp, *rp;
  86. char buf[256];
  87. snprint(buf, sizeof buf, "#p/%d/args", getpid());
  88. if((fd = open(buf, OREAD)) < 0)
  89. return strdup("");
  90. *buf = '\0';
  91. n = read(fd, buf, sizeof buf-1);
  92. close(fd);
  93. if (n >= 0)
  94. buf[n] = '\0';
  95. if ((lp = strchr(buf, '[')) == nil ||
  96. (rp = strrchr(buf, ']')) == nil)
  97. return strdup("");
  98. *rp = '\0';
  99. return strdup(lp+1);
  100. }
  101. /*
  102. * lookup 'type' info for domain name 'name'. If it doesn't exist, try
  103. * looking it up as a canonical name.
  104. */
  105. RR*
  106. dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
  107. int recurse, int rooted, int *status)
  108. {
  109. RR *rp, *nrp, *drp;
  110. DN *dp;
  111. int loops;
  112. char *procname;
  113. char nname[Domlen];
  114. if(status)
  115. *status = 0;
  116. if(depth > 12) /* in a recursive loop? */
  117. return nil;
  118. procname = procgetname();
  119. /*
  120. * hack for systems that don't have resolve search
  121. * lists. Just look up the simple name in the database.
  122. */
  123. if(!rooted && strchr(name, '.') == nil){
  124. rp = nil;
  125. drp = domainlist(class);
  126. for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
  127. snprint(nname, sizeof nname, "%s.%s", name,
  128. nrp->ptr->name);
  129. rp = dnresolve(nname, class, type, req, cn, depth+1,
  130. recurse, rooted, status);
  131. lock(&dnlock);
  132. rrfreelist(rrremneg(&rp));
  133. unlock(&dnlock);
  134. }
  135. if(drp != nil)
  136. rrfreelist(drp);
  137. procsetname(procname);
  138. free(procname);
  139. return rp;
  140. }
  141. /*
  142. * try the name directly
  143. */
  144. rp = dnresolve1(name, class, type, req, depth, recurse);
  145. if(rp == nil) {
  146. /*
  147. * try it as a canonical name if we weren't told
  148. * that the name didn't exist
  149. */
  150. dp = dnlookup(name, class, 0);
  151. if(type != Tptr && dp->respcode != Rname)
  152. for(loops = 0; rp == nil && loops < 32; loops++){
  153. rp = dnresolve1(name, class, Tcname, req,
  154. depth, recurse);
  155. if(rp == nil)
  156. break;
  157. /* rp->host == nil shouldn't happen, but does */
  158. if(rp->negative || rp->host == nil){
  159. rrfreelist(rp);
  160. rp = nil;
  161. break;
  162. }
  163. name = rp->host->name;
  164. lock(&dnlock);
  165. if(cn)
  166. rrcat(cn, rp);
  167. else
  168. rrfreelist(rp);
  169. unlock(&dnlock);
  170. rp = dnresolve1(name, class, type, req,
  171. depth, recurse);
  172. }
  173. /* distinction between not found and not good */
  174. if(rp == nil && status != nil && dp->respcode != 0)
  175. *status = dp->respcode;
  176. }
  177. procsetname(procname);
  178. free(procname);
  179. return randomize(rp);
  180. }
  181. static void
  182. queryinit(Query *qp, DN *dp, int type, Request *req)
  183. {
  184. memset(qp, 0, sizeof *qp);
  185. qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
  186. qp->dp = dp;
  187. qp->type = type;
  188. if (qp->type != type)
  189. dnslog("queryinit: bogus type %d", type);
  190. qp->req = req;
  191. qp->nsrp = nil;
  192. qp->dest = qp->curdest = nil;
  193. qp->magic = Querymagic;
  194. }
  195. static void
  196. queryck(Query *qp)
  197. {
  198. assert(qp);
  199. assert(qp->magic == Querymagic);
  200. }
  201. static void
  202. querydestroy(Query *qp)
  203. {
  204. queryck(qp);
  205. /* leave udpfd open */
  206. if (qp->tcpfd > 0)
  207. close(qp->tcpfd);
  208. if (qp->tcpctlfd > 0) {
  209. hangup(qp->tcpctlfd);
  210. close(qp->tcpctlfd);
  211. }
  212. free(qp->dest);
  213. memset(qp, 0, sizeof *qp); /* prevent accidents */
  214. qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
  215. }
  216. static void
  217. destinit(Dest *p)
  218. {
  219. memset(p, 0, sizeof *p);
  220. p->magic = Destmagic;
  221. }
  222. static void
  223. destck(Dest *p)
  224. {
  225. assert(p);
  226. assert(p->magic == Destmagic);
  227. }
  228. static void
  229. destdestroy(Dest *p)
  230. {
  231. USED(p);
  232. }
  233. /*
  234. * if the response to a query hasn't arrived within 100 ms.,
  235. * it's unlikely to arrive at all. after 1 s., it's really unlikely.
  236. * queries for missing RRs are likely to produce time-outs rather than
  237. * negative responses, so cname and aaaa queries are likely to time out,
  238. * thus we don't wait very long for them.
  239. */
  240. static void
  241. notestats(vlong start, int tmout, int type)
  242. {
  243. qlock(&stats);
  244. if (tmout) {
  245. stats.tmout++;
  246. if (type == Taaaa)
  247. stats.tmoutv6++;
  248. else if (type == Tcname)
  249. stats.tmoutcname++;
  250. } else {
  251. long wait10ths = NS2MS(nsec() - start) / 100;
  252. if (wait10ths <= 0)
  253. stats.under10ths[0]++;
  254. else if (wait10ths >= nelem(stats.under10ths))
  255. stats.under10ths[nelem(stats.under10ths) - 1]++;
  256. else
  257. stats.under10ths[wait10ths]++;
  258. }
  259. qunlock(&stats);
  260. }
  261. static void
  262. noteinmem(void)
  263. {
  264. qlock(&stats);
  265. stats.answinmem++;
  266. qunlock(&stats);
  267. }
  268. static RR*
  269. issuequery(Query *qp, char *name, int class, int depth, int recurse)
  270. {
  271. char *cp;
  272. DN *nsdp;
  273. RR *rp, *nsrp, *dbnsrp;
  274. /*
  275. * if we're running as just a resolver, query our
  276. * designated name servers
  277. */
  278. if(cfg.resolver){
  279. nsrp = randomize(getdnsservers(class));
  280. if(nsrp != nil) {
  281. qp->nsrp = nsrp;
  282. if(netquery(qp, depth+1)){
  283. rrfreelist(nsrp);
  284. return rrlookup(qp->dp, qp->type, OKneg);
  285. }
  286. rrfreelist(nsrp);
  287. }
  288. }
  289. /*
  290. * walk up the domain name looking for
  291. * a name server for the domain.
  292. */
  293. for(cp = name; cp; cp = walkup(cp)){
  294. /*
  295. * if this is a local (served by us) domain,
  296. * return answer
  297. */
  298. dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
  299. if(dbnsrp && dbnsrp->local){
  300. rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
  301. rrfreelist(dbnsrp);
  302. return rp;
  303. }
  304. /*
  305. * if recursion isn't set, just accept local
  306. * entries
  307. */
  308. if(recurse == Dontrecurse){
  309. if(dbnsrp)
  310. rrfreelist(dbnsrp);
  311. continue;
  312. }
  313. /* look for ns in cache */
  314. nsdp = dnlookup(cp, class, 0);
  315. nsrp = nil;
  316. if(nsdp)
  317. nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
  318. /* if the entry timed out, ignore it */
  319. if(nsrp && nsrp->ttl < now){
  320. rrfreelist(nsrp);
  321. nsrp = nil;
  322. }
  323. if(nsrp){
  324. rrfreelist(dbnsrp);
  325. /* query the name servers found in cache */
  326. qp->nsrp = nsrp;
  327. if(netquery(qp, depth+1)){
  328. rrfreelist(nsrp);
  329. return rrlookup(qp->dp, qp->type, OKneg);
  330. }
  331. rrfreelist(nsrp);
  332. continue;
  333. }
  334. /* use ns from db */
  335. if(dbnsrp){
  336. /* try the name servers found in db */
  337. qp->nsrp = dbnsrp;
  338. if(netquery(qp, depth+1)){
  339. /* we got an answer */
  340. rrfreelist(dbnsrp);
  341. return rrlookup(qp->dp, qp->type, NOneg);
  342. }
  343. rrfreelist(dbnsrp);
  344. }
  345. }
  346. return nil;
  347. }
  348. static RR*
  349. dnresolve1(char *name, int class, int type, Request *req, int depth,
  350. int recurse)
  351. {
  352. Area *area;
  353. DN *dp;
  354. RR *rp;
  355. Query *qp;
  356. if(debug)
  357. dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
  358. /* only class Cin implemented so far */
  359. if(class != Cin)
  360. return nil;
  361. dp = dnlookup(name, class, 1);
  362. /*
  363. * Try the cache first
  364. */
  365. rp = rrlookup(dp, type, OKneg);
  366. if(rp)
  367. if(rp->db){
  368. /* unauthoritative db entries are hints */
  369. if(rp->auth) {
  370. noteinmem();
  371. return rp;
  372. }
  373. } else
  374. /* cached entry must still be valid */
  375. if(rp->ttl > now)
  376. /* but Tall entries are special */
  377. if(type != Tall || rp->query == Tall) {
  378. noteinmem();
  379. return rp;
  380. }
  381. rrfreelist(rp);
  382. rp = nil; /* accident prevention */
  383. USED(rp);
  384. /*
  385. * try the cache for a canonical name. if found punt
  386. * since we'll find it during the canonical name search
  387. * in dnresolve().
  388. */
  389. if(type != Tcname){
  390. rp = rrlookup(dp, Tcname, NOneg);
  391. rrfreelist(rp);
  392. if(rp)
  393. return nil;
  394. }
  395. /*
  396. * if the domain name is within an area of ours,
  397. * we should have found its data in memory by now.
  398. */
  399. area = inmyarea(dp->name);
  400. if (area || strncmp(dp->name, "local#", 6) == 0) {
  401. // char buf[32];
  402. // dnslog("%s %s: no data in area %s", dp->name,
  403. // rrname(type, buf, sizeof buf), area->soarr->owner->name);
  404. return nil;
  405. }
  406. qp = emalloc(sizeof *qp);
  407. queryinit(qp, dp, type, req);
  408. rp = issuequery(qp, name, class, depth, recurse);
  409. querydestroy(qp);
  410. free(qp);
  411. if(rp)
  412. return rp;
  413. /* settle for a non-authoritative answer */
  414. rp = rrlookup(dp, type, OKneg);
  415. if(rp)
  416. return rp;
  417. /* noone answered. try the database, we might have a chance. */
  418. return dblookup(name, class, type, 0, 0);
  419. }
  420. /*
  421. * walk a domain name one element to the right.
  422. * return a pointer to that element.
  423. * in other words, return a pointer to the parent domain name.
  424. */
  425. char*
  426. walkup(char *name)
  427. {
  428. char *cp;
  429. cp = strchr(name, '.');
  430. if(cp)
  431. return cp+1;
  432. else if(*name)
  433. return "";
  434. else
  435. return 0;
  436. }
  437. /*
  438. * Get a udp port for sending requests and reading replies. Put the port
  439. * into "headers" mode.
  440. */
  441. static char *hmsg = "headers";
  442. int
  443. udpport(char *mtpt)
  444. {
  445. int fd, ctl;
  446. char ds[64], adir[64];
  447. /* get a udp port */
  448. snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
  449. ctl = announce(ds, adir);
  450. if(ctl < 0){
  451. /* warning("can't get udp port"); */
  452. return -1;
  453. }
  454. /* turn on header style interface */
  455. if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
  456. close(ctl);
  457. warning(hmsg);
  458. return -1;
  459. }
  460. /* grab the data file */
  461. snprint(ds, sizeof ds, "%s/data", adir);
  462. fd = open(ds, ORDWR);
  463. close(ctl);
  464. if(fd < 0)
  465. warning("can't open udp port %s: %r", ds);
  466. return fd;
  467. }
  468. /* generate a DNS UDP query packet */
  469. int
  470. mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
  471. {
  472. DNSmsg m;
  473. int len;
  474. Udphdr *uh = (Udphdr*)buf;
  475. /* stuff port number into output buffer */
  476. memset(uh, 0, sizeof *uh);
  477. hnputs(uh->rport, 53);
  478. /* make request and convert it to output format */
  479. memset(&m, 0, sizeof m);
  480. m.flags = flags;
  481. m.id = reqno;
  482. m.qd = rralloc(type);
  483. m.qd->owner = dp;
  484. m.qd->type = type;
  485. if (m.qd->type != type)
  486. dnslog("mkreq: bogus type %d", type);
  487. len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
  488. rrfree(m.qd);
  489. memset(&m, 0, sizeof m); /* cause trouble */
  490. return len;
  491. }
  492. void
  493. freeanswers(DNSmsg *mp)
  494. {
  495. rrfreelist(mp->qd);
  496. rrfreelist(mp->an);
  497. rrfreelist(mp->ns);
  498. rrfreelist(mp->ar);
  499. mp->qd = mp->an = mp->ns = mp->ar = nil;
  500. }
  501. /* sets srcip */
  502. static int
  503. readnet(Query *qp, int medium, uchar *ibuf, ulong endtime, uchar **replyp,
  504. uchar *srcip)
  505. {
  506. int len, fd;
  507. long ms;
  508. vlong startns = nsec();
  509. uchar *reply;
  510. uchar lenbuf[2];
  511. /* timed read of reply */
  512. ms = S2MS(endtime) - NS2MS(startns);
  513. if (ms < 2000)
  514. ms = 2000; /* give the remote ns a fighting chance */
  515. reply = ibuf;
  516. len = -1; /* pessimism */
  517. memset(srcip, 0, IPaddrlen);
  518. if (medium == Udp)
  519. if (qp->udpfd <= 0)
  520. dnslog("readnet: qp->udpfd closed");
  521. else {
  522. alarm(ms);
  523. len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
  524. alarm(0);
  525. notestats(startns, len < 0, qp->type);
  526. if (len >= IPaddrlen)
  527. memmove(srcip, ibuf, IPaddrlen);
  528. if (len >= Udphdrsize) {
  529. len -= Udphdrsize;
  530. reply += Udphdrsize;
  531. }
  532. }
  533. else {
  534. if (!qp->tcpset)
  535. dnslog("readnet: tcp params not set");
  536. alarm(ms);
  537. fd = qp->tcpfd;
  538. if (fd <= 0)
  539. dnslog("readnet: %s: tcp fd unset for dest %I",
  540. qp->dp->name, qp->tcpip);
  541. else if (readn(fd, lenbuf, 2) != 2) {
  542. dnslog("readnet: short read of tcp size from %I",
  543. qp->tcpip);
  544. /* probably a time-out */
  545. notestats(startns, 1, qp->type);
  546. } else {
  547. len = lenbuf[0]<<8 | lenbuf[1];
  548. if (readn(fd, ibuf, len) != len) {
  549. dnslog("readnet: short read of tcp data from %I",
  550. qp->tcpip);
  551. /* probably a time-out */
  552. notestats(startns, 1, qp->type);
  553. len = -1;
  554. }
  555. }
  556. alarm(0);
  557. memmove(srcip, qp->tcpip, IPaddrlen);
  558. }
  559. *replyp = reply;
  560. return len;
  561. }
  562. /*
  563. * read replies to a request and remember the rrs in the answer(s).
  564. * ignore any of the wrong type.
  565. * wait at most until endtime.
  566. */
  567. static int
  568. readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
  569. ulong endtime)
  570. {
  571. int len, rv;
  572. char *err;
  573. char tbuf[32];
  574. uchar *reply;
  575. uchar srcip[IPaddrlen];
  576. RR *rp;
  577. queryck(qp);
  578. rv = 0;
  579. memset(mp, 0, sizeof *mp);
  580. if (time(nil) >= endtime)
  581. return -1; /* timed out before we started */
  582. memset(srcip, 0, sizeof srcip);
  583. if (0)
  584. len = -1;
  585. for (; time(nil) < endtime &&
  586. (len = readnet(qp, medium, ibuf, endtime, &reply, srcip)) >= 0;
  587. freeanswers(mp)){
  588. /* convert into internal format */
  589. memset(mp, 0, sizeof *mp);
  590. err = convM2DNS(reply, len, mp, nil);
  591. if (mp->flags & Ftrunc) {
  592. free(err);
  593. freeanswers(mp);
  594. /* notify our caller to retry the query via tcp. */
  595. return -1;
  596. } else if(err){
  597. dnslog("readreply: %s: input err, len %d: %s: %I",
  598. qp->dp->name, len, err, srcip);
  599. free(err);
  600. continue;
  601. }
  602. if(debug)
  603. logreply(qp->req->id, srcip, mp);
  604. /* answering the right question? */
  605. if(mp->id != req)
  606. dnslog("%d: id %d instead of %d: %I", qp->req->id,
  607. mp->id, req, srcip);
  608. else if(mp->qd == 0)
  609. dnslog("%d: no question RR: %I", qp->req->id, srcip);
  610. else if(mp->qd->owner != qp->dp)
  611. dnslog("%d: owner %s instead of %s: %I", qp->req->id,
  612. mp->qd->owner->name, qp->dp->name, srcip);
  613. else if(mp->qd->type != qp->type)
  614. dnslog("%d: qp->type %d instead of %d: %I",
  615. qp->req->id, mp->qd->type, qp->type, srcip);
  616. else {
  617. /* remember what request this is in answer to */
  618. for(rp = mp->an; rp; rp = rp->next)
  619. rp->query = qp->type;
  620. return rv;
  621. }
  622. }
  623. if (time(nil) >= endtime) {
  624. ; /* query expired */
  625. } else if (0) {
  626. /* this happens routinely when a read times out */
  627. dnslog("readreply: %s type %s: ns %I read error or eof "
  628. "(returned %d): %r", qp->dp->name, rrname(qp->type,
  629. tbuf, sizeof tbuf), srcip, len);
  630. if (medium == Udp)
  631. for (rp = qp->nsrp; rp != nil; rp = rp->next)
  632. if (rp->type == Tns)
  633. dnslog("readreply: %s: query sent to "
  634. "ns %s", qp->dp->name,
  635. rp->host->name);
  636. }
  637. return -1;
  638. }
  639. /*
  640. * return non-0 if first list includes second list
  641. */
  642. int
  643. contains(RR *rp1, RR *rp2)
  644. {
  645. RR *trp1, *trp2;
  646. for(trp2 = rp2; trp2; trp2 = trp2->next){
  647. for(trp1 = rp1; trp1; trp1 = trp1->next)
  648. if(trp1->type == trp2->type)
  649. if(trp1->host == trp2->host)
  650. if(trp1->owner == trp2->owner)
  651. break;
  652. if(trp1 == nil)
  653. return 0;
  654. }
  655. return 1;
  656. }
  657. /*
  658. * return multicast version if any
  659. */
  660. int
  661. ipisbm(uchar *ip)
  662. {
  663. if(isv4(ip)){
  664. if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
  665. ipcmp(ip, IPv4bcast) == 0)
  666. return 4;
  667. } else
  668. if(ip[0] == 0xff)
  669. return 6;
  670. return 0;
  671. }
  672. /*
  673. * Get next server address
  674. */
  675. static int
  676. serveraddrs(Query *qp, int nd, int depth)
  677. {
  678. RR *rp, *arp, *trp;
  679. Dest *cur;
  680. if(nd >= Maxdest)
  681. return 0;
  682. /*
  683. * look for a server whose address we already know.
  684. * if we find one, mark it so we ignore this on
  685. * subsequent passes.
  686. */
  687. arp = 0;
  688. for(rp = qp->nsrp; rp; rp = rp->next){
  689. assert(rp->magic == RRmagic);
  690. if(rp->marker)
  691. continue;
  692. arp = rrlookup(rp->host, Ta, NOneg);
  693. if(arp){
  694. rp->marker = 1;
  695. break;
  696. }
  697. arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
  698. if(arp){
  699. rp->marker = 1;
  700. break;
  701. }
  702. }
  703. /*
  704. * if the cache and database lookup didn't find any new
  705. * server addresses, try resolving one via the network.
  706. * Mark any we try to resolve so we don't try a second time.
  707. */
  708. if(arp == 0)
  709. for(rp = qp->nsrp; rp; rp = rp->next){
  710. if(rp->marker)
  711. continue;
  712. rp->marker = 1;
  713. /*
  714. * avoid loops looking up a server under itself
  715. */
  716. if(subsume(rp->owner->name, rp->host->name))
  717. continue;
  718. arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
  719. depth+1, Recurse, 1, 0);
  720. lock(&dnlock);
  721. rrfreelist(rrremneg(&arp));
  722. unlock(&dnlock);
  723. if(arp)
  724. break;
  725. }
  726. /* use any addresses that we found */
  727. for(trp = arp; trp && nd < Maxdest; trp = trp->next){
  728. cur = &qp->dest[nd];
  729. parseip(cur->a, trp->ip->name);
  730. /*
  731. * straddling servers can reject all nameservers if they are all
  732. * inside, so be sure to list at least one outside ns at
  733. * the end of the ns list in /lib/ndb for `dom='.
  734. */
  735. if (ipisbm(cur->a) ||
  736. cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
  737. continue;
  738. cur->nx = 0;
  739. cur->s = trp->owner;
  740. cur->code = Rtimeout;
  741. nd++;
  742. }
  743. rrfreelist(arp);
  744. return nd;
  745. }
  746. /*
  747. * cache negative responses
  748. */
  749. static void
  750. cacheneg(DN *dp, int type, int rcode, RR *soarr)
  751. {
  752. RR *rp;
  753. DN *soaowner;
  754. ulong ttl;
  755. stats.negcached++;
  756. /* no cache time specified, don't make anything up */
  757. if(soarr != nil){
  758. if(soarr->next != nil){
  759. rrfreelist(soarr->next);
  760. soarr->next = nil;
  761. }
  762. soaowner = soarr->owner;
  763. } else
  764. soaowner = nil;
  765. /* the attach can cause soarr to be freed so mine it now */
  766. if(soarr != nil && soarr->soa != nil)
  767. ttl = soarr->soa->minttl+now;
  768. else
  769. ttl = 5*Min;
  770. /* add soa and negative RR to the database */
  771. rrattach(soarr, Authoritative);
  772. rp = rralloc(type);
  773. rp->owner = dp;
  774. rp->negative = 1;
  775. rp->negsoaowner = soaowner;
  776. rp->negrcode = rcode;
  777. rp->ttl = ttl;
  778. rrattach(rp, Authoritative);
  779. }
  780. static int
  781. setdestoutns(Dest *p, int n)
  782. {
  783. uchar *outns = outsidens(n);
  784. destck(p);
  785. destinit(p);
  786. if (outns == nil) {
  787. if (n == 0)
  788. dnslog("[%d] no outside-ns in ndb", getpid());
  789. return -1;
  790. }
  791. memmove(p->a, outns, sizeof p->a);
  792. p->s = dnlookup("outside-ns-ips", Cin, 1);
  793. return 0;
  794. }
  795. /*
  796. * issue query via UDP or TCP as appropriate.
  797. * for TCP, returns with qp->tcpip set from udppkt header.
  798. */
  799. static int
  800. mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
  801. {
  802. int rv = -1, nfd;
  803. char *domain;
  804. char conndir[40];
  805. uchar belen[2];
  806. NetConnInfo *nci;
  807. queryck(qp);
  808. domain = smprint("%I", udppkt);
  809. if (myaddr(domain)) {
  810. dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
  811. domain);
  812. free(domain);
  813. return rv;
  814. }
  815. switch (medium) {
  816. case Udp:
  817. free(domain);
  818. nfd = dup(qp->udpfd, -1);
  819. if (nfd < 0) {
  820. warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
  821. close(qp->udpfd); /* ensure it's closed */
  822. qp->udpfd = -1; /* poison it */
  823. return rv;
  824. }
  825. close(nfd);
  826. if (qp->udpfd <= 0)
  827. dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
  828. else {
  829. if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
  830. len+Udphdrsize)
  831. warning("sending udp msg: %r");
  832. else {
  833. stats.qsent++;
  834. rv = 0;
  835. }
  836. }
  837. break;
  838. case Tcp:
  839. /* send via TCP & keep fd around for reply */
  840. alarm(10*1000);
  841. qp->tcpfd = rv = dial(netmkaddr(domain, "tcp", "dns"), nil,
  842. conndir, &qp->tcpctlfd);
  843. alarm(0);
  844. if (qp->tcpfd < 0) {
  845. dnslog("can't dial tcp!%s!dns: %r", domain);
  846. free(domain);
  847. break;
  848. }
  849. free(domain);
  850. nci = getnetconninfo(conndir, qp->tcpfd);
  851. if (nci) {
  852. parseip(qp->tcpip, nci->rsys);
  853. freenetconninfo(nci);
  854. } else
  855. dnslog("mydnsquery: getnetconninfo failed");
  856. qp->tcpset = 1;
  857. belen[0] = len >> 8;
  858. belen[1] = len;
  859. if (write(qp->tcpfd, belen, 2) != 2 ||
  860. write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
  861. warning("sending tcp msg: %r");
  862. break;
  863. default:
  864. sysfatal("mydnsquery: bad medium");
  865. }
  866. return rv;
  867. }
  868. /*
  869. * send query to all UDP destinations or one TCP destination,
  870. * taken from obuf (udp packet) header
  871. */
  872. static int
  873. xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
  874. {
  875. int j, n;
  876. char buf[32];
  877. Dest *p;
  878. queryck(qp);
  879. if(time(nil) >= qp->req->aborttime)
  880. return -1;
  881. /*
  882. * get a nameserver address if we need one.
  883. * serveraddrs populates qp->dest.
  884. */
  885. p = qp->dest;
  886. destck(p);
  887. if (qp->ndest < 0 || qp->ndest > Maxdest)
  888. dnslog("qp->ndest %d out of range", qp->ndest);
  889. if (qp->ndest > qp->curdest - p) {
  890. j = serveraddrs(qp, qp->curdest - p, depth);
  891. if (j < 0 || j >= Maxdest) {
  892. dnslog("serveraddrs() result %d out of range", j);
  893. abort();
  894. }
  895. qp->curdest = &qp->dest[j];
  896. }
  897. destck(qp->curdest);
  898. /* no servers, punt */
  899. if (qp->ndest == 0)
  900. if (cfg.straddle && cfg.inside) {
  901. /* get ips of "outside-ns-ips" */
  902. p = qp->curdest = qp->dest;
  903. for(n = 0; n < Maxdest; n++, qp->curdest++)
  904. if (setdestoutns(qp->curdest, n) < 0)
  905. break;
  906. } else {
  907. /* it's probably just a bogus domain, don't log it */
  908. // dnslog("xmitquery: %s: no nameservers", qp->dp->name);
  909. return -1;
  910. }
  911. /* send to first 'qp->ndest' destinations */
  912. j = 0;
  913. if (medium == Tcp) {
  914. j++;
  915. queryck(qp);
  916. assert(qp->dp);
  917. procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
  918. qp->dp->name, rrname(qp->type, buf, sizeof buf));
  919. mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
  920. if(debug)
  921. logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
  922. qp->type);
  923. } else
  924. for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
  925. /* skip destinations we've finished with */
  926. if(p->nx >= Maxtrans)
  927. continue;
  928. j++;
  929. /* exponential backoff of requests */
  930. if((1<<p->nx) > qp->ndest)
  931. continue;
  932. procsetname("udp %sside query to %I/%s %s %s",
  933. (inns? "in": "out"), p->a, p->s->name,
  934. qp->dp->name, rrname(qp->type, buf, sizeof buf));
  935. if(debug)
  936. logsend(qp->req->id, depth, p->a, p->s->name,
  937. qp->dp->name, qp->type);
  938. /* fill in UDP destination addr & send it */
  939. memmove(obuf, p->a, sizeof p->a);
  940. mydnsquery(qp, medium, obuf, len);
  941. p->nx++;
  942. }
  943. if(j == 0) {
  944. // dnslog("xmitquery: %s: no destinations left", qp->dp->name);
  945. return -1;
  946. }
  947. return 0;
  948. }
  949. static int lckindex[Maxlcks] = {
  950. 0, /* all others map here */
  951. Ta,
  952. Tns,
  953. Tcname,
  954. Tsoa,
  955. Tptr,
  956. Tmx,
  957. Ttxt,
  958. Taaaa,
  959. };
  960. static int
  961. qtype2lck(int qtype) /* map query type to querylck index */
  962. {
  963. int i;
  964. for (i = 1; i < nelem(lckindex); i++)
  965. if (lckindex[i] == qtype)
  966. return i;
  967. return 0;
  968. }
  969. /* is mp a cachable negative response (with Rname set)? */
  970. static int
  971. isnegrname(DNSmsg *mp)
  972. {
  973. /* TODO: could add || cfg.justforw to RHS of && */
  974. return mp->an == nil && (mp->flags & Rmask) == Rname;
  975. }
  976. static int
  977. procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
  978. {
  979. int rv;
  980. // int lcktype;
  981. char buf[32];
  982. DN *ndp;
  983. Query *nqp;
  984. RR *tp, *soarr;
  985. if (mp->an == nil)
  986. stats.negans++;
  987. /* ignore any error replies */
  988. if((mp->flags & Rmask) == Rserver){
  989. stats.negserver++;
  990. freeanswers(mp);
  991. if(p != qp->curdest)
  992. p->code = Rserver;
  993. return -1;
  994. }
  995. /* ignore any bad delegations */
  996. if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
  997. stats.negbaddeleg++;
  998. if(mp->an == nil){
  999. stats.negbdnoans++;
  1000. freeanswers(mp);
  1001. if(p != qp->curdest)
  1002. p->code = Rserver;
  1003. return -1;
  1004. }
  1005. rrfreelist(mp->ns);
  1006. mp->ns = nil;
  1007. }
  1008. /* remove any soa's from the authority section */
  1009. lock(&dnlock);
  1010. soarr = rrremtype(&mp->ns, Tsoa);
  1011. /* incorporate answers */
  1012. unique(mp->an);
  1013. unique(mp->ns);
  1014. unique(mp->ar);
  1015. unlock(&dnlock);
  1016. if(mp->an)
  1017. rrattach(mp->an, (mp->flags & Fauth) != 0);
  1018. if(mp->ar)
  1019. rrattach(mp->ar, Notauthoritative);
  1020. if(mp->ns && !cfg.justforw){
  1021. ndp = mp->ns->owner;
  1022. rrattach(mp->ns, Notauthoritative);
  1023. } else {
  1024. ndp = nil;
  1025. rrfreelist(mp->ns);
  1026. mp->ns = nil;
  1027. }
  1028. /* free the question */
  1029. if(mp->qd) {
  1030. rrfreelist(mp->qd);
  1031. mp->qd = nil;
  1032. }
  1033. /*
  1034. * Any reply from an authoritative server,
  1035. * or a positive reply terminates the search.
  1036. * A negative response now also terminates the search.
  1037. */
  1038. if(mp->an != nil || (mp->flags & Fauth)){
  1039. if(isnegrname(mp))
  1040. qp->dp->respcode = Rname;
  1041. else
  1042. qp->dp->respcode = 0;
  1043. /*
  1044. * cache any negative responses, free soarr.
  1045. * negative responses need not be authoritative:
  1046. * they can legitimately come from a cache.
  1047. */
  1048. if( /* (mp->flags & Fauth) && */ mp->an == nil)
  1049. cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
  1050. else
  1051. rrfreelist(soarr);
  1052. return 1;
  1053. } else if (isnegrname(mp)) {
  1054. qp->dp->respcode = Rname;
  1055. /*
  1056. * cache negative response.
  1057. * negative responses need not be authoritative:
  1058. * they can legitimately come from a cache.
  1059. */
  1060. cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
  1061. return 1;
  1062. }
  1063. stats.negnorname++;
  1064. rrfreelist(soarr);
  1065. /*
  1066. * if we've been given better name servers, recurse.
  1067. * if we're a pure resolver, don't recurse, we have
  1068. * to forward to a fixed set of named servers.
  1069. */
  1070. if(!mp->ns || cfg.resolver && cfg.justforw)
  1071. return 0;
  1072. tp = rrlookup(ndp, Tns, NOneg);
  1073. if(contains(qp->nsrp, tp)){
  1074. rrfreelist(tp);
  1075. return 0;
  1076. }
  1077. procsetname("recursive query for %s %s", qp->dp->name,
  1078. rrname(qp->type, buf, sizeof buf));
  1079. /*
  1080. * we're called from udpquery, called from
  1081. * netquery, which current holds qp->dp->querylck,
  1082. * so release it now and acquire it upon return.
  1083. */
  1084. // lcktype = qtype2lck(qp->type);
  1085. // qunlock(&qp->dp->querylck[lcktype]);
  1086. nqp = emalloc(sizeof *nqp);
  1087. queryinit(nqp, qp->dp, qp->type, qp->req);
  1088. nqp->nsrp = tp;
  1089. rv = netquery(nqp, depth+1);
  1090. // qlock(&qp->dp->querylck[lcktype]);
  1091. rrfreelist(nqp->nsrp);
  1092. querydestroy(nqp);
  1093. free(nqp);
  1094. return rv;
  1095. }
  1096. /*
  1097. * send a query via tcp to a single address (from ibuf's udp header)
  1098. * and read the answer(s) into mp->an.
  1099. */
  1100. static int
  1101. tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
  1102. int waitsecs, int inns, ushort req)
  1103. {
  1104. int rv = 0;
  1105. ulong endtime;
  1106. endtime = time(nil) + waitsecs;
  1107. if(endtime > qp->req->aborttime)
  1108. endtime = qp->req->aborttime;
  1109. if (0)
  1110. dnslog("%s: udp reply truncated; retrying query via tcp to %I",
  1111. qp->dp->name, qp->tcpip);
  1112. qlock(&qp->tcplock);
  1113. memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */
  1114. /* sets qp->tcpip from obuf's udp header */
  1115. if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
  1116. readreply(qp, Tcp, req, ibuf, mp, endtime) < 0)
  1117. rv = -1;
  1118. if (qp->tcpfd > 0) {
  1119. hangup(qp->tcpctlfd);
  1120. close(qp->tcpctlfd);
  1121. close(qp->tcpfd);
  1122. }
  1123. qp->tcpfd = qp->tcpctlfd = -1;
  1124. qunlock(&qp->tcplock);
  1125. return rv;
  1126. }
  1127. /*
  1128. * query name servers. If the name server returns a pointer to another
  1129. * name server, recurse.
  1130. */
  1131. static int
  1132. queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, int waitsecs, int inns)
  1133. {
  1134. int ndest, len, replywaits, rv;
  1135. ushort req;
  1136. ulong endtime;
  1137. char buf[12];
  1138. uchar srcip[IPaddrlen];
  1139. Dest *p, *np, *dest;
  1140. /* pack request into a udp message */
  1141. req = rand();
  1142. len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
  1143. /* no server addresses yet */
  1144. queryck(qp);
  1145. dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */
  1146. for (p = dest; p < dest + Maxdest; p++)
  1147. destinit(p);
  1148. /* this dest array is local to this call of queryns() */
  1149. free(qp->dest);
  1150. qp->curdest = qp->dest = dest;
  1151. /*
  1152. * transmit udp requests and wait for answers.
  1153. * at most Maxtrans attempts to each address.
  1154. * each cycle send one more message than the previous.
  1155. * retry a query via tcp if its response is truncated.
  1156. */
  1157. for(ndest = 1; ndest < Maxdest; ndest++){
  1158. qp->ndest = ndest;
  1159. qp->tcpset = 0;
  1160. if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
  1161. break;
  1162. endtime = time(nil) + waitsecs;
  1163. if(endtime > qp->req->aborttime)
  1164. endtime = qp->req->aborttime;
  1165. for(replywaits = 0; replywaits < ndest; replywaits++){
  1166. DNSmsg m;
  1167. procsetname("reading %sside reply from %I: %s %s from %s",
  1168. (inns? "in": "out"), obuf, qp->dp->name,
  1169. rrname(qp->type, buf, sizeof buf), qp->req->from);
  1170. /* read udp answer into m */
  1171. if (readreply(qp, Udp, req, ibuf, &m, endtime) >= 0)
  1172. memmove(srcip, ibuf, IPaddrlen);
  1173. else if (!(m.flags & Ftrunc)) {
  1174. freeanswers(&m);
  1175. break; /* timed out on this dest */
  1176. } else {
  1177. /* whoops, it was truncated! ask again via tcp */
  1178. freeanswers(&m);
  1179. rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
  1180. waitsecs, inns, req); /* answer in m */
  1181. if (rv < 0) {
  1182. freeanswers(&m);
  1183. break; /* failed via tcp too */
  1184. }
  1185. memmove(srcip, qp->tcpip, IPaddrlen);
  1186. }
  1187. /* find responder */
  1188. // dnslog("queryns got reply from %I", srcip);
  1189. for(p = qp->dest; p < qp->curdest; p++)
  1190. if(memcmp(p->a, srcip, sizeof p->a) == 0)
  1191. break;
  1192. /* remove all addrs of responding server from list */
  1193. for(np = qp->dest; np < qp->curdest; np++)
  1194. if(np->s == p->s)
  1195. p->nx = Maxtrans;
  1196. /* free or incorporate RRs in m */
  1197. rv = procansw(qp, &m, srcip, depth, p);
  1198. if (rv > 0) {
  1199. free(qp->dest);
  1200. qp->dest = qp->curdest = nil; /* prevent accidents */
  1201. return rv;
  1202. }
  1203. }
  1204. }
  1205. /* if all servers returned failure, propagate it */
  1206. qp->dp->respcode = Rserver;
  1207. for(p = dest; p < qp->curdest; p++) {
  1208. destck(p);
  1209. if(p->code != Rserver)
  1210. qp->dp->respcode = 0;
  1211. p->magic = 0; /* prevent accidents */
  1212. }
  1213. // if (qp->dp->respcode)
  1214. // dnslog("queryns setting Rserver for %s", qp->dp->name);
  1215. free(qp->dest);
  1216. qp->dest = qp->curdest = nil; /* prevent accidents */
  1217. return 0;
  1218. }
  1219. /*
  1220. * run a command with a supplied fd as standard input
  1221. */
  1222. char *
  1223. system(int fd, char *cmd)
  1224. {
  1225. int pid, p, i;
  1226. static Waitmsg msg;
  1227. if((pid = fork()) == -1)
  1228. sysfatal("fork failed: %r");
  1229. else if(pid == 0){
  1230. dup(fd, 0);
  1231. close(fd);
  1232. for (i = 3; i < 200; i++)
  1233. close(i); /* don't leak fds */
  1234. execl("/bin/rc", "rc", "-c", cmd, nil);
  1235. sysfatal("exec rc: %r");
  1236. }
  1237. for(p = waitpid(); p >= 0; p = waitpid())
  1238. if(p == pid)
  1239. return msg.msg;
  1240. return "lost child";
  1241. }
  1242. /* compute wait, weighted by probability of success, with minimum */
  1243. static ulong
  1244. weight(ulong ms, unsigned pcntprob)
  1245. {
  1246. ulong wait;
  1247. wait = (ms * pcntprob) / 100;
  1248. if (wait < 1500)
  1249. wait = 1500;
  1250. return wait;
  1251. }
  1252. /*
  1253. * in principle we could use a single descriptor for a udp port
  1254. * to send all queries and receive all the answers to them,
  1255. * but we'd have to sort out the answers by dns-query id.
  1256. */
  1257. static int
  1258. udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
  1259. {
  1260. int fd, rv;
  1261. long now;
  1262. ulong pcntprob, wait, reqtm;
  1263. char *msg;
  1264. uchar *obuf, *ibuf;
  1265. static QLock mntlck;
  1266. static ulong lastmount;
  1267. /* use alloced buffers rather than ones from the stack */
  1268. // ibuf = emalloc(Maxudpin+Udphdrsize);
  1269. ibuf = emalloc(64*1024); /* max. tcp reply size */
  1270. obuf = emalloc(Maxudp+Udphdrsize);
  1271. fd = udpport(mntpt);
  1272. while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
  1273. /* HACK: remount /net.alt */
  1274. now = time(nil);
  1275. if (now < lastmount + Remntretry)
  1276. sleep((lastmount + Remntretry - now)*1000);
  1277. qlock(&mntlck);
  1278. fd = udpport(mntpt); /* try again under lock */
  1279. if (fd < 0) {
  1280. dnslog("[%d] remounting /net.alt", getpid());
  1281. unmount(nil, "/net.alt");
  1282. msg = system(open("/dev/null", ORDWR), "outside");
  1283. lastmount = time(nil);
  1284. if (msg && *msg) {
  1285. dnslog("[%d] can't remount /net.alt: %s",
  1286. getpid(), msg);
  1287. sleep(10*1000); /* don't spin wildly */
  1288. } else
  1289. fd = udpport(mntpt);
  1290. }
  1291. qunlock(&mntlck);
  1292. }
  1293. if (fd < 0) {
  1294. dnslog("can't get udpport for %s query of name %s: %r",
  1295. mntpt, qp->dp->name);
  1296. sysfatal("out of udp conversations"); /* we're buggered */
  1297. }
  1298. /*
  1299. * Our QIP servers are busted, don't answer AAAA and
  1300. * take forever to answer CNAME if there isn't one.
  1301. * They rarely set Rname.
  1302. * make time-to-wait proportional to estimated probability of an
  1303. * RR of that type existing.
  1304. */
  1305. if (qp->type >= nelem(likely))
  1306. pcntprob = 35; /* unpopular query type */
  1307. else
  1308. pcntprob = likely[qp->type];
  1309. reqtm = (patient? 2*Maxreqtm: Maxreqtm);
  1310. /* time for a single outgoing udp query */
  1311. wait = weight(S2MS(reqtm)/3, pcntprob);
  1312. qp->req->aborttime = time(nil) + MS2S(3*wait); /* for all udp queries */
  1313. qp->udpfd = fd;
  1314. rv = queryns(qp, depth, ibuf, obuf, MS2S(wait), inns);
  1315. close(fd);
  1316. qp->udpfd = -1;
  1317. free(obuf);
  1318. free(ibuf);
  1319. return rv;
  1320. }
  1321. /* look up (qp->dp->name,qp->type) rr in dns, via *nsrp with results in *reqp */
  1322. static int
  1323. netquery(Query *qp, int depth)
  1324. {
  1325. int lock, rv, triedin, inname;
  1326. // char buf[32];
  1327. RR *rp;
  1328. DN *dp;
  1329. Querylck *qlp;
  1330. static int whined;
  1331. rv = 0; /* pessimism */
  1332. if(depth > 12) /* in a recursive loop? */
  1333. return 0;
  1334. slave(qp->req);
  1335. /*
  1336. * slave might have forked. if so, the parent process longjmped to
  1337. * req->mret; we're usually the child slave, but if there are too
  1338. * many children already, we're still the same process.
  1339. */
  1340. /*
  1341. * don't lock before call to slave so only children can block.
  1342. * just lock at top-level invocation.
  1343. */
  1344. lock = depth <= 1 && qp->req->isslave;
  1345. dp = qp->dp; /* ensure that it doesn't change underfoot */
  1346. qlp = nil;
  1347. if(lock) {
  1348. // procsetname("query lock wait: %s %s from %s", dp->name,
  1349. // rrname(qp->type, buf, sizeof buf), qp->req->from);
  1350. /*
  1351. * don't make concurrent queries for this name.
  1352. * dozens of processes blocking here probably indicates
  1353. * an error in our dns data that causes us to not
  1354. * recognise a zone (area) as one of our own, thus
  1355. * causing us to query other nameservers.
  1356. */
  1357. qlp = &dp->querylck[qtype2lck(qp->type)];
  1358. qlock(qlp);
  1359. if (qlp->Ref.ref > 10) {
  1360. qunlock(qlp);
  1361. if (!whined) {
  1362. whined = 1;
  1363. dnslog("too many outstanding queries for %s;"
  1364. " dropping this one; no further logging"
  1365. " of drops", dp->name);
  1366. }
  1367. return 0;
  1368. }
  1369. ++qlp->Ref.ref;
  1370. qunlock(qlp);
  1371. }
  1372. procsetname("netquery: %s", dp->name);
  1373. /* prepare server RR's for incremental lookup */
  1374. for(rp = qp->nsrp; rp; rp = rp->next)
  1375. rp->marker = 0;
  1376. triedin = 0;
  1377. /*
  1378. * normal resolvers and servers will just use mntpt for all addresses,
  1379. * even on the outside. straddling servers will use mntpt (/net)
  1380. * for inside addresses and /net.alt for outside addresses,
  1381. * thus bypassing other inside nameservers.
  1382. */
  1383. inname = insideaddr(dp->name);
  1384. if (!cfg.straddle || inname) {
  1385. rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
  1386. triedin = 1;
  1387. }
  1388. /*
  1389. * if we're still looking, are inside, and have an outside domain,
  1390. * try it on our outside interface, if any.
  1391. */
  1392. if (rv == 0 && cfg.inside && !inname) {
  1393. if (triedin)
  1394. dnslog(
  1395. "[%d] netquery: internal nameservers failed for %s; trying external",
  1396. getpid(), dp->name);
  1397. /* prepare server RR's for incremental lookup */
  1398. for(rp = qp->nsrp; rp; rp = rp->next)
  1399. rp->marker = 0;
  1400. rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
  1401. }
  1402. // if (rv == 0) /* could ask /net.alt/dns directly */
  1403. // askoutdns(dp, qp->type);
  1404. if(lock && qlp) {
  1405. qlock(qlp);
  1406. assert(qlp->Ref.ref > 0);
  1407. qunlock(qlp);
  1408. decref(qlp);
  1409. }
  1410. return rv;
  1411. }
  1412. int
  1413. seerootns(void)
  1414. {
  1415. int rv;
  1416. char root[] = "";
  1417. Request req;
  1418. Query *qp;
  1419. memset(&req, 0, sizeof req);
  1420. req.isslave = 1;
  1421. req.aborttime = now + Maxreqtm;
  1422. req.from = "internal";
  1423. qp = emalloc(sizeof *qp);
  1424. queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
  1425. qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
  1426. rv = netquery(qp, 0);
  1427. rrfreelist(qp->nsrp);
  1428. querydestroy(qp);
  1429. free(qp);
  1430. return rv;
  1431. }