dnresolve.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * domain name resolvers, see rfcs 1035 and 1123
  11. */
  12. #include <u.h>
  13. #include <libc.h>
  14. #include <ip.h>
  15. #include <bio.h>
  16. #include <ndb.h>
  17. #include "dns.h"
  18. typedef struct Dest Dest;
  19. typedef struct Ipaddr Ipaddr;
  20. typedef struct Query Query;
  21. enum
  22. {
  23. Udp, Tcp,
  24. Answerr= -1,
  25. Answnone,
  26. Maxdest= 24, /* maximum destinations for a request message */
  27. Maxoutstanding= 15, /* max. outstanding queries per domain name */
  28. Remntretry= 15, /* min. sec.s between /net.alt remount tries */
  29. /*
  30. * these are the old values; we're trying longer timeouts now
  31. * primarily for the benefit of remote nameservers querying us
  32. * during times of bad connectivity.
  33. */
  34. // Maxtrans= 3, /* maximum transmissions to a server */
  35. // Maxretries= 3, /* cname+actual resends: was 32; have pity on user */
  36. // Maxwaitms= 1000, /* wait no longer for a remote dns query */
  37. // Minwaitms= 100, /* willing to wait for a remote dns query */
  38. Maxtrans= 5, /* maximum transmissions to a server */
  39. Maxretries= 5, /* cname+actual resends: was 32; have pity on user */
  40. Maxwaitms= 5000, /* wait no longer for a remote dns query */
  41. Minwaitms= 500, /* willing to wait for a remote dns query */
  42. Destmagic= 0xcafebabe,
  43. Querymagic= 0xdeadbeef,
  44. };
  45. enum { Hurry, Patient, };
  46. enum { Outns, Inns, };
  47. struct Ipaddr {
  48. Ipaddr *next;
  49. uint8_t ip[IPaddrlen];
  50. };
  51. struct Dest
  52. {
  53. uint8_t a[IPaddrlen]; /* ip address */
  54. DN *s; /* name server */
  55. int nx; /* number of transmissions */
  56. int code; /* response code; used to clear dp->respcode */
  57. uint32_t magic;
  58. };
  59. /*
  60. * Query has a QLock in it, thus it can't be an automatic
  61. * variable, since each process would see a separate copy
  62. * of the lock on its stack.
  63. */
  64. struct Query {
  65. DN *dp; /* domain */
  66. uint16_t type; /* and type to look up */
  67. Request *req;
  68. RR *nsrp; /* name servers to consult */
  69. /* dest must not be on the stack due to forking in slave() */
  70. Dest *dest; /* array of destinations */
  71. Dest *curdest; /* pointer to next to fill */
  72. int ndest; /* transmit to this many on this round */
  73. int udpfd;
  74. QLock tcplock; /* only one tcp call at a time per query */
  75. int tcpset;
  76. int tcpfd; /* if Tcp, read replies from here */
  77. int tcpctlfd;
  78. uint8_t tcpip[IPaddrlen];
  79. uint32_t magic;
  80. };
  81. /* estimated % probability of such a record existing at all */
  82. int likely[] = {
  83. [Ta] 95,
  84. [Taaaa] 10,
  85. [Tcname] 15,
  86. [Tmx] 60,
  87. [Tns] 90,
  88. [Tnull] 5,
  89. [Tptr] 35,
  90. [Tsoa] 90,
  91. [Tsrv] 60,
  92. [Ttxt] 15,
  93. [Tall] 95,
  94. };
  95. static RR* dnresolve1(char*, int, int, Request*, int, int);
  96. static int netquery(Query *, int);
  97. /*
  98. * reading /proc/pid/args yields either "name args" or "name [display args]",
  99. * so return only display args, if any.
  100. */
  101. static char *
  102. procgetname(void)
  103. {
  104. int fd, n;
  105. char *lp, *rp;
  106. char buf[256];
  107. snprint(buf, sizeof buf, "#p/%d/args", getpid());
  108. if((fd = open(buf, OREAD)) < 0)
  109. return strdup("");
  110. *buf = '\0';
  111. n = read(fd, buf, sizeof buf-1);
  112. close(fd);
  113. if (n >= 0)
  114. buf[n] = '\0';
  115. if ((lp = strchr(buf, '[')) == nil ||
  116. (rp = strrchr(buf, ']')) == nil)
  117. return strdup("");
  118. *rp = '\0';
  119. return strdup(lp+1);
  120. }
  121. void
  122. rrfreelistptr(RR **rpp)
  123. {
  124. RR *rp;
  125. if (rpp == nil || *rpp == nil)
  126. return;
  127. rp = *rpp;
  128. *rpp = nil; /* update pointer in memory before freeing list */
  129. rrfreelist(rp);
  130. }
  131. /*
  132. * lookup 'type' info for domain name 'name'. If it doesn't exist, try
  133. * looking it up as a canonical name.
  134. *
  135. * this process can be quite slow if time-outs are set too high when querying
  136. * nameservers that just don't respond to certain query types. in that case,
  137. * there will be multiple udp retries, multiple nameservers will be queried,
  138. * and this will be repeated for a cname query. the whole thing will be
  139. * retried several times until we get an answer or a time-out.
  140. */
  141. RR*
  142. dnresolve(char *name, int class, int type, Request *req, RR **cn,
  143. int depth,
  144. int recurse, int rooted, int *status)
  145. {
  146. RR *rp, *nrp, *drp;
  147. DN *dp;
  148. int loops;
  149. char *procname;
  150. char nname[Domlen];
  151. if(status)
  152. *status = 0;
  153. if(depth > 12) /* in a recursive loop? */
  154. return nil;
  155. procname = procgetname();
  156. /*
  157. * hack for systems that don't have resolve search
  158. * lists. Just look up the simple name in the database.
  159. */
  160. if(!rooted && strchr(name, '.') == nil){
  161. rp = nil;
  162. drp = domainlist(class);
  163. for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
  164. snprint(nname, sizeof nname, "%s.%s", name,
  165. nrp->ptr->name);
  166. rp = dnresolve(nname, class, type, req, cn, depth+1,
  167. recurse, rooted, status);
  168. lock(&dnlock);
  169. rrfreelist(rrremneg(&rp));
  170. unlock(&dnlock);
  171. }
  172. if(drp != nil)
  173. rrfreelist(drp);
  174. procsetname(procname);
  175. free(procname);
  176. return rp;
  177. }
  178. /*
  179. * try the name directly
  180. */
  181. rp = dnresolve1(name, class, type, req, depth, recurse);
  182. if(rp == nil) {
  183. /*
  184. * try it as a canonical name if we weren't told
  185. * that the name didn't exist
  186. */
  187. dp = dnlookup(name, class, 0);
  188. if(type != Tptr && dp->respcode != Rname)
  189. for(loops = 0; rp == nil && loops < Maxretries; loops++){
  190. /* retry cname, then the actual type */
  191. rp = dnresolve1(name, class, Tcname, req,
  192. depth, recurse);
  193. if(rp == nil)
  194. break;
  195. /* rp->host == nil shouldn't happen, but does */
  196. if(rp->negative || rp->host == nil){
  197. rrfreelist(rp);
  198. rp = nil;
  199. break;
  200. }
  201. name = rp->host->name;
  202. lock(&dnlock);
  203. if(cn)
  204. rrcat(cn, rp);
  205. else
  206. rrfreelist(rp);
  207. unlock(&dnlock);
  208. rp = dnresolve1(name, class, type, req,
  209. depth, recurse);
  210. }
  211. /* distinction between not found and not good */
  212. if(rp == nil && status != nil && dp->respcode != Rok)
  213. *status = dp->respcode;
  214. }
  215. procsetname(procname);
  216. free(procname);
  217. return randomize(rp);
  218. }
  219. static void
  220. queryinit(Query *qp, DN *dp, int type, Request *req)
  221. {
  222. memset(qp, 0, sizeof *qp);
  223. qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
  224. qp->dp = dp;
  225. qp->type = type;
  226. if (qp->type != type)
  227. dnslog("queryinit: bogus type %d", type);
  228. qp->req = req;
  229. qp->nsrp = nil;
  230. qp->dest = qp->curdest = nil;
  231. qp->magic = Querymagic;
  232. }
  233. static void
  234. queryck(Query *qp)
  235. {
  236. assert(qp);
  237. assert(qp->magic == Querymagic);
  238. }
  239. static void
  240. querydestroy(Query *qp)
  241. {
  242. queryck(qp);
  243. /* leave udpfd open */
  244. if (qp->tcpfd > 0)
  245. close(qp->tcpfd);
  246. if (qp->tcpctlfd > 0) {
  247. hangup(qp->tcpctlfd);
  248. close(qp->tcpctlfd);
  249. }
  250. free(qp->dest);
  251. memset(qp, 0, sizeof *qp); /* prevent accidents */
  252. qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
  253. }
  254. static void
  255. destinit(Dest *p)
  256. {
  257. memset(p, 0, sizeof *p);
  258. p->magic = Destmagic;
  259. }
  260. static void
  261. destck(Dest *p)
  262. {
  263. assert(p);
  264. assert(p->magic == Destmagic);
  265. }
  266. /*
  267. * if the response to a query hasn't arrived within 100 ms.,
  268. * it's unlikely to arrive at all. after 1 s., it's really unlikely.
  269. * queries for missing RRs are likely to produce time-outs rather than
  270. * negative responses, so cname and aaaa queries are likely to time out,
  271. * thus we don't wait very long for them.
  272. */
  273. static void
  274. notestats(int64_t start, int tmout, int type)
  275. {
  276. qlock(&stats);
  277. if (tmout) {
  278. stats.tmout++;
  279. if (type == Taaaa)
  280. stats.tmoutv6++;
  281. else if (type == Tcname)
  282. stats.tmoutcname++;
  283. } else {
  284. int32_t wait10ths = NS2MS(nsec() - start) / 100;
  285. if (wait10ths <= 0)
  286. stats.under10ths[0]++;
  287. else if (wait10ths >= nelem(stats.under10ths))
  288. stats.under10ths[nelem(stats.under10ths) - 1]++;
  289. else
  290. stats.under10ths[wait10ths]++;
  291. }
  292. qunlock(&stats);
  293. }
  294. static void
  295. noteinmem(void)
  296. {
  297. qlock(&stats);
  298. stats.answinmem++;
  299. qunlock(&stats);
  300. }
  301. /* netquery with given name servers, free ns rrs when done */
  302. static int
  303. netqueryns(Query *qp, int depth, RR *nsrp)
  304. {
  305. int rv;
  306. qp->nsrp = nsrp;
  307. rv = netquery(qp, depth);
  308. lock(&dnlock);
  309. rrfreelist(nsrp);
  310. unlock(&dnlock);
  311. return rv;
  312. }
  313. static RR*
  314. issuequery(Query *qp, char *name, int class, int depth, int recurse)
  315. {
  316. char *cp;
  317. DN *nsdp;
  318. RR *rp, *nsrp, *dbnsrp;
  319. /*
  320. * if we're running as just a resolver, query our
  321. * designated name servers
  322. */
  323. if(cfg.resolver){
  324. nsrp = randomize(getdnsservers(class));
  325. if(nsrp != nil)
  326. if(netqueryns(qp, depth+1, nsrp) > Answnone)
  327. return rrlookup(qp->dp, qp->type, OKneg);
  328. }
  329. /*
  330. * walk up the domain name looking for
  331. * a name server for the domain.
  332. */
  333. for(cp = name; cp; cp = walkup(cp)){
  334. /*
  335. * if this is a local (served by us) domain,
  336. * return answer
  337. */
  338. dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
  339. if(dbnsrp && dbnsrp->local){
  340. rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
  341. lock(&dnlock);
  342. rrfreelist(dbnsrp);
  343. unlock(&dnlock);
  344. return rp;
  345. }
  346. /*
  347. * if recursion isn't set, just accept local
  348. * entries
  349. */
  350. if(recurse == Dontrecurse){
  351. if(dbnsrp) {
  352. lock(&dnlock);
  353. rrfreelist(dbnsrp);
  354. unlock(&dnlock);
  355. }
  356. continue;
  357. }
  358. /* look for ns in cache */
  359. nsdp = dnlookup(cp, class, 0);
  360. nsrp = nil;
  361. if(nsdp)
  362. nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
  363. /* if the entry timed out, ignore it */
  364. if(nsrp && nsrp->ttl < now){
  365. lock(&dnlock);
  366. rrfreelistptr(&nsrp);
  367. unlock(&dnlock);
  368. }
  369. if(nsrp){
  370. lock(&dnlock);
  371. rrfreelistptr(&dbnsrp);
  372. unlock(&dnlock);
  373. /* query the name servers found in cache */
  374. if(netqueryns(qp, depth+1, nsrp) > Answnone)
  375. return rrlookup(qp->dp, qp->type, OKneg);
  376. } else if(dbnsrp)
  377. /* try the name servers found in db */
  378. if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
  379. return rrlookup(qp->dp, qp->type, NOneg);
  380. }
  381. return nil;
  382. }
  383. static RR*
  384. dnresolve1(char *name, int class, int type, Request *req, int depth,
  385. int recurse)
  386. {
  387. Area *area;
  388. DN *dp;
  389. RR *rp;
  390. Query *qp;
  391. if(debug)
  392. dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
  393. /* only class Cin implemented so far */
  394. if(class != Cin)
  395. return nil;
  396. dp = dnlookup(name, class, 1);
  397. /*
  398. * Try the cache first
  399. */
  400. rp = rrlookup(dp, type, OKneg);
  401. if(rp)
  402. if(rp->db){
  403. /* unauthoritative db entries are hints */
  404. if(rp->auth) {
  405. noteinmem();
  406. if(debug)
  407. dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
  408. getpid(), name, type, class);
  409. return rp;
  410. }
  411. } else
  412. /* cached entry must still be valid */
  413. if(rp->ttl > now)
  414. /* but Tall entries are special */
  415. if(type != Tall || rp->query == Tall) {
  416. noteinmem();
  417. if(debug)
  418. dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
  419. getpid(), name, type, class);
  420. return rp;
  421. }
  422. lock(&dnlock);
  423. rrfreelist(rp);
  424. unlock(&dnlock);
  425. rp = nil; /* accident prevention */
  426. USED(rp);
  427. /*
  428. * try the cache for a canonical name. if found punt
  429. * since we'll find it during the canonical name search
  430. * in dnresolve().
  431. */
  432. if(type != Tcname){
  433. rp = rrlookup(dp, Tcname, NOneg);
  434. lock(&dnlock);
  435. rrfreelist(rp);
  436. unlock(&dnlock);
  437. if(rp){
  438. if(debug)
  439. dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
  440. getpid(), name, type, class);
  441. return nil;
  442. }
  443. }
  444. /*
  445. * if the domain name is within an area of ours,
  446. * we should have found its data in memory by now.
  447. */
  448. area = inmyarea(dp->name);
  449. if (area || strncmp(dp->name, "local#", 6) == 0) {
  450. // char buf[32];
  451. // dnslog("%s %s: no data in area %s", dp->name,
  452. // rrname(type, buf, sizeof buf), area->soarr->owner->name);
  453. return nil;
  454. }
  455. qp = emalloc(sizeof *qp);
  456. queryinit(qp, dp, type, req);
  457. rp = issuequery(qp, name, class, depth, recurse);
  458. querydestroy(qp);
  459. free(qp);
  460. if(rp){
  461. if(debug)
  462. dnslog("[%d] dnresolve1 %s %d %d: rr from query",
  463. getpid(), name, type, class);
  464. return rp;
  465. }
  466. /* settle for a non-authoritative answer */
  467. rp = rrlookup(dp, type, OKneg);
  468. if(rp){
  469. if(debug)
  470. dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
  471. getpid(), name, type, class);
  472. return rp;
  473. }
  474. /* noone answered. try the database, we might have a chance. */
  475. rp = dblookup(name, class, type, 0, 0);
  476. if (rp) {
  477. if(debug)
  478. dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
  479. getpid(), name, type, class);
  480. }else{
  481. if(debug)
  482. dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
  483. getpid(), name, type, class);
  484. }
  485. return rp;
  486. }
  487. /*
  488. * walk a domain name one element to the right.
  489. * return a pointer to that element.
  490. * in other words, return a pointer to the parent domain name.
  491. */
  492. char*
  493. walkup(char *name)
  494. {
  495. char *cp;
  496. cp = strchr(name, '.');
  497. if(cp)
  498. return cp+1;
  499. else if(*name)
  500. return "";
  501. else
  502. return 0;
  503. }
  504. /*
  505. * Get a udp port for sending requests and reading replies. Put the port
  506. * into "headers" mode.
  507. */
  508. static char *hmsg = "headers";
  509. int
  510. udpport(char *mtpt)
  511. {
  512. int fd, ctl;
  513. char ds[64], adir[64];
  514. /* get a udp port */
  515. snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
  516. ctl = announce(ds, adir);
  517. if(ctl < 0){
  518. /* warning("can't get udp port"); */
  519. return -1;
  520. }
  521. /* turn on header style interface */
  522. if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
  523. close(ctl);
  524. warning(hmsg);
  525. return -1;
  526. }
  527. /* grab the data file */
  528. snprint(ds, sizeof ds, "%s/data", adir);
  529. fd = open(ds, ORDWR);
  530. close(ctl);
  531. if(fd < 0)
  532. warning("can't open udp port %s: %r", ds);
  533. return fd;
  534. }
  535. void
  536. initdnsmsg(DNSmsg *mp, RR *rp, int flags, uint16_t reqno)
  537. {
  538. mp->flags = flags;
  539. mp->id = reqno;
  540. mp->qd = rp;
  541. if(rp != nil)
  542. mp->qdcount = 1;
  543. }
  544. DNSmsg *
  545. newdnsmsg(RR *rp, int flags, uint16_t reqno)
  546. {
  547. DNSmsg *mp;
  548. mp = emalloc(sizeof *mp);
  549. initdnsmsg(mp, rp, flags, reqno);
  550. return mp;
  551. }
  552. /* generate a DNS UDP query packet */
  553. int
  554. mkreq(DN *dp, int type, uint8_t *buf, int flags, uint16_t reqno)
  555. {
  556. DNSmsg m;
  557. int len;
  558. Udphdr *uh = (Udphdr*)buf;
  559. RR *rp;
  560. /* stuff port number into output buffer */
  561. memset(uh, 0, sizeof *uh);
  562. hnputs(uh->rport, Dnsport);
  563. /* make request and convert it to output format */
  564. memset(&m, 0, sizeof m);
  565. rp = rralloc(type);
  566. rp->owner = dp;
  567. initdnsmsg(&m, rp, flags, reqno);
  568. len = convDNS2M(&m, &buf[Udphdrsize], Maxdnspayload);
  569. rrfreelistptr(&m.qd);
  570. memset(&m, 0, sizeof m); /* cause trouble */
  571. return len;
  572. }
  573. void
  574. freeanswers(DNSmsg *mp)
  575. {
  576. lock(&dnlock);
  577. rrfreelistptr(&mp->qd);
  578. rrfreelistptr(&mp->an);
  579. rrfreelistptr(&mp->ns);
  580. rrfreelistptr(&mp->ar);
  581. unlock(&dnlock);
  582. mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
  583. }
  584. /* timed read of reply. sets srcip. ibuf must be 64K to handle tcp answers. */
  585. static int
  586. readnet(Query *qp, int medium, uint8_t *ibuf, uint64_t endms,
  587. uint8_t **replyp,
  588. uint8_t *srcip)
  589. {
  590. int len, fd;
  591. int32_t ms;
  592. int64_t startns = nsec();
  593. uint8_t *reply;
  594. uint8_t lenbuf[2];
  595. len = -1; /* pessimism */
  596. ms = endms - NS2MS(startns);
  597. if (ms <= 0)
  598. return -1; /* taking too long */
  599. reply = ibuf;
  600. memset(srcip, 0, IPaddrlen);
  601. alarm(ms);
  602. if (medium == Udp)
  603. if (qp->udpfd <= 0)
  604. dnslog("readnet: qp->udpfd closed");
  605. else {
  606. len = read(qp->udpfd, ibuf, Udphdrsize+Maxpayload);
  607. alarm(0);
  608. notestats(startns, len < 0, qp->type);
  609. if (len >= IPaddrlen)
  610. memmove(srcip, ibuf, IPaddrlen);
  611. if (len >= Udphdrsize) {
  612. len -= Udphdrsize;
  613. reply += Udphdrsize;
  614. }
  615. }
  616. else {
  617. if (!qp->tcpset)
  618. dnslog("readnet: tcp params not set");
  619. fd = qp->tcpfd;
  620. if (fd <= 0)
  621. dnslog("readnet: %s: tcp fd unset for dest %I",
  622. qp->dp->name, qp->tcpip);
  623. else if (readn(fd, lenbuf, 2) != 2) {
  624. dnslog("readnet: short read of 2-byte tcp msg size from %I",
  625. qp->tcpip);
  626. /* probably a time-out */
  627. notestats(startns, 1, qp->type);
  628. } else {
  629. len = lenbuf[0]<<8 | lenbuf[1];
  630. if (readn(fd, ibuf, len) != len) {
  631. dnslog("readnet: short read of tcp data from %I",
  632. qp->tcpip);
  633. /* probably a time-out */
  634. notestats(startns, 1, qp->type);
  635. len = -1;
  636. }
  637. }
  638. memmove(srcip, qp->tcpip, IPaddrlen);
  639. }
  640. alarm(0);
  641. *replyp = reply;
  642. return len;
  643. }
  644. /*
  645. * read replies to a request and remember the rrs in the answer(s).
  646. * ignore any of the wrong type.
  647. * wait at most until endms.
  648. */
  649. static int
  650. readreply(Query *qp, int medium, uint16_t req, uint8_t *ibuf, DNSmsg *mp,
  651. uint64_t endms)
  652. {
  653. int len;
  654. char *err;
  655. char tbuf[32];
  656. uint8_t *reply;
  657. uint8_t srcip[IPaddrlen];
  658. RR *rp;
  659. queryck(qp);
  660. memset(mp, 0, sizeof *mp);
  661. memset(srcip, 0, sizeof srcip);
  662. if (0)
  663. len = -1;
  664. for (; timems() < endms &&
  665. (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
  666. freeanswers(mp)){
  667. /* convert into internal format */
  668. memset(mp, 0, sizeof *mp);
  669. err = convM2DNS(reply, len, mp, nil);
  670. if (mp->flags & Ftrunc) {
  671. free(err);
  672. freeanswers(mp);
  673. /* notify our caller to retry the query via tcp. */
  674. return -1;
  675. } else if(err){
  676. dnslog("readreply: %s: input err, len %d: %s: %I",
  677. qp->dp->name, len, err, srcip);
  678. free(err);
  679. continue;
  680. }
  681. if(debug)
  682. logreply(qp->req->id, srcip, mp);
  683. /* answering the right question? */
  684. if(mp->id != req)
  685. dnslog("%d: id %d instead of %d: %I", qp->req->id,
  686. mp->id, req, srcip);
  687. else if(mp->qd == 0)
  688. dnslog("%d: no question RR: %I", qp->req->id, srcip);
  689. else if(mp->qd->owner != qp->dp)
  690. dnslog("%d: owner %s instead of %s: %I", qp->req->id,
  691. mp->qd->owner->name, qp->dp->name, srcip);
  692. else if(mp->qd->type != qp->type)
  693. dnslog("%d: qp->type %d instead of %d: %I",
  694. qp->req->id, mp->qd->type, qp->type, srcip);
  695. else {
  696. /* remember what request this is in answer to */
  697. for(rp = mp->an; rp; rp = rp->next)
  698. rp->query = qp->type;
  699. return 0;
  700. }
  701. }
  702. if (timems() >= endms) {
  703. ; /* query expired */
  704. } else if (0) {
  705. /* this happens routinely when a read times out */
  706. dnslog("readreply: %s type %s: ns %I read error or eof "
  707. "(returned %d): %r", qp->dp->name, rrname(qp->type,
  708. tbuf, sizeof tbuf), srcip, len);
  709. if (medium == Udp)
  710. for (rp = qp->nsrp; rp != nil; rp = rp->next)
  711. if (rp->type == Tns)
  712. dnslog("readreply: %s: query sent to "
  713. "ns %s", qp->dp->name,
  714. rp->host->name);
  715. }
  716. return -1;
  717. }
  718. /*
  719. * return non-0 if first list includes second list
  720. */
  721. int
  722. contains(RR *rp1, RR *rp2)
  723. {
  724. RR *trp1, *trp2;
  725. for(trp2 = rp2; trp2; trp2 = trp2->next){
  726. for(trp1 = rp1; trp1; trp1 = trp1->next)
  727. if(trp1->type == trp2->type)
  728. if(trp1->host == trp2->host)
  729. if(trp1->owner == trp2->owner)
  730. break;
  731. if(trp1 == nil)
  732. return 0;
  733. }
  734. return 1;
  735. }
  736. /*
  737. * return multicast version if any
  738. */
  739. int
  740. ipisbm(uint8_t *ip)
  741. {
  742. if(isv4(ip)){
  743. if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
  744. ipcmp(ip, IPv4bcast) == 0)
  745. return 4;
  746. } else
  747. if(ip[0] == 0xff)
  748. return 6;
  749. return 0;
  750. }
  751. /*
  752. * Get next server address(es) into qp->dest[nd] and beyond
  753. */
  754. static int
  755. serveraddrs(Query *qp, int nd, int depth)
  756. {
  757. RR *rp, *arp, *trp;
  758. Dest *cur;
  759. if(nd >= Maxdest) /* dest array is full? */
  760. return Maxdest - 1;
  761. /*
  762. * look for a server whose address we already know.
  763. * if we find one, mark it so we ignore this on
  764. * subsequent passes.
  765. */
  766. arp = 0;
  767. for(rp = qp->nsrp; rp; rp = rp->next){
  768. assert(rp->magic == RRmagic);
  769. if(rp->marker)
  770. continue;
  771. arp = rrlookup(rp->host, Ta, NOneg);
  772. if(arp == nil)
  773. arp = rrlookup(rp->host, Taaaa, NOneg);
  774. if(arp){
  775. rp->marker = 1;
  776. break;
  777. }
  778. arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
  779. if(arp == nil)
  780. arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
  781. if(arp){
  782. rp->marker = 1;
  783. break;
  784. }
  785. }
  786. /*
  787. * if the cache and database lookup didn't find any new
  788. * server addresses, try resolving one via the network.
  789. * Mark any we try to resolve so we don't try a second time.
  790. */
  791. if(arp == 0)
  792. for(rp = qp->nsrp; rp; rp = rp->next){
  793. if(rp->marker)
  794. continue;
  795. rp->marker = 1;
  796. /*
  797. * avoid loops looking up a server under itself
  798. */
  799. if(subsume(rp->owner->name, rp->host->name))
  800. continue;
  801. arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
  802. depth+1, Recurse, 1, 0);
  803. if(arp == nil)
  804. arp = dnresolve(rp->host->name, Cin, Taaaa,
  805. qp->req, 0, depth+1, Recurse, 1, 0);
  806. lock(&dnlock);
  807. rrfreelist(rrremneg(&arp));
  808. unlock(&dnlock);
  809. if(arp)
  810. break;
  811. }
  812. /* use any addresses that we found */
  813. for(trp = arp; trp && nd < Maxdest; trp = trp->next){
  814. cur = &qp->dest[nd];
  815. parseip(cur->a, trp->ip->name);
  816. /*
  817. * straddling servers can reject all nameservers if they are all
  818. * inside, so be sure to list at least one outside ns at
  819. * the end of the ns list in /lib/ndb for `dom='.
  820. */
  821. if (ipisbm(cur->a) ||
  822. cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
  823. continue;
  824. cur->nx = 0;
  825. cur->s = trp->owner;
  826. cur->code = Rtimeout;
  827. nd++;
  828. }
  829. lock(&dnlock);
  830. rrfreelist(arp);
  831. unlock(&dnlock);
  832. if(nd >= Maxdest) /* dest array is full? */
  833. return Maxdest - 1;
  834. return nd;
  835. }
  836. /*
  837. * cache negative responses
  838. */
  839. static void
  840. cacheneg(DN *dp, int type, int rcode, RR *soarr)
  841. {
  842. RR *rp;
  843. DN *soaowner;
  844. uint32_t ttl;
  845. stats.negcached++;
  846. /* no cache time specified, don't make anything up */
  847. if(soarr != nil){
  848. lock(&dnlock);
  849. if(soarr->next != nil)
  850. rrfreelistptr(&soarr->next);
  851. unlock(&dnlock);
  852. soaowner = soarr->owner;
  853. } else
  854. soaowner = nil;
  855. /* the attach can cause soarr to be freed so mine it now */
  856. if(soarr != nil && soarr->soa != nil)
  857. ttl = soarr->soa->minttl+now;
  858. else
  859. ttl = 5*Min;
  860. /* add soa and negative RR to the database */
  861. rrattach(soarr, Authoritative);
  862. rp = rralloc(type);
  863. rp->owner = dp;
  864. rp->negative = 1;
  865. rp->negsoaowner = soaowner;
  866. rp->negrcode = rcode;
  867. rp->ttl = ttl;
  868. rrattach(rp, Authoritative);
  869. }
  870. static int
  871. setdestoutns(Dest *p, int n)
  872. {
  873. uint8_t *outns = outsidens(n);
  874. destck(p);
  875. destinit(p);
  876. if (outns == nil) {
  877. if (n == 0)
  878. dnslog("[%d] no outside-ns in ndb", getpid());
  879. return -1;
  880. }
  881. memmove(p->a, outns, sizeof p->a);
  882. p->s = dnlookup("outside-ns-ips", Cin, 1);
  883. return 0;
  884. }
  885. /*
  886. * issue query via UDP or TCP as appropriate.
  887. * for TCP, returns with qp->tcpip set from udppkt header.
  888. */
  889. static int
  890. mydnsquery(Query *qp, int medium, uint8_t *udppkt, int len)
  891. {
  892. int rv = -1, nfd;
  893. char *domain;
  894. char conndir[NETPATHLEN], net[NETPATHLEN];
  895. uint8_t belen[2];
  896. NetConnInfo *nci;
  897. queryck(qp);
  898. domain = smprint("%I", udppkt);
  899. if (myaddr(domain)) {
  900. dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
  901. domain);
  902. free(domain);
  903. return rv;
  904. }
  905. switch (medium) {
  906. case Udp:
  907. free(domain);
  908. nfd = dup(qp->udpfd, -1);
  909. if (nfd < 0) {
  910. warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
  911. close(qp->udpfd); /* ensure it's closed */
  912. qp->udpfd = -1; /* poison it */
  913. return rv;
  914. }
  915. close(nfd);
  916. if (qp->udpfd <= 0)
  917. dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
  918. else {
  919. if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
  920. len+Udphdrsize)
  921. warning("sending udp msg: %r");
  922. else {
  923. stats.qsent++;
  924. rv = 0;
  925. }
  926. }
  927. break;
  928. case Tcp:
  929. /* send via TCP & keep fd around for reply */
  930. snprint(net, sizeof net, "%s/tcp",
  931. (mntpt[0] != '\0'? mntpt: "/net"));
  932. alarm(10*1000);
  933. qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
  934. conndir, &qp->tcpctlfd);
  935. alarm(0);
  936. if (qp->tcpfd < 0) {
  937. dnslog("can't dial tcp!%s!dns: %r", domain);
  938. free(domain);
  939. break;
  940. }
  941. free(domain);
  942. nci = getnetconninfo(conndir, qp->tcpfd);
  943. if (nci) {
  944. parseip(qp->tcpip, nci->rsys);
  945. freenetconninfo(nci);
  946. } else
  947. dnslog("mydnsquery: getnetconninfo failed");
  948. qp->tcpset = 1;
  949. belen[0] = len >> 8;
  950. belen[1] = len;
  951. if (write(qp->tcpfd, belen, 2) != 2 ||
  952. write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
  953. warning("sending tcp msg: %r");
  954. break;
  955. default:
  956. sysfatal("mydnsquery: bad medium");
  957. }
  958. return rv;
  959. }
  960. /*
  961. * send query to all UDP destinations or one TCP destination,
  962. * taken from obuf (udp packet) header
  963. */
  964. static int
  965. xmitquery(Query *qp, int medium, int depth, uint8_t *obuf, int inns,
  966. int len)
  967. {
  968. int j, n;
  969. char buf[32];
  970. Dest *p;
  971. queryck(qp);
  972. if(timems() >= qp->req->aborttime)
  973. return -1;
  974. /*
  975. * get a nameserver address if we need one.
  976. * serveraddrs populates qp->dest.
  977. */
  978. p = qp->dest;
  979. destck(p);
  980. if (qp->ndest < 0 || qp->ndest > Maxdest) {
  981. dnslog("qp->ndest %d out of range", qp->ndest);
  982. abort();
  983. }
  984. /*
  985. * we're to transmit to more destinations than we currently have,
  986. * so get another.
  987. */
  988. if (qp->ndest > qp->curdest - p) {
  989. j = serveraddrs(qp, qp->curdest - p, depth);
  990. if (j < 0 || j >= Maxdest) {
  991. dnslog("serveraddrs() result %d out of range", j);
  992. abort();
  993. }
  994. qp->curdest = &qp->dest[j];
  995. }
  996. destck(qp->curdest);
  997. /* no servers, punt */
  998. if (qp->ndest == 0)
  999. if (cfg.straddle && cfg.inside) {
  1000. /* get ips of "outside-ns-ips" */
  1001. qp->curdest = qp->dest;
  1002. for(n = 0; n < Maxdest; n++, qp->curdest++)
  1003. if (setdestoutns(qp->curdest, n) < 0)
  1004. break;
  1005. if(n == 0)
  1006. dnslog("xmitquery: %s: no outside-ns nameservers",
  1007. qp->dp->name);
  1008. } else
  1009. /* it's probably just a bogus domain, don't log it */
  1010. return -1;
  1011. /* send to first 'qp->ndest' destinations */
  1012. j = 0;
  1013. if (medium == Tcp) {
  1014. j++;
  1015. queryck(qp);
  1016. assert(qp->dp);
  1017. procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
  1018. qp->dp->name, rrname(qp->type, buf, sizeof buf));
  1019. mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
  1020. if(debug)
  1021. logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
  1022. qp->type);
  1023. } else
  1024. for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
  1025. /* skip destinations we've finished with */
  1026. if(p->nx >= Maxtrans)
  1027. continue;
  1028. j++;
  1029. /* exponential backoff of requests */
  1030. if((1<<p->nx) > qp->ndest)
  1031. continue;
  1032. if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
  1033. continue; /* mistake */
  1034. procsetname("udp %sside query to %I/%s %s %s",
  1035. (inns? "in": "out"), p->a, p->s->name,
  1036. qp->dp->name, rrname(qp->type, buf, sizeof buf));
  1037. if(debug)
  1038. logsend(qp->req->id, depth, p->a, p->s->name,
  1039. qp->dp->name, qp->type);
  1040. /* fill in UDP destination addr & send it */
  1041. memmove(obuf, p->a, sizeof p->a);
  1042. mydnsquery(qp, medium, obuf, len);
  1043. p->nx++;
  1044. }
  1045. if(j == 0) {
  1046. return -1;
  1047. }
  1048. return 0;
  1049. }
  1050. static int lckindex[Maxlcks] = {
  1051. 0, /* all others map here */
  1052. Ta,
  1053. Tns,
  1054. Tcname,
  1055. Tsoa,
  1056. Tptr,
  1057. Tmx,
  1058. Ttxt,
  1059. Taaaa,
  1060. };
  1061. static int
  1062. qtype2lck(int qtype) /* map query type to querylck index */
  1063. {
  1064. int i;
  1065. for (i = 1; i < nelem(lckindex); i++)
  1066. if (lckindex[i] == qtype)
  1067. return i;
  1068. return 0;
  1069. }
  1070. /* is mp a cachable negative response (with Rname set)? */
  1071. static int
  1072. isnegrname(DNSmsg *mp)
  1073. {
  1074. /* TODO: could add || cfg.justforw to RHS of && */
  1075. return mp->an == nil && (mp->flags & Rmask) == Rname;
  1076. }
  1077. /* returns Answerr (-1) on errors, else number of answers, which can be zero. */
  1078. static int
  1079. procansw(Query *qp, DNSmsg *mp, uint8_t *srcip, int depth, Dest *p)
  1080. {
  1081. int rv;
  1082. // int lcktype;
  1083. char buf[32];
  1084. DN *ndp;
  1085. Query *nqp;
  1086. RR *tp, *soarr;
  1087. if (mp->an == nil)
  1088. stats.negans++;
  1089. /* ignore any error replies */
  1090. if((mp->flags & Rmask) == Rserver){
  1091. stats.negserver++;
  1092. freeanswers(mp);
  1093. if(p != qp->curdest)
  1094. p->code = Rserver;
  1095. return Answerr;
  1096. }
  1097. /* ignore any bad delegations */
  1098. if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
  1099. stats.negbaddeleg++;
  1100. if(mp->an == nil){
  1101. stats.negbdnoans++;
  1102. freeanswers(mp);
  1103. if(p != qp->curdest)
  1104. p->code = Rserver;
  1105. dnslog(" and no answers");
  1106. return Answerr;
  1107. }
  1108. dnslog(" but has answers; ignoring ns");
  1109. lock(&dnlock);
  1110. rrfreelistptr(&mp->ns);
  1111. unlock(&dnlock);
  1112. mp->nscount = 0;
  1113. }
  1114. /* remove any soa's from the authority section */
  1115. lock(&dnlock);
  1116. soarr = rrremtype(&mp->ns, Tsoa);
  1117. /* incorporate answers */
  1118. unique(mp->an);
  1119. unique(mp->ns);
  1120. unique(mp->ar);
  1121. unlock(&dnlock);
  1122. if(mp->an)
  1123. rrattach(mp->an, (mp->flags & Fauth) != 0);
  1124. if(mp->ar)
  1125. rrattach(mp->ar, Notauthoritative);
  1126. if(mp->ns && !cfg.justforw){
  1127. ndp = mp->ns->owner;
  1128. rrattach(mp->ns, Notauthoritative);
  1129. } else {
  1130. ndp = nil;
  1131. lock(&dnlock);
  1132. rrfreelistptr(&mp->ns);
  1133. unlock(&dnlock);
  1134. mp->nscount = 0;
  1135. }
  1136. /* free the question */
  1137. if(mp->qd) {
  1138. lock(&dnlock);
  1139. rrfreelistptr(&mp->qd);
  1140. unlock(&dnlock);
  1141. mp->qdcount = 0;
  1142. }
  1143. /*
  1144. * Any reply from an authoritative server,
  1145. * or a positive reply terminates the search.
  1146. * A negative response now also terminates the search.
  1147. */
  1148. if(mp->an != nil || (mp->flags & Fauth)){
  1149. if(isnegrname(mp))
  1150. qp->dp->respcode = Rname;
  1151. else
  1152. qp->dp->respcode = Rok;
  1153. /*
  1154. * cache any negative responses, free soarr.
  1155. * negative responses need not be authoritative:
  1156. * they can legitimately come from a cache.
  1157. */
  1158. if( /* (mp->flags & Fauth) && */ mp->an == nil)
  1159. cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
  1160. else {
  1161. lock(&dnlock);
  1162. rrfreelist(soarr);
  1163. unlock(&dnlock);
  1164. }
  1165. return 1;
  1166. } else if (isnegrname(mp)) {
  1167. qp->dp->respcode = Rname;
  1168. /*
  1169. * cache negative response.
  1170. * negative responses need not be authoritative:
  1171. * they can legitimately come from a cache.
  1172. */
  1173. cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
  1174. return 1;
  1175. }
  1176. stats.negnorname++;
  1177. lock(&dnlock);
  1178. rrfreelist(soarr);
  1179. unlock(&dnlock);
  1180. /*
  1181. * if we've been given better name servers, recurse.
  1182. * if we're a pure resolver, don't recurse, we have
  1183. * to forward to a fixed set of named servers.
  1184. */
  1185. if(!mp->ns || cfg.resolver && cfg.justforw)
  1186. return Answnone;
  1187. tp = rrlookup(ndp, Tns, NOneg);
  1188. if(contains(qp->nsrp, tp)){
  1189. lock(&dnlock);
  1190. rrfreelist(tp);
  1191. unlock(&dnlock);
  1192. return Answnone;
  1193. }
  1194. procsetname("recursive query for %s %s", qp->dp->name,
  1195. rrname(qp->type, buf, sizeof buf));
  1196. /*
  1197. * we're called from udpquery, called from
  1198. * netquery, which current holds qp->dp->querylck,
  1199. * so release it now and acquire it upon return.
  1200. */
  1201. // lcktype = qtype2lck(qp->type); /* someday try this again */
  1202. // qunlock(&qp->dp->querylck[lcktype]);
  1203. nqp = emalloc(sizeof *nqp);
  1204. queryinit(nqp, qp->dp, qp->type, qp->req);
  1205. nqp->nsrp = tp;
  1206. rv = netquery(nqp, depth+1);
  1207. // qlock(&qp->dp->querylck[lcktype]);
  1208. rrfreelist(nqp->nsrp);
  1209. querydestroy(nqp);
  1210. free(nqp);
  1211. return rv;
  1212. }
  1213. /*
  1214. * send a query via tcp to a single address (from ibuf's udp header)
  1215. * and read the answer(s) into mp->an.
  1216. */
  1217. static int
  1218. tcpquery(Query *qp, DNSmsg *mp, int depth, uint8_t *ibuf, uint8_t *obuf,
  1219. int len,
  1220. uint32_t waitms, int inns, uint16_t req)
  1221. {
  1222. int rv = 0;
  1223. uint64_t endms;
  1224. endms = timems() + waitms;
  1225. if(endms > qp->req->aborttime)
  1226. endms = qp->req->aborttime;
  1227. if (0)
  1228. dnslog("%s: udp reply truncated; retrying query via tcp to %I",
  1229. qp->dp->name, qp->tcpip);
  1230. qlock(&qp->tcplock);
  1231. memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */
  1232. /* sets qp->tcpip from obuf's udp header */
  1233. if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
  1234. readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
  1235. rv = -1;
  1236. if (qp->tcpfd > 0) {
  1237. hangup(qp->tcpctlfd);
  1238. close(qp->tcpctlfd);
  1239. close(qp->tcpfd);
  1240. }
  1241. qp->tcpfd = qp->tcpctlfd = -1;
  1242. qunlock(&qp->tcplock);
  1243. return rv;
  1244. }
  1245. /*
  1246. * query name servers. fill in obuf with on-the-wire representation of a
  1247. * DNSmsg derived from qp. if the name server returns a pointer to another
  1248. * name server, recurse.
  1249. */
  1250. static int
  1251. queryns(Query *qp, int depth, uint8_t *ibuf, uint8_t *obuf, uint32_t waitms,
  1252. int inns)
  1253. {
  1254. int ndest, len, replywaits, rv;
  1255. uint16_t req;
  1256. uint64_t endms;
  1257. char buf[12];
  1258. uint8_t srcip[IPaddrlen];
  1259. Dest *p, *np, *dest;
  1260. /* pack request into a udp message */
  1261. req = rand();
  1262. len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
  1263. /* no server addresses yet */
  1264. queryck(qp);
  1265. dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */
  1266. for (p = dest; p < dest + Maxdest; p++)
  1267. destinit(p);
  1268. /* this dest array is local to this call of queryns() */
  1269. free(qp->dest);
  1270. qp->curdest = qp->dest = dest;
  1271. /*
  1272. * transmit udp requests and wait for answers.
  1273. * at most Maxtrans attempts to each address.
  1274. * each cycle send one more message than the previous.
  1275. * retry a query via tcp if its response is truncated.
  1276. */
  1277. for(ndest = 1; ndest < Maxdest; ndest++){
  1278. qp->ndest = ndest;
  1279. qp->tcpset = 0;
  1280. if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
  1281. break;
  1282. endms = timems() + waitms;
  1283. if(endms > qp->req->aborttime)
  1284. endms = qp->req->aborttime;
  1285. for(replywaits = 0; replywaits < ndest; replywaits++){
  1286. DNSmsg m;
  1287. procsetname("reading %sside reply from %I: %s %s from %s",
  1288. (inns? "in": "out"), obuf, qp->dp->name,
  1289. rrname(qp->type, buf, sizeof buf), qp->req->from);
  1290. /* read udp answer into m */
  1291. if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
  1292. memmove(srcip, ibuf, IPaddrlen);
  1293. else if (!(m.flags & Ftrunc)) {
  1294. freeanswers(&m);
  1295. break; /* timed out on this dest */
  1296. } else {
  1297. /* whoops, it was truncated! ask again via tcp */
  1298. freeanswers(&m);
  1299. rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
  1300. waitms, inns, req); /* answer in m */
  1301. if (rv < 0) {
  1302. freeanswers(&m);
  1303. break; /* failed via tcp too */
  1304. }
  1305. memmove(srcip, qp->tcpip, IPaddrlen);
  1306. }
  1307. /* find responder */
  1308. // dnslog("queryns got reply from %I", srcip);
  1309. for(p = qp->dest; p < qp->curdest; p++)
  1310. if(memcmp(p->a, srcip, sizeof p->a) == 0)
  1311. break;
  1312. /* remove all addrs of responding server from list */
  1313. for(np = qp->dest; np < qp->curdest; np++)
  1314. if(np->s == p->s)
  1315. np->nx = Maxtrans;
  1316. /* free or incorporate RRs in m */
  1317. rv = procansw(qp, &m, srcip, depth, p);
  1318. if (rv > Answnone) {
  1319. free(qp->dest);
  1320. qp->dest = qp->curdest = nil; /* prevent accidents */
  1321. return rv;
  1322. }
  1323. }
  1324. }
  1325. /* if all servers returned failure, propagate it */
  1326. qp->dp->respcode = Rserver;
  1327. for(p = dest; p < qp->curdest; p++) {
  1328. destck(p);
  1329. if(p->code != Rserver)
  1330. qp->dp->respcode = Rok;
  1331. p->magic = 0; /* prevent accidents */
  1332. }
  1333. // if (qp->dp->respcode)
  1334. // dnslog("queryns setting Rserver for %s", qp->dp->name);
  1335. free(qp->dest);
  1336. qp->dest = qp->curdest = nil; /* prevent accidents */
  1337. return Answnone;
  1338. }
  1339. /*
  1340. * run a command with a supplied fd as standard input
  1341. */
  1342. char *
  1343. system(int fd, char *cmd)
  1344. {
  1345. int pid, p, i;
  1346. static Waitmsg msg;
  1347. if((pid = fork()) == -1)
  1348. sysfatal("fork failed: %r");
  1349. else if(pid == 0){
  1350. dup(fd, 0);
  1351. close(fd);
  1352. for (i = 3; i < 200; i++)
  1353. close(i); /* don't leak fds */
  1354. execl("/bin/rc", "rc", "-c", cmd, nil);
  1355. sysfatal("exec rc: %r");
  1356. }
  1357. for(p = waitpid(); p >= 0; p = waitpid())
  1358. if(p == pid)
  1359. return msg.msg;
  1360. return "lost child";
  1361. }
  1362. /* compute wait, weighted by probability of success, with bounds */
  1363. static uint32_t
  1364. weight(uint32_t ms, unsigned pcntprob)
  1365. {
  1366. uint32_t wait;
  1367. wait = (ms * pcntprob) / 100;
  1368. if (wait < Minwaitms)
  1369. wait = Minwaitms;
  1370. if (wait > Maxwaitms)
  1371. wait = Maxwaitms;
  1372. return wait;
  1373. }
  1374. /*
  1375. * in principle we could use a single descriptor for a udp port
  1376. * to send all queries and receive all the answers to them,
  1377. * but we'd have to sort out the answers by dns-query id.
  1378. */
  1379. static int
  1380. udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
  1381. {
  1382. int fd, rv;
  1383. uint32_t now, pcntprob;
  1384. uint64_t wait, reqtm;
  1385. char *msg;
  1386. uint8_t *obuf, *ibuf;
  1387. static QLock mntlck;
  1388. static uint32_t lastmount;
  1389. /* use alloced buffers rather than ones from the stack */
  1390. ibuf = emalloc(64*1024); /* max. tcp reply size */
  1391. obuf = emalloc(Maxpayload+Udphdrsize);
  1392. fd = udpport(mntpt);
  1393. while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
  1394. /* HACK: remount /net.alt */
  1395. now = time(nil);
  1396. if (now < lastmount + Remntretry)
  1397. sleep(S2MS(lastmount + Remntretry - now));
  1398. qlock(&mntlck);
  1399. fd = udpport(mntpt); /* try again under lock */
  1400. if (fd < 0) {
  1401. dnslog("[%d] remounting /net.alt", getpid());
  1402. unmount(nil, "/net.alt");
  1403. msg = system(open("/dev/null", ORDWR), "outside");
  1404. lastmount = time(nil);
  1405. if (msg && *msg) {
  1406. dnslog("[%d] can't remount /net.alt: %s",
  1407. getpid(), msg);
  1408. sleep(10*1000); /* don't spin remounting */
  1409. } else
  1410. fd = udpport(mntpt);
  1411. }
  1412. qunlock(&mntlck);
  1413. }
  1414. if (fd < 0) {
  1415. dnslog("can't get udpport for %s query of name %s: %r",
  1416. mntpt, qp->dp->name);
  1417. sysfatal("out of udp conversations"); /* we're buggered */
  1418. }
  1419. /*
  1420. * Our QIP servers are busted and respond to AAAA and CNAME queries
  1421. * with (sometimes malformed [too short] packets and) no answers and
  1422. * just NS RRs but not Rname errors. so make time-to-wait
  1423. * proportional to estimated probability of an RR of that type existing.
  1424. */
  1425. if (qp->type >= nelem(likely))
  1426. pcntprob = 35; /* unpopular query type */
  1427. else
  1428. pcntprob = likely[qp->type];
  1429. reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
  1430. wait = weight(reqtm / 3, pcntprob); /* time for one udp query */
  1431. qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
  1432. qp->udpfd = fd;
  1433. rv = queryns(qp, depth, ibuf, obuf, wait, inns);
  1434. close(fd);
  1435. qp->udpfd = -1;
  1436. free(obuf);
  1437. free(ibuf);
  1438. return rv;
  1439. }
  1440. /*
  1441. * look up (qp->dp->name, qp->type) rr in dns,
  1442. * using nameservers in qp->nsrp.
  1443. */
  1444. static int
  1445. netquery(Query *qp, int depth)
  1446. {
  1447. int lock, rv, triedin, inname;
  1448. char buf[32];
  1449. RR *rp;
  1450. DN *dp;
  1451. Querylck *qlp;
  1452. static int whined;
  1453. rv = Answnone; /* pessimism */
  1454. if(depth > 12) /* in a recursive loop? */
  1455. return Answnone;
  1456. slave(qp->req);
  1457. /*
  1458. * slave might have forked. if so, the parent process longjmped to
  1459. * req->mret; we're usually the child slave, but if there are too
  1460. * many children already, we're still the same process.
  1461. */
  1462. /*
  1463. * don't lock before call to slave so only children can block.
  1464. * just lock at top-level invocation.
  1465. */
  1466. lock = depth <= 1 && qp->req->isslave;
  1467. dp = qp->dp; /* ensure that it doesn't change underfoot */
  1468. qlp = nil;
  1469. if(lock) {
  1470. procsetname("query lock wait: %s %s from %s", dp->name,
  1471. rrname(qp->type, buf, sizeof buf), qp->req->from);
  1472. /*
  1473. * don't make concurrent queries for this name.
  1474. * dozens of processes blocking here probably indicates
  1475. * an error in our dns data that causes us to not
  1476. * recognise a zone (area) as one of our own, thus
  1477. * causing us to query other nameservers.
  1478. */
  1479. qlp = &dp->querylck[qtype2lck(qp->type)];
  1480. qlock(qlp);
  1481. if (qlp->Ref.ref > Maxoutstanding) {
  1482. qunlock(qlp);
  1483. if (!whined) {
  1484. whined = 1;
  1485. dnslog("too many outstanding queries for %s;"
  1486. " dropping this one; no further logging"
  1487. " of drops", dp->name);
  1488. }
  1489. return 0;
  1490. }
  1491. ++qlp->Ref.ref;
  1492. qunlock(qlp);
  1493. }
  1494. procsetname("netquery: %s", dp->name);
  1495. /* prepare server RR's for incremental lookup */
  1496. for(rp = qp->nsrp; rp; rp = rp->next)
  1497. rp->marker = 0;
  1498. triedin = 0;
  1499. /*
  1500. * normal resolvers and servers will just use mntpt for all addresses,
  1501. * even on the outside. straddling servers will use mntpt (/net)
  1502. * for inside addresses and /net.alt for outside addresses,
  1503. * thus bypassing other inside nameservers.
  1504. */
  1505. inname = insideaddr(dp->name);
  1506. if (!cfg.straddle || inname) {
  1507. rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
  1508. triedin = 1;
  1509. }
  1510. /*
  1511. * if we're still looking, are inside, and have an outside domain,
  1512. * try it on our outside interface, if any.
  1513. */
  1514. if (rv == Answnone && cfg.inside && !inname) {
  1515. if (triedin)
  1516. dnslog(
  1517. "[%d] netquery: internal nameservers failed for %s; trying external",
  1518. getpid(), dp->name);
  1519. /* prepare server RR's for incremental lookup */
  1520. for(rp = qp->nsrp; rp; rp = rp->next)
  1521. rp->marker = 0;
  1522. rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
  1523. }
  1524. // if (rv == Answnone) /* could ask /net.alt/dns directly */
  1525. // askoutdns(dp, qp->type);
  1526. if(lock && qlp) {
  1527. qlock(qlp);
  1528. assert(qlp->Ref.ref > 0);
  1529. qunlock(qlp);
  1530. decref(qlp);
  1531. }
  1532. return rv;
  1533. }
  1534. int
  1535. seerootns(void)
  1536. {
  1537. int rv;
  1538. char root[] = "";
  1539. Request req;
  1540. RR *rr;
  1541. Query *qp;
  1542. memset(&req, 0, sizeof req);
  1543. req.isslave = 1;
  1544. req.aborttime = timems() + Maxreqtm;
  1545. req.from = "internal";
  1546. qp = emalloc(sizeof *qp);
  1547. queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
  1548. qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
  1549. for (rr = qp->nsrp; rr != nil; rr = rr->next) /* DEBUG */
  1550. dnslog("seerootns query nsrp: %R", rr);
  1551. rv = netquery(qp, 0); /* lookup ". ns" using qp->nsrp */
  1552. rrfreelist(qp->nsrp);
  1553. querydestroy(qp);
  1554. free(qp);
  1555. return rv;
  1556. }