res_msend.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #include <sys/socket.h>
  2. #include <netinet/in.h>
  3. #include <netinet/tcp.h>
  4. #include <netdb.h>
  5. #include <arpa/inet.h>
  6. #include <stdint.h>
  7. #include <string.h>
  8. #include <poll.h>
  9. #include <time.h>
  10. #include <ctype.h>
  11. #include <unistd.h>
  12. #include <errno.h>
  13. #include <pthread.h>
  14. #include "stdio_impl.h"
  15. #include "syscall.h"
  16. #include "lookup.h"
  17. static void cleanup(void *p)
  18. {
  19. struct pollfd *pfd = p;
  20. for (int i=0; pfd[i].fd >= -1; i++)
  21. if (pfd[i].fd >= 0) __syscall(SYS_close, pfd[i].fd);
  22. }
  23. static unsigned long mtime()
  24. {
  25. struct timespec ts;
  26. clock_gettime(CLOCK_REALTIME, &ts);
  27. return (unsigned long)ts.tv_sec * 1000
  28. + ts.tv_nsec / 1000000;
  29. }
  30. static int start_tcp(struct pollfd *pfd, int family, const void *sa, socklen_t sl, const unsigned char *q, int ql)
  31. {
  32. struct msghdr mh = {
  33. .msg_name = (void *)sa,
  34. .msg_namelen = sl,
  35. .msg_iovlen = 2,
  36. .msg_iov = (struct iovec [2]){
  37. { .iov_base = (uint8_t[]){ ql>>8, ql }, .iov_len = 2 },
  38. { .iov_base = (void *)q, .iov_len = ql } }
  39. };
  40. int r;
  41. int fd = socket(family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
  42. pfd->fd = fd;
  43. pfd->events = POLLOUT;
  44. if (!setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT,
  45. &(int){1}, sizeof(int))) {
  46. r = sendmsg(fd, &mh, MSG_FASTOPEN|MSG_NOSIGNAL);
  47. if (r == ql+2) pfd->events = POLLIN;
  48. if (r >= 0) return r;
  49. if (errno == EINPROGRESS) return 0;
  50. }
  51. r = connect(fd, sa, sl);
  52. if (!r || errno == EINPROGRESS) return 0;
  53. close(fd);
  54. pfd->fd = -1;
  55. return -1;
  56. }
  57. static void step_mh(struct msghdr *mh, size_t n)
  58. {
  59. /* Adjust iovec in msghdr to skip first n bytes. */
  60. while (mh->msg_iovlen && n >= mh->msg_iov->iov_len) {
  61. n -= mh->msg_iov->iov_len;
  62. mh->msg_iov++;
  63. mh->msg_iovlen--;
  64. }
  65. if (!mh->msg_iovlen) return;
  66. mh->msg_iov->iov_base = (char *)mh->msg_iov->iov_base + n;
  67. mh->msg_iov->iov_len -= n;
  68. }
  69. /* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
  70. * must be sufficiently small to be safe as VLA size. In practice it's
  71. * either 1 or 2, anyway. */
  72. int __res_msend_rc(int nqueries, const unsigned char *const *queries,
  73. const int *qlens, unsigned char *const *answers, int *alens, int asize,
  74. const struct resolvconf *conf)
  75. {
  76. int fd;
  77. int timeout, attempts, retry_interval, servfail_retry;
  78. union {
  79. struct sockaddr_in sin;
  80. struct sockaddr_in6 sin6;
  81. } sa = {0}, ns[MAXNS] = {{0}};
  82. socklen_t sl = sizeof sa.sin;
  83. int nns = 0;
  84. int family = AF_INET;
  85. int rlen;
  86. int next;
  87. int i, j;
  88. int cs;
  89. struct pollfd pfd[nqueries+2];
  90. int qpos[nqueries], apos[nqueries];
  91. unsigned char alen_buf[nqueries][2];
  92. int r;
  93. unsigned long t0, t1, t2;
  94. pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
  95. timeout = 1000*conf->timeout;
  96. attempts = conf->attempts;
  97. for (nns=0; nns<conf->nns; nns++) {
  98. const struct address *iplit = &conf->ns[nns];
  99. if (iplit->family == AF_INET) {
  100. memcpy(&ns[nns].sin.sin_addr, iplit->addr, 4);
  101. ns[nns].sin.sin_port = htons(53);
  102. ns[nns].sin.sin_family = AF_INET;
  103. } else {
  104. sl = sizeof sa.sin6;
  105. memcpy(&ns[nns].sin6.sin6_addr, iplit->addr, 16);
  106. ns[nns].sin6.sin6_port = htons(53);
  107. ns[nns].sin6.sin6_scope_id = iplit->scopeid;
  108. ns[nns].sin6.sin6_family = family = AF_INET6;
  109. }
  110. }
  111. /* Get local address and open/bind a socket */
  112. fd = socket(family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
  113. /* Handle case where system lacks IPv6 support */
  114. if (fd < 0 && family == AF_INET6 && errno == EAFNOSUPPORT) {
  115. for (i=0; i<nns && conf->ns[nns].family == AF_INET6; i++);
  116. if (i==nns) {
  117. pthread_setcancelstate(cs, 0);
  118. return -1;
  119. }
  120. fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
  121. family = AF_INET;
  122. sl = sizeof sa.sin;
  123. }
  124. sa.sin.sin_family = family;
  125. if (fd < 0 || bind(fd, (void *)&sa, sl) < 0) {
  126. if (fd >= 0) close(fd);
  127. pthread_setcancelstate(cs, 0);
  128. return -1;
  129. }
  130. /* Past this point, there are no errors. Each individual query will
  131. * yield either no reply (indicated by zero length) or an answer
  132. * packet which is up to the caller to interpret. */
  133. for (i=0; i<nqueries; i++) pfd[i].fd = -1;
  134. pfd[nqueries].fd = fd;
  135. pfd[nqueries].events = POLLIN;
  136. pfd[nqueries+1].fd = -2;
  137. pthread_cleanup_push(cleanup, pfd);
  138. pthread_setcancelstate(cs, 0);
  139. /* Convert any IPv4 addresses in a mixed environment to v4-mapped */
  140. if (family == AF_INET6) {
  141. setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &(int){0}, sizeof 0);
  142. for (i=0; i<nns; i++) {
  143. if (ns[i].sin.sin_family != AF_INET) continue;
  144. memcpy(ns[i].sin6.sin6_addr.s6_addr+12,
  145. &ns[i].sin.sin_addr, 4);
  146. memcpy(ns[i].sin6.sin6_addr.s6_addr,
  147. "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12);
  148. ns[i].sin6.sin6_family = AF_INET6;
  149. ns[i].sin6.sin6_flowinfo = 0;
  150. ns[i].sin6.sin6_scope_id = 0;
  151. }
  152. }
  153. memset(alens, 0, sizeof *alens * nqueries);
  154. retry_interval = timeout / attempts;
  155. next = 0;
  156. t0 = t2 = mtime();
  157. t1 = t2 - retry_interval;
  158. for (; t2-t0 < timeout; t2=mtime()) {
  159. /* This is the loop exit condition: that all queries
  160. * have an accepted answer. */
  161. for (i=0; i<nqueries && alens[i]>0; i++);
  162. if (i==nqueries) break;
  163. if (t2-t1 >= retry_interval) {
  164. /* Query all configured namservers in parallel */
  165. for (i=0; i<nqueries; i++)
  166. if (!alens[i])
  167. for (j=0; j<nns; j++)
  168. sendto(fd, queries[i],
  169. qlens[i], MSG_NOSIGNAL,
  170. (void *)&ns[j], sl);
  171. t1 = t2;
  172. servfail_retry = 2 * nqueries;
  173. }
  174. /* Wait for a response, or until time to retry */
  175. if (poll(pfd, nqueries+1, t1+retry_interval-t2) <= 0) continue;
  176. while (next < nqueries) {
  177. struct msghdr mh = {
  178. .msg_name = (void *)&sa,
  179. .msg_namelen = sl,
  180. .msg_iovlen = 1,
  181. .msg_iov = (struct iovec []){
  182. { .iov_base = (void *)answers[next],
  183. .iov_len = asize }
  184. }
  185. };
  186. rlen = recvmsg(fd, &mh, 0);
  187. if (rlen < 0) break;
  188. /* Ignore non-identifiable packets */
  189. if (rlen < 4) continue;
  190. /* Ignore replies from addresses we didn't send to */
  191. for (j=0; j<nns && memcmp(ns+j, &sa, sl); j++);
  192. if (j==nns) continue;
  193. /* Find which query this answer goes with, if any */
  194. for (i=next; i<nqueries && (
  195. answers[next][0] != queries[i][0] ||
  196. answers[next][1] != queries[i][1] ); i++);
  197. if (i==nqueries) continue;
  198. if (alens[i]) continue;
  199. /* Only accept positive or negative responses;
  200. * retry immediately on server failure, and ignore
  201. * all other codes such as refusal. */
  202. switch (answers[next][3] & 15) {
  203. case 0:
  204. case 3:
  205. break;
  206. case 2:
  207. if (servfail_retry && servfail_retry--)
  208. sendto(fd, queries[i],
  209. qlens[i], MSG_NOSIGNAL,
  210. (void *)&ns[j], sl);
  211. default:
  212. continue;
  213. }
  214. /* Store answer in the right slot, or update next
  215. * available temp slot if it's already in place. */
  216. alens[i] = rlen;
  217. if (i == next)
  218. for (; next<nqueries && alens[next]; next++);
  219. else
  220. memcpy(answers[i], answers[next], rlen);
  221. /* Ignore further UDP if all slots full or TCP-mode */
  222. if (next == nqueries) pfd[nqueries].events = 0;
  223. /* If answer is truncated (TC bit), fallback to TCP */
  224. if ((answers[i][2] & 2) || (mh.msg_flags & MSG_TRUNC)) {
  225. alens[i] = -1;
  226. pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0);
  227. r = start_tcp(pfd+i, family, ns+j, sl, queries[i], qlens[i]);
  228. pthread_setcancelstate(cs, 0);
  229. if (r >= 0) {
  230. qpos[i] = r;
  231. apos[i] = 0;
  232. }
  233. continue;
  234. }
  235. }
  236. for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLOUT) {
  237. struct msghdr mh = {
  238. .msg_iovlen = 2,
  239. .msg_iov = (struct iovec [2]){
  240. { .iov_base = (uint8_t[]){ qlens[i]>>8, qlens[i] }, .iov_len = 2 },
  241. { .iov_base = (void *)queries[i], .iov_len = qlens[i] } }
  242. };
  243. step_mh(&mh, qpos[i]);
  244. r = sendmsg(pfd[i].fd, &mh, MSG_NOSIGNAL);
  245. if (r < 0) goto out;
  246. qpos[i] += r;
  247. if (qpos[i] == qlens[i]+2)
  248. pfd[i].events = POLLIN;
  249. }
  250. for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLIN) {
  251. struct msghdr mh = {
  252. .msg_iovlen = 2,
  253. .msg_iov = (struct iovec [2]){
  254. { .iov_base = alen_buf[i], .iov_len = 2 },
  255. { .iov_base = answers[i], .iov_len = asize } }
  256. };
  257. step_mh(&mh, apos[i]);
  258. r = recvmsg(pfd[i].fd, &mh, 0);
  259. if (r < 0) goto out;
  260. apos[i] += r;
  261. if (apos[i] < 2) continue;
  262. int alen = alen_buf[i][0]*256 + alen_buf[i][1];
  263. if (alen < 13) goto out;
  264. if (apos[i] < alen+2 && apos[i] < asize+2)
  265. continue;
  266. int rcode = answers[i][3] & 15;
  267. if (rcode != 0 && rcode != 3)
  268. goto out;
  269. /* Storing the length here commits the accepted answer.
  270. * Immediately close TCP socket so as not to consume
  271. * resources we no longer need. */
  272. alens[i] = alen;
  273. __syscall(SYS_close, pfd[i].fd);
  274. pfd[i].fd = -1;
  275. }
  276. }
  277. out:
  278. pthread_cleanup_pop(1);
  279. /* Disregard any incomplete TCP results */
  280. for (i=0; i<nqueries; i++) if (alens[i]<0) alens[i] = 0;
  281. return 0;
  282. }
  283. int __res_msend(int nqueries, const unsigned char *const *queries,
  284. const int *qlens, unsigned char *const *answers, int *alens, int asize)
  285. {
  286. struct resolvconf conf;
  287. if (__get_resolv_conf(&conf, 0, 0) < 0) return -1;
  288. return __res_msend_rc(nqueries, queries, qlens, answers, alens, asize, &conf);
  289. }