123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
- From a799ca0c6314ad73a97bc6c89382d2712a9c0b0e Mon Sep 17 00:00:00 2001
- From: Simon Kelley <simon@thekelleys.org.uk>
- Date: Thu, 18 Oct 2018 19:35:29 +0100
- Subject: [PATCH 01/32] Impove cache behaviour for TCP connections.
- For ease of implementaion, dnsmasq has always forked a new process to
- handle each incoming TCP connection. A side-effect of this is that any
- DNS queries answered from TCP connections are not cached: when TCP
- connections were rare, this was not a problem. With the coming of
- DNSSEC, it's now the case that some DNSSEC queries have answers which
- spill to TCP, and if, for instance, this applies to the keys for the
- root then those never get cached, and performance is very bad. This
- fix passes cache entries back from the TCP child process to the main
- server process, and fixes the problem.
- Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
- ---
- CHANGELOG | 14 ++++
- src/blockdata.c | 37 ++++++++-
- src/cache.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++--
- src/dnsmasq.c | 58 ++++++++++++--
- src/dnsmasq.h | 5 ++
- 5 files changed, 291 insertions(+), 19 deletions(-)
- --- a/CHANGELOG
- +++ b/CHANGELOG
- @@ -1,3 +1,17 @@
- +version 2.81
- + Impove cache behaviour for TCP connections. For ease of
- + implementaion, dnsmasq has always forked a new process to handle
- + each incoming TCP connection. A side-effect of this is that
- + any DNS queries answered from TCP connections are not cached:
- + when TCP connections were rare, this was not a problem.
- + With the coming of DNSSEC, it's now the case that some
- + DNSSEC queries have answers which spill to TCP, and if,
- + for instance, this applies to the keys for the root then
- + those never get cached, and performance is very bad.
- + This fix passes cache entries back from the TCP child process to
- + the main server process, and fixes the problem.
- +
- +
- version 2.80
- Add support for RFC 4039 DHCP rapid commit. Thanks to Ashram Method
- for the initial patch and motivation.
- --- a/src/blockdata.c
- +++ b/src/blockdata.c
- @@ -61,7 +61,7 @@ void blockdata_report(void)
- blockdata_alloced * sizeof(struct blockdata));
- }
-
- -struct blockdata *blockdata_alloc(char *data, size_t len)
- +static struct blockdata *blockdata_alloc_real(int fd, char *data, size_t len)
- {
- struct blockdata *block, *ret = NULL;
- struct blockdata **prev = &ret;
- @@ -89,8 +89,17 @@ struct blockdata *blockdata_alloc(char *
- blockdata_hwm = blockdata_count;
-
- blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len;
- - memcpy(block->key, data, blen);
- - data += blen;
- + if (data)
- + {
- + memcpy(block->key, data, blen);
- + data += blen;
- + }
- + else if (!read_write(fd, block->key, blen, 1))
- + {
- + /* failed read free partial chain */
- + blockdata_free(ret);
- + return NULL;
- + }
- len -= blen;
- *prev = block;
- prev = &block->next;
- @@ -100,6 +109,10 @@ struct blockdata *blockdata_alloc(char *
- return ret;
- }
-
- +struct blockdata *blockdata_alloc(char *data, size_t len)
- +{
- + return blockdata_alloc_real(0, data, len);
- +}
-
- void blockdata_free(struct blockdata *blocks)
- {
- @@ -148,5 +161,21 @@ void *blockdata_retrieve(struct blockdat
-
- return data;
- }
- -
- +
- +
- +void blockdata_write(struct blockdata *block, size_t len, int fd)
- +{
- + for (; len > 0 && block; block = block->next)
- + {
- + size_t blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len;
- + read_write(fd, block->key, blen, 0);
- + len -= blen;
- + }
- +}
- +
- +struct blockdata *blockdata_read(int fd, size_t len)
- +{
- + return blockdata_alloc_real(fd, NULL, len);
- +}
- +
- #endif
- --- a/src/cache.c
- +++ b/src/cache.c
- @@ -26,6 +26,8 @@ static union bigname *big_free = NULL;
- static int bignames_left, hash_size;
-
- static void make_non_terminals(struct crec *source);
- +static struct crec *really_insert(char *name, struct all_addr *addr,
- + time_t now, unsigned long ttl, unsigned short flags);
-
- /* type->string mapping: this is also used by the name-hash function as a mixing table. */
- static const struct {
- @@ -464,16 +466,10 @@ void cache_start_insert(void)
- new_chain = NULL;
- insert_error = 0;
- }
- -
- +
- struct crec *cache_insert(char *name, struct all_addr *addr,
- time_t now, unsigned long ttl, unsigned short flags)
- {
- - struct crec *new, *target_crec = NULL;
- - union bigname *big_name = NULL;
- - int freed_all = flags & F_REVERSE;
- - int free_avail = 0;
- - unsigned int target_uid;
- -
- /* Don't log DNSSEC records here, done elsewhere */
- if (flags & (F_IPV4 | F_IPV6 | F_CNAME))
- {
- @@ -484,7 +480,20 @@ struct crec *cache_insert(char *name, st
- if (daemon->min_cache_ttl != 0 && daemon->min_cache_ttl > ttl)
- ttl = daemon->min_cache_ttl;
- }
- +
- + return really_insert(name, addr, now, ttl, flags);
- +}
-
- +
- +static struct crec *really_insert(char *name, struct all_addr *addr,
- + time_t now, unsigned long ttl, unsigned short flags)
- +{
- + struct crec *new, *target_crec = NULL;
- + union bigname *big_name = NULL;
- + int freed_all = flags & F_REVERSE;
- + int free_avail = 0;
- + unsigned int target_uid;
- +
- /* if previous insertion failed give up now. */
- if (insert_error)
- return NULL;
- @@ -645,12 +654,185 @@ void cache_end_insert(void)
- cache_hash(new_chain);
- cache_link(new_chain);
- daemon->metrics[METRIC_DNS_CACHE_INSERTED]++;
- +
- + /* If we're a child process, send this cache entry up the pipe to the master.
- + The marshalling process is rather nasty. */
- + if (daemon->pipe_to_parent != -1)
- + {
- + char *name = cache_get_name(new_chain);
- + ssize_t m = strlen(name);
- + unsigned short flags = new_chain->flags;
- +#ifdef HAVE_DNSSEC
- + u16 class = new_chain->uid;
- +#endif
- +
- + read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)name, m, 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->ttd, sizeof(new_chain->ttd), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&flags, sizeof(flags), 0);
- +
- + if (flags & (F_IPV4 | F_IPV6))
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr, sizeof(new_chain->addr), 0);
- +#ifdef HAVE_DNSSEC
- + else if (flags & F_DNSKEY)
- + {
- + read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.algo, sizeof(new_chain->addr.key.algo), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keytag, sizeof(new_chain->addr.key.keytag), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.flags, sizeof(new_chain->addr.key.flags), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keylen, sizeof(new_chain->addr.key.keylen), 0);
- + blockdata_write(new_chain->addr.key.keydata, new_chain->addr.key.keylen, daemon->pipe_to_parent);
- + }
- + else if (flags & F_DS)
- + {
- + read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0);
- + /* A negative DS entry is possible and has no data, obviously. */
- + if (!(flags & F_NEG))
- + {
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.algo, sizeof(new_chain->addr.ds.algo), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keytag, sizeof(new_chain->addr.ds.keytag), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.digest, sizeof(new_chain->addr.ds.digest), 0);
- + read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keylen, sizeof(new_chain->addr.ds.keylen), 0);
- + blockdata_write(new_chain->addr.ds.keydata, new_chain->addr.ds.keylen, daemon->pipe_to_parent);
- + }
- + }
- +#endif
- +
- + }
- }
- +
- new_chain = tmp;
- }
- +
- + /* signal end of cache insert in master process */
- + if (daemon->pipe_to_parent != -1)
- + {
- + ssize_t m = -1;
- + read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
- + }
- +
- new_chain = NULL;
- }
-
- +
- +/* A marshalled cache entry arrives on fd, read, unmarshall and insert into cache of master process. */
- +int cache_recv_insert(time_t now, int fd)
- +{
- + ssize_t m;
- + struct all_addr addr;
- + unsigned long ttl;
- + time_t ttd;
- + unsigned short flags;
- + struct crec *crecp = NULL;
- +
- + cache_start_insert();
- +
- + while(1)
- + {
- +
- + if (!read_write(fd, (unsigned char *)&m, sizeof(m), 1))
- + return 0;
- +
- + if (m == -1)
- + {
- + cache_end_insert();
- + return 1;
- + }
- +
- + if (!read_write(fd, (unsigned char *)daemon->namebuff, m, 1) ||
- + !read_write(fd, (unsigned char *)&ttd, sizeof(ttd), 1) ||
- + !read_write(fd, (unsigned char *)&flags, sizeof(flags), 1))
- + return 0;
- +
- + daemon->namebuff[m] = 0;
- +
- + ttl = difftime(ttd, now);
- +
- + if (flags & (F_IPV4 | F_IPV6))
- + {
- + if (!read_write(fd, (unsigned char *)&addr, sizeof(addr), 1))
- + return 0;
- + crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags);
- + }
- + else if (flags & F_CNAME)
- + {
- + struct crec *newc = really_insert(daemon->namebuff, NULL, now, ttl, flags);
- + /* This relies on the fact the the target of a CNAME immediately preceeds
- + it because of the order of extraction in extract_addresses, and
- + the order reversal on the new_chain. */
- + if (newc)
- + {
- + if (!crecp)
- + {
- + newc->addr.cname.target.cache = NULL;
- + /* anything other than zero, to avoid being mistaken for CNAME to interface-name */
- + newc->addr.cname.uid = 1;
- + }
- + else
- + {
- + next_uid(crecp);
- + newc->addr.cname.target.cache = crecp;
- + newc->addr.cname.uid = crecp->uid;
- + }
- + }
- + }
- +#ifdef HAVE_DNSSEC
- + else if (flags & (F_DNSKEY | F_DS))
- + {
- + unsigned short class, keylen, keyflags, keytag;
- + unsigned char algo, digest;
- + struct blockdata *keydata;
- +
- + if (!read_write(fd, (unsigned char *)&class, sizeof(class), 1))
- + return 0;
- + /* Cache needs to known class for DNSSEC stuff */
- + addr.addr.dnssec.class = class;
- +
- + crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags);
- +
- + if (flags & F_DNSKEY)
- + {
- + if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) ||
- + !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) ||
- + !read_write(fd, (unsigned char *)&keyflags, sizeof(keyflags), 1) ||
- + !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) ||
- + !(keydata = blockdata_read(fd, keylen)))
- + return 0;
- + }
- + else if (!(flags & F_NEG))
- + {
- + if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) ||
- + !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) ||
- + !read_write(fd, (unsigned char *)&digest, sizeof(digest), 1) ||
- + !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) ||
- + !(keydata = blockdata_read(fd, keylen)))
- + return 0;
- + }
- +
- + if (crecp)
- + {
- + if (flags & F_DNSKEY)
- + {
- + crecp->addr.key.algo = algo;
- + crecp->addr.key.keytag = keytag;
- + crecp->addr.key.flags = flags;
- + crecp->addr.key.keylen = keylen;
- + crecp->addr.key.keydata = keydata;
- + }
- + else if (!(flags & F_NEG))
- + {
- + crecp->addr.ds.algo = algo;
- + crecp->addr.ds.keytag = keytag;
- + crecp->addr.ds.digest = digest;
- + crecp->addr.ds.keylen = keylen;
- + crecp->addr.ds.keydata = keydata;
- + }
- + }
- + }
- +#endif
- + }
- +}
- +
- int cache_find_non_terminal(char *name, time_t now)
- {
- struct crec *crecp;
- --- a/src/dnsmasq.c
- +++ b/src/dnsmasq.c
- @@ -930,6 +930,10 @@ int main (int argc, char **argv)
- check_servers();
-
- pid = getpid();
- +
- + daemon->pipe_to_parent = -1;
- + for (i = 0; i < MAX_PROCS; i++)
- + daemon->tcp_pipes[i] = -1;
-
- #ifdef HAVE_INOTIFY
- /* Using inotify, have to select a resolv file at startup */
- @@ -1611,7 +1615,7 @@ static int set_dns_listeners(time_t now)
- we don't need to explicitly arrange to wake up here */
- if (listener->tcpfd != -1)
- for (i = 0; i < MAX_PROCS; i++)
- - if (daemon->tcp_pids[i] == 0)
- + if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
- {
- poll_listen(listener->tcpfd, POLLIN);
- break;
- @@ -1624,6 +1628,13 @@ static int set_dns_listeners(time_t now)
-
- }
-
- +#ifndef NO_FORK
- + if (!option_bool(OPT_DEBUG))
- + for (i = 0; i < MAX_PROCS; i++)
- + if (daemon->tcp_pipes[i] != -1)
- + poll_listen(daemon->tcp_pipes[i], POLLIN);
- +#endif
- +
- return wait;
- }
-
- @@ -1632,7 +1643,10 @@ static void check_dns_listeners(time_t n
- struct serverfd *serverfdp;
- struct listener *listener;
- int i;
- -
- +#ifndef NO_FORK
- + int pipefd[2];
- +#endif
- +
- for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next)
- if (poll_check(serverfdp->fd, POLLIN))
- reply_query(serverfdp->fd, serverfdp->source_addr.sa.sa_family, now);
- @@ -1642,7 +1656,26 @@ static void check_dns_listeners(time_t n
- if (daemon->randomsocks[i].refcount != 0 &&
- poll_check(daemon->randomsocks[i].fd, POLLIN))
- reply_query(daemon->randomsocks[i].fd, daemon->randomsocks[i].family, now);
- -
- +
- +#ifndef NO_FORK
- + /* Races. The child process can die before we read all of the data from the
- + pipe, or vice versa. Therefore send tcp_pids to zero when we wait() the
- + process, and tcp_pipes to -1 and close the FD when we read the last
- + of the data - indicated by cache_recv_insert returning zero.
- + The order of these events is indeterminate, and both are needed
- + to free the process slot. Once the child process has gone, poll()
- + returns POLLHUP, not POLLIN, so have to check for both here. */
- + if (!option_bool(OPT_DEBUG))
- + for (i = 0; i < MAX_PROCS; i++)
- + if (daemon->tcp_pipes[i] != -1 &&
- + poll_check(daemon->tcp_pipes[i], POLLIN | POLLHUP) &&
- + !cache_recv_insert(now, daemon->tcp_pipes[i]))
- + {
- + close(daemon->tcp_pipes[i]);
- + daemon->tcp_pipes[i] = -1;
- + }
- +#endif
- +
- for (listener = daemon->listeners; listener; listener = listener->next)
- {
- if (listener->fd != -1 && poll_check(listener->fd, POLLIN))
- @@ -1736,15 +1769,20 @@ static void check_dns_listeners(time_t n
- while (retry_send(close(confd)));
- }
- #ifndef NO_FORK
- - else if (!option_bool(OPT_DEBUG) && (p = fork()) != 0)
- + else if (!option_bool(OPT_DEBUG) && pipe(pipefd) == 0 && (p = fork()) != 0)
- {
- - if (p != -1)
- + close(pipefd[1]); /* parent needs read pipe end. */
- + if (p == -1)
- + close(pipefd[0]);
- + else
- {
- int i;
- +
- for (i = 0; i < MAX_PROCS; i++)
- - if (daemon->tcp_pids[i] == 0)
- + if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
- {
- daemon->tcp_pids[i] = p;
- + daemon->tcp_pipes[i] = pipefd[0];
- break;
- }
- }
- @@ -1761,7 +1799,7 @@ static void check_dns_listeners(time_t n
- int flags;
- struct in_addr netmask;
- int auth_dns;
- -
- +
- if (iface)
- {
- netmask = iface->netmask;
- @@ -1777,7 +1815,11 @@ static void check_dns_listeners(time_t n
- /* Arrange for SIGALRM after CHILD_LIFETIME seconds to
- terminate the process. */
- if (!option_bool(OPT_DEBUG))
- - alarm(CHILD_LIFETIME);
- + {
- + alarm(CHILD_LIFETIME);
- + close(pipefd[0]); /* close read end in child. */
- + daemon->pipe_to_parent = pipefd[1];
- + }
- #endif
-
- /* start with no upstream connections. */
- --- a/src/dnsmasq.h
- +++ b/src/dnsmasq.h
- @@ -1091,6 +1091,8 @@ extern struct daemon {
- size_t packet_len; /* " " */
- struct randfd *rfd_save; /* " " */
- pid_t tcp_pids[MAX_PROCS];
- + int tcp_pipes[MAX_PROCS];
- + int pipe_to_parent;
- struct randfd randomsocks[RANDOM_SOCKS];
- int v6pktinfo;
- struct addrlist *interface_addrs; /* list of all addresses/prefix lengths associated with all local interfaces */
- @@ -1152,6 +1154,7 @@ struct crec *cache_find_by_name(struct c
- char *name, time_t now, unsigned int prot);
- void cache_end_insert(void);
- void cache_start_insert(void);
- +int cache_recv_insert(time_t now, int fd);
- struct crec *cache_insert(char *name, struct all_addr *addr,
- time_t now, unsigned long ttl, unsigned short flags);
- void cache_reload(void);
- @@ -1174,6 +1177,8 @@ void blockdata_init(void);
- void blockdata_report(void);
- struct blockdata *blockdata_alloc(char *data, size_t len);
- void *blockdata_retrieve(struct blockdata *block, size_t len, void *data);
- +struct blockdata *blockdata_read(int fd, size_t len);
- +void blockdata_write(struct blockdata *block, size_t len, int fd);
- void blockdata_free(struct blockdata *blocks);
- #endif
-
|