diff options
author | Martin Pitt <martin.pitt@ubuntu.com> | 2016-06-24 08:01:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-06-24 08:01:49 +0200 |
commit | 54522e941df6a379d65ef1822af4215654fe1513 (patch) | |
tree | 47c4b4a39ca1dfea520e484b62502ed5528e3d34 | |
parent | 39c38ce17c3a15f7b709536a6d7f1f7e093ac450 (diff) | |
parent | b3c6b00a93cca0e9108cef0b63e11787c1e10fc3 (diff) |
Merge pull request #3594 from poettering/resolved-servfail
resolved fixes for handling SERVFAIL errors from servers
-rw-r--r-- | src/resolve/resolved-dns-cache.c | 12 | ||||
-rw-r--r-- | src/resolve/resolved-dns-server.c | 80 | ||||
-rw-r--r-- | src/resolve/resolved-dns-server.h | 3 | ||||
-rw-r--r-- | src/resolve/resolved-dns-transaction.c | 75 | ||||
-rw-r--r-- | src/resolve/resolved-dns-transaction.h | 3 |
5 files changed, 106 insertions, 67 deletions
diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c index 0bb5a95188..87f7c21d03 100644 --- a/src/resolve/resolved-dns-cache.c +++ b/src/resolve/resolved-dns-cache.c @@ -624,6 +624,12 @@ int dns_cache_put( dns_cache_remove_previous(c, key, answer); + /* We only care for positive replies and NXDOMAINs, on all + * other replies we will simply flush the respective entries, + * and that's it */ + if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) + return 0; + if (dns_answer_size(answer) <= 0) { char key_str[DNS_RESOURCE_KEY_STRING_MAX]; @@ -632,12 +638,6 @@ int dns_cache_put( return 0; } - /* We only care for positive replies and NXDOMAINs, on all - * other replies we will simply flush the respective entries, - * and that's it */ - if (!IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) - return 0; - cache_keys = dns_answer_size(answer); if (key) cache_keys++; diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c index 7226111c07..9b7b471600 100644 --- a/src/resolve/resolved-dns-server.c +++ b/src/resolve/resolved-dns-server.c @@ -245,6 +245,26 @@ static void dns_server_verified(DnsServer *s, DnsServerFeatureLevel level) { assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0); } +static void dns_server_reset_counters(DnsServer *s) { + assert(s); + + s->n_failed_udp = 0; + s->n_failed_tcp = 0; + s->packet_truncated = false; + s->verified_usec = 0; + + /* Note that we do not reset s->packet_bad_opt and s->packet_rrsig_missing here. We reset them only when the + * grace period ends, but not when lowering the possible feature level, as a lower level feature level should + * not make RRSIGs appear or OPT appear, but rather make them disappear. If the reappear anyway, then that's + * indication for a differently broken OPT/RRSIG implementation, and we really don't want to support that + * either. + * + * This is particularly important to deal with certain Belkin routers which break OPT for certain lookups (A), + * but pass traffic through for others (AAAA). If we detect the broken behaviour on one lookup we should not + * reenable it for another, because we cannot validate things anyway, given that the RRSIG/OPT data will be + * incomplete. */ +} + void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t rtt, size_t size) { assert(s); @@ -305,17 +325,6 @@ void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel le s->resend_timeout = MIN(s->resend_timeout * 2, DNS_TIMEOUT_MAX_USEC); } -void dns_server_packet_failed(DnsServer *s, DnsServerFeatureLevel level) { - assert(s); - - /* Invoked whenever we get a FORMERR, SERVFAIL or NOTIMP rcode from a server. */ - - if (s->possible_feature_level != level) - return; - - s->packet_failed = true; -} - void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level) { assert(s); @@ -353,6 +362,24 @@ void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level) { s->packet_bad_opt = true; } +void dns_server_packet_rcode_downgrade(DnsServer *s, DnsServerFeatureLevel level) { + assert(s); + + /* Invoked whenever we got a FORMERR, SERVFAIL or NOTIMP rcode from a server and downgrading the feature level + * for the transaction made it go away. In this case we immediately downgrade to the feature level that made + * things work. */ + + if (s->verified_feature_level > level) + s->verified_feature_level = level; + + if (s->possible_feature_level > level) { + s->possible_feature_level = level; + dns_server_reset_counters(s); + } + + log_debug("Downgrading transaction feature level fixed an RCODE error, downgrading server %s too.", dns_server_string(s)); +} + static bool dns_server_grace_period_expired(DnsServer *s) { usec_t ts; @@ -372,27 +399,6 @@ static bool dns_server_grace_period_expired(DnsServer *s) { return true; } -static void dns_server_reset_counters(DnsServer *s) { - assert(s); - - s->n_failed_udp = 0; - s->n_failed_tcp = 0; - s->packet_failed = false; - s->packet_truncated = false; - s->verified_usec = 0; - - /* Note that we do not reset s->packet_bad_opt and s->packet_rrsig_missing here. We reset them only when the - * grace period ends, but not when lowering the possible feature level, as a lower level feature level should - * not make RRSIGs appear or OPT appear, but rather make them disappear. If the reappear anyway, then that's - * indication for a differently broken OPT/RRSIG implementation, and we really don't want to support that - * either. - * - * This is particularly important to deal with certain Belkin routers which break OPT for certain lookups (A), - * but pass traffic through for others (AAAA). If we detect the broken behaviour on one lookup we should not - * reenable it for another, because we cannot validate things anyway, given that the RRSIG/OPT data will be - * incomplete. */ -} - DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) { assert(s); @@ -455,16 +461,6 @@ DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) { log_debug("Lost too many UDP packets, downgrading feature level..."); s->possible_feature_level--; - } else if (s->packet_failed && - s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_UDP) { - - /* We got a failure packet, and are at a feature level above UDP. Note that in this case we - * downgrade no further than UDP, under the assumption that a failure packet indicates an - * incompatible packet contents, but not a problem with the transport. */ - - log_debug("Got server failure, downgrading feature level..."); - s->possible_feature_level--; - } else if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS && s->packet_truncated && s->possible_feature_level > DNS_SERVER_FEATURE_LEVEL_UDP) { diff --git a/src/resolve/resolved-dns-server.h b/src/resolve/resolved-dns-server.h index 7d07fa3e29..c1732faffd 100644 --- a/src/resolve/resolved-dns-server.h +++ b/src/resolve/resolved-dns-server.h @@ -77,7 +77,6 @@ struct DnsServer { unsigned n_failed_udp; unsigned n_failed_tcp; - bool packet_failed:1; bool packet_truncated:1; bool packet_bad_opt:1; bool packet_rrsig_missing:1; @@ -113,10 +112,10 @@ void dns_server_move_back_and_unmark(DnsServer *s); void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t rtt, size_t size); void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level, usec_t usec); -void dns_server_packet_failed(DnsServer *s, DnsServerFeatureLevel level); void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level); void dns_server_packet_rrsig_missing(DnsServer *s, DnsServerFeatureLevel level); void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level); +void dns_server_packet_rcode_downgrade(DnsServer *s, DnsServerFeatureLevel level); DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s); diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c index 06e7145422..d455b6b1fa 100644 --- a/src/resolve/resolved-dns-transaction.c +++ b/src/resolve/resolved-dns-transaction.c @@ -214,6 +214,7 @@ int dns_transaction_new(DnsTransaction **ret, DnsScope *s, DnsResourceKey *key) t->answer_nsec_ttl = (uint32_t) -1; t->key = dns_resource_key_ref(key); t->current_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID; + t->clamp_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID; t->id = pick_new_id(s->manager); @@ -378,22 +379,38 @@ static int dns_transaction_pick_server(DnsTransaction *t) { assert(t); assert(t->scope->protocol == DNS_PROTOCOL_DNS); + /* Pick a DNS server and a feature level for it. */ + server = dns_scope_get_dns_server(t->scope); if (!server) return -ESRCH; + /* If we changed the server invalidate the feature level clamping, as the new server might have completely + * different properties. */ + if (server != t->server) + t->clamp_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID; + t->current_feature_level = dns_server_possible_feature_level(server); + /* Clamp the feature level if that is requested. */ + if (t->clamp_feature_level != _DNS_SERVER_FEATURE_LEVEL_INVALID && + t->current_feature_level > t->clamp_feature_level) + t->current_feature_level = t->clamp_feature_level; + + log_debug("Using feature level %s for transaction %u.", dns_server_feature_level_to_string(t->current_feature_level), t->id); + if (server == t->server) return 0; dns_server_unref(t->server); t->server = dns_server_ref(server); + log_debug("Using DNS server %s for transaction %u.", dns_server_string(t->server), t->id); + return 1; } -static void dns_transaction_retry(DnsTransaction *t) { +static void dns_transaction_retry(DnsTransaction *t, bool next_server) { int r; assert(t); @@ -401,7 +418,8 @@ static void dns_transaction_retry(DnsTransaction *t) { log_debug("Retrying transaction %" PRIu16 ".", t->id); /* Before we try again, switch to a new server. */ - dns_scope_next_dns_server(t->scope); + if (next_server) + dns_scope_next_dns_server(t->scope); r = dns_transaction_go(t); if (r < 0) { @@ -467,7 +485,7 @@ static int on_stream_complete(DnsStream *s, int error) { assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &usec) >= 0); dns_server_packet_lost(t->server, IPPROTO_TCP, t->current_feature_level, usec - t->start_usec); - dns_transaction_retry(t); + dns_transaction_retry(t, true); return 0; } if (error != 0) { @@ -645,14 +663,15 @@ static int dns_transaction_dnssec_ready(DnsTransaction *t) { return 0; case DNS_TRANSACTION_RCODE_FAILURE: - if (dt->answer_rcode != DNS_RCODE_NXDOMAIN) { + if (!IN_SET(dt->answer_rcode, DNS_RCODE_NXDOMAIN, DNS_RCODE_SERVFAIL)) { log_debug("Auxiliary DNSSEC RR query failed with rcode=%s.", dns_rcode_to_string(dt->answer_rcode)); goto fail; } - /* Fall-through: NXDOMAIN is good enough for us. This is because some DNS servers erronously - * return NXDOMAIN for empty non-terminals (Akamai...), and we need to handle that nicely, when - * asking for parent SOA or similar RRs to make unsigned proofs. */ + /* Fall-through: NXDOMAIN/SERVFAIL is good enough for us. This is because some DNS servers + * erronously return NXDOMAIN/SERVFAIL for empty non-terminals (Akamai...) or missing DS + * records (Facebook), and we need to handle that nicely, when asking for parent SOA or similar + * RRs to make unsigned proofs. */ case DNS_TRANSACTION_SUCCESS: /* All good. */ @@ -889,10 +908,22 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { if (IN_SET(DNS_PACKET_RCODE(p), DNS_RCODE_FORMERR, DNS_RCODE_SERVFAIL, DNS_RCODE_NOTIMP)) { /* Request failed, immediately try again with reduced features */ - log_debug("Server returned error: %s", dns_rcode_to_string(DNS_PACKET_RCODE(p))); - dns_server_packet_failed(t->server, t->current_feature_level); - dns_transaction_retry(t); + if (t->current_feature_level <= DNS_SERVER_FEATURE_LEVEL_WORST) { + /* This was already at the lowest possible feature level? If so, we can't downgrade + * this transaction anymore, hence let's process the response, and accept the rcode. */ + log_debug("Server returned error: %s", dns_rcode_to_string(DNS_PACKET_RCODE(p))); + break; + } + + /* Reduce this feature level by one and try again. */ + t->clamp_feature_level = t->current_feature_level - 1; + + log_debug("Server returned error %s, retrying transaction with reduced feature level %s.", + dns_rcode_to_string(DNS_PACKET_RCODE(p)), + dns_server_feature_level_to_string(t->clamp_feature_level)); + + dns_transaction_retry(t, false /* use the same server */); return; } else if (DNS_PACKET_TC(p)) dns_server_packet_truncated(t->server, t->current_feature_level); @@ -937,7 +968,7 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { goto fail; /* On DNS, couldn't send? Try immediately again, with a new server */ - dns_transaction_retry(t); + dns_transaction_retry(t, true); } return; @@ -950,11 +981,19 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) { return; } - /* Report that the OPT RR was missing */ if (t->server) { + /* Report that we successfully received a valid packet with a good rcode after we initially got a bad + * rcode and subsequently downgraded the protocol */ + + if (IN_SET(DNS_PACKET_RCODE(p), DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN) && + t->clamp_feature_level != _DNS_SERVER_FEATURE_LEVEL_INVALID) + dns_server_packet_rcode_downgrade(t->server, t->clamp_feature_level); + + /* Report that the OPT RR was missing */ if (!p->opt) dns_server_packet_bad_opt(t->server, t->current_feature_level); + /* Report that we successfully received a packet */ dns_server_packet_received(t->server, p->ipproto, t->current_feature_level, ts - t->start_usec, p->size); } @@ -1041,7 +1080,7 @@ static int on_dns_packet(sd_event_source *s, int fd, uint32_t revents, void *use assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &usec) >= 0); dns_server_packet_lost(t->server, IPPROTO_UDP, t->current_feature_level, usec - t->start_usec); - dns_transaction_retry(t); + dns_transaction_retry(t, true); return 0; } if (r < 0) { @@ -1150,7 +1189,7 @@ static int on_transaction_timeout(sd_event_source *s, usec_t usec, void *userdat log_debug("Timeout reached on transaction %" PRIu16 ".", t->id); - dns_transaction_retry(t); + dns_transaction_retry(t, true); return 0; } @@ -1781,8 +1820,10 @@ static bool dns_transaction_dnssec_supported(DnsTransaction *t) { if (!t->server) return true; - if (t->current_feature_level < DNS_SERVER_FEATURE_LEVEL_DO) - return false; + /* Note that we do not check the feature level actually used for the transaction but instead the feature level + * the server is known to support currently, as the transaction feature level might be lower than what the + * server actually supports, since we might have downgraded this transaction's feature level because we got a + * SERVFAIL earlier and wanted to check whether downgrading fixes it. */ return dns_server_dnssec_supported(t->server); } @@ -2874,7 +2915,7 @@ int dns_transaction_validate_dnssec(DnsTransaction *t) { if (!dns_transaction_dnssec_supported_full(t)) { /* The server does not support DNSSEC, or doesn't augment responses with RRSIGs. */ t->answer_dnssec_result = DNSSEC_INCOMPATIBLE_SERVER; - log_debug("Not validating response for %" PRIu16 ", server lacks DNSSEC support.", t->id); + log_debug("Not validating response for %" PRIu16 ", used server feature level does not support DNSSEC.", t->id); return 0; } diff --git a/src/resolve/resolved-dns-transaction.h b/src/resolve/resolved-dns-transaction.h index 46a934a39b..96b066845d 100644 --- a/src/resolve/resolved-dns-transaction.h +++ b/src/resolve/resolved-dns-transaction.h @@ -117,6 +117,9 @@ struct DnsTransaction { /* The features of the DNS server at time of transaction start */ DnsServerFeatureLevel current_feature_level; + /* If we got SERVFAIL back, we retry the lookup, using a lower feature level than we used before. */ + DnsServerFeatureLevel clamp_feature_level; + /* Query candidates this transaction is referenced by and that * shall be notified about this specific transaction * completing. */ |